Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
diff --git a/contrib/llvm-project/FREEBSD-Xlist b/contrib/llvm-project/FREEBSD-Xlist
index 4f1ddd272910..0e3705aa52b7 100644
--- a/contrib/llvm-project/FREEBSD-Xlist
+++ b/contrib/llvm-project/FREEBSD-Xlist
@@ -1,913 +1,917 @@
.arcconfig
.arclint
.clang-format
.clang-tidy
.git-blame-ignore-revs
.github/
.gitignore
.mailmap
CONTRIBUTING.md
README.md
SECURITY.md
+bolt/
clang/.clang-format
clang/.clang-tidy
clang/.gitignore
clang/CMakeLists.txt
clang/CODE_OWNERS.TXT
clang/INPUTS/
clang/INSTALL.txt
clang/ModuleInfo.txt
clang/NOTES.txt
clang/README.txt
clang/bindings/
clang/cmake/
clang/docs/
clang/examples/
clang/include/CMakeLists.txt
clang/include/clang/AST/CMakeLists.txt
clang/include/clang/Basic/CMakeLists.txt
clang/include/clang/Basic/Version.inc.in
clang/include/clang/CMakeLists.txt
clang/include/clang/Config/
clang/include/clang/Driver/CMakeLists.txt
clang/include/clang/Parse/CMakeLists.txt
clang/include/clang/Sema/CMakeLists.txt
clang/include/clang/Serialization/CMakeLists.txt
clang/include/clang/StaticAnalyzer/Checkers/CMakeLists.txt
clang/include/clang/Tooling/Syntax/CMakeLists.txt
clang/lib/APINotes/CMakeLists.txt
clang/lib/ARCMigrate/CMakeLists.txt
clang/lib/AST/CMakeLists.txt
clang/lib/ASTMatchers/CMakeLists.txt
clang/lib/ASTMatchers/Dynamic/CMakeLists.txt
clang/lib/Analysis/CMakeLists.txt
clang/lib/Analysis/FlowSensitive/CMakeLists.txt
clang/lib/Analysis/plugins/CMakeLists.txt
clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
clang/lib/Basic/CMakeLists.txt
clang/lib/CMakeLists.txt
clang/lib/CodeGen/CMakeLists.txt
clang/lib/CodeGen/README.txt
clang/lib/CrossTU/CMakeLists.txt
clang/lib/DirectoryWatcher/CMakeLists.txt
clang/lib/Driver/CMakeLists.txt
clang/lib/Edit/CMakeLists.txt
clang/lib/Format/CMakeLists.txt
clang/lib/Frontend/CMakeLists.txt
clang/lib/Frontend/Rewrite/CMakeLists.txt
clang/lib/FrontendTool/CMakeLists.txt
clang/lib/Headers/CMakeLists.txt
clang/lib/Index/CMakeLists.txt
clang/lib/IndexSerialization/CMakeLists.txt
clang/lib/Interpreter/CMakeLists.txt
clang/lib/Lex/CMakeLists.txt
clang/lib/Parse/CMakeLists.txt
clang/lib/Rewrite/CMakeLists.txt
clang/lib/Sema/CMakeLists.txt
clang/lib/Serialization/CMakeLists.txt
clang/lib/StaticAnalyzer/CMakeLists.txt
clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
clang/lib/StaticAnalyzer/Core/CMakeLists.txt
clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt
clang/lib/StaticAnalyzer/README.txt
clang/lib/Testing/CMakeLists.txt
clang/lib/Tooling/ASTDiff/CMakeLists.txt
clang/lib/Tooling/CMakeLists.txt
clang/lib/Tooling/Core/CMakeLists.txt
clang/lib/Tooling/DependencyScanning/CMakeLists.txt
clang/lib/Tooling/DumpTool/CMakeLists.txt
clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
clang/lib/Tooling/Inclusions/CMakeLists.txt
clang/lib/Tooling/Refactoring/CMakeLists.txt
clang/lib/Tooling/Syntax/CMakeLists.txt
clang/lib/Tooling/Transformer/CMakeLists.txt
clang/runtime/
clang/test/
clang/tools/CMakeLists.txt
clang/tools/amdgpu-arch/CMakeLists.txt
clang/tools/apinotes-test/
clang/tools/arcmt-test/
clang/tools/c-arcmt-test/
clang/tools/c-index-test/
clang/tools/clang-check/
clang/tools/clang-diff/
clang/tools/clang-extdef-mapping/
clang/tools/clang-format/CMakeLists.txt
clang/tools/clang-format/clang-format-bbedit.applescript
clang/tools/clang-format/clang-format-diff.py
clang/tools/clang-format/clang-format-sublime.py
clang/tools/clang-format/clang-format-test.el
clang/tools/clang-format/clang-format.el
clang/tools/clang-format/clang-format.py
clang/tools/clang-format/fuzzer/
clang/tools/clang-format/git-clang-format
clang/tools/clang-format-vs/
clang/tools/clang-fuzzer/
clang/tools/clang-import-test/
+clang/tools/clang-linker-wrapper/
clang/tools/clang-nvlink-wrapper/
clang/tools/clang-offload-bundler/
clang/tools/clang-offload-wrapper/
clang/tools/clang-refactor/
clang/tools/clang-rename/
clang/tools/clang-repl/CMakeLists.txt
clang/tools/clang-scan-deps/
clang/tools/clang-shlib/
clang/tools/diag-build/
clang/tools/diagtool/
clang/tools/driver/CMakeLists.txt
clang/tools/driver/Info.plist.in
clang/tools/libclang/
clang/tools/scan-build/
clang/tools/scan-build-py/
clang/tools/scan-view/
clang/unittests/
clang/utils/ABITest/
clang/utils/CIndex/
clang/utils/CaptureCmd
clang/utils/ClangDataFormat.py
clang/utils/ClangVisualizers/
clang/utils/CmpDriver
clang/utils/FindSpecRefs
clang/utils/FuzzTest
clang/utils/TableGen/CMakeLists.txt
clang/utils/TestUtils/
clang/utils/VtableTest/
clang/utils/analyzer/
clang/utils/bash-autocomplete.sh
clang/utils/builtin-defines.c
clang/utils/check_cfc/
clang/utils/clangdiag.py
clang/utils/convert_arm_neon.py
clang/utils/creduce-clang-crash.py
clang/utils/find-unused-diagnostics.sh
clang/utils/hmaptool/
clang/utils/make-ast-dump-check.sh
clang/utils/modfuzz.py
clang/utils/module-deps-to-rsp.py
clang/utils/perf-training/
clang/utils/token-delta.py
clang/utils/valgrind/
clang/www/
clang-tools-extra/
cmake/
compiler-rt/.gitignore
compiler-rt/CMakeLists.txt
compiler-rt/CODE_OWNERS.TXT
compiler-rt/cmake/
compiler-rt/docs/
compiler-rt/include/CMakeLists.txt
compiler-rt/lib/CMakeLists.txt
compiler-rt/lib/asan/.clang-format
compiler-rt/lib/asan/CMakeLists.txt
compiler-rt/lib/asan/scripts/
compiler-rt/lib/asan/tests/
compiler-rt/lib/builtins/CMakeLists.txt
compiler-rt/lib/builtins/Darwin-excludes/
compiler-rt/lib/builtins/macho_embedded/
compiler-rt/lib/cfi/CMakeLists.txt
compiler-rt/lib/crt/CMakeLists.txt
compiler-rt/lib/dfsan/.clang-format
compiler-rt/lib/dfsan/CMakeLists.txt
compiler-rt/lib/dfsan/scripts/
compiler-rt/lib/fuzzer/CMakeLists.txt
compiler-rt/lib/fuzzer/afl/
compiler-rt/lib/fuzzer/build.sh
compiler-rt/lib/fuzzer/dataflow/
compiler-rt/lib/fuzzer/scripts/
compiler-rt/lib/fuzzer/standalone/
compiler-rt/lib/fuzzer/tests/
compiler-rt/lib/gwp_asan/CMakeLists.txt
compiler-rt/lib/gwp_asan/scripts/
compiler-rt/lib/gwp_asan/tests/
compiler-rt/lib/hwasan/.clang-format
compiler-rt/lib/hwasan/CMakeLists.txt
compiler-rt/lib/hwasan/scripts/
compiler-rt/lib/interception/.clang-format
compiler-rt/lib/interception/CMakeLists.txt
compiler-rt/lib/interception/tests/
compiler-rt/lib/lsan/.clang-format
compiler-rt/lib/lsan/CMakeLists.txt
compiler-rt/lib/memprof/CMakeLists.txt
compiler-rt/lib/memprof/tests/CMakeLists.txt
compiler-rt/lib/msan/.clang-format
compiler-rt/lib/msan/CMakeLists.txt
compiler-rt/lib/msan/tests/
compiler-rt/lib/orc/CMakeLists.txt
compiler-rt/lib/orc/unittests/
compiler-rt/lib/profile/CMakeLists.txt
compiler-rt/lib/safestack/.clang-format
compiler-rt/lib/safestack/CMakeLists.txt
compiler-rt/lib/sanitizer_common/.clang-format
compiler-rt/lib/sanitizer_common/CMakeLists.txt
compiler-rt/lib/sanitizer_common/scripts/
compiler-rt/lib/sanitizer_common/tests/
compiler-rt/lib/scudo/CMakeLists.txt
compiler-rt/lib/scudo/standalone/CMakeLists.txt
compiler-rt/lib/scudo/standalone/benchmarks/
compiler-rt/lib/scudo/standalone/fuzz/CMakeLists.txt
compiler-rt/lib/scudo/standalone/tests/
compiler-rt/lib/scudo/standalone/tools/
compiler-rt/lib/stats/CMakeLists.txt
compiler-rt/lib/tsan/.clang-format
compiler-rt/lib/tsan/CMakeLists.txt
compiler-rt/lib/tsan/analyze_libtsan.sh
compiler-rt/lib/tsan/check_analyze.sh
compiler-rt/lib/tsan/check_cmake.sh
compiler-rt/lib/tsan/dd/CMakeLists.txt
compiler-rt/lib/tsan/go/build.bat
compiler-rt/lib/tsan/go/buildgo.sh
+compiler-rt/lib/tsan/rtl/CMakeLists.txt
+compiler-rt/lib/tsan/rtl-old/CMakeLists.txt
compiler-rt/lib/tsan/tests/
compiler-rt/lib/ubsan/CMakeLists.txt
compiler-rt/lib/ubsan_minimal/CMakeLists.txt
compiler-rt/lib/xray/CMakeLists.txt
compiler-rt/lib/xray/tests/
compiler-rt/test/
compiler-rt/tools/
compiler-rt/unittests/
compiler-rt/utils/
compiler-rt/www/
cross-project-tests/
flang/
libc/
libclc/
libcxx/.clang-format
libcxx/.clang-tidy
libcxx/.gitignore
libcxx/CMakeLists.txt
libcxx/TODO.TXT
libcxx/appveyor-reqs-install.cmd
libcxx/appveyor.yml
libcxx/benchmarks/
libcxx/cmake/
libcxx/docs/
libcxx/include/CMakeLists.txt
libcxx/include/__config_site.in
libcxx/include/__support/
libcxx/lib/
libcxx/src/CMakeLists.txt
libcxx/src/support/solaris/
libcxx/src/support/win32/
libcxx/test/
libcxx/utils/
libcxxabi/
libunwind/.clang-format
libunwind/CMakeLists.txt
libunwind/cmake/
libunwind/docs/
+libunwind/include/CMakeLists.txt
libunwind/src/CMakeLists.txt
libunwind/test/
lld/CMakeLists.txt
lld/COFF/CMakeLists.txt
lld/Common/CMakeLists.txt
lld/ELF/CMakeLists.txt
lld/MachO/CMakeLists.txt
lld/MinGW/
lld/cmake/
lld/docs/CMakeLists.txt
-lld/lib/CMakeLists.txt
-lld/lib/Core/CMakeLists.txt
-lld/lib/Driver/CMakeLists.txt
-lld/lib/ReaderWriter/CMakeLists.txt
-lld/lib/ReaderWriter/MachO/CMakeLists.txt
-lld/lib/ReaderWriter/YAML/CMakeLists.txt
lld/test/
lld/tools/lld/CMakeLists.txt
-lld/unittests/
lld/utils/
lld/wasm/
lldb/.clang-format
lldb/.clang-tidy
lldb/.gitignore
lldb/CMakeLists.txt
lldb/CODE_OWNERS.txt
lldb/bindings/CMakeLists.txt
lldb/bindings/lua/CMakeLists.txt
lldb/bindings/python/CMakeLists.txt
lldb/bindings/python/get-python-config.py
lldb/cmake/
lldb/docs/.htaccess
lldb/docs/CMakeLists.txt
lldb/docs/_static/
lldb/docs/conf.py
lldb/docs/doxygen-mainpage.dox
lldb/docs/doxygen.cfg.in
lldb/docs/index.rst
lldb/docs/lldb-for-gdb-users.txt
lldb/docs/lldb-gdb-remote.txt
lldb/docs/lldb-platform-packets.txt
lldb/docs/resources/
lldb/docs/status/
lldb/docs/testsuite/
lldb/docs/use/
lldb/examples/
lldb/include/lldb/Host/android/
lldb/include/lldb/Host/linux/
lldb/include/lldb/Host/macosx/
lldb/include/lldb/Host/windows/
lldb/packages/
lldb/resources/
lldb/scripts/
lldb/source/API/CMakeLists.txt
lldb/source/Breakpoint/CMakeLists.txt
lldb/source/CMakeLists.txt
lldb/source/Commands/CMakeLists.txt
lldb/source/Core/CMakeLists.txt
lldb/source/DataFormatters/CMakeLists.txt
lldb/source/Expression/CMakeLists.txt
lldb/source/Host/CMakeLists.txt
lldb/source/Host/android/
lldb/source/Host/linux/
lldb/source/Host/macosx/
lldb/source/Host/windows/
lldb/source/Initialization/CMakeLists.txt
lldb/source/Interpreter/CMakeLists.txt
lldb/source/Plugins/ABI/AArch64/CMakeLists.txt
lldb/source/Plugins/ABI/ARC/CMakeLists.txt
lldb/source/Plugins/ABI/ARM/CMakeLists.txt
lldb/source/Plugins/ABI/CMakeLists.txt
lldb/source/Plugins/ABI/Hexagon/CMakeLists.txt
lldb/source/Plugins/ABI/Mips/CMakeLists.txt
lldb/source/Plugins/ABI/PowerPC/CMakeLists.txt
lldb/source/Plugins/ABI/SystemZ/CMakeLists.txt
lldb/source/Plugins/ABI/X86/CMakeLists.txt
lldb/source/Plugins/Architecture/AArch64/CMakeLists.txt
lldb/source/Plugins/Architecture/Arm/CMakeLists.txt
lldb/source/Plugins/Architecture/CMakeLists.txt
lldb/source/Plugins/Architecture/Mips/CMakeLists.txt
lldb/source/Plugins/Architecture/PPC64/CMakeLists.txt
lldb/source/Plugins/CMakeLists.txt
lldb/source/Plugins/Disassembler/CMakeLists.txt
lldb/source/Plugins/Disassembler/LLVMC/CMakeLists.txt
lldb/source/Plugins/DynamicLoader/CMakeLists.txt
lldb/source/Plugins/DynamicLoader/Darwin-Kernel/
lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/CMakeLists.txt
lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/
lldb/source/Plugins/DynamicLoader/POSIX-DYLD/CMakeLists.txt
lldb/source/Plugins/DynamicLoader/Static/CMakeLists.txt
lldb/source/Plugins/DynamicLoader/Windows-DYLD/CMakeLists.txt
lldb/source/Plugins/DynamicLoader/wasm-DYLD/CMakeLists.txt
lldb/source/Plugins/ExpressionParser/CMakeLists.txt
lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt
lldb/source/Plugins/Instruction/ARM/CMakeLists.txt
lldb/source/Plugins/Instruction/ARM64/CMakeLists.txt
lldb/source/Plugins/Instruction/CMakeLists.txt
lldb/source/Plugins/Instruction/MIPS/CMakeLists.txt
lldb/source/Plugins/Instruction/MIPS64/CMakeLists.txt
lldb/source/Plugins/Instruction/PPC64/CMakeLists.txt
lldb/source/Plugins/InstrumentationRuntime/ASan/CMakeLists.txt
lldb/source/Plugins/InstrumentationRuntime/CMakeLists.txt
lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/CMakeLists.txt
lldb/source/Plugins/InstrumentationRuntime/TSan/CMakeLists.txt
lldb/source/Plugins/InstrumentationRuntime/UBSan/CMakeLists.txt
lldb/source/Plugins/JITLoader/CMakeLists.txt
lldb/source/Plugins/JITLoader/GDB/CMakeLists.txt
lldb/source/Plugins/Language/CMakeLists.txt
lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt
lldb/source/Plugins/Language/ClangCommon/CMakeLists.txt
lldb/source/Plugins/Language/ObjC/CMakeLists.txt
lldb/source/Plugins/Language/ObjCPlusPlus/CMakeLists.txt
lldb/source/Plugins/LanguageRuntime/CMakeLists.txt
lldb/source/Plugins/LanguageRuntime/CPlusPlus/CMakeLists.txt
lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI/CMakeLists.txt
lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/CMakeLists.txt
lldb/source/Plugins/LanguageRuntime/ObjC/CMakeLists.txt
lldb/source/Plugins/LanguageRuntime/RenderScript/CMakeLists.txt
lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt
lldb/source/Plugins/MemoryHistory/CMakeLists.txt
lldb/source/Plugins/MemoryHistory/asan/CMakeLists.txt
lldb/source/Plugins/ObjectContainer/BSD-Archive/CMakeLists.txt
lldb/source/Plugins/ObjectContainer/CMakeLists.txt
lldb/source/Plugins/ObjectContainer/Universal-Mach-O/
lldb/source/Plugins/ObjectFile/Breakpad/CMakeLists.txt
lldb/source/Plugins/ObjectFile/CMakeLists.txt
lldb/source/Plugins/ObjectFile/ELF/CMakeLists.txt
lldb/source/Plugins/ObjectFile/JIT/CMakeLists.txt
lldb/source/Plugins/ObjectFile/Mach-O/
lldb/source/Plugins/ObjectFile/Minidump/CMakeLists.txt
lldb/source/Plugins/ObjectFile/PDB/CMakeLists.txt
lldb/source/Plugins/ObjectFile/PECOFF/
lldb/source/Plugins/ObjectFile/wasm/CMakeLists.txt
lldb/source/Plugins/OperatingSystem/CMakeLists.txt
lldb/source/Plugins/OperatingSystem/Python/CMakeLists.txt
lldb/source/Plugins/Platform/Android/
lldb/source/Plugins/Platform/CMakeLists.txt
lldb/source/Plugins/Platform/FreeBSD/CMakeLists.txt
lldb/source/Plugins/Platform/Linux/
lldb/source/Plugins/Platform/MacOSX/
lldb/source/Plugins/Platform/NetBSD/CMakeLists.txt
lldb/source/Plugins/Platform/OpenBSD/CMakeLists.txt
lldb/source/Plugins/Platform/POSIX/CMakeLists.txt
lldb/source/Plugins/Platform/QemuUser/CMakeLists.txt
lldb/source/Plugins/Platform/Windows/
lldb/source/Plugins/Platform/gdb-server/CMakeLists.txt
lldb/source/Plugins/Process/CMakeLists.txt
lldb/source/Plugins/Process/FreeBSD/CMakeLists.txt
+lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt
lldb/source/Plugins/Process/Linux/
lldb/source/Plugins/Process/MacOSX-Kernel/
lldb/source/Plugins/Process/NetBSD/CMakeLists.txt
lldb/source/Plugins/Process/POSIX/CMakeLists.txt
lldb/source/Plugins/Process/Utility/CMakeLists.txt
lldb/source/Plugins/Process/Windows/
lldb/source/Plugins/Process/elf-core/CMakeLists.txt
lldb/source/Plugins/Process/gdb-remote/CMakeLists.txt
lldb/source/Plugins/Process/mach-core/
lldb/source/Plugins/Process/minidump/CMakeLists.txt
lldb/source/Plugins/Process/scripted/CMakeLists.txt
lldb/source/Plugins/REPL/CMakeLists.txt
lldb/source/Plugins/REPL/Clang/CMakeLists.txt
lldb/source/Plugins/ScriptInterpreter/CMakeLists.txt
lldb/source/Plugins/ScriptInterpreter/Lua/CMakeLists.txt
lldb/source/Plugins/ScriptInterpreter/None/CMakeLists.txt
lldb/source/Plugins/ScriptInterpreter/Python/CMakeLists.txt
lldb/source/Plugins/StructuredData/CMakeLists.txt
lldb/source/Plugins/StructuredData/DarwinLog/CMakeLists.txt
lldb/source/Plugins/SymbolFile/Breakpad/CMakeLists.txt
lldb/source/Plugins/SymbolFile/CMakeLists.txt
lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt
lldb/source/Plugins/SymbolFile/NativePDB/CMakeLists.txt
lldb/source/Plugins/SymbolFile/PDB/CMakeLists.txt
lldb/source/Plugins/SymbolFile/Symtab/CMakeLists.txt
lldb/source/Plugins/SymbolVendor/CMakeLists.txt
lldb/source/Plugins/SymbolVendor/ELF/CMakeLists.txt
lldb/source/Plugins/SymbolVendor/MacOSX/
lldb/source/Plugins/SymbolVendor/wasm/CMakeLists.txt
lldb/source/Plugins/SystemRuntime/
lldb/source/Plugins/Trace/CMakeLists.txt
lldb/source/Plugins/Trace/common/CMakeLists.txt
lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt
lldb/source/Plugins/TraceExporter/CMakeLists.txt
lldb/source/Plugins/TraceExporter/common/CMakeLists.txt
lldb/source/Plugins/TraceExporter/ctf/CMakeLists.txt
lldb/source/Plugins/TypeSystem/CMakeLists.txt
lldb/source/Plugins/TypeSystem/Clang/CMakeLists.txt
lldb/source/Plugins/UnwindAssembly/CMakeLists.txt
lldb/source/Plugins/UnwindAssembly/InstEmulation/CMakeLists.txt
lldb/source/Plugins/UnwindAssembly/x86/CMakeLists.txt
lldb/source/Symbol/CMakeLists.txt
lldb/source/Target/CMakeLists.txt
lldb/source/Utility/CMakeLists.txt
+lldb/source/Version/CMakeLists.txt
lldb/test/
lldb/third_party/
lldb/tools/CMakeLists.txt
lldb/tools/argdumper/CMakeLists.txt
lldb/tools/darwin-debug/
lldb/tools/darwin-threads/
lldb/tools/debugserver/
lldb/tools/driver/CMakeLists.txt
lldb/tools/driver/lldb-Info.plist.in
lldb/tools/intel-features/
lldb/tools/lldb-instr/CMakeLists.txt
lldb/tools/lldb-server/CMakeLists.txt
lldb/tools/lldb-test/
lldb/tools/lldb-vscode/
lldb/unittests/
lldb/use_lldb_suite_root.py
lldb/utils/CMakeLists.txt
lldb/utils/TableGen/CMakeLists.txt
lldb/utils/lit-cpuid/
lldb/utils/lldb-dotest/
lldb/utils/lldb-repro/
lldb/utils/lui/
llvm/.clang-format
llvm/.clang-tidy
llvm/.gitattributes
llvm/.gitignore
llvm/CMakeLists.txt
llvm/CODE_OWNERS.TXT
llvm/CREDITS.TXT
llvm/README.txt
llvm/RELEASE_TESTERS.TXT
llvm/benchmarks/
llvm/bindings/
llvm/cmake/
llvm/configure
llvm/docs/
llvm/examples/
llvm/include/llvm/CMakeLists.txt
llvm/include/llvm/Config/
llvm/include/llvm/Frontend/CMakeLists.txt
llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt
llvm/include/llvm/IR/CMakeLists.txt
llvm/include/llvm/Support/CMakeLists.txt
llvm/include/llvm/Support/LICENSE.TXT
llvm/lib/Analysis/CMakeLists.txt
llvm/lib/Analysis/README.txt
llvm/lib/Analysis/models/
llvm/lib/AsmParser/CMakeLists.txt
llvm/lib/BinaryFormat/CMakeLists.txt
llvm/lib/Bitcode/CMakeLists.txt
llvm/lib/Bitcode/Reader/CMakeLists.txt
llvm/lib/Bitcode/Writer/CMakeLists.txt
llvm/lib/Bitstream/CMakeLists.txt
llvm/lib/Bitstream/Reader/CMakeLists.txt
llvm/lib/CMakeLists.txt
llvm/lib/CodeGen/AsmPrinter/CMakeLists.txt
llvm/lib/CodeGen/CMakeLists.txt
llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
llvm/lib/CodeGen/MIRParser/CMakeLists.txt
llvm/lib/CodeGen/README.txt
llvm/lib/CodeGen/SelectionDAG/CMakeLists.txt
llvm/lib/DWARFLinker/CMakeLists.txt
llvm/lib/DWP/CMakeLists.txt
llvm/lib/DebugInfo/CMakeLists.txt
llvm/lib/DebugInfo/CodeView/CMakeLists.txt
llvm/lib/DebugInfo/DWARF/CMakeLists.txt
llvm/lib/DebugInfo/GSYM/CMakeLists.txt
llvm/lib/DebugInfo/MSF/CMakeLists.txt
llvm/lib/DebugInfo/PDB/CMakeLists.txt
llvm/lib/DebugInfo/Symbolize/CMakeLists.txt
+llvm/lib/Debuginfod/CMakeLists.txt
llvm/lib/Demangle/CMakeLists.txt
llvm/lib/ExecutionEngine/CMakeLists.txt
llvm/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
llvm/lib/ExecutionEngine/Interpreter/CMakeLists.txt
llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt
llvm/lib/ExecutionEngine/MCJIT/CMakeLists.txt
llvm/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
llvm/lib/ExecutionEngine/Orc/Shared/CMakeLists.txt
llvm/lib/ExecutionEngine/Orc/TargetProcess/CMakeLists.txt
llvm/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt
llvm/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
llvm/lib/Extensions/
llvm/lib/FileCheck/CMakeLists.txt
llvm/lib/Frontend/CMakeLists.txt
llvm/lib/Frontend/OpenACC/CMakeLists.txt
llvm/lib/Frontend/OpenMP/CMakeLists.txt
llvm/lib/FuzzMutate/CMakeLists.txt
llvm/lib/Fuzzer/
llvm/lib/IR/CMakeLists.txt
llvm/lib/IRReader/CMakeLists.txt
llvm/lib/InterfaceStub/CMakeLists.txt
llvm/lib/LTO/CMakeLists.txt
llvm/lib/LineEditor/CMakeLists.txt
llvm/lib/Linker/CMakeLists.txt
llvm/lib/MC/CMakeLists.txt
llvm/lib/MC/MCDisassembler/CMakeLists.txt
llvm/lib/MC/MCParser/CMakeLists.txt
llvm/lib/MCA/CMakeLists.txt
llvm/lib/Object/CMakeLists.txt
llvm/lib/ObjectYAML/CMakeLists.txt
llvm/lib/Option/CMakeLists.txt
llvm/lib/Passes/CMakeLists.txt
llvm/lib/ProfileData/CMakeLists.txt
llvm/lib/ProfileData/Coverage/CMakeLists.txt
llvm/lib/Remarks/CMakeLists.txt
llvm/lib/Support/CMakeLists.txt
llvm/lib/TableGen/CMakeLists.txt
llvm/lib/Target/AArch64/AsmParser/CMakeLists.txt
llvm/lib/Target/AArch64/CMakeLists.txt
llvm/lib/Target/AArch64/Disassembler/CMakeLists.txt
llvm/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/AArch64/TargetInfo/CMakeLists.txt
llvm/lib/Target/AArch64/Utils/CMakeLists.txt
llvm/lib/Target/AMDGPU/AsmParser/CMakeLists.txt
llvm/lib/Target/AMDGPU/CMakeLists.txt
llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt
llvm/lib/Target/AMDGPU/MCA/CMakeLists.txt
llvm/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/AMDGPU/TargetInfo/CMakeLists.txt
llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
llvm/lib/Target/ARC/CMakeLists.txt
llvm/lib/Target/ARC/Disassembler/CMakeLists.txt
llvm/lib/Target/ARC/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/ARC/TargetInfo/CMakeLists.txt
llvm/lib/Target/ARM/AsmParser/CMakeLists.txt
llvm/lib/Target/ARM/CMakeLists.txt
llvm/lib/Target/ARM/Disassembler/CMakeLists.txt
llvm/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/ARM/README-Thumb.txt
llvm/lib/Target/ARM/README-Thumb2.txt
llvm/lib/Target/ARM/README.txt
llvm/lib/Target/ARM/TargetInfo/CMakeLists.txt
llvm/lib/Target/ARM/Utils/CMakeLists.txt
llvm/lib/Target/AVR/AsmParser/CMakeLists.txt
llvm/lib/Target/AVR/CMakeLists.txt
llvm/lib/Target/AVR/Disassembler/CMakeLists.txt
llvm/lib/Target/AVR/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/AVR/TargetInfo/CMakeLists.txt
llvm/lib/Target/BPF/AsmParser/CMakeLists.txt
llvm/lib/Target/BPF/CMakeLists.txt
llvm/lib/Target/BPF/Disassembler/CMakeLists.txt
llvm/lib/Target/BPF/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/BPF/TargetInfo/CMakeLists.txt
llvm/lib/Target/CMakeLists.txt
llvm/lib/Target/CSKY/AsmParser/CMakeLists.txt
llvm/lib/Target/CSKY/CMakeLists.txt
llvm/lib/Target/CSKY/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/CSKY/TargetInfo/CMakeLists.txt
llvm/lib/Target/Hexagon/AsmParser/CMakeLists.txt
llvm/lib/Target/Hexagon/CMakeLists.txt
llvm/lib/Target/Hexagon/Disassembler/CMakeLists.txt
llvm/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
llvm/lib/Target/Lanai/AsmParser/CMakeLists.txt
llvm/lib/Target/Lanai/CMakeLists.txt
llvm/lib/Target/Lanai/Disassembler/CMakeLists.txt
llvm/lib/Target/Lanai/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/Lanai/TargetInfo/CMakeLists.txt
llvm/lib/Target/M68k/AsmParser/CMakeLists.txt
llvm/lib/Target/M68k/CMakeLists.txt
llvm/lib/Target/M68k/Disassembler/CMakeLists.txt
llvm/lib/Target/M68k/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/M68k/TargetInfo/CMakeLists.txt
llvm/lib/Target/MSP430/AsmParser/CMakeLists.txt
llvm/lib/Target/MSP430/CMakeLists.txt
llvm/lib/Target/MSP430/Disassembler/CMakeLists.txt
llvm/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/MSP430/README.txt
llvm/lib/Target/MSP430/TargetInfo/CMakeLists.txt
llvm/lib/Target/Mips/AsmParser/CMakeLists.txt
llvm/lib/Target/Mips/CMakeLists.txt
llvm/lib/Target/Mips/Disassembler/CMakeLists.txt
llvm/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/Mips/TargetInfo/CMakeLists.txt
llvm/lib/Target/NVPTX/CMakeLists.txt
llvm/lib/Target/NVPTX/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/NVPTX/TargetInfo/CMakeLists.txt
llvm/lib/Target/PowerPC/AsmParser/CMakeLists.txt
llvm/lib/Target/PowerPC/CMakeLists.txt
llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt
llvm/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/PowerPC/README.txt
llvm/lib/Target/PowerPC/README_ALTIVEC.txt
llvm/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
llvm/lib/Target/README.txt
llvm/lib/Target/RISCV/AsmParser/CMakeLists.txt
llvm/lib/Target/RISCV/CMakeLists.txt
llvm/lib/Target/RISCV/Disassembler/CMakeLists.txt
llvm/lib/Target/RISCV/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/RISCV/TargetInfo/CMakeLists.txt
llvm/lib/Target/Sparc/AsmParser/CMakeLists.txt
llvm/lib/Target/Sparc/CMakeLists.txt
llvm/lib/Target/Sparc/Disassembler/CMakeLists.txt
llvm/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/Sparc/README.txt
llvm/lib/Target/Sparc/TargetInfo/CMakeLists.txt
llvm/lib/Target/SystemZ/AsmParser/CMakeLists.txt
llvm/lib/Target/SystemZ/CMakeLists.txt
llvm/lib/Target/SystemZ/Disassembler/CMakeLists.txt
llvm/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
llvm/lib/Target/VE/AsmParser/CMakeLists.txt
llvm/lib/Target/VE/CMakeLists.txt
llvm/lib/Target/VE/Disassembler/CMakeLists.txt
llvm/lib/Target/VE/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/VE/TargetInfo/CMakeLists.txt
llvm/lib/Target/WebAssembly/AsmParser/CMakeLists.txt
llvm/lib/Target/WebAssembly/CMakeLists.txt
llvm/lib/Target/WebAssembly/Disassembler/CMakeLists.txt
llvm/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/WebAssembly/TargetInfo/CMakeLists.txt
llvm/lib/Target/WebAssembly/Utils/CMakeLists.txt
llvm/lib/Target/X86/AsmParser/CMakeLists.txt
llvm/lib/Target/X86/CMakeLists.txt
llvm/lib/Target/X86/Disassembler/CMakeLists.txt
+llvm/lib/Target/X86/MCA/CMakeLists.txt
llvm/lib/Target/X86/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/X86/README-FPStack.txt
llvm/lib/Target/X86/README-SSE.txt
llvm/lib/Target/X86/README-X86-64.txt
llvm/lib/Target/X86/README.txt
llvm/lib/Target/X86/TargetInfo/CMakeLists.txt
llvm/lib/Target/XCore/CMakeLists.txt
llvm/lib/Target/XCore/Disassembler/CMakeLists.txt
llvm/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
llvm/lib/Target/XCore/README.txt
llvm/lib/Target/XCore/TargetInfo/CMakeLists.txt
llvm/lib/Testing/CMakeLists.txt
llvm/lib/Testing/Support/CMakeLists.txt
llvm/lib/TextAPI/CMakeLists.txt
llvm/lib/ToolDrivers/CMakeLists.txt
llvm/lib/ToolDrivers/llvm-dlltool/CMakeLists.txt
llvm/lib/ToolDrivers/llvm-lib/CMakeLists.txt
llvm/lib/Transforms/AggressiveInstCombine/CMakeLists.txt
llvm/lib/Transforms/CFGuard/CMakeLists.txt
llvm/lib/Transforms/CMakeLists.txt
llvm/lib/Transforms/Coroutines/CMakeLists.txt
llvm/lib/Transforms/Hello/
llvm/lib/Transforms/IPO/CMakeLists.txt
llvm/lib/Transforms/InstCombine/CMakeLists.txt
llvm/lib/Transforms/Instrumentation/CMakeLists.txt
llvm/lib/Transforms/ObjCARC/CMakeLists.txt
llvm/lib/Transforms/Scalar/CMakeLists.txt
llvm/lib/Transforms/Utils/CMakeLists.txt
llvm/lib/Transforms/Vectorize/CMakeLists.txt
llvm/lib/WindowsManifest/CMakeLists.txt
llvm/lib/XRay/CMakeLists.txt
llvm/llvm.spec.in
llvm/projects/
llvm/resources/
llvm/runtimes/
llvm/test/
llvm/tools/CMakeLists.txt
llvm/tools/bugpoint/CMakeLists.txt
llvm/tools/bugpoint-passes/
llvm/tools/dsymutil/
llvm/tools/gold/
llvm/tools/llc/CMakeLists.txt
llvm/tools/lli/CMakeLists.txt
llvm/tools/lli/ChildTarget/CMakeLists.txt
llvm/tools/llvm-ar/CMakeLists.txt
llvm/tools/llvm-as/CMakeLists.txt
llvm/tools/llvm-as-fuzzer/
llvm/tools/llvm-bcanalyzer/CMakeLists.txt
llvm/tools/llvm-c-test/
llvm/tools/llvm-cat/
llvm/tools/llvm-cfi-verify/
llvm/tools/llvm-config/
llvm/tools/llvm-cov/CMakeLists.txt
llvm/tools/llvm-cvtres/
llvm/tools/llvm-cxxdump/CMakeLists.txt
llvm/tools/llvm-cxxfilt/CMakeLists.txt
llvm/tools/llvm-cxxmap/CMakeLists.txt
+llvm/tools/llvm-debuginfod-find/
llvm/tools/llvm-diff/CMakeLists.txt
llvm/tools/llvm-diff/lib/CMakeLists.txt
llvm/tools/llvm-dis/CMakeLists.txt
llvm/tools/llvm-dlang-demangle-fuzzer/
llvm/tools/llvm-dwarfdump/CMakeLists.txt
llvm/tools/llvm-dwarfdump/fuzzer/
llvm/tools/llvm-dwp/CMakeLists.txt
llvm/tools/llvm-exegesis/
llvm/tools/llvm-extract/CMakeLists.txt
llvm/tools/llvm-go/
llvm/tools/llvm-gsymutil/
llvm/tools/llvm-ifs/
llvm/tools/llvm-isel-fuzzer/
llvm/tools/llvm-itanium-demangle-fuzzer/
llvm/tools/llvm-jitlink/
llvm/tools/llvm-jitlistener/
llvm/tools/llvm-libtool-darwin/
llvm/tools/llvm-link/CMakeLists.txt
llvm/tools/llvm-lipo/
llvm/tools/llvm-lto/CMakeLists.txt
llvm/tools/llvm-lto2/CMakeLists.txt
llvm/tools/llvm-mc/CMakeLists.txt
llvm/tools/llvm-mc-assemble-fuzzer/
llvm/tools/llvm-mc-disassemble-fuzzer/
llvm/tools/llvm-mca/CMakeLists.txt
llvm/tools/llvm-microsoft-demangle-fuzzer/
llvm/tools/llvm-ml/
llvm/tools/llvm-modextract/CMakeLists.txt
llvm/tools/llvm-mt/
llvm/tools/llvm-nm/CMakeLists.txt
llvm/tools/llvm-objcopy/CMakeLists.txt
llvm/tools/llvm-objdump/CMakeLists.txt
llvm/tools/llvm-opt-fuzzer/
llvm/tools/llvm-opt-report/
llvm/tools/llvm-pdbutil/CMakeLists.txt
llvm/tools/llvm-profdata/CMakeLists.txt
llvm/tools/llvm-profgen/
llvm/tools/llvm-rc/
llvm/tools/llvm-readobj/CMakeLists.txt
llvm/tools/llvm-reduce/
llvm/tools/llvm-rtdyld/CMakeLists.txt
llvm/tools/llvm-rust-demangle-fuzzer/
llvm/tools/llvm-shlib/
llvm/tools/llvm-sim/CMakeLists.txt
llvm/tools/llvm-size/CMakeLists.txt
llvm/tools/llvm-special-case-list-fuzzer/
llvm/tools/llvm-split/
llvm/tools/llvm-stress/CMakeLists.txt
llvm/tools/llvm-strings/CMakeLists.txt
llvm/tools/llvm-symbolizer/CMakeLists.txt
llvm/tools/llvm-tapi-diff/CMakeLists.txt
llvm/tools/llvm-tli-checker/CMakeLists.txt
llvm/tools/llvm-undname/
llvm/tools/llvm-xray/CMakeLists.txt
llvm/tools/llvm-yaml-numeric-parser-fuzzer/
llvm/tools/llvm-yaml-parser-fuzzer/
llvm/tools/lto/
llvm/tools/msbuild/
llvm/tools/obj2yaml/
llvm/tools/opt/CMakeLists.txt
llvm/tools/opt-viewer/
llvm/tools/remarks-shlib/
llvm/tools/sancov/
llvm/tools/sanstats/
llvm/tools/split-file/
llvm/tools/verify-uselistorder/
llvm/tools/vfabi-demangle-fuzzer/
llvm/tools/xcode-toolchain/
llvm/tools/yaml2obj/
llvm/unittests/
llvm/utils/DSAclean.py
llvm/utils/DSAextract.py
llvm/utils/FileCheck/
llvm/utils/GenLibDeps.pl
llvm/utils/GetSourceVersion
llvm/utils/KillTheDoctor/
llvm/utils/LLVMVisualizers/
llvm/utils/Misc/
llvm/utils/PerfectShuffle/
llvm/utils/Reviewing/
llvm/utils/TableGen/CMakeLists.txt
llvm/utils/TableGen/GlobalISel/CMakeLists.txt
llvm/utils/TableGen/tdtags
llvm/utils/Target/
llvm/utils/UpdateCMakeLists.pl
llvm/utils/UpdateTestChecks/
llvm/utils/abtest.py
llvm/utils/add_argument_names.py
-llvm/utils/benchmark/
llvm/utils/bisect
llvm/utils/bisect-skip-count
llvm/utils/bugpoint/
llvm/utils/bugpoint_gisel_reducer.py
llvm/utils/check-each-file
llvm/utils/check_ninja_deps.py
llvm/utils/chunk-print-before-all.py
llvm/utils/clang-parse-diagnostics-file
llvm/utils/codegen-diff
llvm/utils/collect_and_build_with_pgo.py
llvm/utils/convert-constraint-log-to-z3.py
llvm/utils/count/
llvm/utils/countloc.sh
llvm/utils/create_ladder_graph.py
llvm/utils/crosstool/
llvm/utils/demangle_tree.py
llvm/utils/docker/
llvm/utils/emacs/
llvm/utils/extract-section.py
llvm/utils/extract_symbols.py
llvm/utils/extract_vplan.py
llvm/utils/findmisopt
llvm/utils/findoptdiff
llvm/utils/findsym.pl
llvm/utils/fpcmp/
llvm/utils/gdb-scripts/
llvm/utils/getsrcs.sh
llvm/utils/git/
llvm/utils/gn/
llvm/utils/indirect_calls.py
llvm/utils/jedit/
llvm/utils/kate/
llvm/utils/lint/
llvm/utils/lit/
llvm/utils/lldbDataFormatters.py
llvm/utils/llvm-compilers-check
llvm/utils/llvm-gisel-cov.py
llvm/utils/llvm-lit/
llvm/utils/llvm-locstats/
+llvm/utils/llvm-mca-compare.py
llvm/utils/llvm-native-gxx
llvm/utils/llvm-original-di-preservation.py
llvm/utils/llvm.grm
llvm/utils/llvmdo
llvm/utils/llvmgrep
llvm/utils/merge-stats.py
llvm/utils/not/
llvm/utils/pipeline.py
llvm/utils/prepare-code-coverage-artifact.py
llvm/utils/reduce_pipeline.py
llvm/utils/reduce_pipeline_test/
llvm/utils/release/
llvm/utils/remote-exec.py
llvm/utils/revert_checker.py
llvm/utils/revert_checker_test.py
llvm/utils/rsp_bisect.py
llvm/utils/rsp_bisect_test/
llvm/utils/sanitizers/
llvm/utils/schedcover.py
llvm/utils/shuffle_fuzz.py
llvm/utils/shuffle_select_fuzz_tester.py
llvm/utils/sort_includes.py
llvm/utils/sysroot.py
llvm/utils/testgen/
llvm/utils/textmate/
llvm/utils/unicode-case-fold.py
llvm/utils/unittest/
llvm/utils/update_analyze_test_checks.py
llvm/utils/update_cc_test_checks.py
llvm/utils/update_llc_test_checks.py
llvm/utils/update_mca_test_checks.py
llvm/utils/update_mir_test_checks.py
llvm/utils/update_test_checks.py
llvm/utils/update_test_prefix.py
llvm/utils/valgrind/
llvm/utils/vim/
llvm/utils/vscode/
llvm/utils/wciia.py
llvm/utils/yaml-bench/
mlir/
openmp/.gitignore
openmp/CMakeLists.txt
openmp/README.rst
openmp/cmake/
openmp/docs/
openmp/libompd/
openmp/libomptarget/
openmp/runtime/.clang-format
openmp/runtime/.clang-tidy
openmp/runtime/CMakeLists.txt
openmp/runtime/README.txt
openmp/runtime/cmake/
openmp/runtime/doc/
openmp/runtime/src/CMakeLists.txt
openmp/runtime/test/
openmp/runtime/tools/
openmp/tools/
polly/
pstl/
runtimes/
+third-party/
utils/
diff --git a/contrib/llvm-project/clang/include/clang/AST/DeclTemplate.h b/contrib/llvm-project/clang/include/clang/AST/DeclTemplate.h
index d216b359816e..319e605a8a1c 100755
--- a/contrib/llvm-project/clang/include/clang/AST/DeclTemplate.h
+++ b/contrib/llvm-project/clang/include/clang/AST/DeclTemplate.h
@@ -1,3393 +1,3393 @@
//===- DeclTemplate.h - Classes for representing C++ templates --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Defines the C++ template declaration subclasses.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_AST_DECLTEMPLATE_H
#define LLVM_CLANG_AST_DECLTEMPLATE_H
#include "clang/AST/ASTConcept.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclarationName.h"
#include "clang/AST/Redeclarable.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/Type.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/Specifiers.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/TrailingObjects.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <utility>
namespace clang {
enum BuiltinTemplateKind : int;
class ClassTemplateDecl;
class ClassTemplatePartialSpecializationDecl;
class Expr;
class FunctionTemplateDecl;
class IdentifierInfo;
class NonTypeTemplateParmDecl;
class TemplateDecl;
class TemplateTemplateParmDecl;
class TemplateTypeParmDecl;
class ConceptDecl;
class UnresolvedSetImpl;
class VarTemplateDecl;
class VarTemplatePartialSpecializationDecl;
/// Stores a template parameter of any kind.
using TemplateParameter =
llvm::PointerUnion<TemplateTypeParmDecl *, NonTypeTemplateParmDecl *,
TemplateTemplateParmDecl *>;
NamedDecl *getAsNamedDecl(TemplateParameter P);
/// Stores a list of template parameters for a TemplateDecl and its
/// derived classes.
class TemplateParameterList final
: private llvm::TrailingObjects<TemplateParameterList, NamedDecl *,
Expr *> {
/// The location of the 'template' keyword.
SourceLocation TemplateLoc;
/// The locations of the '<' and '>' angle brackets.
SourceLocation LAngleLoc, RAngleLoc;
/// The number of template parameters in this template
/// parameter list.
unsigned NumParams : 29;
/// Whether this template parameter list contains an unexpanded parameter
/// pack.
unsigned ContainsUnexpandedParameterPack : 1;
/// Whether this template parameter list has a requires clause.
unsigned HasRequiresClause : 1;
/// Whether any of the template parameters has constrained-parameter
/// constraint-expression.
unsigned HasConstrainedParameters : 1;
protected:
TemplateParameterList(const ASTContext& C, SourceLocation TemplateLoc,
SourceLocation LAngleLoc, ArrayRef<NamedDecl *> Params,
SourceLocation RAngleLoc, Expr *RequiresClause);
size_t numTrailingObjects(OverloadToken<NamedDecl *>) const {
return NumParams;
}
size_t numTrailingObjects(OverloadToken<Expr *>) const {
return HasRequiresClause ? 1 : 0;
}
public:
template <size_t N, bool HasRequiresClause>
friend class FixedSizeTemplateParameterListStorage;
friend TrailingObjects;
static TemplateParameterList *Create(const ASTContext &C,
SourceLocation TemplateLoc,
SourceLocation LAngleLoc,
ArrayRef<NamedDecl *> Params,
SourceLocation RAngleLoc,
Expr *RequiresClause);
/// Iterates through the template parameters in this list.
using iterator = NamedDecl **;
/// Iterates through the template parameters in this list.
using const_iterator = NamedDecl * const *;
iterator begin() { return getTrailingObjects<NamedDecl *>(); }
const_iterator begin() const { return getTrailingObjects<NamedDecl *>(); }
iterator end() { return begin() + NumParams; }
const_iterator end() const { return begin() + NumParams; }
unsigned size() const { return NumParams; }
ArrayRef<NamedDecl*> asArray() {
return llvm::makeArrayRef(begin(), end());
}
ArrayRef<const NamedDecl*> asArray() const {
return llvm::makeArrayRef(begin(), size());
}
NamedDecl* getParam(unsigned Idx) {
assert(Idx < size() && "Template parameter index out-of-range");
return begin()[Idx];
}
const NamedDecl* getParam(unsigned Idx) const {
assert(Idx < size() && "Template parameter index out-of-range");
return begin()[Idx];
}
/// Returns the minimum number of arguments needed to form a
/// template specialization.
///
/// This may be fewer than the number of template parameters, if some of
/// the parameters have default arguments or if there is a parameter pack.
unsigned getMinRequiredArguments() const;
/// Get the depth of this template parameter list in the set of
/// template parameter lists.
///
/// The first template parameter list in a declaration will have depth 0,
/// the second template parameter list will have depth 1, etc.
unsigned getDepth() const;
/// Determine whether this template parameter list contains an
/// unexpanded parameter pack.
bool containsUnexpandedParameterPack() const;
/// Determine whether this template parameter list contains a parameter pack.
bool hasParameterPack() const {
for (const NamedDecl *P : asArray())
if (P->isParameterPack())
return true;
return false;
}
/// The constraint-expression of the associated requires-clause.
Expr *getRequiresClause() {
return HasRequiresClause ? getTrailingObjects<Expr *>()[0] : nullptr;
}
/// The constraint-expression of the associated requires-clause.
const Expr *getRequiresClause() const {
return HasRequiresClause ? getTrailingObjects<Expr *>()[0] : nullptr;
}
/// \brief All associated constraints derived from this template parameter
/// list, including the requires clause and any constraints derived from
/// constrained-parameters.
///
/// The constraints in the resulting list are to be treated as if in a
/// conjunction ("and").
void getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const;
bool hasAssociatedConstraints() const;
SourceLocation getTemplateLoc() const { return TemplateLoc; }
SourceLocation getLAngleLoc() const { return LAngleLoc; }
SourceLocation getRAngleLoc() const { return RAngleLoc; }
SourceRange getSourceRange() const LLVM_READONLY {
return SourceRange(TemplateLoc, RAngleLoc);
}
void print(raw_ostream &Out, const ASTContext &Context,
bool OmitTemplateKW = false) const;
void print(raw_ostream &Out, const ASTContext &Context,
const PrintingPolicy &Policy, bool OmitTemplateKW = false) const;
static bool shouldIncludeTypeForArgument(const PrintingPolicy &Policy,
const TemplateParameterList *TPL,
unsigned Idx);
};
/// Stores a list of template parameters and the associated
/// requires-clause (if any) for a TemplateDecl and its derived classes.
/// Suitable for creating on the stack.
template <size_t N, bool HasRequiresClause>
class FixedSizeTemplateParameterListStorage
: public TemplateParameterList::FixedSizeStorageOwner {
typename TemplateParameterList::FixedSizeStorage<
NamedDecl *, Expr *>::with_counts<
N, HasRequiresClause ? 1u : 0u
>::type storage;
public:
FixedSizeTemplateParameterListStorage(const ASTContext &C,
SourceLocation TemplateLoc,
SourceLocation LAngleLoc,
ArrayRef<NamedDecl *> Params,
SourceLocation RAngleLoc,
Expr *RequiresClause)
: FixedSizeStorageOwner(
(assert(N == Params.size()),
assert(HasRequiresClause == (RequiresClause != nullptr)),
new (static_cast<void *>(&storage)) TemplateParameterList(C,
TemplateLoc, LAngleLoc, Params, RAngleLoc, RequiresClause))) {}
};
/// A template argument list.
class TemplateArgumentList final
: private llvm::TrailingObjects<TemplateArgumentList, TemplateArgument> {
/// The template argument list.
const TemplateArgument *Arguments;
/// The number of template arguments in this template
/// argument list.
unsigned NumArguments;
// Constructs an instance with an internal Argument list, containing
// a copy of the Args array. (Called by CreateCopy)
TemplateArgumentList(ArrayRef<TemplateArgument> Args);
public:
friend TrailingObjects;
TemplateArgumentList(const TemplateArgumentList &) = delete;
TemplateArgumentList &operator=(const TemplateArgumentList &) = delete;
/// Type used to indicate that the template argument list itself is a
/// stack object. It does not own its template arguments.
enum OnStackType { OnStack };
/// Create a new template argument list that copies the given set of
/// template arguments.
static TemplateArgumentList *CreateCopy(ASTContext &Context,
ArrayRef<TemplateArgument> Args);
/// Construct a new, temporary template argument list on the stack.
///
/// The template argument list does not own the template arguments
/// provided.
explicit TemplateArgumentList(OnStackType, ArrayRef<TemplateArgument> Args)
: Arguments(Args.data()), NumArguments(Args.size()) {}
/// Produces a shallow copy of the given template argument list.
///
/// This operation assumes that the input argument list outlives it.
/// This takes the list as a pointer to avoid looking like a copy
/// constructor, since this really really isn't safe to use that
/// way.
explicit TemplateArgumentList(const TemplateArgumentList *Other)
: Arguments(Other->data()), NumArguments(Other->size()) {}
/// Retrieve the template argument at a given index.
const TemplateArgument &get(unsigned Idx) const {
assert(Idx < NumArguments && "Invalid template argument index");
return data()[Idx];
}
/// Retrieve the template argument at a given index.
const TemplateArgument &operator[](unsigned Idx) const { return get(Idx); }
/// Produce this as an array ref.
ArrayRef<TemplateArgument> asArray() const {
return llvm::makeArrayRef(data(), size());
}
/// Retrieve the number of template arguments in this
/// template argument list.
unsigned size() const { return NumArguments; }
/// Retrieve a pointer to the template argument list.
const TemplateArgument *data() const { return Arguments; }
};
void *allocateDefaultArgStorageChain(const ASTContext &C);
/// Storage for a default argument. This is conceptually either empty, or an
/// argument value, or a pointer to a previous declaration that had a default
/// argument.
///
/// However, this is complicated by modules: while we require all the default
/// arguments for a template to be equivalent, there may be more than one, and
/// we need to track all the originating parameters to determine if the default
/// argument is visible.
template<typename ParmDecl, typename ArgType>
class DefaultArgStorage {
/// Storage for both the value *and* another parameter from which we inherit
/// the default argument. This is used when multiple default arguments for a
/// parameter are merged together from different modules.
struct Chain {
ParmDecl *PrevDeclWithDefaultArg;
ArgType Value;
};
static_assert(sizeof(Chain) == sizeof(void *) * 2,
"non-pointer argument type?");
llvm::PointerUnion<ArgType, ParmDecl*, Chain*> ValueOrInherited;
static ParmDecl *getParmOwningDefaultArg(ParmDecl *Parm) {
const DefaultArgStorage &Storage = Parm->getDefaultArgStorage();
if (auto *Prev = Storage.ValueOrInherited.template dyn_cast<ParmDecl *>())
Parm = Prev;
assert(!Parm->getDefaultArgStorage()
.ValueOrInherited.template is<ParmDecl *>() &&
"should only be one level of indirection");
return Parm;
}
public:
DefaultArgStorage() : ValueOrInherited(ArgType()) {}
/// Determine whether there is a default argument for this parameter.
bool isSet() const { return !ValueOrInherited.isNull(); }
/// Determine whether the default argument for this parameter was inherited
/// from a previous declaration of the same entity.
bool isInherited() const { return ValueOrInherited.template is<ParmDecl*>(); }
/// Get the default argument's value. This does not consider whether the
/// default argument is visible.
ArgType get() const {
const DefaultArgStorage *Storage = this;
if (const auto *Prev = ValueOrInherited.template dyn_cast<ParmDecl *>())
Storage = &Prev->getDefaultArgStorage();
if (const auto *C = Storage->ValueOrInherited.template dyn_cast<Chain *>())
return C->Value;
return Storage->ValueOrInherited.template get<ArgType>();
}
/// Get the parameter from which we inherit the default argument, if any.
/// This is the parameter on which the default argument was actually written.
const ParmDecl *getInheritedFrom() const {
if (const auto *D = ValueOrInherited.template dyn_cast<ParmDecl *>())
return D;
if (const auto *C = ValueOrInherited.template dyn_cast<Chain *>())
return C->PrevDeclWithDefaultArg;
return nullptr;
}
/// Set the default argument.
void set(ArgType Arg) {
assert(!isSet() && "default argument already set");
ValueOrInherited = Arg;
}
/// Set that the default argument was inherited from another parameter.
void setInherited(const ASTContext &C, ParmDecl *InheritedFrom) {
assert(!isInherited() && "default argument already inherited");
InheritedFrom = getParmOwningDefaultArg(InheritedFrom);
if (!isSet())
ValueOrInherited = InheritedFrom;
else
ValueOrInherited = new (allocateDefaultArgStorageChain(C))
Chain{InheritedFrom, ValueOrInherited.template get<ArgType>()};
}
/// Remove the default argument, even if it was inherited.
void clear() {
ValueOrInherited = ArgType();
}
};
//===----------------------------------------------------------------------===//
// Kinds of Templates
//===----------------------------------------------------------------------===//
/// \brief The base class of all kinds of template declarations (e.g.,
/// class, function, etc.).
///
/// The TemplateDecl class stores the list of template parameters and a
/// reference to the templated scoped declaration: the underlying AST node.
class TemplateDecl : public NamedDecl {
void anchor() override;
protected:
// Construct a template decl with name, parameters, and templated element.
TemplateDecl(Kind DK, DeclContext *DC, SourceLocation L, DeclarationName Name,
TemplateParameterList *Params, NamedDecl *Decl);
// Construct a template decl with the given name and parameters.
// Used when there is no templated element (e.g., for tt-params).
TemplateDecl(Kind DK, DeclContext *DC, SourceLocation L, DeclarationName Name,
TemplateParameterList *Params)
: TemplateDecl(DK, DC, L, Name, Params, nullptr) {}
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
/// Get the list of template parameters
TemplateParameterList *getTemplateParameters() const {
return TemplateParams;
}
/// \brief Get the total constraint-expression associated with this template,
/// including constraint-expressions derived from the requires-clause,
/// trailing requires-clause (for functions and methods) and constrained
/// template parameters.
void getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const;
bool hasAssociatedConstraints() const;
/// Get the underlying, templated declaration.
NamedDecl *getTemplatedDecl() const { return TemplatedDecl; }
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K >= firstTemplate && K <= lastTemplate;
}
SourceRange getSourceRange() const override LLVM_READONLY {
return SourceRange(getTemplateParameters()->getTemplateLoc(),
TemplatedDecl->getSourceRange().getEnd());
}
protected:
NamedDecl *TemplatedDecl;
TemplateParameterList *TemplateParams;
void setTemplateParameters(TemplateParameterList *TParams) {
TemplateParams = TParams;
}
public:
/// Initialize the underlying templated declaration and
/// template parameters.
void init(NamedDecl *templatedDecl, TemplateParameterList* templateParams) {
assert(!TemplatedDecl && "TemplatedDecl already set!");
assert(!TemplateParams && "TemplateParams already set!");
TemplatedDecl = templatedDecl;
TemplateParams = templateParams;
}
};
/// Provides information about a function template specialization,
/// which is a FunctionDecl that has been explicitly specialization or
/// instantiated from a function template.
class FunctionTemplateSpecializationInfo final
: public llvm::FoldingSetNode,
private llvm::TrailingObjects<FunctionTemplateSpecializationInfo,
MemberSpecializationInfo *> {
/// The function template specialization that this structure describes and a
/// flag indicating if the function is a member specialization.
llvm::PointerIntPair<FunctionDecl *, 1, bool> Function;
/// The function template from which this function template
/// specialization was generated.
///
/// The two bits contain the top 4 values of TemplateSpecializationKind.
llvm::PointerIntPair<FunctionTemplateDecl *, 2> Template;
public:
/// The template arguments used to produce the function template
/// specialization from the function template.
const TemplateArgumentList *TemplateArguments;
/// The template arguments as written in the sources, if provided.
/// FIXME: Normally null; tail-allocate this.
const ASTTemplateArgumentListInfo *TemplateArgumentsAsWritten;
/// The point at which this function template specialization was
/// first instantiated.
SourceLocation PointOfInstantiation;
private:
FunctionTemplateSpecializationInfo(
FunctionDecl *FD, FunctionTemplateDecl *Template,
TemplateSpecializationKind TSK, const TemplateArgumentList *TemplateArgs,
const ASTTemplateArgumentListInfo *TemplateArgsAsWritten,
SourceLocation POI, MemberSpecializationInfo *MSInfo)
: Function(FD, MSInfo ? true : false), Template(Template, TSK - 1),
TemplateArguments(TemplateArgs),
TemplateArgumentsAsWritten(TemplateArgsAsWritten),
PointOfInstantiation(POI) {
if (MSInfo)
getTrailingObjects<MemberSpecializationInfo *>()[0] = MSInfo;
}
size_t numTrailingObjects(OverloadToken<MemberSpecializationInfo*>) const {
return Function.getInt();
}
public:
friend TrailingObjects;
static FunctionTemplateSpecializationInfo *
Create(ASTContext &C, FunctionDecl *FD, FunctionTemplateDecl *Template,
TemplateSpecializationKind TSK,
const TemplateArgumentList *TemplateArgs,
const TemplateArgumentListInfo *TemplateArgsAsWritten,
SourceLocation POI, MemberSpecializationInfo *MSInfo);
/// Retrieve the declaration of the function template specialization.
FunctionDecl *getFunction() const { return Function.getPointer(); }
/// Retrieve the template from which this function was specialized.
FunctionTemplateDecl *getTemplate() const { return Template.getPointer(); }
/// Determine what kind of template specialization this is.
TemplateSpecializationKind getTemplateSpecializationKind() const {
return (TemplateSpecializationKind)(Template.getInt() + 1);
}
bool isExplicitSpecialization() const {
return getTemplateSpecializationKind() == TSK_ExplicitSpecialization;
}
/// True if this declaration is an explicit specialization,
/// explicit instantiation declaration, or explicit instantiation
/// definition.
bool isExplicitInstantiationOrSpecialization() const {
return isTemplateExplicitInstantiationOrSpecialization(
getTemplateSpecializationKind());
}
/// Set the template specialization kind.
void setTemplateSpecializationKind(TemplateSpecializationKind TSK) {
assert(TSK != TSK_Undeclared &&
"Cannot encode TSK_Undeclared for a function template specialization");
Template.setInt(TSK - 1);
}
/// Retrieve the first point of instantiation of this function
/// template specialization.
///
/// The point of instantiation may be an invalid source location if this
/// function has yet to be instantiated.
SourceLocation getPointOfInstantiation() const {
return PointOfInstantiation;
}
/// Set the (first) point of instantiation of this function template
/// specialization.
void setPointOfInstantiation(SourceLocation POI) {
PointOfInstantiation = POI;
}
/// Get the specialization info if this function template specialization is
/// also a member specialization:
///
/// \code
/// template<typename> struct A {
/// template<typename> void f();
/// template<> void f<int>(); // ClassScopeFunctionSpecializationDecl
/// };
/// \endcode
///
/// Here, A<int>::f<int> is a function template specialization that is
/// an explicit specialization of A<int>::f, but it's also a member
/// specialization (an implicit instantiation in this case) of A::f<int>.
/// Further:
///
/// \code
/// template<> template<> void A<int>::f<int>() {}
/// \endcode
///
/// ... declares a function template specialization that is an explicit
/// specialization of A<int>::f, and is also an explicit member
/// specialization of A::f<int>.
///
/// Note that the TemplateSpecializationKind of the MemberSpecializationInfo
/// need not be the same as that returned by getTemplateSpecializationKind(),
/// and represents the relationship between the function and the class-scope
/// explicit specialization in the original templated class -- whereas our
/// TemplateSpecializationKind represents the relationship between the
/// function and the function template, and should always be
/// TSK_ExplicitSpecialization whenever we have MemberSpecializationInfo.
MemberSpecializationInfo *getMemberSpecializationInfo() const {
return numTrailingObjects(OverloadToken<MemberSpecializationInfo *>())
? getTrailingObjects<MemberSpecializationInfo *>()[0]
: nullptr;
}
void Profile(llvm::FoldingSetNodeID &ID) {
Profile(ID, TemplateArguments->asArray(), getFunction()->getASTContext());
}
static void
Profile(llvm::FoldingSetNodeID &ID, ArrayRef<TemplateArgument> TemplateArgs,
ASTContext &Context) {
ID.AddInteger(TemplateArgs.size());
for (const TemplateArgument &TemplateArg : TemplateArgs)
TemplateArg.Profile(ID, Context);
}
};
/// Provides information a specialization of a member of a class
/// template, which may be a member function, static data member,
/// member class or member enumeration.
class MemberSpecializationInfo {
// The member declaration from which this member was instantiated, and the
// manner in which the instantiation occurred (in the lower two bits).
llvm::PointerIntPair<NamedDecl *, 2> MemberAndTSK;
// The point at which this member was first instantiated.
SourceLocation PointOfInstantiation;
public:
explicit
MemberSpecializationInfo(NamedDecl *IF, TemplateSpecializationKind TSK,
SourceLocation POI = SourceLocation())
: MemberAndTSK(IF, TSK - 1), PointOfInstantiation(POI) {
assert(TSK != TSK_Undeclared &&
"Cannot encode undeclared template specializations for members");
}
/// Retrieve the member declaration from which this member was
/// instantiated.
NamedDecl *getInstantiatedFrom() const { return MemberAndTSK.getPointer(); }
/// Determine what kind of template specialization this is.
TemplateSpecializationKind getTemplateSpecializationKind() const {
return (TemplateSpecializationKind)(MemberAndTSK.getInt() + 1);
}
bool isExplicitSpecialization() const {
return getTemplateSpecializationKind() == TSK_ExplicitSpecialization;
}
/// Set the template specialization kind.
void setTemplateSpecializationKind(TemplateSpecializationKind TSK) {
assert(TSK != TSK_Undeclared &&
"Cannot encode undeclared template specializations for members");
MemberAndTSK.setInt(TSK - 1);
}
/// Retrieve the first point of instantiation of this member.
/// If the point of instantiation is an invalid location, then this member
/// has not yet been instantiated.
SourceLocation getPointOfInstantiation() const {
return PointOfInstantiation;
}
/// Set the first point of instantiation.
void setPointOfInstantiation(SourceLocation POI) {
PointOfInstantiation = POI;
}
};
/// Provides information about a dependent function-template
/// specialization declaration.
///
/// Since explicit function template specialization and instantiation
/// declarations can only appear in namespace scope, and you can only
/// specialize a member of a fully-specialized class, the only way to
/// get one of these is in a friend declaration like the following:
///
/// \code
/// template \<class T> void foo(T);
/// template \<class T> class A {
/// friend void foo<>(T);
/// };
/// \endcode
class DependentFunctionTemplateSpecializationInfo final
: private llvm::TrailingObjects<DependentFunctionTemplateSpecializationInfo,
TemplateArgumentLoc,
FunctionTemplateDecl *> {
/// The number of potential template candidates.
unsigned NumTemplates;
/// The number of template arguments.
unsigned NumArgs;
/// The locations of the left and right angle brackets.
SourceRange AngleLocs;
size_t numTrailingObjects(OverloadToken<TemplateArgumentLoc>) const {
return NumArgs;
}
size_t numTrailingObjects(OverloadToken<FunctionTemplateDecl *>) const {
return NumTemplates;
}
DependentFunctionTemplateSpecializationInfo(
const UnresolvedSetImpl &Templates,
const TemplateArgumentListInfo &TemplateArgs);
public:
friend TrailingObjects;
static DependentFunctionTemplateSpecializationInfo *
Create(ASTContext &Context, const UnresolvedSetImpl &Templates,
const TemplateArgumentListInfo &TemplateArgs);
/// Returns the number of function templates that this might
/// be a specialization of.
unsigned getNumTemplates() const { return NumTemplates; }
/// Returns the i'th template candidate.
FunctionTemplateDecl *getTemplate(unsigned I) const {
assert(I < getNumTemplates() && "template index out of range");
return getTrailingObjects<FunctionTemplateDecl *>()[I];
}
/// Returns the explicit template arguments that were given.
const TemplateArgumentLoc *getTemplateArgs() const {
return getTrailingObjects<TemplateArgumentLoc>();
}
/// Returns the number of explicit template arguments that were given.
unsigned getNumTemplateArgs() const { return NumArgs; }
llvm::ArrayRef<TemplateArgumentLoc> arguments() const {
return llvm::makeArrayRef(getTemplateArgs(), getNumTemplateArgs());
}
/// Returns the nth template argument.
const TemplateArgumentLoc &getTemplateArg(unsigned I) const {
assert(I < getNumTemplateArgs() && "template arg index out of range");
return getTemplateArgs()[I];
}
SourceLocation getLAngleLoc() const {
return AngleLocs.getBegin();
}
SourceLocation getRAngleLoc() const {
return AngleLocs.getEnd();
}
};
/// Declaration of a redeclarable template.
class RedeclarableTemplateDecl : public TemplateDecl,
public Redeclarable<RedeclarableTemplateDecl>
{
using redeclarable_base = Redeclarable<RedeclarableTemplateDecl>;
RedeclarableTemplateDecl *getNextRedeclarationImpl() override {
return getNextRedeclaration();
}
RedeclarableTemplateDecl *getPreviousDeclImpl() override {
return getPreviousDecl();
}
RedeclarableTemplateDecl *getMostRecentDeclImpl() override {
return getMostRecentDecl();
}
void anchor() override;
protected:
template <typename EntryType> struct SpecEntryTraits {
using DeclType = EntryType;
static DeclType *getDecl(EntryType *D) {
return D;
}
static ArrayRef<TemplateArgument> getTemplateArgs(EntryType *D) {
return D->getTemplateArgs().asArray();
}
};
template <typename EntryType, typename SETraits = SpecEntryTraits<EntryType>,
typename DeclType = typename SETraits::DeclType>
struct SpecIterator
: llvm::iterator_adaptor_base<
SpecIterator<EntryType, SETraits, DeclType>,
typename llvm::FoldingSetVector<EntryType>::iterator,
typename std::iterator_traits<typename llvm::FoldingSetVector<
EntryType>::iterator>::iterator_category,
DeclType *, ptrdiff_t, DeclType *, DeclType *> {
SpecIterator() = default;
explicit SpecIterator(
typename llvm::FoldingSetVector<EntryType>::iterator SetIter)
: SpecIterator::iterator_adaptor_base(std::move(SetIter)) {}
DeclType *operator*() const {
return SETraits::getDecl(&*this->I)->getMostRecentDecl();
}
DeclType *operator->() const { return **this; }
};
template <typename EntryType>
static SpecIterator<EntryType>
makeSpecIterator(llvm::FoldingSetVector<EntryType> &Specs, bool isEnd) {
return SpecIterator<EntryType>(isEnd ? Specs.end() : Specs.begin());
}
void loadLazySpecializationsImpl() const;
template <class EntryType, typename ...ProfileArguments>
typename SpecEntryTraits<EntryType>::DeclType*
findSpecializationImpl(llvm::FoldingSetVector<EntryType> &Specs,
void *&InsertPos, ProfileArguments &&...ProfileArgs);
template <class Derived, class EntryType>
void addSpecializationImpl(llvm::FoldingSetVector<EntryType> &Specs,
EntryType *Entry, void *InsertPos);
struct CommonBase {
CommonBase() : InstantiatedFromMember(nullptr, false) {}
/// The template from which this was most
/// directly instantiated (or null).
///
/// The boolean value indicates whether this template
/// was explicitly specialized.
llvm::PointerIntPair<RedeclarableTemplateDecl*, 1, bool>
InstantiatedFromMember;
/// If non-null, points to an array of specializations (including
/// partial specializations) known only by their external declaration IDs.
///
/// The first value in the array is the number of specializations/partial
/// specializations that follow.
uint32_t *LazySpecializations = nullptr;
};
/// Pointer to the common data shared by all declarations of this
/// template.
mutable CommonBase *Common = nullptr;
/// Retrieves the "common" pointer shared by all (re-)declarations of
/// the same template. Calling this routine may implicitly allocate memory
/// for the common pointer.
CommonBase *getCommonPtr() const;
virtual CommonBase *newCommon(ASTContext &C) const = 0;
// Construct a template decl with name, parameters, and templated element.
RedeclarableTemplateDecl(Kind DK, ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName Name,
TemplateParameterList *Params, NamedDecl *Decl)
: TemplateDecl(DK, DC, L, Name, Params, Decl), redeclarable_base(C) {}
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
friend class ASTReader;
template <class decl_type> friend class RedeclarableTemplate;
/// Retrieves the canonical declaration of this template.
RedeclarableTemplateDecl *getCanonicalDecl() override {
return getFirstDecl();
}
const RedeclarableTemplateDecl *getCanonicalDecl() const {
return getFirstDecl();
}
/// Determines whether this template was a specialization of a
/// member template.
///
/// In the following example, the function template \c X<int>::f and the
/// member template \c X<int>::Inner are member specializations.
///
/// \code
/// template<typename T>
/// struct X {
/// template<typename U> void f(T, U);
/// template<typename U> struct Inner;
/// };
///
/// template<> template<typename T>
/// void X<int>::f(int, T);
/// template<> template<typename T>
/// struct X<int>::Inner { /* ... */ };
/// \endcode
bool isMemberSpecialization() const {
return getCommonPtr()->InstantiatedFromMember.getInt();
}
/// Note that this member template is a specialization.
void setMemberSpecialization() {
assert(getCommonPtr()->InstantiatedFromMember.getPointer() &&
"Only member templates can be member template specializations");
getCommonPtr()->InstantiatedFromMember.setInt(true);
}
/// Retrieve the member template from which this template was
/// instantiated, or nullptr if this template was not instantiated from a
/// member template.
///
/// A template is instantiated from a member template when the member
/// template itself is part of a class template (or member thereof). For
/// example, given
///
/// \code
/// template<typename T>
/// struct X {
/// template<typename U> void f(T, U);
/// };
///
/// void test(X<int> x) {
/// x.f(1, 'a');
/// };
/// \endcode
///
/// \c X<int>::f is a FunctionTemplateDecl that describes the function
/// template
///
/// \code
/// template<typename U> void X<int>::f(int, U);
/// \endcode
///
/// which was itself created during the instantiation of \c X<int>. Calling
/// getInstantiatedFromMemberTemplate() on this FunctionTemplateDecl will
/// retrieve the FunctionTemplateDecl for the original template \c f within
/// the class template \c X<T>, i.e.,
///
/// \code
/// template<typename T>
/// template<typename U>
/// void X<T>::f(T, U);
/// \endcode
RedeclarableTemplateDecl *getInstantiatedFromMemberTemplate() const {
return getCommonPtr()->InstantiatedFromMember.getPointer();
}
void setInstantiatedFromMemberTemplate(RedeclarableTemplateDecl *TD) {
assert(!getCommonPtr()->InstantiatedFromMember.getPointer());
getCommonPtr()->InstantiatedFromMember.setPointer(TD);
}
using redecl_range = redeclarable_base::redecl_range;
using redecl_iterator = redeclarable_base::redecl_iterator;
using redeclarable_base::redecls_begin;
using redeclarable_base::redecls_end;
using redeclarable_base::redecls;
using redeclarable_base::getPreviousDecl;
using redeclarable_base::getMostRecentDecl;
using redeclarable_base::isFirstDecl;
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K >= firstRedeclarableTemplate && K <= lastRedeclarableTemplate;
}
};
template <> struct RedeclarableTemplateDecl::
SpecEntryTraits<FunctionTemplateSpecializationInfo> {
using DeclType = FunctionDecl;
static DeclType *getDecl(FunctionTemplateSpecializationInfo *I) {
return I->getFunction();
}
static ArrayRef<TemplateArgument>
getTemplateArgs(FunctionTemplateSpecializationInfo *I) {
return I->TemplateArguments->asArray();
}
};
/// Declaration of a template function.
class FunctionTemplateDecl : public RedeclarableTemplateDecl {
protected:
friend class FunctionDecl;
/// Data that is common to all of the declarations of a given
/// function template.
struct Common : CommonBase {
/// The function template specializations for this function
/// template, including explicit specializations and instantiations.
llvm::FoldingSetVector<FunctionTemplateSpecializationInfo> Specializations;
/// The set of "injected" template arguments used within this
/// function template.
///
/// This pointer refers to the template arguments (there are as
/// many template arguments as template parameaters) for the function
/// template, and is allocated lazily, since most function templates do not
/// require the use of this information.
TemplateArgument *InjectedArgs = nullptr;
Common() = default;
};
FunctionTemplateDecl(ASTContext &C, DeclContext *DC, SourceLocation L,
DeclarationName Name, TemplateParameterList *Params,
NamedDecl *Decl)
: RedeclarableTemplateDecl(FunctionTemplate, C, DC, L, Name, Params,
Decl) {}
CommonBase *newCommon(ASTContext &C) const override;
Common *getCommonPtr() const {
return static_cast<Common *>(RedeclarableTemplateDecl::getCommonPtr());
}
/// Retrieve the set of function template specializations of this
/// function template.
llvm::FoldingSetVector<FunctionTemplateSpecializationInfo> &
getSpecializations() const;
/// Add a specialization of this function template.
///
/// \param InsertPos Insert position in the FoldingSetVector, must have been
/// retrieved by an earlier call to findSpecialization().
void addSpecialization(FunctionTemplateSpecializationInfo* Info,
void *InsertPos);
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
/// Load any lazily-loaded specializations from the external source.
void LoadLazySpecializations() const;
/// Get the underlying function declaration of the template.
FunctionDecl *getTemplatedDecl() const {
return static_cast<FunctionDecl *>(TemplatedDecl);
}
/// Returns whether this template declaration defines the primary
/// pattern.
bool isThisDeclarationADefinition() const {
return getTemplatedDecl()->isThisDeclarationADefinition();
}
/// Return the specialization with the provided arguments if it exists,
/// otherwise return the insertion point.
FunctionDecl *findSpecialization(ArrayRef<TemplateArgument> Args,
void *&InsertPos);
FunctionTemplateDecl *getCanonicalDecl() override {
return cast<FunctionTemplateDecl>(
RedeclarableTemplateDecl::getCanonicalDecl());
}
const FunctionTemplateDecl *getCanonicalDecl() const {
return cast<FunctionTemplateDecl>(
RedeclarableTemplateDecl::getCanonicalDecl());
}
/// Retrieve the previous declaration of this function template, or
/// nullptr if no such declaration exists.
FunctionTemplateDecl *getPreviousDecl() {
return cast_or_null<FunctionTemplateDecl>(
static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
}
const FunctionTemplateDecl *getPreviousDecl() const {
return cast_or_null<FunctionTemplateDecl>(
static_cast<const RedeclarableTemplateDecl *>(this)->getPreviousDecl());
}
FunctionTemplateDecl *getMostRecentDecl() {
return cast<FunctionTemplateDecl>(
static_cast<RedeclarableTemplateDecl *>(this)
->getMostRecentDecl());
}
const FunctionTemplateDecl *getMostRecentDecl() const {
return const_cast<FunctionTemplateDecl*>(this)->getMostRecentDecl();
}
FunctionTemplateDecl *getInstantiatedFromMemberTemplate() const {
return cast_or_null<FunctionTemplateDecl>(
RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
}
using spec_iterator = SpecIterator<FunctionTemplateSpecializationInfo>;
using spec_range = llvm::iterator_range<spec_iterator>;
spec_range specializations() const {
return spec_range(spec_begin(), spec_end());
}
spec_iterator spec_begin() const {
return makeSpecIterator(getSpecializations(), false);
}
spec_iterator spec_end() const {
return makeSpecIterator(getSpecializations(), true);
}
/// Retrieve the "injected" template arguments that correspond to the
/// template parameters of this function template.
///
/// Although the C++ standard has no notion of the "injected" template
/// arguments for a function template, the notion is convenient when
/// we need to perform substitutions inside the definition of a function
/// template.
ArrayRef<TemplateArgument> getInjectedTemplateArgs();
/// Return whether this function template is an abbreviated function template,
/// e.g. `void foo(auto x)` or `template<typename T> void foo(auto x)`
bool isAbbreviated() const {
// Since the invented template parameters generated from 'auto' parameters
// are either appended to the end of the explicit template parameter list or
// form a new template paramter list, we can simply observe the last
// parameter to determine if such a thing happened.
const TemplateParameterList *TPL = getTemplateParameters();
return TPL->getParam(TPL->size() - 1)->isImplicit();
}
/// Merge \p Prev with our RedeclarableTemplateDecl::Common.
void mergePrevDecl(FunctionTemplateDecl *Prev);
/// Create a function template node.
static FunctionTemplateDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation L,
DeclarationName Name,
TemplateParameterList *Params,
NamedDecl *Decl);
/// Create an empty function template node.
static FunctionTemplateDecl *CreateDeserialized(ASTContext &C, unsigned ID);
// Implement isa/cast/dyncast support
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == FunctionTemplate; }
};
//===----------------------------------------------------------------------===//
// Kinds of Template Parameters
//===----------------------------------------------------------------------===//
/// Defines the position of a template parameter within a template
/// parameter list.
///
/// Because template parameter can be listed
/// sequentially for out-of-line template members, each template parameter is
/// given a Depth - the nesting of template parameter scopes - and a Position -
/// the occurrence within the parameter list.
/// This class is inheritedly privately by different kinds of template
/// parameters and is not part of the Decl hierarchy. Just a facility.
class TemplateParmPosition {
protected:
// FIXME: These probably don't need to be ints. int:5 for depth, int:8 for
// position? Maybe?
unsigned Depth;
unsigned Position;
TemplateParmPosition(unsigned D, unsigned P) : Depth(D), Position(P) {}
public:
TemplateParmPosition() = delete;
/// Get the nesting depth of the template parameter.
unsigned getDepth() const { return Depth; }
void setDepth(unsigned D) { Depth = D; }
/// Get the position of the template parameter within its parameter list.
unsigned getPosition() const { return Position; }
void setPosition(unsigned P) { Position = P; }
/// Get the index of the template parameter within its parameter list.
unsigned getIndex() const { return Position; }
};
/// Declaration of a template type parameter.
///
/// For example, "T" in
/// \code
/// template<typename T> class vector;
/// \endcode
class TemplateTypeParmDecl final : public TypeDecl,
private llvm::TrailingObjects<TemplateTypeParmDecl, TypeConstraint> {
/// Sema creates these on the stack during auto type deduction.
friend class Sema;
friend TrailingObjects;
friend class ASTDeclReader;
/// Whether this template type parameter was declaration with
/// the 'typename' keyword.
///
/// If false, it was declared with the 'class' keyword.
bool Typename : 1;
/// Whether this template type parameter has a type-constraint construct.
bool HasTypeConstraint : 1;
/// Whether the type constraint has been initialized. This can be false if the
/// constraint was not initialized yet or if there was an error forming the
/// type constraint.
bool TypeConstraintInitialized : 1;
/// Whether this non-type template parameter is an "expanded"
/// parameter pack, meaning that its type is a pack expansion and we
/// already know the set of types that expansion expands to.
bool ExpandedParameterPack : 1;
/// The number of type parameters in an expanded parameter pack.
unsigned NumExpanded = 0;
/// The default template argument, if any.
using DefArgStorage =
DefaultArgStorage<TemplateTypeParmDecl, TypeSourceInfo *>;
DefArgStorage DefaultArgument;
TemplateTypeParmDecl(DeclContext *DC, SourceLocation KeyLoc,
SourceLocation IdLoc, IdentifierInfo *Id, bool Typename,
bool HasTypeConstraint, Optional<unsigned> NumExpanded)
: TypeDecl(TemplateTypeParm, DC, IdLoc, Id, KeyLoc), Typename(Typename),
HasTypeConstraint(HasTypeConstraint), TypeConstraintInitialized(false),
ExpandedParameterPack(NumExpanded),
NumExpanded(NumExpanded.getValueOr(0)) {}
public:
static TemplateTypeParmDecl *Create(const ASTContext &C, DeclContext *DC,
SourceLocation KeyLoc,
SourceLocation NameLoc,
unsigned D, unsigned P,
IdentifierInfo *Id, bool Typename,
bool ParameterPack,
bool HasTypeConstraint = false,
Optional<unsigned> NumExpanded = None);
static TemplateTypeParmDecl *CreateDeserialized(const ASTContext &C,
unsigned ID);
static TemplateTypeParmDecl *CreateDeserialized(const ASTContext &C,
unsigned ID,
bool HasTypeConstraint);
/// Whether this template type parameter was declared with
/// the 'typename' keyword.
///
/// If not, it was either declared with the 'class' keyword or with a
/// type-constraint (see hasTypeConstraint()).
bool wasDeclaredWithTypename() const {
return Typename && !HasTypeConstraint;
}
const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; }
/// Determine whether this template parameter has a default
/// argument.
bool hasDefaultArgument() const { return DefaultArgument.isSet(); }
/// Retrieve the default argument, if any.
QualType getDefaultArgument() const {
return DefaultArgument.get()->getType();
}
/// Retrieves the default argument's source information, if any.
TypeSourceInfo *getDefaultArgumentInfo() const {
return DefaultArgument.get();
}
/// Retrieves the location of the default argument declaration.
SourceLocation getDefaultArgumentLoc() const;
/// Determines whether the default argument was inherited
/// from a previous declaration of this template.
bool defaultArgumentWasInherited() const {
return DefaultArgument.isInherited();
}
/// Set the default argument for this template parameter.
void setDefaultArgument(TypeSourceInfo *DefArg) {
DefaultArgument.set(DefArg);
}
/// Set that this default argument was inherited from another
/// parameter.
void setInheritedDefaultArgument(const ASTContext &C,
TemplateTypeParmDecl *Prev) {
DefaultArgument.setInherited(C, Prev);
}
/// Removes the default argument of this template parameter.
void removeDefaultArgument() {
DefaultArgument.clear();
}
/// Set whether this template type parameter was declared with
/// the 'typename' or 'class' keyword.
void setDeclaredWithTypename(bool withTypename) { Typename = withTypename; }
/// Retrieve the depth of the template parameter.
unsigned getDepth() const;
/// Retrieve the index of the template parameter.
unsigned getIndex() const;
/// Returns whether this is a parameter pack.
bool isParameterPack() const;
/// Whether this parameter pack is a pack expansion.
///
/// A template type template parameter pack can be a pack expansion if its
/// type-constraint contains an unexpanded parameter pack.
bool isPackExpansion() const {
if (!isParameterPack())
return false;
if (const TypeConstraint *TC = getTypeConstraint())
if (TC->hasExplicitTemplateArgs())
for (const auto &ArgLoc : TC->getTemplateArgsAsWritten()->arguments())
if (ArgLoc.getArgument().containsUnexpandedParameterPack())
return true;
return false;
}
/// Whether this parameter is a template type parameter pack that has a known
/// list of different type-constraints at different positions.
///
/// A parameter pack is an expanded parameter pack when the original
/// parameter pack's type-constraint was itself a pack expansion, and that
/// expansion has already been expanded. For example, given:
///
/// \code
/// template<typename ...Types>
/// struct X {
/// template<convertible_to<Types> ...Convertibles>
/// struct Y { /* ... */ };
/// };
/// \endcode
///
/// The parameter pack \c Convertibles has (convertible_to<Types> && ...) as
/// its type-constraint. When \c Types is supplied with template arguments by
/// instantiating \c X, the instantiation of \c Convertibles becomes an
/// expanded parameter pack. For example, instantiating
/// \c X<int, unsigned int> results in \c Convertibles being an expanded
/// parameter pack of size 2 (use getNumExpansionTypes() to get this number).
bool isExpandedParameterPack() const { return ExpandedParameterPack; }
/// Retrieves the number of parameters in an expanded parameter pack.
unsigned getNumExpansionParameters() const {
assert(ExpandedParameterPack && "Not an expansion parameter pack");
return NumExpanded;
}
/// Returns the type constraint associated with this template parameter (if
/// any).
const TypeConstraint *getTypeConstraint() const {
return TypeConstraintInitialized ? getTrailingObjects<TypeConstraint>() :
nullptr;
}
void setTypeConstraint(NestedNameSpecifierLoc NNS,
DeclarationNameInfo NameInfo, NamedDecl *FoundDecl,
ConceptDecl *CD,
const ASTTemplateArgumentListInfo *ArgsAsWritten,
Expr *ImmediatelyDeclaredConstraint);
/// Determine whether this template parameter has a type-constraint.
bool hasTypeConstraint() const {
return HasTypeConstraint;
}
/// \brief Get the associated-constraints of this template parameter.
/// This will either be the immediately-introduced constraint or empty.
///
/// Use this instead of getConstraintExpression for concepts APIs that
/// accept an ArrayRef of constraint expressions.
void getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const {
if (HasTypeConstraint)
AC.push_back(getTypeConstraint()->getImmediatelyDeclaredConstraint());
}
SourceRange getSourceRange() const override LLVM_READONLY;
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == TemplateTypeParm; }
};
/// NonTypeTemplateParmDecl - Declares a non-type template parameter,
/// e.g., "Size" in
/// @code
/// template<int Size> class array { };
/// @endcode
class NonTypeTemplateParmDecl final
: public DeclaratorDecl,
protected TemplateParmPosition,
private llvm::TrailingObjects<NonTypeTemplateParmDecl,
std::pair<QualType, TypeSourceInfo *>,
Expr *> {
friend class ASTDeclReader;
friend TrailingObjects;
/// The default template argument, if any, and whether or not
/// it was inherited.
using DefArgStorage = DefaultArgStorage<NonTypeTemplateParmDecl, Expr *>;
DefArgStorage DefaultArgument;
// FIXME: Collapse this into TemplateParamPosition; or, just move depth/index
// down here to save memory.
/// Whether this non-type template parameter is a parameter pack.
bool ParameterPack;
/// Whether this non-type template parameter is an "expanded"
/// parameter pack, meaning that its type is a pack expansion and we
/// already know the set of types that expansion expands to.
bool ExpandedParameterPack = false;
/// The number of types in an expanded parameter pack.
unsigned NumExpandedTypes = 0;
size_t numTrailingObjects(
OverloadToken<std::pair<QualType, TypeSourceInfo *>>) const {
return NumExpandedTypes;
}
NonTypeTemplateParmDecl(DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, unsigned D, unsigned P,
IdentifierInfo *Id, QualType T,
bool ParameterPack, TypeSourceInfo *TInfo)
: DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
TemplateParmPosition(D, P), ParameterPack(ParameterPack) {}
NonTypeTemplateParmDecl(DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, unsigned D, unsigned P,
IdentifierInfo *Id, QualType T,
TypeSourceInfo *TInfo,
ArrayRef<QualType> ExpandedTypes,
ArrayRef<TypeSourceInfo *> ExpandedTInfos);
public:
static NonTypeTemplateParmDecl *
Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, unsigned D, unsigned P, IdentifierInfo *Id,
QualType T, bool ParameterPack, TypeSourceInfo *TInfo);
static NonTypeTemplateParmDecl *
Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, unsigned D, unsigned P, IdentifierInfo *Id,
QualType T, TypeSourceInfo *TInfo, ArrayRef<QualType> ExpandedTypes,
ArrayRef<TypeSourceInfo *> ExpandedTInfos);
static NonTypeTemplateParmDecl *CreateDeserialized(ASTContext &C,
unsigned ID,
bool HasTypeConstraint);
static NonTypeTemplateParmDecl *CreateDeserialized(ASTContext &C,
unsigned ID,
unsigned NumExpandedTypes,
bool HasTypeConstraint);
using TemplateParmPosition::getDepth;
using TemplateParmPosition::setDepth;
using TemplateParmPosition::getPosition;
using TemplateParmPosition::setPosition;
using TemplateParmPosition::getIndex;
SourceRange getSourceRange() const override LLVM_READONLY;
const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; }
/// Determine whether this template parameter has a default
/// argument.
bool hasDefaultArgument() const { return DefaultArgument.isSet(); }
/// Retrieve the default argument, if any.
Expr *getDefaultArgument() const { return DefaultArgument.get(); }
/// Retrieve the location of the default argument, if any.
SourceLocation getDefaultArgumentLoc() const;
/// Determines whether the default argument was inherited
/// from a previous declaration of this template.
bool defaultArgumentWasInherited() const {
return DefaultArgument.isInherited();
}
/// Set the default argument for this template parameter, and
/// whether that default argument was inherited from another
/// declaration.
void setDefaultArgument(Expr *DefArg) { DefaultArgument.set(DefArg); }
void setInheritedDefaultArgument(const ASTContext &C,
NonTypeTemplateParmDecl *Parm) {
DefaultArgument.setInherited(C, Parm);
}
/// Removes the default argument of this template parameter.
void removeDefaultArgument() { DefaultArgument.clear(); }
/// Whether this parameter is a non-type template parameter pack.
///
/// If the parameter is a parameter pack, the type may be a
/// \c PackExpansionType. In the following example, the \c Dims parameter
/// is a parameter pack (whose type is 'unsigned').
///
/// \code
/// template<typename T, unsigned ...Dims> struct multi_array;
/// \endcode
bool isParameterPack() const { return ParameterPack; }
/// Whether this parameter pack is a pack expansion.
///
/// A non-type template parameter pack is a pack expansion if its type
/// contains an unexpanded parameter pack. In this case, we will have
/// built a PackExpansionType wrapping the type.
bool isPackExpansion() const {
return ParameterPack && getType()->getAs<PackExpansionType>();
}
/// Whether this parameter is a non-type template parameter pack
/// that has a known list of different types at different positions.
///
/// A parameter pack is an expanded parameter pack when the original
/// parameter pack's type was itself a pack expansion, and that expansion
/// has already been expanded. For example, given:
///
/// \code
/// template<typename ...Types>
/// struct X {
/// template<Types ...Values>
/// struct Y { /* ... */ };
/// };
/// \endcode
///
/// The parameter pack \c Values has a \c PackExpansionType as its type,
/// which expands \c Types. When \c Types is supplied with template arguments
/// by instantiating \c X, the instantiation of \c Values becomes an
/// expanded parameter pack. For example, instantiating
/// \c X<int, unsigned int> results in \c Values being an expanded parameter
/// pack with expansion types \c int and \c unsigned int.
///
/// The \c getExpansionType() and \c getExpansionTypeSourceInfo() functions
/// return the expansion types.
bool isExpandedParameterPack() const { return ExpandedParameterPack; }
/// Retrieves the number of expansion types in an expanded parameter
/// pack.
unsigned getNumExpansionTypes() const {
assert(ExpandedParameterPack && "Not an expansion parameter pack");
return NumExpandedTypes;
}
/// Retrieve a particular expansion type within an expanded parameter
/// pack.
QualType getExpansionType(unsigned I) const {
assert(I < NumExpandedTypes && "Out-of-range expansion type index");
auto TypesAndInfos =
getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
return TypesAndInfos[I].first;
}
/// Retrieve a particular expansion type source info within an
/// expanded parameter pack.
TypeSourceInfo *getExpansionTypeSourceInfo(unsigned I) const {
assert(I < NumExpandedTypes && "Out-of-range expansion type index");
auto TypesAndInfos =
getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
return TypesAndInfos[I].second;
}
/// Return the constraint introduced by the placeholder type of this non-type
/// template parameter (if any).
Expr *getPlaceholderTypeConstraint() const {
return hasPlaceholderTypeConstraint() ? *getTrailingObjects<Expr *>() :
nullptr;
}
void setPlaceholderTypeConstraint(Expr *E) {
*getTrailingObjects<Expr *>() = E;
}
/// Determine whether this non-type template parameter's type has a
/// placeholder with a type-constraint.
bool hasPlaceholderTypeConstraint() const {
auto *AT = getType()->getContainedAutoType();
return AT && AT->isConstrained();
}
/// \brief Get the associated-constraints of this template parameter.
/// This will either be a vector of size 1 containing the immediately-declared
/// constraint introduced by the placeholder type, or an empty vector.
///
/// Use this instead of getPlaceholderImmediatelyDeclaredConstraint for
/// concepts APIs that accept an ArrayRef of constraint expressions.
void getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const {
if (Expr *E = getPlaceholderTypeConstraint())
AC.push_back(E);
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == NonTypeTemplateParm; }
};
/// TemplateTemplateParmDecl - Declares a template template parameter,
/// e.g., "T" in
/// @code
/// template <template <typename> class T> class container { };
/// @endcode
/// A template template parameter is a TemplateDecl because it defines the
/// name of a template and the template parameters allowable for substitution.
class TemplateTemplateParmDecl final
: public TemplateDecl,
protected TemplateParmPosition,
private llvm::TrailingObjects<TemplateTemplateParmDecl,
TemplateParameterList *> {
/// The default template argument, if any.
using DefArgStorage =
DefaultArgStorage<TemplateTemplateParmDecl, TemplateArgumentLoc *>;
DefArgStorage DefaultArgument;
/// Whether this parameter is a parameter pack.
bool ParameterPack;
/// Whether this template template parameter is an "expanded"
/// parameter pack, meaning that it is a pack expansion and we
/// already know the set of template parameters that expansion expands to.
bool ExpandedParameterPack = false;
/// The number of parameters in an expanded parameter pack.
unsigned NumExpandedParams = 0;
TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L,
unsigned D, unsigned P, bool ParameterPack,
IdentifierInfo *Id, TemplateParameterList *Params)
: TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
TemplateParmPosition(D, P), ParameterPack(ParameterPack) {}
TemplateTemplateParmDecl(DeclContext *DC, SourceLocation L,
unsigned D, unsigned P,
IdentifierInfo *Id, TemplateParameterList *Params,
ArrayRef<TemplateParameterList *> Expansions);
void anchor() override;
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
friend TrailingObjects;
static TemplateTemplateParmDecl *Create(const ASTContext &C, DeclContext *DC,
SourceLocation L, unsigned D,
unsigned P, bool ParameterPack,
IdentifierInfo *Id,
TemplateParameterList *Params);
static TemplateTemplateParmDecl *Create(const ASTContext &C, DeclContext *DC,
SourceLocation L, unsigned D,
unsigned P,
IdentifierInfo *Id,
TemplateParameterList *Params,
ArrayRef<TemplateParameterList *> Expansions);
static TemplateTemplateParmDecl *CreateDeserialized(ASTContext &C,
unsigned ID);
static TemplateTemplateParmDecl *CreateDeserialized(ASTContext &C,
unsigned ID,
unsigned NumExpansions);
using TemplateParmPosition::getDepth;
using TemplateParmPosition::setDepth;
using TemplateParmPosition::getPosition;
using TemplateParmPosition::setPosition;
using TemplateParmPosition::getIndex;
/// Whether this template template parameter is a template
/// parameter pack.
///
/// \code
/// template<template <class T> ...MetaFunctions> struct Apply;
/// \endcode
bool isParameterPack() const { return ParameterPack; }
/// Whether this parameter pack is a pack expansion.
///
/// A template template parameter pack is a pack expansion if its template
/// parameter list contains an unexpanded parameter pack.
bool isPackExpansion() const {
return ParameterPack &&
getTemplateParameters()->containsUnexpandedParameterPack();
}
/// Whether this parameter is a template template parameter pack that
/// has a known list of different template parameter lists at different
/// positions.
///
/// A parameter pack is an expanded parameter pack when the original parameter
/// pack's template parameter list was itself a pack expansion, and that
/// expansion has already been expanded. For exampe, given:
///
/// \code
/// template<typename...Types> struct Outer {
/// template<template<Types> class...Templates> struct Inner;
/// };
/// \endcode
///
/// The parameter pack \c Templates is a pack expansion, which expands the
/// pack \c Types. When \c Types is supplied with template arguments by
/// instantiating \c Outer, the instantiation of \c Templates is an expanded
/// parameter pack.
bool isExpandedParameterPack() const { return ExpandedParameterPack; }
/// Retrieves the number of expansion template parameters in
/// an expanded parameter pack.
unsigned getNumExpansionTemplateParameters() const {
assert(ExpandedParameterPack && "Not an expansion parameter pack");
return NumExpandedParams;
}
/// Retrieve a particular expansion type within an expanded parameter
/// pack.
TemplateParameterList *getExpansionTemplateParameters(unsigned I) const {
assert(I < NumExpandedParams && "Out-of-range expansion type index");
return getTrailingObjects<TemplateParameterList *>()[I];
}
const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; }
/// Determine whether this template parameter has a default
/// argument.
bool hasDefaultArgument() const { return DefaultArgument.isSet(); }
/// Retrieve the default argument, if any.
const TemplateArgumentLoc &getDefaultArgument() const {
static const TemplateArgumentLoc NoneLoc;
return DefaultArgument.isSet() ? *DefaultArgument.get() : NoneLoc;
}
/// Retrieve the location of the default argument, if any.
SourceLocation getDefaultArgumentLoc() const;
/// Determines whether the default argument was inherited
/// from a previous declaration of this template.
bool defaultArgumentWasInherited() const {
return DefaultArgument.isInherited();
}
/// Set the default argument for this template parameter, and
/// whether that default argument was inherited from another
/// declaration.
void setDefaultArgument(const ASTContext &C,
const TemplateArgumentLoc &DefArg);
void setInheritedDefaultArgument(const ASTContext &C,
TemplateTemplateParmDecl *Prev) {
DefaultArgument.setInherited(C, Prev);
}
/// Removes the default argument of this template parameter.
void removeDefaultArgument() { DefaultArgument.clear(); }
SourceRange getSourceRange() const override LLVM_READONLY {
SourceLocation End = getLocation();
if (hasDefaultArgument() && !defaultArgumentWasInherited())
End = getDefaultArgument().getSourceRange().getEnd();
return SourceRange(getTemplateParameters()->getTemplateLoc(), End);
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == TemplateTemplateParm; }
};
/// Represents the builtin template declaration which is used to
/// implement __make_integer_seq and other builtin templates. It serves
/// no real purpose beyond existing as a place to hold template parameters.
class BuiltinTemplateDecl : public TemplateDecl {
BuiltinTemplateKind BTK;
BuiltinTemplateDecl(const ASTContext &C, DeclContext *DC,
DeclarationName Name, BuiltinTemplateKind BTK);
void anchor() override;
public:
// Implement isa/cast/dyncast support
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == BuiltinTemplate; }
static BuiltinTemplateDecl *Create(const ASTContext &C, DeclContext *DC,
DeclarationName Name,
BuiltinTemplateKind BTK) {
return new (C, DC) BuiltinTemplateDecl(C, DC, Name, BTK);
}
SourceRange getSourceRange() const override LLVM_READONLY {
return {};
}
BuiltinTemplateKind getBuiltinTemplateKind() const { return BTK; }
};
/// Represents a class template specialization, which refers to
/// a class template with a given set of template arguments.
///
/// Class template specializations represent both explicit
/// specialization of class templates, as in the example below, and
/// implicit instantiations of class templates.
///
/// \code
/// template<typename T> class array;
///
/// template<>
/// class array<bool> { }; // class template specialization array<bool>
/// \endcode
class ClassTemplateSpecializationDecl
: public CXXRecordDecl, public llvm::FoldingSetNode {
/// Structure that stores information about a class template
/// specialization that was instantiated from a class template partial
/// specialization.
struct SpecializedPartialSpecialization {
/// The class template partial specialization from which this
/// class template specialization was instantiated.
ClassTemplatePartialSpecializationDecl *PartialSpecialization;
/// The template argument list deduced for the class template
/// partial specialization itself.
const TemplateArgumentList *TemplateArgs;
};
/// The template that this specialization specializes
llvm::PointerUnion<ClassTemplateDecl *, SpecializedPartialSpecialization *>
SpecializedTemplate;
/// Further info for explicit template specialization/instantiation.
struct ExplicitSpecializationInfo {
/// The type-as-written.
TypeSourceInfo *TypeAsWritten = nullptr;
/// The location of the extern keyword.
SourceLocation ExternLoc;
/// The location of the template keyword.
SourceLocation TemplateKeywordLoc;
ExplicitSpecializationInfo() = default;
};
/// Further info for explicit template specialization/instantiation.
/// Does not apply to implicit specializations.
ExplicitSpecializationInfo *ExplicitInfo = nullptr;
/// The template arguments used to describe this specialization.
const TemplateArgumentList *TemplateArgs;
/// The point where this template was instantiated (if any)
SourceLocation PointOfInstantiation;
/// The kind of specialization this declaration refers to.
/// Really a value of type TemplateSpecializationKind.
unsigned SpecializationKind : 3;
protected:
ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
ClassTemplateSpecializationDecl *PrevDecl);
explicit ClassTemplateSpecializationDecl(ASTContext &C, Kind DK);
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
static ClassTemplateSpecializationDecl *
Create(ASTContext &Context, TagKind TK, DeclContext *DC,
SourceLocation StartLoc, SourceLocation IdLoc,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
ClassTemplateSpecializationDecl *PrevDecl);
static ClassTemplateSpecializationDecl *
CreateDeserialized(ASTContext &C, unsigned ID);
void getNameForDiagnostic(raw_ostream &OS, const PrintingPolicy &Policy,
bool Qualified) const override;
// FIXME: This is broken. CXXRecordDecl::getMostRecentDecl() returns a
// different "most recent" declaration from this function for the same
// declaration, because we don't override getMostRecentDeclImpl(). But
// it's not clear that we should override that, because the most recent
// declaration as a CXXRecordDecl sometimes is the injected-class-name.
ClassTemplateSpecializationDecl *getMostRecentDecl() {
return cast<ClassTemplateSpecializationDecl>(
getMostRecentNonInjectedDecl());
}
/// Retrieve the template that this specialization specializes.
ClassTemplateDecl *getSpecializedTemplate() const;
/// Retrieve the template arguments of the class template
/// specialization.
const TemplateArgumentList &getTemplateArgs() const {
return *TemplateArgs;
}
void setTemplateArgs(TemplateArgumentList *Args) {
TemplateArgs = Args;
}
/// Determine the kind of specialization that this
/// declaration represents.
TemplateSpecializationKind getSpecializationKind() const {
return static_cast<TemplateSpecializationKind>(SpecializationKind);
}
bool isExplicitSpecialization() const {
return getSpecializationKind() == TSK_ExplicitSpecialization;
}
/// Is this an explicit specialization at class scope (within the class that
/// owns the primary template)? For example:
///
/// \code
/// template<typename T> struct Outer {
/// template<typename U> struct Inner;
/// template<> struct Inner; // class-scope explicit specialization
/// };
/// \endcode
bool isClassScopeExplicitSpecialization() const {
return isExplicitSpecialization() &&
isa<CXXRecordDecl>(getLexicalDeclContext());
}
/// True if this declaration is an explicit specialization,
/// explicit instantiation declaration, or explicit instantiation
/// definition.
bool isExplicitInstantiationOrSpecialization() const {
return isTemplateExplicitInstantiationOrSpecialization(
getTemplateSpecializationKind());
}
void setSpecializedTemplate(ClassTemplateDecl *Specialized) {
SpecializedTemplate = Specialized;
}
void setSpecializationKind(TemplateSpecializationKind TSK) {
SpecializationKind = TSK;
}
/// Get the point of instantiation (if any), or null if none.
SourceLocation getPointOfInstantiation() const {
return PointOfInstantiation;
}
void setPointOfInstantiation(SourceLocation Loc) {
assert(Loc.isValid() && "point of instantiation must be valid!");
PointOfInstantiation = Loc;
}
/// If this class template specialization is an instantiation of
/// a template (rather than an explicit specialization), return the
/// class template or class template partial specialization from which it
/// was instantiated.
llvm::PointerUnion<ClassTemplateDecl *,
ClassTemplatePartialSpecializationDecl *>
getInstantiatedFrom() const {
if (!isTemplateInstantiation(getSpecializationKind()))
return llvm::PointerUnion<ClassTemplateDecl *,
ClassTemplatePartialSpecializationDecl *>();
return getSpecializedTemplateOrPartial();
}
/// Retrieve the class template or class template partial
/// specialization which was specialized by this.
llvm::PointerUnion<ClassTemplateDecl *,
ClassTemplatePartialSpecializationDecl *>
getSpecializedTemplateOrPartial() const {
if (const auto *PartialSpec =
SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization *>())
return PartialSpec->PartialSpecialization;
return SpecializedTemplate.get<ClassTemplateDecl*>();
}
/// Retrieve the set of template arguments that should be used
/// to instantiate members of the class template or class template partial
/// specialization from which this class template specialization was
/// instantiated.
///
/// \returns For a class template specialization instantiated from the primary
/// template, this function will return the same template arguments as
/// getTemplateArgs(). For a class template specialization instantiated from
/// a class template partial specialization, this function will return the
/// deduced template arguments for the class template partial specialization
/// itself.
const TemplateArgumentList &getTemplateInstantiationArgs() const {
if (const auto *PartialSpec =
SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization *>())
return *PartialSpec->TemplateArgs;
return getTemplateArgs();
}
/// Note that this class template specialization is actually an
/// instantiation of the given class template partial specialization whose
/// template arguments have been deduced.
void setInstantiationOf(ClassTemplatePartialSpecializationDecl *PartialSpec,
const TemplateArgumentList *TemplateArgs) {
assert(!SpecializedTemplate.is<SpecializedPartialSpecialization*>() &&
"Already set to a class template partial specialization!");
auto *PS = new (getASTContext()) SpecializedPartialSpecialization();
PS->PartialSpecialization = PartialSpec;
PS->TemplateArgs = TemplateArgs;
SpecializedTemplate = PS;
}
/// Note that this class template specialization is an instantiation
/// of the given class template.
void setInstantiationOf(ClassTemplateDecl *TemplDecl) {
assert(!SpecializedTemplate.is<SpecializedPartialSpecialization*>() &&
"Previously set to a class template partial specialization!");
SpecializedTemplate = TemplDecl;
}
/// Sets the type of this specialization as it was written by
/// the user. This will be a class template specialization type.
void setTypeAsWritten(TypeSourceInfo *T) {
if (!ExplicitInfo)
ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
ExplicitInfo->TypeAsWritten = T;
}
/// Gets the type of this specialization as it was written by
/// the user, if it was so written.
TypeSourceInfo *getTypeAsWritten() const {
return ExplicitInfo ? ExplicitInfo->TypeAsWritten : nullptr;
}
/// Gets the location of the extern keyword, if present.
SourceLocation getExternLoc() const {
return ExplicitInfo ? ExplicitInfo->ExternLoc : SourceLocation();
}
/// Sets the location of the extern keyword.
void setExternLoc(SourceLocation Loc) {
if (!ExplicitInfo)
ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
ExplicitInfo->ExternLoc = Loc;
}
/// Sets the location of the template keyword.
void setTemplateKeywordLoc(SourceLocation Loc) {
if (!ExplicitInfo)
ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
ExplicitInfo->TemplateKeywordLoc = Loc;
}
/// Gets the location of the template keyword, if present.
SourceLocation getTemplateKeywordLoc() const {
return ExplicitInfo ? ExplicitInfo->TemplateKeywordLoc : SourceLocation();
}
SourceRange getSourceRange() const override LLVM_READONLY;
void Profile(llvm::FoldingSetNodeID &ID) const {
Profile(ID, TemplateArgs->asArray(), getASTContext());
}
static void
Profile(llvm::FoldingSetNodeID &ID, ArrayRef<TemplateArgument> TemplateArgs,
ASTContext &Context) {
ID.AddInteger(TemplateArgs.size());
for (const TemplateArgument &TemplateArg : TemplateArgs)
TemplateArg.Profile(ID, Context);
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K >= firstClassTemplateSpecialization &&
K <= lastClassTemplateSpecialization;
}
};
class ClassTemplatePartialSpecializationDecl
: public ClassTemplateSpecializationDecl {
/// The list of template parameters
TemplateParameterList* TemplateParams = nullptr;
/// The source info for the template arguments as written.
/// FIXME: redundant with TypeAsWritten?
const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
/// The class template partial specialization from which this
/// class template partial specialization was instantiated.
///
/// The boolean value will be true to indicate that this class template
/// partial specialization was specialized at this level.
llvm::PointerIntPair<ClassTemplatePartialSpecializationDecl *, 1, bool>
InstantiatedFromMember;
ClassTemplatePartialSpecializationDecl(ASTContext &Context, TagKind TK,
DeclContext *DC,
SourceLocation StartLoc,
SourceLocation IdLoc,
TemplateParameterList *Params,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
const ASTTemplateArgumentListInfo *ArgsAsWritten,
ClassTemplatePartialSpecializationDecl *PrevDecl);
ClassTemplatePartialSpecializationDecl(ASTContext &C)
: ClassTemplateSpecializationDecl(C, ClassTemplatePartialSpecialization),
InstantiatedFromMember(nullptr, false) {}
void anchor() override;
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
static ClassTemplatePartialSpecializationDecl *
Create(ASTContext &Context, TagKind TK, DeclContext *DC,
SourceLocation StartLoc, SourceLocation IdLoc,
TemplateParameterList *Params,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
const TemplateArgumentListInfo &ArgInfos,
QualType CanonInjectedType,
ClassTemplatePartialSpecializationDecl *PrevDecl);
static ClassTemplatePartialSpecializationDecl *
CreateDeserialized(ASTContext &C, unsigned ID);
ClassTemplatePartialSpecializationDecl *getMostRecentDecl() {
return cast<ClassTemplatePartialSpecializationDecl>(
static_cast<ClassTemplateSpecializationDecl *>(
this)->getMostRecentDecl());
}
/// Get the list of template parameters
TemplateParameterList *getTemplateParameters() const {
return TemplateParams;
}
/// \brief All associated constraints of this partial specialization,
/// including the requires clause and any constraints derived from
/// constrained-parameters.
///
/// The constraints in the resulting list are to be treated as if in a
/// conjunction ("and").
void getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const {
TemplateParams->getAssociatedConstraints(AC);
}
bool hasAssociatedConstraints() const {
return TemplateParams->hasAssociatedConstraints();
}
/// Get the template arguments as written.
const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
return ArgsAsWritten;
}
/// Retrieve the member class template partial specialization from
/// which this particular class template partial specialization was
/// instantiated.
///
/// \code
/// template<typename T>
/// struct Outer {
/// template<typename U> struct Inner;
/// template<typename U> struct Inner<U*> { }; // #1
/// };
///
/// Outer<float>::Inner<int*> ii;
/// \endcode
///
/// In this example, the instantiation of \c Outer<float>::Inner<int*> will
/// end up instantiating the partial specialization
/// \c Outer<float>::Inner<U*>, which itself was instantiated from the class
/// template partial specialization \c Outer<T>::Inner<U*>. Given
/// \c Outer<float>::Inner<U*>, this function would return
/// \c Outer<T>::Inner<U*>.
ClassTemplatePartialSpecializationDecl *getInstantiatedFromMember() const {
const auto *First =
cast<ClassTemplatePartialSpecializationDecl>(getFirstDecl());
return First->InstantiatedFromMember.getPointer();
}
ClassTemplatePartialSpecializationDecl *
getInstantiatedFromMemberTemplate() const {
return getInstantiatedFromMember();
}
void setInstantiatedFromMember(
ClassTemplatePartialSpecializationDecl *PartialSpec) {
auto *First = cast<ClassTemplatePartialSpecializationDecl>(getFirstDecl());
First->InstantiatedFromMember.setPointer(PartialSpec);
}
/// Determines whether this class template partial specialization
/// template was a specialization of a member partial specialization.
///
/// In the following example, the member template partial specialization
/// \c X<int>::Inner<T*> is a member specialization.
///
/// \code
/// template<typename T>
/// struct X {
/// template<typename U> struct Inner;
/// template<typename U> struct Inner<U*>;
/// };
///
/// template<> template<typename T>
/// struct X<int>::Inner<T*> { /* ... */ };
/// \endcode
bool isMemberSpecialization() {
const auto *First =
cast<ClassTemplatePartialSpecializationDecl>(getFirstDecl());
return First->InstantiatedFromMember.getInt();
}
/// Note that this member template is a specialization.
void setMemberSpecialization() {
auto *First = cast<ClassTemplatePartialSpecializationDecl>(getFirstDecl());
assert(First->InstantiatedFromMember.getPointer() &&
"Only member templates can be member template specializations");
return First->InstantiatedFromMember.setInt(true);
}
/// Retrieves the injected specialization type for this partial
/// specialization. This is not the same as the type-decl-type for
/// this partial specialization, which is an InjectedClassNameType.
QualType getInjectedSpecializationType() const {
assert(getTypeForDecl() && "partial specialization has no type set!");
return cast<InjectedClassNameType>(getTypeForDecl())
->getInjectedSpecializationType();
}
void Profile(llvm::FoldingSetNodeID &ID) const {
Profile(ID, getTemplateArgs().asArray(), getTemplateParameters(),
getASTContext());
}
static void
Profile(llvm::FoldingSetNodeID &ID, ArrayRef<TemplateArgument> TemplateArgs,
TemplateParameterList *TPL, ASTContext &Context);
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K == ClassTemplatePartialSpecialization;
}
};
/// Declaration of a class template.
class ClassTemplateDecl : public RedeclarableTemplateDecl {
protected:
/// Data that is common to all of the declarations of a given
/// class template.
struct Common : CommonBase {
/// The class template specializations for this class
/// template, including explicit specializations and instantiations.
llvm::FoldingSetVector<ClassTemplateSpecializationDecl> Specializations;
/// The class template partial specializations for this class
/// template.
llvm::FoldingSetVector<ClassTemplatePartialSpecializationDecl>
PartialSpecializations;
/// The injected-class-name type for this class template.
QualType InjectedClassNameType;
Common() = default;
};
/// Retrieve the set of specializations of this class template.
llvm::FoldingSetVector<ClassTemplateSpecializationDecl> &
getSpecializations() const;
/// Retrieve the set of partial specializations of this class
/// template.
llvm::FoldingSetVector<ClassTemplatePartialSpecializationDecl> &
getPartialSpecializations() const;
ClassTemplateDecl(ASTContext &C, DeclContext *DC, SourceLocation L,
DeclarationName Name, TemplateParameterList *Params,
NamedDecl *Decl)
: RedeclarableTemplateDecl(ClassTemplate, C, DC, L, Name, Params, Decl) {}
CommonBase *newCommon(ASTContext &C) const override;
Common *getCommonPtr() const {
return static_cast<Common *>(RedeclarableTemplateDecl::getCommonPtr());
}
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
/// Load any lazily-loaded specializations from the external source.
void LoadLazySpecializations() const;
/// Get the underlying class declarations of the template.
CXXRecordDecl *getTemplatedDecl() const {
return static_cast<CXXRecordDecl *>(TemplatedDecl);
}
/// Returns whether this template declaration defines the primary
/// class pattern.
bool isThisDeclarationADefinition() const {
return getTemplatedDecl()->isThisDeclarationADefinition();
}
/// \brief Create a class template node.
static ClassTemplateDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation L,
DeclarationName Name,
TemplateParameterList *Params,
NamedDecl *Decl);
/// Create an empty class template node.
static ClassTemplateDecl *CreateDeserialized(ASTContext &C, unsigned ID);
/// Return the specialization with the provided arguments if it exists,
/// otherwise return the insertion point.
ClassTemplateSpecializationDecl *
findSpecialization(ArrayRef<TemplateArgument> Args, void *&InsertPos);
/// Insert the specified specialization knowing that it is not already
/// in. InsertPos must be obtained from findSpecialization.
void AddSpecialization(ClassTemplateSpecializationDecl *D, void *InsertPos);
ClassTemplateDecl *getCanonicalDecl() override {
return cast<ClassTemplateDecl>(
RedeclarableTemplateDecl::getCanonicalDecl());
}
const ClassTemplateDecl *getCanonicalDecl() const {
return cast<ClassTemplateDecl>(
RedeclarableTemplateDecl::getCanonicalDecl());
}
/// Retrieve the previous declaration of this class template, or
/// nullptr if no such declaration exists.
ClassTemplateDecl *getPreviousDecl() {
return cast_or_null<ClassTemplateDecl>(
static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
}
const ClassTemplateDecl *getPreviousDecl() const {
return cast_or_null<ClassTemplateDecl>(
static_cast<const RedeclarableTemplateDecl *>(
this)->getPreviousDecl());
}
ClassTemplateDecl *getMostRecentDecl() {
return cast<ClassTemplateDecl>(
static_cast<RedeclarableTemplateDecl *>(this)->getMostRecentDecl());
}
const ClassTemplateDecl *getMostRecentDecl() const {
return const_cast<ClassTemplateDecl*>(this)->getMostRecentDecl();
}
ClassTemplateDecl *getInstantiatedFromMemberTemplate() const {
return cast_or_null<ClassTemplateDecl>(
RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
}
/// Return the partial specialization with the provided arguments if it
/// exists, otherwise return the insertion point.
ClassTemplatePartialSpecializationDecl *
findPartialSpecialization(ArrayRef<TemplateArgument> Args,
TemplateParameterList *TPL, void *&InsertPos);
/// Insert the specified partial specialization knowing that it is not
/// already in. InsertPos must be obtained from findPartialSpecialization.
void AddPartialSpecialization(ClassTemplatePartialSpecializationDecl *D,
void *InsertPos);
/// Retrieve the partial specializations as an ordered list.
void getPartialSpecializations(
SmallVectorImpl<ClassTemplatePartialSpecializationDecl *> &PS) const;
/// Find a class template partial specialization with the given
/// type T.
///
/// \param T a dependent type that names a specialization of this class
/// template.
///
/// \returns the class template partial specialization that exactly matches
/// the type \p T, or nullptr if no such partial specialization exists.
ClassTemplatePartialSpecializationDecl *findPartialSpecialization(QualType T);
/// Find a class template partial specialization which was instantiated
/// from the given member partial specialization.
///
/// \param D a member class template partial specialization.
///
/// \returns the class template partial specialization which was instantiated
/// from the given member partial specialization, or nullptr if no such
/// partial specialization exists.
ClassTemplatePartialSpecializationDecl *
findPartialSpecInstantiatedFromMember(
ClassTemplatePartialSpecializationDecl *D);
/// Retrieve the template specialization type of the
/// injected-class-name for this class template.
///
/// The injected-class-name for a class template \c X is \c
/// X<template-args>, where \c template-args is formed from the
/// template arguments that correspond to the template parameters of
/// \c X. For example:
///
/// \code
/// template<typename T, int N>
/// struct array {
/// typedef array this_type; // "array" is equivalent to "array<T, N>"
/// };
/// \endcode
QualType getInjectedClassNameSpecialization();
using spec_iterator = SpecIterator<ClassTemplateSpecializationDecl>;
using spec_range = llvm::iterator_range<spec_iterator>;
spec_range specializations() const {
return spec_range(spec_begin(), spec_end());
}
spec_iterator spec_begin() const {
return makeSpecIterator(getSpecializations(), false);
}
spec_iterator spec_end() const {
return makeSpecIterator(getSpecializations(), true);
}
// Implement isa/cast/dyncast support
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == ClassTemplate; }
};
/// Declaration of a friend template.
///
/// For example:
/// \code
/// template \<typename T> class A {
/// friend class MyVector<T>; // not a friend template
/// template \<typename U> friend class B; // not a friend template
/// template \<typename U> friend class Foo<T>::Nested; // friend template
/// };
/// \endcode
///
/// \note This class is not currently in use. All of the above
/// will yield a FriendDecl, not a FriendTemplateDecl.
class FriendTemplateDecl : public Decl {
virtual void anchor();
public:
using FriendUnion = llvm::PointerUnion<NamedDecl *,TypeSourceInfo *>;
private:
// The number of template parameters; always non-zero.
unsigned NumParams = 0;
// The parameter list.
TemplateParameterList **Params = nullptr;
// The declaration that's a friend of this class.
FriendUnion Friend;
// Location of the 'friend' specifier.
SourceLocation FriendLoc;
FriendTemplateDecl(DeclContext *DC, SourceLocation Loc,
- MutableArrayRef<TemplateParameterList *> Params,
+ TemplateParameterList **Params, unsigned NumParams,
FriendUnion Friend, SourceLocation FriendLoc)
- : Decl(Decl::FriendTemplate, DC, Loc), NumParams(Params.size()),
- Params(Params.data()), Friend(Friend), FriendLoc(FriendLoc) {}
+ : Decl(Decl::FriendTemplate, DC, Loc), NumParams(NumParams),
+ Params(Params), Friend(Friend), FriendLoc(FriendLoc) {}
FriendTemplateDecl(EmptyShell Empty) : Decl(Decl::FriendTemplate, Empty) {}
public:
friend class ASTDeclReader;
static FriendTemplateDecl *
Create(ASTContext &Context, DeclContext *DC, SourceLocation Loc,
MutableArrayRef<TemplateParameterList *> Params, FriendUnion Friend,
SourceLocation FriendLoc);
static FriendTemplateDecl *CreateDeserialized(ASTContext &C, unsigned ID);
/// If this friend declaration names a templated type (or
/// a dependent member type of a templated type), return that
/// type; otherwise return null.
TypeSourceInfo *getFriendType() const {
return Friend.dyn_cast<TypeSourceInfo*>();
}
/// If this friend declaration names a templated function (or
/// a member function of a templated type), return that type;
/// otherwise return null.
NamedDecl *getFriendDecl() const {
return Friend.dyn_cast<NamedDecl*>();
}
/// Retrieves the location of the 'friend' keyword.
SourceLocation getFriendLoc() const {
return FriendLoc;
}
TemplateParameterList *getTemplateParameterList(unsigned i) const {
assert(i <= NumParams);
return Params[i];
}
unsigned getNumTemplateParameters() const {
return NumParams;
}
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == Decl::FriendTemplate; }
};
/// Declaration of an alias template.
///
/// For example:
/// \code
/// template \<typename T> using V = std::map<T*, int, MyCompare<T>>;
/// \endcode
class TypeAliasTemplateDecl : public RedeclarableTemplateDecl {
protected:
using Common = CommonBase;
TypeAliasTemplateDecl(ASTContext &C, DeclContext *DC, SourceLocation L,
DeclarationName Name, TemplateParameterList *Params,
NamedDecl *Decl)
: RedeclarableTemplateDecl(TypeAliasTemplate, C, DC, L, Name, Params,
Decl) {}
CommonBase *newCommon(ASTContext &C) const override;
Common *getCommonPtr() {
return static_cast<Common *>(RedeclarableTemplateDecl::getCommonPtr());
}
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
/// Get the underlying function declaration of the template.
TypeAliasDecl *getTemplatedDecl() const {
return static_cast<TypeAliasDecl *>(TemplatedDecl);
}
TypeAliasTemplateDecl *getCanonicalDecl() override {
return cast<TypeAliasTemplateDecl>(
RedeclarableTemplateDecl::getCanonicalDecl());
}
const TypeAliasTemplateDecl *getCanonicalDecl() const {
return cast<TypeAliasTemplateDecl>(
RedeclarableTemplateDecl::getCanonicalDecl());
}
/// Retrieve the previous declaration of this function template, or
/// nullptr if no such declaration exists.
TypeAliasTemplateDecl *getPreviousDecl() {
return cast_or_null<TypeAliasTemplateDecl>(
static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
}
const TypeAliasTemplateDecl *getPreviousDecl() const {
return cast_or_null<TypeAliasTemplateDecl>(
static_cast<const RedeclarableTemplateDecl *>(
this)->getPreviousDecl());
}
TypeAliasTemplateDecl *getInstantiatedFromMemberTemplate() const {
return cast_or_null<TypeAliasTemplateDecl>(
RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
}
/// Create a function template node.
static TypeAliasTemplateDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation L,
DeclarationName Name,
TemplateParameterList *Params,
NamedDecl *Decl);
/// Create an empty alias template node.
static TypeAliasTemplateDecl *CreateDeserialized(ASTContext &C, unsigned ID);
// Implement isa/cast/dyncast support
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == TypeAliasTemplate; }
};
/// Declaration of a function specialization at template class scope.
///
/// For example:
/// \code
/// template <class T>
/// class A {
/// template <class U> void foo(U a) { }
/// template<> void foo(int a) { }
/// }
/// \endcode
///
/// "template<> foo(int a)" will be saved in Specialization as a normal
/// CXXMethodDecl. Then during an instantiation of class A, it will be
/// transformed into an actual function specialization.
///
/// FIXME: This is redundant; we could store the same information directly on
/// the CXXMethodDecl as a DependentFunctionTemplateSpecializationInfo.
class ClassScopeFunctionSpecializationDecl : public Decl {
CXXMethodDecl *Specialization;
const ASTTemplateArgumentListInfo *TemplateArgs;
ClassScopeFunctionSpecializationDecl(
DeclContext *DC, SourceLocation Loc, CXXMethodDecl *FD,
const ASTTemplateArgumentListInfo *TemplArgs)
: Decl(Decl::ClassScopeFunctionSpecialization, DC, Loc),
Specialization(FD), TemplateArgs(TemplArgs) {}
ClassScopeFunctionSpecializationDecl(EmptyShell Empty)
: Decl(Decl::ClassScopeFunctionSpecialization, Empty) {}
virtual void anchor();
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
CXXMethodDecl *getSpecialization() const { return Specialization; }
bool hasExplicitTemplateArgs() const { return TemplateArgs; }
const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
return TemplateArgs;
}
static ClassScopeFunctionSpecializationDecl *
Create(ASTContext &C, DeclContext *DC, SourceLocation Loc, CXXMethodDecl *FD,
bool HasExplicitTemplateArgs,
const TemplateArgumentListInfo &TemplateArgs) {
return new (C, DC) ClassScopeFunctionSpecializationDecl(
DC, Loc, FD,
HasExplicitTemplateArgs
? ASTTemplateArgumentListInfo::Create(C, TemplateArgs)
: nullptr);
}
static ClassScopeFunctionSpecializationDecl *
CreateDeserialized(ASTContext &Context, unsigned ID);
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K == Decl::ClassScopeFunctionSpecialization;
}
};
/// Represents a variable template specialization, which refers to
/// a variable template with a given set of template arguments.
///
/// Variable template specializations represent both explicit
/// specializations of variable templates, as in the example below, and
/// implicit instantiations of variable templates.
///
/// \code
/// template<typename T> constexpr T pi = T(3.1415926535897932385);
///
/// template<>
/// constexpr float pi<float>; // variable template specialization pi<float>
/// \endcode
class VarTemplateSpecializationDecl : public VarDecl,
public llvm::FoldingSetNode {
/// Structure that stores information about a variable template
/// specialization that was instantiated from a variable template partial
/// specialization.
struct SpecializedPartialSpecialization {
/// The variable template partial specialization from which this
/// variable template specialization was instantiated.
VarTemplatePartialSpecializationDecl *PartialSpecialization;
/// The template argument list deduced for the variable template
/// partial specialization itself.
const TemplateArgumentList *TemplateArgs;
};
/// The template that this specialization specializes.
llvm::PointerUnion<VarTemplateDecl *, SpecializedPartialSpecialization *>
SpecializedTemplate;
/// Further info for explicit template specialization/instantiation.
struct ExplicitSpecializationInfo {
/// The type-as-written.
TypeSourceInfo *TypeAsWritten = nullptr;
/// The location of the extern keyword.
SourceLocation ExternLoc;
/// The location of the template keyword.
SourceLocation TemplateKeywordLoc;
ExplicitSpecializationInfo() = default;
};
/// Further info for explicit template specialization/instantiation.
/// Does not apply to implicit specializations.
ExplicitSpecializationInfo *ExplicitInfo = nullptr;
/// The template arguments used to describe this specialization.
const TemplateArgumentList *TemplateArgs;
TemplateArgumentListInfo TemplateArgsInfo;
/// The point where this template was instantiated (if any).
SourceLocation PointOfInstantiation;
/// The kind of specialization this declaration refers to.
/// Really a value of type TemplateSpecializationKind.
unsigned SpecializationKind : 3;
/// Whether this declaration is a complete definition of the
/// variable template specialization. We can't otherwise tell apart
/// an instantiated declaration from an instantiated definition with
/// no initializer.
unsigned IsCompleteDefinition : 1;
protected:
VarTemplateSpecializationDecl(Kind DK, ASTContext &Context, DeclContext *DC,
SourceLocation StartLoc, SourceLocation IdLoc,
VarTemplateDecl *SpecializedTemplate,
QualType T, TypeSourceInfo *TInfo,
StorageClass S,
ArrayRef<TemplateArgument> Args);
explicit VarTemplateSpecializationDecl(Kind DK, ASTContext &Context);
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
friend class VarDecl;
static VarTemplateSpecializationDecl *
Create(ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, VarTemplateDecl *SpecializedTemplate, QualType T,
TypeSourceInfo *TInfo, StorageClass S,
ArrayRef<TemplateArgument> Args);
static VarTemplateSpecializationDecl *CreateDeserialized(ASTContext &C,
unsigned ID);
void getNameForDiagnostic(raw_ostream &OS, const PrintingPolicy &Policy,
bool Qualified) const override;
VarTemplateSpecializationDecl *getMostRecentDecl() {
VarDecl *Recent = static_cast<VarDecl *>(this)->getMostRecentDecl();
return cast<VarTemplateSpecializationDecl>(Recent);
}
/// Retrieve the template that this specialization specializes.
VarTemplateDecl *getSpecializedTemplate() const;
/// Retrieve the template arguments of the variable template
/// specialization.
const TemplateArgumentList &getTemplateArgs() const { return *TemplateArgs; }
// TODO: Always set this when creating the new specialization?
void setTemplateArgsInfo(const TemplateArgumentListInfo &ArgsInfo);
const TemplateArgumentListInfo &getTemplateArgsInfo() const {
return TemplateArgsInfo;
}
/// Determine the kind of specialization that this
/// declaration represents.
TemplateSpecializationKind getSpecializationKind() const {
return static_cast<TemplateSpecializationKind>(SpecializationKind);
}
bool isExplicitSpecialization() const {
return getSpecializationKind() == TSK_ExplicitSpecialization;
}
bool isClassScopeExplicitSpecialization() const {
return isExplicitSpecialization() &&
isa<CXXRecordDecl>(getLexicalDeclContext());
}
/// True if this declaration is an explicit specialization,
/// explicit instantiation declaration, or explicit instantiation
/// definition.
bool isExplicitInstantiationOrSpecialization() const {
return isTemplateExplicitInstantiationOrSpecialization(
getTemplateSpecializationKind());
}
void setSpecializationKind(TemplateSpecializationKind TSK) {
SpecializationKind = TSK;
}
/// Get the point of instantiation (if any), or null if none.
SourceLocation getPointOfInstantiation() const {
return PointOfInstantiation;
}
void setPointOfInstantiation(SourceLocation Loc) {
assert(Loc.isValid() && "point of instantiation must be valid!");
PointOfInstantiation = Loc;
}
void setCompleteDefinition() { IsCompleteDefinition = true; }
/// If this variable template specialization is an instantiation of
/// a template (rather than an explicit specialization), return the
/// variable template or variable template partial specialization from which
/// it was instantiated.
llvm::PointerUnion<VarTemplateDecl *, VarTemplatePartialSpecializationDecl *>
getInstantiatedFrom() const {
if (!isTemplateInstantiation(getSpecializationKind()))
return llvm::PointerUnion<VarTemplateDecl *,
VarTemplatePartialSpecializationDecl *>();
return getSpecializedTemplateOrPartial();
}
/// Retrieve the variable template or variable template partial
/// specialization which was specialized by this.
llvm::PointerUnion<VarTemplateDecl *, VarTemplatePartialSpecializationDecl *>
getSpecializedTemplateOrPartial() const {
if (const auto *PartialSpec =
SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization *>())
return PartialSpec->PartialSpecialization;
return SpecializedTemplate.get<VarTemplateDecl *>();
}
/// Retrieve the set of template arguments that should be used
/// to instantiate the initializer of the variable template or variable
/// template partial specialization from which this variable template
/// specialization was instantiated.
///
/// \returns For a variable template specialization instantiated from the
/// primary template, this function will return the same template arguments
/// as getTemplateArgs(). For a variable template specialization instantiated
/// from a variable template partial specialization, this function will the
/// return deduced template arguments for the variable template partial
/// specialization itself.
const TemplateArgumentList &getTemplateInstantiationArgs() const {
if (const auto *PartialSpec =
SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization *>())
return *PartialSpec->TemplateArgs;
return getTemplateArgs();
}
/// Note that this variable template specialization is actually an
/// instantiation of the given variable template partial specialization whose
/// template arguments have been deduced.
void setInstantiationOf(VarTemplatePartialSpecializationDecl *PartialSpec,
const TemplateArgumentList *TemplateArgs) {
assert(!SpecializedTemplate.is<SpecializedPartialSpecialization *>() &&
"Already set to a variable template partial specialization!");
auto *PS = new (getASTContext()) SpecializedPartialSpecialization();
PS->PartialSpecialization = PartialSpec;
PS->TemplateArgs = TemplateArgs;
SpecializedTemplate = PS;
}
/// Note that this variable template specialization is an instantiation
/// of the given variable template.
void setInstantiationOf(VarTemplateDecl *TemplDecl) {
assert(!SpecializedTemplate.is<SpecializedPartialSpecialization *>() &&
"Previously set to a variable template partial specialization!");
SpecializedTemplate = TemplDecl;
}
/// Sets the type of this specialization as it was written by
/// the user.
void setTypeAsWritten(TypeSourceInfo *T) {
if (!ExplicitInfo)
ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
ExplicitInfo->TypeAsWritten = T;
}
/// Gets the type of this specialization as it was written by
/// the user, if it was so written.
TypeSourceInfo *getTypeAsWritten() const {
return ExplicitInfo ? ExplicitInfo->TypeAsWritten : nullptr;
}
/// Gets the location of the extern keyword, if present.
SourceLocation getExternLoc() const {
return ExplicitInfo ? ExplicitInfo->ExternLoc : SourceLocation();
}
/// Sets the location of the extern keyword.
void setExternLoc(SourceLocation Loc) {
if (!ExplicitInfo)
ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
ExplicitInfo->ExternLoc = Loc;
}
/// Sets the location of the template keyword.
void setTemplateKeywordLoc(SourceLocation Loc) {
if (!ExplicitInfo)
ExplicitInfo = new (getASTContext()) ExplicitSpecializationInfo;
ExplicitInfo->TemplateKeywordLoc = Loc;
}
/// Gets the location of the template keyword, if present.
SourceLocation getTemplateKeywordLoc() const {
return ExplicitInfo ? ExplicitInfo->TemplateKeywordLoc : SourceLocation();
}
void Profile(llvm::FoldingSetNodeID &ID) const {
Profile(ID, TemplateArgs->asArray(), getASTContext());
}
static void Profile(llvm::FoldingSetNodeID &ID,
ArrayRef<TemplateArgument> TemplateArgs,
ASTContext &Context) {
ID.AddInteger(TemplateArgs.size());
for (const TemplateArgument &TemplateArg : TemplateArgs)
TemplateArg.Profile(ID, Context);
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K >= firstVarTemplateSpecialization &&
K <= lastVarTemplateSpecialization;
}
};
class VarTemplatePartialSpecializationDecl
: public VarTemplateSpecializationDecl {
/// The list of template parameters
TemplateParameterList *TemplateParams = nullptr;
/// The source info for the template arguments as written.
/// FIXME: redundant with TypeAsWritten?
const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
/// The variable template partial specialization from which this
/// variable template partial specialization was instantiated.
///
/// The boolean value will be true to indicate that this variable template
/// partial specialization was specialized at this level.
llvm::PointerIntPair<VarTemplatePartialSpecializationDecl *, 1, bool>
InstantiatedFromMember;
VarTemplatePartialSpecializationDecl(
ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, TemplateParameterList *Params,
VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
StorageClass S, ArrayRef<TemplateArgument> Args,
const ASTTemplateArgumentListInfo *ArgInfos);
VarTemplatePartialSpecializationDecl(ASTContext &Context)
: VarTemplateSpecializationDecl(VarTemplatePartialSpecialization,
Context),
InstantiatedFromMember(nullptr, false) {}
void anchor() override;
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
static VarTemplatePartialSpecializationDecl *
Create(ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, TemplateParameterList *Params,
VarTemplateDecl *SpecializedTemplate, QualType T,
TypeSourceInfo *TInfo, StorageClass S, ArrayRef<TemplateArgument> Args,
const TemplateArgumentListInfo &ArgInfos);
static VarTemplatePartialSpecializationDecl *CreateDeserialized(ASTContext &C,
unsigned ID);
VarTemplatePartialSpecializationDecl *getMostRecentDecl() {
return cast<VarTemplatePartialSpecializationDecl>(
static_cast<VarTemplateSpecializationDecl *>(
this)->getMostRecentDecl());
}
/// Get the list of template parameters
TemplateParameterList *getTemplateParameters() const {
return TemplateParams;
}
/// Get the template arguments as written.
const ASTTemplateArgumentListInfo *getTemplateArgsAsWritten() const {
return ArgsAsWritten;
}
/// \brief All associated constraints of this partial specialization,
/// including the requires clause and any constraints derived from
/// constrained-parameters.
///
/// The constraints in the resulting list are to be treated as if in a
/// conjunction ("and").
void getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const {
TemplateParams->getAssociatedConstraints(AC);
}
bool hasAssociatedConstraints() const {
return TemplateParams->hasAssociatedConstraints();
}
/// \brief Retrieve the member variable template partial specialization from
/// which this particular variable template partial specialization was
/// instantiated.
///
/// \code
/// template<typename T>
/// struct Outer {
/// template<typename U> U Inner;
/// template<typename U> U* Inner<U*> = (U*)(0); // #1
/// };
///
/// template int* Outer<float>::Inner<int*>;
/// \endcode
///
/// In this example, the instantiation of \c Outer<float>::Inner<int*> will
/// end up instantiating the partial specialization
/// \c Outer<float>::Inner<U*>, which itself was instantiated from the
/// variable template partial specialization \c Outer<T>::Inner<U*>. Given
/// \c Outer<float>::Inner<U*>, this function would return
/// \c Outer<T>::Inner<U*>.
VarTemplatePartialSpecializationDecl *getInstantiatedFromMember() const {
const auto *First =
cast<VarTemplatePartialSpecializationDecl>(getFirstDecl());
return First->InstantiatedFromMember.getPointer();
}
void
setInstantiatedFromMember(VarTemplatePartialSpecializationDecl *PartialSpec) {
auto *First = cast<VarTemplatePartialSpecializationDecl>(getFirstDecl());
First->InstantiatedFromMember.setPointer(PartialSpec);
}
/// Determines whether this variable template partial specialization
/// was a specialization of a member partial specialization.
///
/// In the following example, the member template partial specialization
/// \c X<int>::Inner<T*> is a member specialization.
///
/// \code
/// template<typename T>
/// struct X {
/// template<typename U> U Inner;
/// template<typename U> U* Inner<U*> = (U*)(0);
/// };
///
/// template<> template<typename T>
/// U* X<int>::Inner<T*> = (T*)(0) + 1;
/// \endcode
bool isMemberSpecialization() {
const auto *First =
cast<VarTemplatePartialSpecializationDecl>(getFirstDecl());
return First->InstantiatedFromMember.getInt();
}
/// Note that this member template is a specialization.
void setMemberSpecialization() {
auto *First = cast<VarTemplatePartialSpecializationDecl>(getFirstDecl());
assert(First->InstantiatedFromMember.getPointer() &&
"Only member templates can be member template specializations");
return First->InstantiatedFromMember.setInt(true);
}
void Profile(llvm::FoldingSetNodeID &ID) const {
Profile(ID, getTemplateArgs().asArray(), getTemplateParameters(),
getASTContext());
}
static void
Profile(llvm::FoldingSetNodeID &ID, ArrayRef<TemplateArgument> TemplateArgs,
TemplateParameterList *TPL, ASTContext &Context);
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) {
return K == VarTemplatePartialSpecialization;
}
};
/// Declaration of a variable template.
class VarTemplateDecl : public RedeclarableTemplateDecl {
protected:
/// Data that is common to all of the declarations of a given
/// variable template.
struct Common : CommonBase {
/// The variable template specializations for this variable
/// template, including explicit specializations and instantiations.
llvm::FoldingSetVector<VarTemplateSpecializationDecl> Specializations;
/// The variable template partial specializations for this variable
/// template.
llvm::FoldingSetVector<VarTemplatePartialSpecializationDecl>
PartialSpecializations;
Common() = default;
};
/// Retrieve the set of specializations of this variable template.
llvm::FoldingSetVector<VarTemplateSpecializationDecl> &
getSpecializations() const;
/// Retrieve the set of partial specializations of this class
/// template.
llvm::FoldingSetVector<VarTemplatePartialSpecializationDecl> &
getPartialSpecializations() const;
VarTemplateDecl(ASTContext &C, DeclContext *DC, SourceLocation L,
DeclarationName Name, TemplateParameterList *Params,
NamedDecl *Decl)
: RedeclarableTemplateDecl(VarTemplate, C, DC, L, Name, Params, Decl) {}
CommonBase *newCommon(ASTContext &C) const override;
Common *getCommonPtr() const {
return static_cast<Common *>(RedeclarableTemplateDecl::getCommonPtr());
}
public:
friend class ASTDeclReader;
friend class ASTDeclWriter;
/// Load any lazily-loaded specializations from the external source.
void LoadLazySpecializations() const;
/// Get the underlying variable declarations of the template.
VarDecl *getTemplatedDecl() const {
return static_cast<VarDecl *>(TemplatedDecl);
}
/// Returns whether this template declaration defines the primary
/// variable pattern.
bool isThisDeclarationADefinition() const {
return getTemplatedDecl()->isThisDeclarationADefinition();
}
VarTemplateDecl *getDefinition();
/// Create a variable template node.
static VarTemplateDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName Name,
TemplateParameterList *Params,
VarDecl *Decl);
/// Create an empty variable template node.
static VarTemplateDecl *CreateDeserialized(ASTContext &C, unsigned ID);
/// Return the specialization with the provided arguments if it exists,
/// otherwise return the insertion point.
VarTemplateSpecializationDecl *
findSpecialization(ArrayRef<TemplateArgument> Args, void *&InsertPos);
/// Insert the specified specialization knowing that it is not already
/// in. InsertPos must be obtained from findSpecialization.
void AddSpecialization(VarTemplateSpecializationDecl *D, void *InsertPos);
VarTemplateDecl *getCanonicalDecl() override {
return cast<VarTemplateDecl>(RedeclarableTemplateDecl::getCanonicalDecl());
}
const VarTemplateDecl *getCanonicalDecl() const {
return cast<VarTemplateDecl>(RedeclarableTemplateDecl::getCanonicalDecl());
}
/// Retrieve the previous declaration of this variable template, or
/// nullptr if no such declaration exists.
VarTemplateDecl *getPreviousDecl() {
return cast_or_null<VarTemplateDecl>(
static_cast<RedeclarableTemplateDecl *>(this)->getPreviousDecl());
}
const VarTemplateDecl *getPreviousDecl() const {
return cast_or_null<VarTemplateDecl>(
static_cast<const RedeclarableTemplateDecl *>(
this)->getPreviousDecl());
}
VarTemplateDecl *getMostRecentDecl() {
return cast<VarTemplateDecl>(
static_cast<RedeclarableTemplateDecl *>(this)->getMostRecentDecl());
}
const VarTemplateDecl *getMostRecentDecl() const {
return const_cast<VarTemplateDecl *>(this)->getMostRecentDecl();
}
VarTemplateDecl *getInstantiatedFromMemberTemplate() const {
return cast_or_null<VarTemplateDecl>(
RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
}
/// Return the partial specialization with the provided arguments if it
/// exists, otherwise return the insertion point.
VarTemplatePartialSpecializationDecl *
findPartialSpecialization(ArrayRef<TemplateArgument> Args,
TemplateParameterList *TPL, void *&InsertPos);
/// Insert the specified partial specialization knowing that it is not
/// already in. InsertPos must be obtained from findPartialSpecialization.
void AddPartialSpecialization(VarTemplatePartialSpecializationDecl *D,
void *InsertPos);
/// Retrieve the partial specializations as an ordered list.
void getPartialSpecializations(
SmallVectorImpl<VarTemplatePartialSpecializationDecl *> &PS) const;
/// Find a variable template partial specialization which was
/// instantiated
/// from the given member partial specialization.
///
/// \param D a member variable template partial specialization.
///
/// \returns the variable template partial specialization which was
/// instantiated
/// from the given member partial specialization, or nullptr if no such
/// partial specialization exists.
VarTemplatePartialSpecializationDecl *findPartialSpecInstantiatedFromMember(
VarTemplatePartialSpecializationDecl *D);
using spec_iterator = SpecIterator<VarTemplateSpecializationDecl>;
using spec_range = llvm::iterator_range<spec_iterator>;
spec_range specializations() const {
return spec_range(spec_begin(), spec_end());
}
spec_iterator spec_begin() const {
return makeSpecIterator(getSpecializations(), false);
}
spec_iterator spec_end() const {
return makeSpecIterator(getSpecializations(), true);
}
// Implement isa/cast/dyncast support
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == VarTemplate; }
};
/// Declaration of a C++2a concept.
class ConceptDecl : public TemplateDecl, public Mergeable<ConceptDecl> {
protected:
Expr *ConstraintExpr;
ConceptDecl(DeclContext *DC, SourceLocation L, DeclarationName Name,
TemplateParameterList *Params, Expr *ConstraintExpr)
: TemplateDecl(Concept, DC, L, Name, Params),
ConstraintExpr(ConstraintExpr) {};
public:
static ConceptDecl *Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName Name,
TemplateParameterList *Params,
Expr *ConstraintExpr);
static ConceptDecl *CreateDeserialized(ASTContext &C, unsigned ID);
Expr *getConstraintExpr() const {
return ConstraintExpr;
}
SourceRange getSourceRange() const override LLVM_READONLY {
return SourceRange(getTemplateParameters()->getTemplateLoc(),
ConstraintExpr->getEndLoc());
}
bool isTypeConcept() const {
return isa<TemplateTypeParmDecl>(getTemplateParameters()->getParam(0));
}
ConceptDecl *getCanonicalDecl() override { return getFirstDecl(); }
const ConceptDecl *getCanonicalDecl() const { return getFirstDecl(); }
// Implement isa/cast/dyncast/etc.
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == Concept; }
friend class ASTReader;
friend class ASTDeclReader;
friend class ASTDeclWriter;
};
/// A template parameter object.
///
/// Template parameter objects represent values of class type used as template
/// arguments. There is one template parameter object for each such distinct
/// value used as a template argument across the program.
///
/// \code
/// struct A { int x, y; };
/// template<A> struct S;
/// S<A{1, 2}> s1;
/// S<A{1, 2}> s2; // same type, argument is same TemplateParamObjectDecl.
/// \endcode
class TemplateParamObjectDecl : public ValueDecl,
public Mergeable<TemplateParamObjectDecl>,
public llvm::FoldingSetNode {
private:
/// The value of this template parameter object.
APValue Value;
TemplateParamObjectDecl(DeclContext *DC, QualType T, const APValue &V)
: ValueDecl(TemplateParamObject, DC, SourceLocation(), DeclarationName(),
T),
Value(V) {}
static TemplateParamObjectDecl *Create(const ASTContext &C, QualType T,
const APValue &V);
static TemplateParamObjectDecl *CreateDeserialized(ASTContext &C,
unsigned ID);
/// Only ASTContext::getTemplateParamObjectDecl and deserialization
/// create these.
friend class ASTContext;
friend class ASTReader;
friend class ASTDeclReader;
public:
/// Print this template parameter object in a human-readable format.
void printName(llvm::raw_ostream &OS) const override;
/// Print this object as an equivalent expression.
void printAsExpr(llvm::raw_ostream &OS) const;
/// Print this object as an initializer suitable for a variable of the
/// object's type.
void printAsInit(llvm::raw_ostream &OS) const;
const APValue &getValue() const { return Value; }
static void Profile(llvm::FoldingSetNodeID &ID, QualType T,
const APValue &V) {
ID.AddPointer(T.getCanonicalType().getAsOpaquePtr());
V.Profile(ID);
}
void Profile(llvm::FoldingSetNodeID &ID) {
Profile(ID, getType(), getValue());
}
TemplateParamObjectDecl *getCanonicalDecl() override {
return getFirstDecl();
}
const TemplateParamObjectDecl *getCanonicalDecl() const {
return getFirstDecl();
}
static bool classof(const Decl *D) { return classofKind(D->getKind()); }
static bool classofKind(Kind K) { return K == TemplateParamObject; }
};
inline NamedDecl *getAsNamedDecl(TemplateParameter P) {
if (auto *PD = P.dyn_cast<TemplateTypeParmDecl *>())
return PD;
if (auto *PD = P.dyn_cast<NonTypeTemplateParmDecl *>())
return PD;
return P.get<TemplateTemplateParmDecl *>();
}
inline TemplateDecl *getAsTypeTemplateDecl(Decl *D) {
auto *TD = dyn_cast<TemplateDecl>(D);
return TD && (isa<ClassTemplateDecl>(TD) ||
isa<ClassTemplatePartialSpecializationDecl>(TD) ||
isa<TypeAliasTemplateDecl>(TD) ||
isa<TemplateTemplateParmDecl>(TD))
? TD
: nullptr;
}
/// Check whether the template parameter is a pack expansion, and if so,
/// determine the number of parameters produced by that expansion. For instance:
///
/// \code
/// template<typename ...Ts> struct A {
/// template<Ts ...NTs, template<Ts> class ...TTs, typename ...Us> struct B;
/// };
/// \endcode
///
/// In \c A<int,int>::B, \c NTs and \c TTs have expanded pack size 2, and \c Us
/// is not a pack expansion, so returns an empty Optional.
inline Optional<unsigned> getExpandedPackSize(const NamedDecl *Param) {
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
if (TTP->isExpandedParameterPack())
return TTP->getNumExpansionParameters();
}
if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
if (NTTP->isExpandedParameterPack())
return NTTP->getNumExpansionTypes();
}
if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(Param)) {
if (TTP->isExpandedParameterPack())
return TTP->getNumExpansionTemplateParameters();
}
return None;
}
} // namespace clang
#endif // LLVM_CLANG_AST_DECLTEMPLATE_H
diff --git a/contrib/llvm-project/clang/lib/AST/DeclTemplate.cpp b/contrib/llvm-project/clang/lib/AST/DeclTemplate.cpp
index 223f06b9db1c..d9ff3517a589 100755
--- a/contrib/llvm-project/clang/lib/AST/DeclTemplate.cpp
+++ b/contrib/llvm-project/clang/lib/AST/DeclTemplate.cpp
@@ -1,1518 +1,1525 @@
//===- DeclTemplate.cpp - Template Declaration AST Node Implementation ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the C++ related Decl classes for templates.
//
//===----------------------------------------------------------------------===//
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTMutationListener.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclarationName.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
#include "clang/AST/TypeLoc.h"
#include "clang/Basic/Builtins.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <memory>
#include <utility>
using namespace clang;
//===----------------------------------------------------------------------===//
// TemplateParameterList Implementation
//===----------------------------------------------------------------------===//
TemplateParameterList::TemplateParameterList(const ASTContext& C,
SourceLocation TemplateLoc,
SourceLocation LAngleLoc,
ArrayRef<NamedDecl *> Params,
SourceLocation RAngleLoc,
Expr *RequiresClause)
: TemplateLoc(TemplateLoc), LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc),
NumParams(Params.size()), ContainsUnexpandedParameterPack(false),
HasRequiresClause(RequiresClause != nullptr),
HasConstrainedParameters(false) {
for (unsigned Idx = 0; Idx < NumParams; ++Idx) {
NamedDecl *P = Params[Idx];
begin()[Idx] = P;
bool IsPack = P->isTemplateParameterPack();
if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
if (!IsPack && NTTP->getType()->containsUnexpandedParameterPack())
ContainsUnexpandedParameterPack = true;
if (NTTP->hasPlaceholderTypeConstraint())
HasConstrainedParameters = true;
} else if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(P)) {
if (!IsPack &&
TTP->getTemplateParameters()->containsUnexpandedParameterPack())
ContainsUnexpandedParameterPack = true;
} else if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(P)) {
if (const TypeConstraint *TC = TTP->getTypeConstraint()) {
if (TC->getImmediatelyDeclaredConstraint()
->containsUnexpandedParameterPack())
ContainsUnexpandedParameterPack = true;
}
if (TTP->hasTypeConstraint())
HasConstrainedParameters = true;
} else {
llvm_unreachable("unexpected template parameter type");
}
// FIXME: If a default argument contains an unexpanded parameter pack, the
// template parameter list does too.
}
if (HasRequiresClause) {
if (RequiresClause->containsUnexpandedParameterPack())
ContainsUnexpandedParameterPack = true;
*getTrailingObjects<Expr *>() = RequiresClause;
}
}
bool TemplateParameterList::containsUnexpandedParameterPack() const {
if (ContainsUnexpandedParameterPack)
return true;
if (!HasConstrainedParameters)
return false;
// An implicit constrained parameter might have had a use of an unexpanded
// pack added to it after the template parameter list was created. All
// implicit parameters are at the end of the parameter list.
for (const NamedDecl *Param : llvm::reverse(asArray())) {
if (!Param->isImplicit())
break;
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
const auto *TC = TTP->getTypeConstraint();
if (TC && TC->getImmediatelyDeclaredConstraint()
->containsUnexpandedParameterPack())
return true;
}
}
return false;
}
TemplateParameterList *
TemplateParameterList::Create(const ASTContext &C, SourceLocation TemplateLoc,
SourceLocation LAngleLoc,
ArrayRef<NamedDecl *> Params,
SourceLocation RAngleLoc, Expr *RequiresClause) {
void *Mem = C.Allocate(totalSizeToAlloc<NamedDecl *, Expr *>(
Params.size(), RequiresClause ? 1u : 0u),
alignof(TemplateParameterList));
return new (Mem) TemplateParameterList(C, TemplateLoc, LAngleLoc, Params,
RAngleLoc, RequiresClause);
}
unsigned TemplateParameterList::getMinRequiredArguments() const {
unsigned NumRequiredArgs = 0;
for (const NamedDecl *P : asArray()) {
if (P->isTemplateParameterPack()) {
if (Optional<unsigned> Expansions = getExpandedPackSize(P)) {
NumRequiredArgs += *Expansions;
continue;
}
break;
}
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(P)) {
if (TTP->hasDefaultArgument())
break;
} else if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
if (NTTP->hasDefaultArgument())
break;
} else if (cast<TemplateTemplateParmDecl>(P)->hasDefaultArgument())
break;
++NumRequiredArgs;
}
return NumRequiredArgs;
}
unsigned TemplateParameterList::getDepth() const {
if (size() == 0)
return 0;
const NamedDecl *FirstParm = getParam(0);
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(FirstParm))
return TTP->getDepth();
else if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(FirstParm))
return NTTP->getDepth();
else
return cast<TemplateTemplateParmDecl>(FirstParm)->getDepth();
}
static bool AdoptTemplateParameterList(TemplateParameterList *Params,
DeclContext *Owner) {
bool Invalid = false;
for (NamedDecl *P : *Params) {
P->setDeclContext(Owner);
if (const auto *TTP = dyn_cast<TemplateTemplateParmDecl>(P))
if (AdoptTemplateParameterList(TTP->getTemplateParameters(), Owner))
Invalid = true;
if (P->isInvalidDecl())
Invalid = true;
}
return Invalid;
}
void TemplateParameterList::
getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const {
if (HasConstrainedParameters)
for (const NamedDecl *Param : *this) {
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
if (const auto *TC = TTP->getTypeConstraint())
AC.push_back(TC->getImmediatelyDeclaredConstraint());
} else if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(Param)) {
if (const Expr *E = NTTP->getPlaceholderTypeConstraint())
AC.push_back(E);
}
}
if (HasRequiresClause)
AC.push_back(getRequiresClause());
}
bool TemplateParameterList::hasAssociatedConstraints() const {
return HasRequiresClause || HasConstrainedParameters;
}
bool TemplateParameterList::shouldIncludeTypeForArgument(
const PrintingPolicy &Policy, const TemplateParameterList *TPL,
unsigned Idx) {
if (!TPL || Idx >= TPL->size() || Policy.AlwaysIncludeTypeForTemplateArgument)
return true;
const NamedDecl *TemplParam = TPL->getParam(Idx);
if (const auto *ParamValueDecl =
dyn_cast<NonTypeTemplateParmDecl>(TemplParam))
if (ParamValueDecl->getType()->getContainedDeducedType())
return true;
return false;
}
namespace clang {
void *allocateDefaultArgStorageChain(const ASTContext &C) {
return new (C) char[sizeof(void*) * 2];
}
} // namespace clang
//===----------------------------------------------------------------------===//
// TemplateDecl Implementation
//===----------------------------------------------------------------------===//
TemplateDecl::TemplateDecl(Kind DK, DeclContext *DC, SourceLocation L,
DeclarationName Name, TemplateParameterList *Params,
NamedDecl *Decl)
: NamedDecl(DK, DC, L, Name), TemplatedDecl(Decl), TemplateParams(Params) {}
void TemplateDecl::anchor() {}
void TemplateDecl::
getAssociatedConstraints(llvm::SmallVectorImpl<const Expr *> &AC) const {
TemplateParams->getAssociatedConstraints(AC);
if (auto *FD = dyn_cast_or_null<FunctionDecl>(getTemplatedDecl()))
if (const Expr *TRC = FD->getTrailingRequiresClause())
AC.push_back(TRC);
}
bool TemplateDecl::hasAssociatedConstraints() const {
if (TemplateParams->hasAssociatedConstraints())
return true;
if (auto *FD = dyn_cast_or_null<FunctionDecl>(getTemplatedDecl()))
return FD->getTrailingRequiresClause();
return false;
}
//===----------------------------------------------------------------------===//
// RedeclarableTemplateDecl Implementation
//===----------------------------------------------------------------------===//
void RedeclarableTemplateDecl::anchor() {}
RedeclarableTemplateDecl::CommonBase *RedeclarableTemplateDecl::getCommonPtr() const {
if (Common)
return Common;
// Walk the previous-declaration chain until we either find a declaration
// with a common pointer or we run out of previous declarations.
SmallVector<const RedeclarableTemplateDecl *, 2> PrevDecls;
for (const RedeclarableTemplateDecl *Prev = getPreviousDecl(); Prev;
Prev = Prev->getPreviousDecl()) {
if (Prev->Common) {
Common = Prev->Common;
break;
}
PrevDecls.push_back(Prev);
}
// If we never found a common pointer, allocate one now.
if (!Common) {
// FIXME: If any of the declarations is from an AST file, we probably
// need an update record to add the common data.
Common = newCommon(getASTContext());
}
// Update any previous declarations we saw with the common pointer.
for (const RedeclarableTemplateDecl *Prev : PrevDecls)
Prev->Common = Common;
return Common;
}
void RedeclarableTemplateDecl::loadLazySpecializationsImpl() const {
// Grab the most recent declaration to ensure we've loaded any lazy
// redeclarations of this template.
CommonBase *CommonBasePtr = getMostRecentDecl()->getCommonPtr();
if (CommonBasePtr->LazySpecializations) {
ASTContext &Context = getASTContext();
uint32_t *Specs = CommonBasePtr->LazySpecializations;
CommonBasePtr->LazySpecializations = nullptr;
for (uint32_t I = 0, N = *Specs++; I != N; ++I)
(void)Context.getExternalSource()->GetExternalDecl(Specs[I]);
}
}
template<class EntryType, typename... ProfileArguments>
typename RedeclarableTemplateDecl::SpecEntryTraits<EntryType>::DeclType *
RedeclarableTemplateDecl::findSpecializationImpl(
llvm::FoldingSetVector<EntryType> &Specs, void *&InsertPos,
ProfileArguments&&... ProfileArgs) {
using SETraits = SpecEntryTraits<EntryType>;
llvm::FoldingSetNodeID ID;
EntryType::Profile(ID, std::forward<ProfileArguments>(ProfileArgs)...,
getASTContext());
EntryType *Entry = Specs.FindNodeOrInsertPos(ID, InsertPos);
return Entry ? SETraits::getDecl(Entry)->getMostRecentDecl() : nullptr;
}
template<class Derived, class EntryType>
void RedeclarableTemplateDecl::addSpecializationImpl(
llvm::FoldingSetVector<EntryType> &Specializations, EntryType *Entry,
void *InsertPos) {
using SETraits = SpecEntryTraits<EntryType>;
if (InsertPos) {
#ifndef NDEBUG
void *CorrectInsertPos;
assert(!findSpecializationImpl(Specializations,
CorrectInsertPos,
SETraits::getTemplateArgs(Entry)) &&
InsertPos == CorrectInsertPos &&
"given incorrect InsertPos for specialization");
#endif
Specializations.InsertNode(Entry, InsertPos);
} else {
EntryType *Existing = Specializations.GetOrInsertNode(Entry);
(void)Existing;
assert(SETraits::getDecl(Existing)->isCanonicalDecl() &&
"non-canonical specialization?");
}
if (ASTMutationListener *L = getASTMutationListener())
L->AddedCXXTemplateSpecialization(cast<Derived>(this),
SETraits::getDecl(Entry));
}
//===----------------------------------------------------------------------===//
// FunctionTemplateDecl Implementation
//===----------------------------------------------------------------------===//
FunctionTemplateDecl *
FunctionTemplateDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L,
DeclarationName Name,
TemplateParameterList *Params, NamedDecl *Decl) {
bool Invalid = AdoptTemplateParameterList(Params, cast<DeclContext>(Decl));
auto *TD = new (C, DC) FunctionTemplateDecl(C, DC, L, Name, Params, Decl);
if (Invalid)
TD->setInvalidDecl();
return TD;
}
FunctionTemplateDecl *FunctionTemplateDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) FunctionTemplateDecl(C, nullptr, SourceLocation(),
DeclarationName(), nullptr, nullptr);
}
RedeclarableTemplateDecl::CommonBase *
FunctionTemplateDecl::newCommon(ASTContext &C) const {
auto *CommonPtr = new (C) Common;
C.addDestruction(CommonPtr);
return CommonPtr;
}
void FunctionTemplateDecl::LoadLazySpecializations() const {
loadLazySpecializationsImpl();
}
llvm::FoldingSetVector<FunctionTemplateSpecializationInfo> &
FunctionTemplateDecl::getSpecializations() const {
LoadLazySpecializations();
return getCommonPtr()->Specializations;
}
FunctionDecl *
FunctionTemplateDecl::findSpecialization(ArrayRef<TemplateArgument> Args,
void *&InsertPos) {
return findSpecializationImpl(getSpecializations(), InsertPos, Args);
}
void FunctionTemplateDecl::addSpecialization(
FunctionTemplateSpecializationInfo *Info, void *InsertPos) {
addSpecializationImpl<FunctionTemplateDecl>(getSpecializations(), Info,
InsertPos);
}
ArrayRef<TemplateArgument> FunctionTemplateDecl::getInjectedTemplateArgs() {
TemplateParameterList *Params = getTemplateParameters();
Common *CommonPtr = getCommonPtr();
if (!CommonPtr->InjectedArgs) {
auto &Context = getASTContext();
SmallVector<TemplateArgument, 16> TemplateArgs;
Context.getInjectedTemplateArgs(Params, TemplateArgs);
CommonPtr->InjectedArgs =
new (Context) TemplateArgument[TemplateArgs.size()];
std::copy(TemplateArgs.begin(), TemplateArgs.end(),
CommonPtr->InjectedArgs);
}
return llvm::makeArrayRef(CommonPtr->InjectedArgs, Params->size());
}
void FunctionTemplateDecl::mergePrevDecl(FunctionTemplateDecl *Prev) {
using Base = RedeclarableTemplateDecl;
// If we haven't created a common pointer yet, then it can just be created
// with the usual method.
if (!Base::Common)
return;
Common *ThisCommon = static_cast<Common *>(Base::Common);
Common *PrevCommon = nullptr;
SmallVector<FunctionTemplateDecl *, 8> PreviousDecls;
for (; Prev; Prev = Prev->getPreviousDecl()) {
if (Prev->Base::Common) {
PrevCommon = static_cast<Common *>(Prev->Base::Common);
break;
}
PreviousDecls.push_back(Prev);
}
// If the previous redecl chain hasn't created a common pointer yet, then just
// use this common pointer.
if (!PrevCommon) {
for (auto *D : PreviousDecls)
D->Base::Common = ThisCommon;
return;
}
// Ensure we don't leak any important state.
assert(ThisCommon->Specializations.size() == 0 &&
"Can't merge incompatible declarations!");
Base::Common = PrevCommon;
}
//===----------------------------------------------------------------------===//
// ClassTemplateDecl Implementation
//===----------------------------------------------------------------------===//
ClassTemplateDecl *ClassTemplateDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L,
DeclarationName Name,
TemplateParameterList *Params,
NamedDecl *Decl) {
bool Invalid = AdoptTemplateParameterList(Params, cast<DeclContext>(Decl));
auto *TD = new (C, DC) ClassTemplateDecl(C, DC, L, Name, Params, Decl);
if (Invalid)
TD->setInvalidDecl();
return TD;
}
ClassTemplateDecl *ClassTemplateDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) ClassTemplateDecl(C, nullptr, SourceLocation(),
DeclarationName(), nullptr, nullptr);
}
void ClassTemplateDecl::LoadLazySpecializations() const {
loadLazySpecializationsImpl();
}
llvm::FoldingSetVector<ClassTemplateSpecializationDecl> &
ClassTemplateDecl::getSpecializations() const {
LoadLazySpecializations();
return getCommonPtr()->Specializations;
}
llvm::FoldingSetVector<ClassTemplatePartialSpecializationDecl> &
ClassTemplateDecl::getPartialSpecializations() const {
LoadLazySpecializations();
return getCommonPtr()->PartialSpecializations;
}
RedeclarableTemplateDecl::CommonBase *
ClassTemplateDecl::newCommon(ASTContext &C) const {
auto *CommonPtr = new (C) Common;
C.addDestruction(CommonPtr);
return CommonPtr;
}
ClassTemplateSpecializationDecl *
ClassTemplateDecl::findSpecialization(ArrayRef<TemplateArgument> Args,
void *&InsertPos) {
return findSpecializationImpl(getSpecializations(), InsertPos, Args);
}
void ClassTemplateDecl::AddSpecialization(ClassTemplateSpecializationDecl *D,
void *InsertPos) {
addSpecializationImpl<ClassTemplateDecl>(getSpecializations(), D, InsertPos);
}
ClassTemplatePartialSpecializationDecl *
ClassTemplateDecl::findPartialSpecialization(
ArrayRef<TemplateArgument> Args,
TemplateParameterList *TPL, void *&InsertPos) {
return findSpecializationImpl(getPartialSpecializations(), InsertPos, Args,
TPL);
}
static void ProfileTemplateParameterList(ASTContext &C,
llvm::FoldingSetNodeID &ID, const TemplateParameterList *TPL) {
const Expr *RC = TPL->getRequiresClause();
ID.AddBoolean(RC != nullptr);
if (RC)
RC->Profile(ID, C, /*Canonical=*/true);
ID.AddInteger(TPL->size());
for (NamedDecl *D : *TPL) {
if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(D)) {
ID.AddInteger(0);
ID.AddBoolean(NTTP->isParameterPack());
NTTP->getType().getCanonicalType().Profile(ID);
continue;
}
if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(D)) {
ID.AddInteger(1);
ID.AddBoolean(TTP->isParameterPack());
ID.AddBoolean(TTP->hasTypeConstraint());
if (const TypeConstraint *TC = TTP->getTypeConstraint())
TC->getImmediatelyDeclaredConstraint()->Profile(ID, C,
/*Canonical=*/true);
continue;
}
const auto *TTP = cast<TemplateTemplateParmDecl>(D);
ID.AddInteger(2);
ID.AddBoolean(TTP->isParameterPack());
ProfileTemplateParameterList(C, ID, TTP->getTemplateParameters());
}
}
void
ClassTemplatePartialSpecializationDecl::Profile(llvm::FoldingSetNodeID &ID,
ArrayRef<TemplateArgument> TemplateArgs, TemplateParameterList *TPL,
ASTContext &Context) {
ID.AddInteger(TemplateArgs.size());
for (const TemplateArgument &TemplateArg : TemplateArgs)
TemplateArg.Profile(ID, Context);
ProfileTemplateParameterList(Context, ID, TPL);
}
void ClassTemplateDecl::AddPartialSpecialization(
ClassTemplatePartialSpecializationDecl *D,
void *InsertPos) {
if (InsertPos)
getPartialSpecializations().InsertNode(D, InsertPos);
else {
ClassTemplatePartialSpecializationDecl *Existing
= getPartialSpecializations().GetOrInsertNode(D);
(void)Existing;
assert(Existing->isCanonicalDecl() && "Non-canonical specialization?");
}
if (ASTMutationListener *L = getASTMutationListener())
L->AddedCXXTemplateSpecialization(this, D);
}
void ClassTemplateDecl::getPartialSpecializations(
SmallVectorImpl<ClassTemplatePartialSpecializationDecl *> &PS) const {
llvm::FoldingSetVector<ClassTemplatePartialSpecializationDecl> &PartialSpecs
= getPartialSpecializations();
PS.clear();
PS.reserve(PartialSpecs.size());
for (ClassTemplatePartialSpecializationDecl &P : PartialSpecs)
PS.push_back(P.getMostRecentDecl());
}
ClassTemplatePartialSpecializationDecl *
ClassTemplateDecl::findPartialSpecialization(QualType T) {
ASTContext &Context = getASTContext();
for (ClassTemplatePartialSpecializationDecl &P :
getPartialSpecializations()) {
if (Context.hasSameType(P.getInjectedSpecializationType(), T))
return P.getMostRecentDecl();
}
return nullptr;
}
ClassTemplatePartialSpecializationDecl *
ClassTemplateDecl::findPartialSpecInstantiatedFromMember(
ClassTemplatePartialSpecializationDecl *D) {
Decl *DCanon = D->getCanonicalDecl();
for (ClassTemplatePartialSpecializationDecl &P : getPartialSpecializations()) {
if (P.getInstantiatedFromMember()->getCanonicalDecl() == DCanon)
return P.getMostRecentDecl();
}
return nullptr;
}
QualType
ClassTemplateDecl::getInjectedClassNameSpecialization() {
Common *CommonPtr = getCommonPtr();
if (!CommonPtr->InjectedClassNameType.isNull())
return CommonPtr->InjectedClassNameType;
// C++0x [temp.dep.type]p2:
// The template argument list of a primary template is a template argument
// list in which the nth template argument has the value of the nth template
// parameter of the class template. If the nth template parameter is a
// template parameter pack (14.5.3), the nth template argument is a pack
// expansion (14.5.3) whose pattern is the name of the template parameter
// pack.
ASTContext &Context = getASTContext();
TemplateParameterList *Params = getTemplateParameters();
SmallVector<TemplateArgument, 16> TemplateArgs;
Context.getInjectedTemplateArgs(Params, TemplateArgs);
CommonPtr->InjectedClassNameType
= Context.getTemplateSpecializationType(TemplateName(this),
TemplateArgs);
return CommonPtr->InjectedClassNameType;
}
//===----------------------------------------------------------------------===//
// TemplateTypeParm Allocation/Deallocation Method Implementations
//===----------------------------------------------------------------------===//
TemplateTypeParmDecl *
TemplateTypeParmDecl::Create(const ASTContext &C, DeclContext *DC,
SourceLocation KeyLoc, SourceLocation NameLoc,
unsigned D, unsigned P, IdentifierInfo *Id,
bool Typename, bool ParameterPack,
bool HasTypeConstraint,
Optional<unsigned> NumExpanded) {
auto *TTPDecl =
new (C, DC,
additionalSizeToAlloc<TypeConstraint>(HasTypeConstraint ? 1 : 0))
TemplateTypeParmDecl(DC, KeyLoc, NameLoc, Id, Typename,
HasTypeConstraint, NumExpanded);
QualType TTPType = C.getTemplateTypeParmType(D, P, ParameterPack, TTPDecl);
TTPDecl->setTypeForDecl(TTPType.getTypePtr());
return TTPDecl;
}
TemplateTypeParmDecl *
TemplateTypeParmDecl::CreateDeserialized(const ASTContext &C, unsigned ID) {
return new (C, ID) TemplateTypeParmDecl(nullptr, SourceLocation(),
SourceLocation(), nullptr, false,
false, None);
}
TemplateTypeParmDecl *
TemplateTypeParmDecl::CreateDeserialized(const ASTContext &C, unsigned ID,
bool HasTypeConstraint) {
return new (C, ID,
additionalSizeToAlloc<TypeConstraint>(HasTypeConstraint ? 1 : 0))
TemplateTypeParmDecl(nullptr, SourceLocation(), SourceLocation(),
nullptr, false, HasTypeConstraint, None);
}
SourceLocation TemplateTypeParmDecl::getDefaultArgumentLoc() const {
return hasDefaultArgument()
? getDefaultArgumentInfo()->getTypeLoc().getBeginLoc()
: SourceLocation();
}
SourceRange TemplateTypeParmDecl::getSourceRange() const {
if (hasDefaultArgument() && !defaultArgumentWasInherited())
return SourceRange(getBeginLoc(),
getDefaultArgumentInfo()->getTypeLoc().getEndLoc());
// TypeDecl::getSourceRange returns a range containing name location, which is
// wrong for unnamed template parameters. e.g:
// it will return <[[typename>]] instead of <[[typename]]>
else if (getDeclName().isEmpty())
return SourceRange(getBeginLoc());
return TypeDecl::getSourceRange();
}
unsigned TemplateTypeParmDecl::getDepth() const {
return getTypeForDecl()->castAs<TemplateTypeParmType>()->getDepth();
}
unsigned TemplateTypeParmDecl::getIndex() const {
return getTypeForDecl()->castAs<TemplateTypeParmType>()->getIndex();
}
bool TemplateTypeParmDecl::isParameterPack() const {
return getTypeForDecl()->castAs<TemplateTypeParmType>()->isParameterPack();
}
void TemplateTypeParmDecl::setTypeConstraint(NestedNameSpecifierLoc NNS,
DeclarationNameInfo NameInfo, NamedDecl *FoundDecl, ConceptDecl *CD,
const ASTTemplateArgumentListInfo *ArgsAsWritten,
Expr *ImmediatelyDeclaredConstraint) {
assert(HasTypeConstraint &&
"HasTypeConstraint=true must be passed at construction in order to "
"call setTypeConstraint");
assert(!TypeConstraintInitialized &&
"TypeConstraint was already initialized!");
new (getTrailingObjects<TypeConstraint>()) TypeConstraint(NNS, NameInfo,
FoundDecl, CD, ArgsAsWritten, ImmediatelyDeclaredConstraint);
TypeConstraintInitialized = true;
}
//===----------------------------------------------------------------------===//
// NonTypeTemplateParmDecl Method Implementations
//===----------------------------------------------------------------------===//
NonTypeTemplateParmDecl::NonTypeTemplateParmDecl(
DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, unsigned D,
unsigned P, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo,
ArrayRef<QualType> ExpandedTypes, ArrayRef<TypeSourceInfo *> ExpandedTInfos)
: DeclaratorDecl(NonTypeTemplateParm, DC, IdLoc, Id, T, TInfo, StartLoc),
TemplateParmPosition(D, P), ParameterPack(true),
ExpandedParameterPack(true), NumExpandedTypes(ExpandedTypes.size()) {
if (!ExpandedTypes.empty() && !ExpandedTInfos.empty()) {
auto TypesAndInfos =
getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
for (unsigned I = 0; I != NumExpandedTypes; ++I) {
new (&TypesAndInfos[I].first) QualType(ExpandedTypes[I]);
TypesAndInfos[I].second = ExpandedTInfos[I];
}
}
}
NonTypeTemplateParmDecl *
NonTypeTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC,
SourceLocation StartLoc, SourceLocation IdLoc,
unsigned D, unsigned P, IdentifierInfo *Id,
QualType T, bool ParameterPack,
TypeSourceInfo *TInfo) {
AutoType *AT =
C.getLangOpts().CPlusPlus20 ? T->getContainedAutoType() : nullptr;
return new (C, DC,
additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>,
Expr *>(0,
AT && AT->isConstrained() ? 1 : 0))
NonTypeTemplateParmDecl(DC, StartLoc, IdLoc, D, P, Id, T, ParameterPack,
TInfo);
}
NonTypeTemplateParmDecl *NonTypeTemplateParmDecl::Create(
const ASTContext &C, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, unsigned D, unsigned P, IdentifierInfo *Id,
QualType T, TypeSourceInfo *TInfo, ArrayRef<QualType> ExpandedTypes,
ArrayRef<TypeSourceInfo *> ExpandedTInfos) {
AutoType *AT = TInfo->getType()->getContainedAutoType();
return new (C, DC,
additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>,
Expr *>(
ExpandedTypes.size(), AT && AT->isConstrained() ? 1 : 0))
NonTypeTemplateParmDecl(DC, StartLoc, IdLoc, D, P, Id, T, TInfo,
ExpandedTypes, ExpandedTInfos);
}
NonTypeTemplateParmDecl *
NonTypeTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
bool HasTypeConstraint) {
return new (C, ID, additionalSizeToAlloc<std::pair<QualType,
TypeSourceInfo *>,
Expr *>(0,
HasTypeConstraint ? 1 : 0))
NonTypeTemplateParmDecl(nullptr, SourceLocation(), SourceLocation(),
0, 0, nullptr, QualType(), false, nullptr);
}
NonTypeTemplateParmDecl *
NonTypeTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
unsigned NumExpandedTypes,
bool HasTypeConstraint) {
auto *NTTP =
new (C, ID, additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>,
Expr *>(
NumExpandedTypes, HasTypeConstraint ? 1 : 0))
NonTypeTemplateParmDecl(nullptr, SourceLocation(), SourceLocation(),
0, 0, nullptr, QualType(), nullptr, None,
None);
NTTP->NumExpandedTypes = NumExpandedTypes;
return NTTP;
}
SourceRange NonTypeTemplateParmDecl::getSourceRange() const {
if (hasDefaultArgument() && !defaultArgumentWasInherited())
return SourceRange(getOuterLocStart(),
getDefaultArgument()->getSourceRange().getEnd());
return DeclaratorDecl::getSourceRange();
}
SourceLocation NonTypeTemplateParmDecl::getDefaultArgumentLoc() const {
return hasDefaultArgument()
? getDefaultArgument()->getSourceRange().getBegin()
: SourceLocation();
}
//===----------------------------------------------------------------------===//
// TemplateTemplateParmDecl Method Implementations
//===----------------------------------------------------------------------===//
void TemplateTemplateParmDecl::anchor() {}
TemplateTemplateParmDecl::TemplateTemplateParmDecl(
DeclContext *DC, SourceLocation L, unsigned D, unsigned P,
IdentifierInfo *Id, TemplateParameterList *Params,
ArrayRef<TemplateParameterList *> Expansions)
: TemplateDecl(TemplateTemplateParm, DC, L, Id, Params),
TemplateParmPosition(D, P), ParameterPack(true),
ExpandedParameterPack(true), NumExpandedParams(Expansions.size()) {
if (!Expansions.empty())
std::uninitialized_copy(Expansions.begin(), Expansions.end(),
getTrailingObjects<TemplateParameterList *>());
}
TemplateTemplateParmDecl *
TemplateTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC,
SourceLocation L, unsigned D, unsigned P,
bool ParameterPack, IdentifierInfo *Id,
TemplateParameterList *Params) {
return new (C, DC) TemplateTemplateParmDecl(DC, L, D, P, ParameterPack, Id,
Params);
}
TemplateTemplateParmDecl *
TemplateTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC,
SourceLocation L, unsigned D, unsigned P,
IdentifierInfo *Id,
TemplateParameterList *Params,
ArrayRef<TemplateParameterList *> Expansions) {
return new (C, DC,
additionalSizeToAlloc<TemplateParameterList *>(Expansions.size()))
TemplateTemplateParmDecl(DC, L, D, P, Id, Params, Expansions);
}
TemplateTemplateParmDecl *
TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID) TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0,
false, nullptr, nullptr);
}
TemplateTemplateParmDecl *
TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
unsigned NumExpansions) {
auto *TTP =
new (C, ID, additionalSizeToAlloc<TemplateParameterList *>(NumExpansions))
TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0, nullptr,
nullptr, None);
TTP->NumExpandedParams = NumExpansions;
return TTP;
}
SourceLocation TemplateTemplateParmDecl::getDefaultArgumentLoc() const {
return hasDefaultArgument() ? getDefaultArgument().getLocation()
: SourceLocation();
}
void TemplateTemplateParmDecl::setDefaultArgument(
const ASTContext &C, const TemplateArgumentLoc &DefArg) {
if (DefArg.getArgument().isNull())
DefaultArgument.set(nullptr);
else
DefaultArgument.set(new (C) TemplateArgumentLoc(DefArg));
}
//===----------------------------------------------------------------------===//
// TemplateArgumentList Implementation
//===----------------------------------------------------------------------===//
TemplateArgumentList::TemplateArgumentList(ArrayRef<TemplateArgument> Args)
: Arguments(getTrailingObjects<TemplateArgument>()),
NumArguments(Args.size()) {
std::uninitialized_copy(Args.begin(), Args.end(),
getTrailingObjects<TemplateArgument>());
}
TemplateArgumentList *
TemplateArgumentList::CreateCopy(ASTContext &Context,
ArrayRef<TemplateArgument> Args) {
void *Mem = Context.Allocate(totalSizeToAlloc<TemplateArgument>(Args.size()));
return new (Mem) TemplateArgumentList(Args);
}
FunctionTemplateSpecializationInfo *FunctionTemplateSpecializationInfo::Create(
ASTContext &C, FunctionDecl *FD, FunctionTemplateDecl *Template,
TemplateSpecializationKind TSK, const TemplateArgumentList *TemplateArgs,
const TemplateArgumentListInfo *TemplateArgsAsWritten, SourceLocation POI,
MemberSpecializationInfo *MSInfo) {
const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
if (TemplateArgsAsWritten)
ArgsAsWritten = ASTTemplateArgumentListInfo::Create(C,
*TemplateArgsAsWritten);
void *Mem =
C.Allocate(totalSizeToAlloc<MemberSpecializationInfo *>(MSInfo ? 1 : 0));
return new (Mem) FunctionTemplateSpecializationInfo(
FD, Template, TSK, TemplateArgs, ArgsAsWritten, POI, MSInfo);
}
//===----------------------------------------------------------------------===//
// ClassTemplateSpecializationDecl Implementation
//===----------------------------------------------------------------------===//
ClassTemplateSpecializationDecl::
ClassTemplateSpecializationDecl(ASTContext &Context, Kind DK, TagKind TK,
DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
ClassTemplateSpecializationDecl *PrevDecl)
: CXXRecordDecl(DK, TK, Context, DC, StartLoc, IdLoc,
SpecializedTemplate->getIdentifier(), PrevDecl),
SpecializedTemplate(SpecializedTemplate),
TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
SpecializationKind(TSK_Undeclared) {
}
ClassTemplateSpecializationDecl::ClassTemplateSpecializationDecl(ASTContext &C,
Kind DK)
: CXXRecordDecl(DK, TTK_Struct, C, nullptr, SourceLocation(),
SourceLocation(), nullptr, nullptr),
SpecializationKind(TSK_Undeclared) {}
ClassTemplateSpecializationDecl *
ClassTemplateSpecializationDecl::Create(ASTContext &Context, TagKind TK,
DeclContext *DC,
SourceLocation StartLoc,
SourceLocation IdLoc,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
ClassTemplateSpecializationDecl *PrevDecl) {
auto *Result =
new (Context, DC) ClassTemplateSpecializationDecl(
Context, ClassTemplateSpecialization, TK, DC, StartLoc, IdLoc,
SpecializedTemplate, Args, PrevDecl);
Result->setMayHaveOutOfDateDef(false);
Context.getTypeDeclType(Result, PrevDecl);
return Result;
}
ClassTemplateSpecializationDecl *
ClassTemplateSpecializationDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
auto *Result =
new (C, ID) ClassTemplateSpecializationDecl(C, ClassTemplateSpecialization);
Result->setMayHaveOutOfDateDef(false);
return Result;
}
void ClassTemplateSpecializationDecl::getNameForDiagnostic(
raw_ostream &OS, const PrintingPolicy &Policy, bool Qualified) const {
NamedDecl::getNameForDiagnostic(OS, Policy, Qualified);
const auto *PS = dyn_cast<ClassTemplatePartialSpecializationDecl>(this);
if (const ASTTemplateArgumentListInfo *ArgsAsWritten =
PS ? PS->getTemplateArgsAsWritten() : nullptr) {
printTemplateArgumentList(
OS, ArgsAsWritten->arguments(), Policy,
getSpecializedTemplate()->getTemplateParameters());
} else {
const TemplateArgumentList &TemplateArgs = getTemplateArgs();
printTemplateArgumentList(
OS, TemplateArgs.asArray(), Policy,
getSpecializedTemplate()->getTemplateParameters());
}
}
ClassTemplateDecl *
ClassTemplateSpecializationDecl::getSpecializedTemplate() const {
if (const auto *PartialSpec =
SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization*>())
return PartialSpec->PartialSpecialization->getSpecializedTemplate();
return SpecializedTemplate.get<ClassTemplateDecl*>();
}
SourceRange
ClassTemplateSpecializationDecl::getSourceRange() const {
if (ExplicitInfo) {
SourceLocation Begin = getTemplateKeywordLoc();
if (Begin.isValid()) {
// Here we have an explicit (partial) specialization or instantiation.
assert(getSpecializationKind() == TSK_ExplicitSpecialization ||
getSpecializationKind() == TSK_ExplicitInstantiationDeclaration ||
getSpecializationKind() == TSK_ExplicitInstantiationDefinition);
if (getExternLoc().isValid())
Begin = getExternLoc();
SourceLocation End = getBraceRange().getEnd();
if (End.isInvalid())
End = getTypeAsWritten()->getTypeLoc().getEndLoc();
return SourceRange(Begin, End);
}
// An implicit instantiation of a class template partial specialization
// uses ExplicitInfo to record the TypeAsWritten, but the source
// locations should be retrieved from the instantiation pattern.
using CTPSDecl = ClassTemplatePartialSpecializationDecl;
auto *ctpsd = const_cast<CTPSDecl *>(cast<CTPSDecl>(this));
CTPSDecl *inst_from = ctpsd->getInstantiatedFromMember();
assert(inst_from != nullptr);
return inst_from->getSourceRange();
}
else {
// No explicit info available.
llvm::PointerUnion<ClassTemplateDecl *,
ClassTemplatePartialSpecializationDecl *>
inst_from = getInstantiatedFrom();
if (inst_from.isNull())
return getSpecializedTemplate()->getSourceRange();
if (const auto *ctd = inst_from.dyn_cast<ClassTemplateDecl *>())
return ctd->getSourceRange();
return inst_from.get<ClassTemplatePartialSpecializationDecl *>()
->getSourceRange();
}
}
//===----------------------------------------------------------------------===//
// ConceptDecl Implementation
//===----------------------------------------------------------------------===//
ConceptDecl *ConceptDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName Name,
TemplateParameterList *Params,
Expr *ConstraintExpr) {
bool Invalid = AdoptTemplateParameterList(Params, DC);
auto *TD = new (C, DC) ConceptDecl(DC, L, Name, Params, ConstraintExpr);
if (Invalid)
TD->setInvalidDecl();
return TD;
}
ConceptDecl *ConceptDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
ConceptDecl *Result = new (C, ID) ConceptDecl(nullptr, SourceLocation(),
DeclarationName(),
nullptr, nullptr);
return Result;
}
//===----------------------------------------------------------------------===//
// ClassTemplatePartialSpecializationDecl Implementation
//===----------------------------------------------------------------------===//
void ClassTemplatePartialSpecializationDecl::anchor() {}
ClassTemplatePartialSpecializationDecl::
ClassTemplatePartialSpecializationDecl(ASTContext &Context, TagKind TK,
DeclContext *DC,
SourceLocation StartLoc,
SourceLocation IdLoc,
TemplateParameterList *Params,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
const ASTTemplateArgumentListInfo *ArgInfos,
ClassTemplatePartialSpecializationDecl *PrevDecl)
: ClassTemplateSpecializationDecl(Context,
ClassTemplatePartialSpecialization,
TK, DC, StartLoc, IdLoc,
SpecializedTemplate, Args, PrevDecl),
TemplateParams(Params), ArgsAsWritten(ArgInfos),
InstantiatedFromMember(nullptr, false) {
if (AdoptTemplateParameterList(Params, this))
setInvalidDecl();
}
ClassTemplatePartialSpecializationDecl *
ClassTemplatePartialSpecializationDecl::
Create(ASTContext &Context, TagKind TK,DeclContext *DC,
SourceLocation StartLoc, SourceLocation IdLoc,
TemplateParameterList *Params,
ClassTemplateDecl *SpecializedTemplate,
ArrayRef<TemplateArgument> Args,
const TemplateArgumentListInfo &ArgInfos,
QualType CanonInjectedType,
ClassTemplatePartialSpecializationDecl *PrevDecl) {
const ASTTemplateArgumentListInfo *ASTArgInfos =
ASTTemplateArgumentListInfo::Create(Context, ArgInfos);
auto *Result = new (Context, DC)
ClassTemplatePartialSpecializationDecl(Context, TK, DC, StartLoc, IdLoc,
Params, SpecializedTemplate, Args,
ASTArgInfos, PrevDecl);
Result->setSpecializationKind(TSK_ExplicitSpecialization);
Result->setMayHaveOutOfDateDef(false);
Context.getInjectedClassNameType(Result, CanonInjectedType);
return Result;
}
ClassTemplatePartialSpecializationDecl *
ClassTemplatePartialSpecializationDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
auto *Result = new (C, ID) ClassTemplatePartialSpecializationDecl(C);
Result->setMayHaveOutOfDateDef(false);
return Result;
}
//===----------------------------------------------------------------------===//
// FriendTemplateDecl Implementation
//===----------------------------------------------------------------------===//
void FriendTemplateDecl::anchor() {}
FriendTemplateDecl *
FriendTemplateDecl::Create(ASTContext &Context, DeclContext *DC,
SourceLocation L,
MutableArrayRef<TemplateParameterList *> Params,
FriendUnion Friend, SourceLocation FLoc) {
- return new (Context, DC) FriendTemplateDecl(DC, L, Params, Friend, FLoc);
+ TemplateParameterList **TPL = nullptr;
+ if (!Params.empty()) {
+ TPL = new (Context) TemplateParameterList *[Params.size()];
+ llvm::copy(Params, TPL);
+ }
+ return new (Context, DC)
+ FriendTemplateDecl(DC, L, TPL, Params.size(), Friend, FLoc);
}
FriendTemplateDecl *FriendTemplateDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) FriendTemplateDecl(EmptyShell());
}
//===----------------------------------------------------------------------===//
// TypeAliasTemplateDecl Implementation
//===----------------------------------------------------------------------===//
TypeAliasTemplateDecl *
TypeAliasTemplateDecl::Create(ASTContext &C, DeclContext *DC, SourceLocation L,
DeclarationName Name,
TemplateParameterList *Params, NamedDecl *Decl) {
bool Invalid = AdoptTemplateParameterList(Params, DC);
auto *TD = new (C, DC) TypeAliasTemplateDecl(C, DC, L, Name, Params, Decl);
if (Invalid)
TD->setInvalidDecl();
return TD;
}
TypeAliasTemplateDecl *TypeAliasTemplateDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) TypeAliasTemplateDecl(C, nullptr, SourceLocation(),
DeclarationName(), nullptr, nullptr);
}
RedeclarableTemplateDecl::CommonBase *
TypeAliasTemplateDecl::newCommon(ASTContext &C) const {
auto *CommonPtr = new (C) Common;
C.addDestruction(CommonPtr);
return CommonPtr;
}
//===----------------------------------------------------------------------===//
// ClassScopeFunctionSpecializationDecl Implementation
//===----------------------------------------------------------------------===//
void ClassScopeFunctionSpecializationDecl::anchor() {}
ClassScopeFunctionSpecializationDecl *
ClassScopeFunctionSpecializationDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) ClassScopeFunctionSpecializationDecl(
nullptr, SourceLocation(), nullptr, nullptr);
}
//===----------------------------------------------------------------------===//
// VarTemplateDecl Implementation
//===----------------------------------------------------------------------===//
VarTemplateDecl *VarTemplateDecl::getDefinition() {
VarTemplateDecl *CurD = this;
while (CurD) {
if (CurD->isThisDeclarationADefinition())
return CurD;
CurD = CurD->getPreviousDecl();
}
return nullptr;
}
VarTemplateDecl *VarTemplateDecl::Create(ASTContext &C, DeclContext *DC,
SourceLocation L, DeclarationName Name,
TemplateParameterList *Params,
VarDecl *Decl) {
bool Invalid = AdoptTemplateParameterList(Params, DC);
auto *TD = new (C, DC) VarTemplateDecl(C, DC, L, Name, Params, Decl);
if (Invalid)
TD->setInvalidDecl();
return TD;
}
VarTemplateDecl *VarTemplateDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) VarTemplateDecl(C, nullptr, SourceLocation(),
DeclarationName(), nullptr, nullptr);
}
void VarTemplateDecl::LoadLazySpecializations() const {
loadLazySpecializationsImpl();
}
llvm::FoldingSetVector<VarTemplateSpecializationDecl> &
VarTemplateDecl::getSpecializations() const {
LoadLazySpecializations();
return getCommonPtr()->Specializations;
}
llvm::FoldingSetVector<VarTemplatePartialSpecializationDecl> &
VarTemplateDecl::getPartialSpecializations() const {
LoadLazySpecializations();
return getCommonPtr()->PartialSpecializations;
}
RedeclarableTemplateDecl::CommonBase *
VarTemplateDecl::newCommon(ASTContext &C) const {
auto *CommonPtr = new (C) Common;
C.addDestruction(CommonPtr);
return CommonPtr;
}
VarTemplateSpecializationDecl *
VarTemplateDecl::findSpecialization(ArrayRef<TemplateArgument> Args,
void *&InsertPos) {
return findSpecializationImpl(getSpecializations(), InsertPos, Args);
}
void VarTemplateDecl::AddSpecialization(VarTemplateSpecializationDecl *D,
void *InsertPos) {
addSpecializationImpl<VarTemplateDecl>(getSpecializations(), D, InsertPos);
}
VarTemplatePartialSpecializationDecl *
VarTemplateDecl::findPartialSpecialization(ArrayRef<TemplateArgument> Args,
TemplateParameterList *TPL, void *&InsertPos) {
return findSpecializationImpl(getPartialSpecializations(), InsertPos, Args,
TPL);
}
void
VarTemplatePartialSpecializationDecl::Profile(llvm::FoldingSetNodeID &ID,
ArrayRef<TemplateArgument> TemplateArgs, TemplateParameterList *TPL,
ASTContext &Context) {
ID.AddInteger(TemplateArgs.size());
for (const TemplateArgument &TemplateArg : TemplateArgs)
TemplateArg.Profile(ID, Context);
ProfileTemplateParameterList(Context, ID, TPL);
}
void VarTemplateDecl::AddPartialSpecialization(
VarTemplatePartialSpecializationDecl *D, void *InsertPos) {
if (InsertPos)
getPartialSpecializations().InsertNode(D, InsertPos);
else {
VarTemplatePartialSpecializationDecl *Existing =
getPartialSpecializations().GetOrInsertNode(D);
(void)Existing;
assert(Existing->isCanonicalDecl() && "Non-canonical specialization?");
}
if (ASTMutationListener *L = getASTMutationListener())
L->AddedCXXTemplateSpecialization(this, D);
}
void VarTemplateDecl::getPartialSpecializations(
SmallVectorImpl<VarTemplatePartialSpecializationDecl *> &PS) const {
llvm::FoldingSetVector<VarTemplatePartialSpecializationDecl> &PartialSpecs =
getPartialSpecializations();
PS.clear();
PS.reserve(PartialSpecs.size());
for (VarTemplatePartialSpecializationDecl &P : PartialSpecs)
PS.push_back(P.getMostRecentDecl());
}
VarTemplatePartialSpecializationDecl *
VarTemplateDecl::findPartialSpecInstantiatedFromMember(
VarTemplatePartialSpecializationDecl *D) {
Decl *DCanon = D->getCanonicalDecl();
for (VarTemplatePartialSpecializationDecl &P : getPartialSpecializations()) {
if (P.getInstantiatedFromMember()->getCanonicalDecl() == DCanon)
return P.getMostRecentDecl();
}
return nullptr;
}
//===----------------------------------------------------------------------===//
// VarTemplateSpecializationDecl Implementation
//===----------------------------------------------------------------------===//
VarTemplateSpecializationDecl::VarTemplateSpecializationDecl(
Kind DK, ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, VarTemplateDecl *SpecializedTemplate, QualType T,
TypeSourceInfo *TInfo, StorageClass S, ArrayRef<TemplateArgument> Args)
: VarDecl(DK, Context, DC, StartLoc, IdLoc,
SpecializedTemplate->getIdentifier(), T, TInfo, S),
SpecializedTemplate(SpecializedTemplate),
TemplateArgs(TemplateArgumentList::CreateCopy(Context, Args)),
SpecializationKind(TSK_Undeclared), IsCompleteDefinition(false) {}
VarTemplateSpecializationDecl::VarTemplateSpecializationDecl(Kind DK,
ASTContext &C)
: VarDecl(DK, C, nullptr, SourceLocation(), SourceLocation(), nullptr,
QualType(), nullptr, SC_None),
SpecializationKind(TSK_Undeclared), IsCompleteDefinition(false) {}
VarTemplateSpecializationDecl *VarTemplateSpecializationDecl::Create(
ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, VarTemplateDecl *SpecializedTemplate, QualType T,
TypeSourceInfo *TInfo, StorageClass S, ArrayRef<TemplateArgument> Args) {
return new (Context, DC) VarTemplateSpecializationDecl(
VarTemplateSpecialization, Context, DC, StartLoc, IdLoc,
SpecializedTemplate, T, TInfo, S, Args);
}
VarTemplateSpecializationDecl *
VarTemplateSpecializationDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
return new (C, ID)
VarTemplateSpecializationDecl(VarTemplateSpecialization, C);
}
void VarTemplateSpecializationDecl::getNameForDiagnostic(
raw_ostream &OS, const PrintingPolicy &Policy, bool Qualified) const {
NamedDecl::getNameForDiagnostic(OS, Policy, Qualified);
const auto *PS = dyn_cast<VarTemplatePartialSpecializationDecl>(this);
if (const ASTTemplateArgumentListInfo *ArgsAsWritten =
PS ? PS->getTemplateArgsAsWritten() : nullptr) {
printTemplateArgumentList(
OS, ArgsAsWritten->arguments(), Policy,
getSpecializedTemplate()->getTemplateParameters());
} else {
const TemplateArgumentList &TemplateArgs = getTemplateArgs();
printTemplateArgumentList(
OS, TemplateArgs.asArray(), Policy,
getSpecializedTemplate()->getTemplateParameters());
}
}
VarTemplateDecl *VarTemplateSpecializationDecl::getSpecializedTemplate() const {
if (const auto *PartialSpec =
SpecializedTemplate.dyn_cast<SpecializedPartialSpecialization *>())
return PartialSpec->PartialSpecialization->getSpecializedTemplate();
return SpecializedTemplate.get<VarTemplateDecl *>();
}
void VarTemplateSpecializationDecl::setTemplateArgsInfo(
const TemplateArgumentListInfo &ArgsInfo) {
TemplateArgsInfo.setLAngleLoc(ArgsInfo.getLAngleLoc());
TemplateArgsInfo.setRAngleLoc(ArgsInfo.getRAngleLoc());
for (const TemplateArgumentLoc &Loc : ArgsInfo.arguments())
TemplateArgsInfo.addArgument(Loc);
}
//===----------------------------------------------------------------------===//
// VarTemplatePartialSpecializationDecl Implementation
//===----------------------------------------------------------------------===//
void VarTemplatePartialSpecializationDecl::anchor() {}
VarTemplatePartialSpecializationDecl::VarTemplatePartialSpecializationDecl(
ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, TemplateParameterList *Params,
VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
StorageClass S, ArrayRef<TemplateArgument> Args,
const ASTTemplateArgumentListInfo *ArgInfos)
: VarTemplateSpecializationDecl(VarTemplatePartialSpecialization, Context,
DC, StartLoc, IdLoc, SpecializedTemplate, T,
TInfo, S, Args),
TemplateParams(Params), ArgsAsWritten(ArgInfos),
InstantiatedFromMember(nullptr, false) {
if (AdoptTemplateParameterList(Params, DC))
setInvalidDecl();
}
VarTemplatePartialSpecializationDecl *
VarTemplatePartialSpecializationDecl::Create(
ASTContext &Context, DeclContext *DC, SourceLocation StartLoc,
SourceLocation IdLoc, TemplateParameterList *Params,
VarTemplateDecl *SpecializedTemplate, QualType T, TypeSourceInfo *TInfo,
StorageClass S, ArrayRef<TemplateArgument> Args,
const TemplateArgumentListInfo &ArgInfos) {
const ASTTemplateArgumentListInfo *ASTArgInfos
= ASTTemplateArgumentListInfo::Create(Context, ArgInfos);
auto *Result =
new (Context, DC) VarTemplatePartialSpecializationDecl(
Context, DC, StartLoc, IdLoc, Params, SpecializedTemplate, T, TInfo,
S, Args, ASTArgInfos);
Result->setSpecializationKind(TSK_ExplicitSpecialization);
return Result;
}
VarTemplatePartialSpecializationDecl *
VarTemplatePartialSpecializationDecl::CreateDeserialized(ASTContext &C,
unsigned ID) {
return new (C, ID) VarTemplatePartialSpecializationDecl(C);
}
static TemplateParameterList *
createMakeIntegerSeqParameterList(const ASTContext &C, DeclContext *DC) {
// typename T
auto *T = TemplateTypeParmDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Depth=*/1, /*Position=*/0,
/*Id=*/nullptr, /*Typename=*/true, /*ParameterPack=*/false,
/*HasTypeConstraint=*/false);
T->setImplicit(true);
// T ...Ints
TypeSourceInfo *TI =
C.getTrivialTypeSourceInfo(QualType(T->getTypeForDecl(), 0));
auto *N = NonTypeTemplateParmDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/1,
/*Id=*/nullptr, TI->getType(), /*ParameterPack=*/true, TI);
N->setImplicit(true);
// <typename T, T ...Ints>
NamedDecl *P[2] = {T, N};
auto *TPL = TemplateParameterList::Create(
C, SourceLocation(), SourceLocation(), P, SourceLocation(), nullptr);
// template <typename T, ...Ints> class IntSeq
auto *TemplateTemplateParm = TemplateTemplateParmDecl::Create(
C, DC, SourceLocation(), /*Depth=*/0, /*Position=*/0,
/*ParameterPack=*/false, /*Id=*/nullptr, TPL);
TemplateTemplateParm->setImplicit(true);
// typename T
auto *TemplateTypeParm = TemplateTypeParmDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/1,
/*Id=*/nullptr, /*Typename=*/true, /*ParameterPack=*/false,
/*HasTypeConstraint=*/false);
TemplateTypeParm->setImplicit(true);
// T N
TypeSourceInfo *TInfo = C.getTrivialTypeSourceInfo(
QualType(TemplateTypeParm->getTypeForDecl(), 0));
auto *NonTypeTemplateParm = NonTypeTemplateParmDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/2,
/*Id=*/nullptr, TInfo->getType(), /*ParameterPack=*/false, TInfo);
NamedDecl *Params[] = {TemplateTemplateParm, TemplateTypeParm,
NonTypeTemplateParm};
// template <template <typename T, T ...Ints> class IntSeq, typename T, T N>
return TemplateParameterList::Create(C, SourceLocation(), SourceLocation(),
Params, SourceLocation(), nullptr);
}
static TemplateParameterList *
createTypePackElementParameterList(const ASTContext &C, DeclContext *DC) {
// std::size_t Index
TypeSourceInfo *TInfo = C.getTrivialTypeSourceInfo(C.getSizeType());
auto *Index = NonTypeTemplateParmDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/0,
/*Id=*/nullptr, TInfo->getType(), /*ParameterPack=*/false, TInfo);
// typename ...T
auto *Ts = TemplateTypeParmDecl::Create(
C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/1,
/*Id=*/nullptr, /*Typename=*/true, /*ParameterPack=*/true,
/*HasTypeConstraint=*/false);
Ts->setImplicit(true);
// template <std::size_t Index, typename ...T>
NamedDecl *Params[] = {Index, Ts};
return TemplateParameterList::Create(C, SourceLocation(), SourceLocation(),
llvm::makeArrayRef(Params),
SourceLocation(), nullptr);
}
static TemplateParameterList *createBuiltinTemplateParameterList(
const ASTContext &C, DeclContext *DC, BuiltinTemplateKind BTK) {
switch (BTK) {
case BTK__make_integer_seq:
return createMakeIntegerSeqParameterList(C, DC);
case BTK__type_pack_element:
return createTypePackElementParameterList(C, DC);
}
llvm_unreachable("unhandled BuiltinTemplateKind!");
}
void BuiltinTemplateDecl::anchor() {}
BuiltinTemplateDecl::BuiltinTemplateDecl(const ASTContext &C, DeclContext *DC,
DeclarationName Name,
BuiltinTemplateKind BTK)
: TemplateDecl(BuiltinTemplate, DC, SourceLocation(), Name,
createBuiltinTemplateParameterList(C, DC, BTK)),
BTK(BTK) {}
void TypeConstraint::print(llvm::raw_ostream &OS, PrintingPolicy Policy) const {
if (NestedNameSpec)
NestedNameSpec.getNestedNameSpecifier()->print(OS, Policy);
ConceptName.printName(OS, Policy);
if (hasExplicitTemplateArgs()) {
OS << "<";
// FIXME: Find corresponding parameter for argument
for (auto &ArgLoc : ArgsAsWritten->arguments())
ArgLoc.getArgument().print(Policy, OS, /*IncludeType*/ false);
OS << ">";
}
}
TemplateParamObjectDecl *TemplateParamObjectDecl::Create(const ASTContext &C,
QualType T,
const APValue &V) {
DeclContext *DC = C.getTranslationUnitDecl();
auto *TPOD = new (C, DC) TemplateParamObjectDecl(DC, T, V);
C.addDestruction(&TPOD->Value);
return TPOD;
}
TemplateParamObjectDecl *
TemplateParamObjectDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
auto *TPOD = new (C, ID) TemplateParamObjectDecl(nullptr, QualType(), APValue());
C.addDestruction(&TPOD->Value);
return TPOD;
}
void TemplateParamObjectDecl::printName(llvm::raw_ostream &OS) const {
OS << "<template param ";
printAsExpr(OS);
OS << ">";
}
void TemplateParamObjectDecl::printAsExpr(llvm::raw_ostream &OS) const {
const ASTContext &Ctx = getASTContext();
getType().getUnqualifiedType().print(OS, Ctx.getPrintingPolicy());
printAsInit(OS);
}
void TemplateParamObjectDecl::printAsInit(llvm::raw_ostream &OS) const {
const ASTContext &Ctx = getASTContext();
getValue().printPretty(OS, Ctx, getType());
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 6364cd133e0b..dfcef2304040 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -1,2053 +1,2053 @@
//===--- CommonArgs.cpp - Args handling for multiple toolchains -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "CommonArgs.h"
#include "Arch/AArch64.h"
#include "Arch/ARM.h"
#include "Arch/M68k.h"
#include "Arch/Mips.h"
#include "Arch/PPC.h"
#include "Arch/SystemZ.h"
#include "Arch/VE.h"
#include "Arch/X86.h"
#include "HIPAMD.h"
#include "Hexagon.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/ObjCRuntime.h"
#include "clang/Basic/Version.h"
#include "clang/Config/config.h"
#include "clang/Driver/Action.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Job.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "clang/Driver/ToolChain.h"
#include "clang/Driver/Util.h"
#include "clang/Driver/XRayArgs.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/YAMLParser.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang;
using namespace llvm::opt;
static void renderRpassOptions(const ArgList &Args, ArgStringList &CmdArgs) {
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_EQ))
CmdArgs.push_back(Args.MakeArgString(Twine("--plugin-opt=-pass-remarks=") +
A->getValue()));
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_missed_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=-pass-remarks-missed=") + A->getValue()));
if (const Arg *A = Args.getLastArg(options::OPT_Rpass_analysis_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=-pass-remarks-analysis=") + A->getValue()));
}
static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
const llvm::Triple &Triple,
const InputInfo &Input,
const InputInfo &Output) {
StringRef Format = "yaml";
if (const Arg *A = Args.getLastArg(options::OPT_fsave_optimization_record_EQ))
Format = A->getValue();
SmallString<128> F;
const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
if (A)
F = A->getValue();
else if (Output.isFilename())
F = Output.getFilename();
assert(!F.empty() && "Cannot determine remarks output name.");
// Append "opt.ld.<format>" to the end of the file name.
CmdArgs.push_back(
Args.MakeArgString(Twine("--plugin-opt=opt-remarks-filename=") + F +
Twine(".opt.ld.") + Format));
if (const Arg *A =
Args.getLastArg(options::OPT_foptimization_record_passes_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=opt-remarks-passes=") + A->getValue()));
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=opt-remarks-format=") + Format.data()));
}
static void renderRemarksHotnessOptions(const ArgList &Args,
ArgStringList &CmdArgs) {
if (Args.hasFlag(options::OPT_fdiagnostics_show_hotness,
options::OPT_fno_diagnostics_show_hotness, false))
CmdArgs.push_back("--plugin-opt=opt-remarks-with-hotness");
if (const Arg *A =
Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ))
CmdArgs.push_back(Args.MakeArgString(
Twine("--plugin-opt=opt-remarks-hotness-threshold=") + A->getValue()));
}
void tools::addPathIfExists(const Driver &D, const Twine &Path,
ToolChain::path_list &Paths) {
if (D.getVFS().exists(Path))
Paths.push_back(Path.str());
}
void tools::handleTargetFeaturesGroup(const ArgList &Args,
std::vector<StringRef> &Features,
OptSpecifier Group) {
for (const Arg *A : Args.filtered(Group)) {
StringRef Name = A->getOption().getName();
A->claim();
// Skip over "-m".
assert(Name.startswith("m") && "Invalid feature name.");
Name = Name.substr(1);
bool IsNegative = Name.startswith("no-");
if (IsNegative)
Name = Name.substr(3);
Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
}
}
std::vector<StringRef>
tools::unifyTargetFeatures(const std::vector<StringRef> &Features) {
std::vector<StringRef> UnifiedFeatures;
// Find the last of each feature.
llvm::StringMap<unsigned> LastOpt;
for (unsigned I = 0, N = Features.size(); I < N; ++I) {
StringRef Name = Features[I];
assert(Name[0] == '-' || Name[0] == '+');
LastOpt[Name.drop_front(1)] = I;
}
for (unsigned I = 0, N = Features.size(); I < N; ++I) {
// If this feature was overridden, ignore it.
StringRef Name = Features[I];
llvm::StringMap<unsigned>::iterator LastI = LastOpt.find(Name.drop_front(1));
assert(LastI != LastOpt.end());
unsigned Last = LastI->second;
if (Last != I)
continue;
UnifiedFeatures.push_back(Name);
}
return UnifiedFeatures;
}
void tools::addDirectoryList(const ArgList &Args, ArgStringList &CmdArgs,
const char *ArgName, const char *EnvVar) {
const char *DirList = ::getenv(EnvVar);
bool CombinedArg = false;
if (!DirList)
return; // Nothing to do.
StringRef Name(ArgName);
if (Name.equals("-I") || Name.equals("-L") || Name.empty())
CombinedArg = true;
StringRef Dirs(DirList);
if (Dirs.empty()) // Empty string should not add '.'.
return;
StringRef::size_type Delim;
while ((Delim = Dirs.find(llvm::sys::EnvPathSeparator)) != StringRef::npos) {
if (Delim == 0) { // Leading colon.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(".");
}
} else {
if (CombinedArg) {
CmdArgs.push_back(
Args.MakeArgString(std::string(ArgName) + Dirs.substr(0, Delim)));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(Args.MakeArgString(Dirs.substr(0, Delim)));
}
}
Dirs = Dirs.substr(Delim + 1);
}
if (Dirs.empty()) { // Trailing colon.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + "."));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(".");
}
} else { // Add the last path.
if (CombinedArg) {
CmdArgs.push_back(Args.MakeArgString(std::string(ArgName) + Dirs));
} else {
CmdArgs.push_back(ArgName);
CmdArgs.push_back(Args.MakeArgString(Dirs));
}
}
}
void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
const ArgList &Args, ArgStringList &CmdArgs,
const JobAction &JA) {
const Driver &D = TC.getDriver();
// Add extra linker input arguments which are not treated as inputs
// (constructed via -Xarch_).
Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
// LIBRARY_PATH are included before user inputs and only supported on native
// toolchains.
if (!TC.isCrossCompiling())
addDirectoryList(Args, CmdArgs, "-L", "LIBRARY_PATH");
for (const auto &II : Inputs) {
// If the current tool chain refers to an OpenMP offloading host, we
// should ignore inputs that refer to OpenMP offloading devices -
// they will be embedded according to a proper linker script.
if (auto *IA = II.getAction())
if ((JA.isHostOffloading(Action::OFK_OpenMP) &&
IA->isDeviceOffloading(Action::OFK_OpenMP)))
continue;
if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
// Don't try to pass LLVM inputs unless we have native support.
D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();
// Add filenames immediately.
if (II.isFilename()) {
CmdArgs.push_back(II.getFilename());
continue;
}
// Otherwise, this is a linker input argument.
const Arg &A = II.getInputArg();
// Handle reserved library options.
if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx))
TC.AddCXXStdlibLibArgs(Args, CmdArgs);
else if (A.getOption().matches(options::OPT_Z_reserved_lib_cckext))
TC.AddCCKextLibArgs(Args, CmdArgs);
else if (A.getOption().matches(options::OPT_z)) {
// Pass -z prefix for gcc linker compatibility.
A.claim();
A.render(Args, CmdArgs);
} else if (A.getOption().matches(options::OPT_b)) {
const llvm::Triple &T = TC.getTriple();
if (!T.isOSAIX()) {
TC.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< A.getSpelling() << T.str();
}
// Pass -b prefix for AIX linker.
A.claim();
A.render(Args, CmdArgs);
} else {
A.renderAsInput(Args, CmdArgs);
}
}
}
void tools::addLinkerCompressDebugSectionsOption(
const ToolChain &TC, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) {
// GNU ld supports --compress-debug-sections=none|zlib|zlib-gnu|zlib-gabi
// whereas zlib is an alias to zlib-gabi and zlib-gnu is obsoleted. Therefore
// -gz=none|zlib are translated to --compress-debug-sections=none|zlib. -gz
// is not translated since ld --compress-debug-sections option requires an
// argument.
if (const Arg *A = Args.getLastArg(options::OPT_gz_EQ)) {
StringRef V = A->getValue();
if (V == "none" || V == "zlib")
CmdArgs.push_back(Args.MakeArgString("--compress-debug-sections=" + V));
else
TC.getDriver().Diag(diag::err_drv_unsupported_option_argument)
<< A->getOption().getName() << V;
}
}
void tools::AddTargetFeature(const ArgList &Args,
std::vector<StringRef> &Features,
OptSpecifier OnOpt, OptSpecifier OffOpt,
StringRef FeatureName) {
if (Arg *A = Args.getLastArg(OnOpt, OffOpt)) {
if (A->getOption().matches(OnOpt))
Features.push_back(Args.MakeArgString("+" + FeatureName));
else
Features.push_back(Args.MakeArgString("-" + FeatureName));
}
}
/// Get the (LLVM) name of the AMDGPU gpu we are targeting.
static std::string getAMDGPUTargetGPU(const llvm::Triple &T,
const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
auto GPUName = getProcessorFromTargetID(T, A->getValue());
return llvm::StringSwitch<std::string>(GPUName)
.Cases("rv630", "rv635", "r600")
.Cases("rv610", "rv620", "rs780", "rs880")
.Case("rv740", "rv770")
.Case("palm", "cedar")
.Cases("sumo", "sumo2", "sumo")
.Case("hemlock", "cypress")
.Case("aruba", "cayman")
.Default(GPUName.str());
}
return "";
}
static std::string getLanaiTargetCPU(const ArgList &Args) {
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
return A->getValue();
}
return "";
}
/// Get the (LLVM) name of the WebAssembly cpu we are targeting.
static StringRef getWebAssemblyTargetCPU(const ArgList &Args) {
// If we have -mcpu=, use that.
if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
StringRef CPU = A->getValue();
#ifdef __wasm__
// Handle "native" by examining the host. "native" isn't meaningful when
// cross compiling, so only support this when the host is also WebAssembly.
if (CPU == "native")
return llvm::sys::getHostCPUName();
#endif
return CPU;
}
return "generic";
}
std::string tools::getCPUName(const Driver &D, const ArgList &Args,
const llvm::Triple &T, bool FromAs) {
Arg *A;
switch (T.getArch()) {
default:
return "";
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_32:
case llvm::Triple::aarch64_be:
return aarch64::getAArch64TargetCPU(Args, T, A);
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb: {
StringRef MArch, MCPU;
arm::getARMArchCPUFromArgs(Args, MArch, MCPU, FromAs);
return arm::getARMTargetCPU(MCPU, MArch, T);
}
case llvm::Triple::avr:
if (const Arg *A = Args.getLastArg(options::OPT_mmcu_EQ))
return A->getValue();
return "";
case llvm::Triple::m68k:
return m68k::getM68kTargetCPU(Args);
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
mips::getMipsCPUAndABI(Args, T, CPUName, ABIName);
return std::string(CPUName);
}
case llvm::Triple::nvptx:
case llvm::Triple::nvptx64:
if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
return A->getValue();
return "";
case llvm::Triple::ppc:
case llvm::Triple::ppcle:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le: {
std::string TargetCPUName = ppc::getPPCTargetCPU(Args);
// LLVM may default to generating code for the native CPU,
// but, like gcc, we default to a more generic option for
// each architecture. (except on AIX)
if (!TargetCPUName.empty())
return TargetCPUName;
if (T.isOSAIX())
TargetCPUName = "pwr7";
else if (T.getArch() == llvm::Triple::ppc64le)
TargetCPUName = "ppc64le";
else if (T.getArch() == llvm::Triple::ppc64)
TargetCPUName = "ppc64";
else
TargetCPUName = "ppc";
return TargetCPUName;
}
case llvm::Triple::riscv32:
case llvm::Triple::riscv64:
if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
return A->getValue();
return "";
case llvm::Triple::bpfel:
case llvm::Triple::bpfeb:
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::sparcv9:
if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
return A->getValue();
if (T.getArch() == llvm::Triple::sparc && T.isOSSolaris())
return "v9";
return "";
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return x86::getX86TargetCPU(D, Args, T);
case llvm::Triple::hexagon:
return "hexagon" +
toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
case llvm::Triple::lanai:
return getLanaiTargetCPU(Args);
case llvm::Triple::systemz:
return systemz::getSystemZTargetCPU(Args);
case llvm::Triple::r600:
case llvm::Triple::amdgcn:
return getAMDGPUTargetGPU(T, Args);
case llvm::Triple::wasm32:
case llvm::Triple::wasm64:
return std::string(getWebAssemblyTargetCPU(Args));
}
}
llvm::StringRef tools::getLTOParallelism(const ArgList &Args, const Driver &D) {
Arg *LtoJobsArg = Args.getLastArg(options::OPT_flto_jobs_EQ);
if (!LtoJobsArg)
return {};
if (!llvm::get_threadpool_strategy(LtoJobsArg->getValue()))
D.Diag(diag::err_drv_invalid_int_value)
<< LtoJobsArg->getAsString(Args) << LtoJobsArg->getValue();
return LtoJobsArg->getValue();
}
// CloudABI uses -ffunction-sections and -fdata-sections by default.
bool tools::isUseSeparateSections(const llvm::Triple &Triple) {
return Triple.getOS() == llvm::Triple::CloudABI;
}
void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
ArgStringList &CmdArgs, const InputInfo &Output,
const InputInfo &Input, bool IsThinLTO) {
const char *Linker = Args.MakeArgString(ToolChain.GetLinkerPath());
const Driver &D = ToolChain.getDriver();
if (llvm::sys::path::filename(Linker) != "ld.lld" &&
llvm::sys::path::stem(Linker) != "ld.lld") {
// Tell the linker to load the plugin. This has to come before
// AddLinkerInputs as gold requires -plugin to come before any -plugin-opt
// that -Wl might forward.
CmdArgs.push_back("-plugin");
#if defined(_WIN32)
const char *Suffix = ".dll";
#elif defined(__APPLE__)
const char *Suffix = ".dylib";
#else
const char *Suffix = ".so";
#endif
SmallString<1024> Plugin;
llvm::sys::path::native(
Twine(D.Dir) + "/../lib" CLANG_LIBDIR_SUFFIX "/LLVMgold" + Suffix,
Plugin);
CmdArgs.push_back(Args.MakeArgString(Plugin));
}
// Try to pass driver level flags relevant to LTO code generation down to
// the plugin.
// Handle flags for selecting CPU variants.
std::string CPU = getCPUName(D, Args, ToolChain.getTriple());
if (!CPU.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));
if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
// The optimization level matches
// CompilerInvocation.cpp:getOptimizationLevel().
StringRef OOpt;
if (A->getOption().matches(options::OPT_O4) ||
A->getOption().matches(options::OPT_Ofast))
OOpt = "3";
else if (A->getOption().matches(options::OPT_O)) {
OOpt = A->getValue();
if (OOpt == "g")
OOpt = "1";
else if (OOpt == "s" || OOpt == "z")
OOpt = "2";
} else if (A->getOption().matches(options::OPT_O0))
OOpt = "0";
if (!OOpt.empty())
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt));
}
if (Args.hasArg(options::OPT_gsplit_dwarf)) {
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=dwo_dir=") +
Output.getFilename() + "_dwo"));
}
if (IsThinLTO)
CmdArgs.push_back("-plugin-opt=thinlto");
StringRef Parallelism = getLTOParallelism(Args, D);
if (!Parallelism.empty())
CmdArgs.push_back(
Args.MakeArgString("-plugin-opt=jobs=" + Twine(Parallelism)));
// If an explicit debugger tuning argument appeared, pass it along.
if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
options::OPT_ggdbN_Group)) {
if (A->getOption().matches(options::OPT_glldb))
CmdArgs.push_back("-plugin-opt=-debugger-tune=lldb");
else if (A->getOption().matches(options::OPT_gsce))
CmdArgs.push_back("-plugin-opt=-debugger-tune=sce");
else if (A->getOption().matches(options::OPT_gdbx))
CmdArgs.push_back("-plugin-opt=-debugger-tune=dbx");
else
CmdArgs.push_back("-plugin-opt=-debugger-tune=gdb");
}
bool UseSeparateSections =
isUseSeparateSections(ToolChain.getEffectiveTriple());
if (Args.hasFlag(options::OPT_ffunction_sections,
options::OPT_fno_function_sections, UseSeparateSections)) {
CmdArgs.push_back("-plugin-opt=-function-sections");
}
if (Args.hasFlag(options::OPT_fdata_sections, options::OPT_fno_data_sections,
UseSeparateSections)) {
CmdArgs.push_back("-plugin-opt=-data-sections");
}
if (Arg *A = getLastProfileSampleUseArg(Args)) {
StringRef FName = A->getValue();
if (!llvm::sys::fs::exists(FName))
D.Diag(diag::err_drv_no_such_file) << FName;
else
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=sample-profile=") + FName));
}
auto *CSPGOGenerateArg = Args.getLastArg(options::OPT_fcs_profile_generate,
options::OPT_fcs_profile_generate_EQ,
options::OPT_fno_profile_generate);
if (CSPGOGenerateArg &&
CSPGOGenerateArg->getOption().matches(options::OPT_fno_profile_generate))
CSPGOGenerateArg = nullptr;
auto *ProfileUseArg = getLastProfileUseArg(Args);
if (CSPGOGenerateArg) {
CmdArgs.push_back(Args.MakeArgString("-plugin-opt=cs-profile-generate"));
if (CSPGOGenerateArg->getOption().matches(
options::OPT_fcs_profile_generate_EQ)) {
SmallString<128> Path(CSPGOGenerateArg->getValue());
llvm::sys::path::append(Path, "default_%m.profraw");
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=cs-profile-path=") + Path));
} else
CmdArgs.push_back(
Args.MakeArgString("-plugin-opt=cs-profile-path=default_%m.profraw"));
} else if (ProfileUseArg) {
SmallString<128> Path(
ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
if (Path.empty() || llvm::sys::fs::is_directory(Path))
llvm::sys::path::append(Path, "default.profdata");
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=cs-profile-path=") +
Path));
}
// Pass an option to enable/disable the new pass manager.
if (auto *A = Args.getLastArg(options::OPT_flegacy_pass_manager,
options::OPT_fno_legacy_pass_manager)) {
if (A->getOption().matches(options::OPT_flegacy_pass_manager))
CmdArgs.push_back("-plugin-opt=legacy-pass-manager");
else
CmdArgs.push_back("-plugin-opt=new-pass-manager");
}
// Setup statistics file output.
SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
if (!StatsFile.empty())
CmdArgs.push_back(
Args.MakeArgString(Twine("-plugin-opt=stats-file=") + StatsFile));
addX86AlignBranchArgs(D, Args, CmdArgs, /*IsLTO=*/true);
// Handle remark diagnostics on screen options: '-Rpass-*'.
renderRpassOptions(Args, CmdArgs);
// Handle serialized remarks options: '-fsave-optimization-record'
// and '-foptimization-record-*'.
if (willEmitRemarks(Args))
renderRemarksOptions(Args, CmdArgs, ToolChain.getEffectiveTriple(), Input,
Output);
// Handle remarks hotness/threshold related options.
renderRemarksHotnessOptions(Args, CmdArgs);
addMachineOutlinerArgs(D, Args, CmdArgs, ToolChain.getEffectiveTriple(),
/*IsLTO=*/true);
}
void tools::addOpenMPRuntimeSpecificRPath(const ToolChain &TC,
const ArgList &Args,
ArgStringList &CmdArgs) {
if (Args.hasFlag(options::OPT_fopenmp_implicit_rpath,
options::OPT_fno_openmp_implicit_rpath, true)) {
// Default to clang lib / lib64 folder, i.e. the same location as device
// runtime
SmallString<256> DefaultLibPath =
llvm::sys::path::parent_path(TC.getDriver().Dir);
llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
CmdArgs.push_back("-rpath");
CmdArgs.push_back(Args.MakeArgString(DefaultLibPath));
}
}
void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
// Enable -frtlib-add-rpath by default for the case of VE.
const bool IsVE = TC.getTriple().isVE();
bool DefaultValue = IsVE;
if (!Args.hasFlag(options::OPT_frtlib_add_rpath,
options::OPT_fno_rtlib_add_rpath, DefaultValue))
return;
std::string CandidateRPath = TC.getArchSpecificLibPath();
if (TC.getVFS().exists(CandidateRPath)) {
CmdArgs.push_back("-rpath");
CmdArgs.push_back(Args.MakeArgString(CandidateRPath));
}
}
bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC,
const ArgList &Args, bool ForceStaticHostRuntime,
bool IsOffloadingHost, bool GompNeedsRT) {
if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
options::OPT_fno_openmp, false))
return false;
Driver::OpenMPRuntimeKind RTKind = TC.getDriver().getOpenMPRuntime(Args);
if (RTKind == Driver::OMPRT_Unknown)
// Already diagnosed.
return false;
if (ForceStaticHostRuntime)
CmdArgs.push_back("-Bstatic");
switch (RTKind) {
case Driver::OMPRT_OMP:
CmdArgs.push_back("-lomp");
break;
case Driver::OMPRT_GOMP:
CmdArgs.push_back("-lgomp");
break;
case Driver::OMPRT_IOMP5:
CmdArgs.push_back("-liomp5");
break;
case Driver::OMPRT_Unknown:
break;
}
if (ForceStaticHostRuntime)
CmdArgs.push_back("-Bdynamic");
if (RTKind == Driver::OMPRT_GOMP && GompNeedsRT)
CmdArgs.push_back("-lrt");
if (IsOffloadingHost)
CmdArgs.push_back("-lomptarget");
addArchSpecificRPath(TC, Args, CmdArgs);
if (RTKind == Driver::OMPRT_OMP)
addOpenMPRuntimeSpecificRPath(TC, Args, CmdArgs);
return true;
}
static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs, StringRef Sanitizer,
bool IsShared, bool IsWhole) {
// Wrap any static runtimes that must be forced into executable in
// whole-archive.
if (IsWhole) CmdArgs.push_back("--whole-archive");
CmdArgs.push_back(TC.getCompilerRTArgString(
Args, Sanitizer, IsShared ? ToolChain::FT_Shared : ToolChain::FT_Static));
if (IsWhole) CmdArgs.push_back("--no-whole-archive");
if (IsShared) {
addArchSpecificRPath(TC, Args, CmdArgs);
}
}
// Tries to use a file with the list of dynamic symbols that need to be exported
// from the runtime library. Returns true if the file was found.
static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs,
StringRef Sanitizer) {
// Solaris ld defaults to --export-dynamic behaviour but doesn't support
// the option, so don't try to pass it.
if (TC.getTriple().getOS() == llvm::Triple::Solaris)
return true;
SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer));
if (llvm::sys::fs::exists(SanRT + ".syms")) {
CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms"));
return true;
}
return false;
}
-static const char *getAsNeededOption(const ToolChain &TC, bool as_needed) {
+const char *tools::getAsNeededOption(const ToolChain &TC, bool as_needed) {
assert(!TC.getTriple().isOSAIX() &&
"AIX linker does not support any form of --as-needed option yet.");
// While the Solaris 11.2 ld added --as-needed/--no-as-needed as aliases
// for the native forms -z ignore/-z record, they are missing in Illumos,
// so always use the native form.
if (TC.getTriple().isOSSolaris())
return as_needed ? "-zignore" : "-zrecord";
else
return as_needed ? "--as-needed" : "--no-as-needed";
}
void tools::linkSanitizerRuntimeDeps(const ToolChain &TC,
ArgStringList &CmdArgs) {
// Fuchsia never needs these. Any sanitizer runtimes with system
// dependencies use the `.deplibs` feature instead.
if (TC.getTriple().isOSFuchsia())
return;
// Force linking against the system libraries sanitizers depends on
// (see PR15823 why this is necessary).
CmdArgs.push_back(getAsNeededOption(TC, false));
// There's no libpthread or librt on RTEMS & Android.
if (TC.getTriple().getOS() != llvm::Triple::RTEMS &&
!TC.getTriple().isAndroid()) {
CmdArgs.push_back("-lpthread");
if (!TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-lrt");
}
CmdArgs.push_back("-lm");
// There's no libdl on all OSes.
if (!TC.getTriple().isOSFreeBSD() && !TC.getTriple().isOSNetBSD() &&
!TC.getTriple().isOSOpenBSD() &&
TC.getTriple().getOS() != llvm::Triple::RTEMS)
CmdArgs.push_back("-ldl");
// Required for backtrace on some OSes
if (TC.getTriple().isOSFreeBSD() ||
TC.getTriple().isOSNetBSD() ||
TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-lexecinfo");
}
static void
collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
SmallVectorImpl<StringRef> &SharedRuntimes,
SmallVectorImpl<StringRef> &StaticRuntimes,
SmallVectorImpl<StringRef> &NonWholeStaticRuntimes,
SmallVectorImpl<StringRef> &HelperStaticRuntimes,
SmallVectorImpl<StringRef> &RequiredSymbols) {
const SanitizerArgs &SanArgs = TC.getSanitizerArgs(Args);
// Collect shared runtimes.
if (SanArgs.needsSharedRt()) {
if (SanArgs.needsAsanRt() && SanArgs.linkRuntimes()) {
SharedRuntimes.push_back("asan");
if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid())
HelperStaticRuntimes.push_back("asan-preinit");
}
if (SanArgs.needsMemProfRt() && SanArgs.linkRuntimes()) {
SharedRuntimes.push_back("memprof");
if (!Args.hasArg(options::OPT_shared) && !TC.getTriple().isAndroid())
HelperStaticRuntimes.push_back("memprof-preinit");
}
if (SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime())
SharedRuntimes.push_back("ubsan_minimal");
else
SharedRuntimes.push_back("ubsan_standalone");
}
if (SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime())
SharedRuntimes.push_back("scudo_minimal");
else
SharedRuntimes.push_back("scudo");
}
if (SanArgs.needsTsanRt() && SanArgs.linkRuntimes())
SharedRuntimes.push_back("tsan");
if (SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.needsHwasanAliasesRt())
SharedRuntimes.push_back("hwasan_aliases");
else
SharedRuntimes.push_back("hwasan");
}
}
// The stats_client library is also statically linked into DSOs.
if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("stats_client");
// Always link the static runtime regardless of DSO or executable.
if (SanArgs.needsAsanRt())
HelperStaticRuntimes.push_back("asan_static");
// Collect static runtimes.
if (Args.hasArg(options::OPT_shared)) {
// Don't link static runtimes into DSOs.
return;
}
// Each static runtime that has a DSO counterpart above is excluded below,
// but runtimes that exist only as static are not affected by needsSharedRt.
if (!SanArgs.needsSharedRt() && SanArgs.needsAsanRt() && SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("asan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("asan_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsMemProfRt() &&
SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("memprof");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("memprof_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsHwasanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.needsHwasanAliasesRt()) {
StaticRuntimes.push_back("hwasan_aliases");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("hwasan_aliases_cxx");
} else {
StaticRuntimes.push_back("hwasan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("hwasan_cxx");
}
}
if (SanArgs.needsDfsanRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("dfsan");
if (SanArgs.needsLsanRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("lsan");
if (SanArgs.needsMsanRt() && SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("msan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("msan_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsTsanRt() &&
SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("tsan");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("tsan_cxx");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime()) {
StaticRuntimes.push_back("ubsan_minimal");
} else {
StaticRuntimes.push_back("ubsan_standalone");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("ubsan_standalone_cxx");
}
}
if (SanArgs.needsSafeStackRt() && SanArgs.linkRuntimes()) {
NonWholeStaticRuntimes.push_back("safestack");
RequiredSymbols.push_back("__safestack_init");
}
if (!(SanArgs.needsSharedRt() && SanArgs.needsUbsanRt() && SanArgs.linkRuntimes())) {
if (SanArgs.needsCfiRt() && SanArgs.linkRuntimes())
StaticRuntimes.push_back("cfi");
if (SanArgs.needsCfiDiagRt() && SanArgs.linkRuntimes()) {
StaticRuntimes.push_back("cfi_diag");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("ubsan_standalone_cxx");
}
}
if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) {
NonWholeStaticRuntimes.push_back("stats");
RequiredSymbols.push_back("__sanitizer_stats_register");
}
if (!SanArgs.needsSharedRt() && SanArgs.needsScudoRt() && SanArgs.linkRuntimes()) {
if (SanArgs.requiresMinimalRuntime()) {
StaticRuntimes.push_back("scudo_minimal");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("scudo_cxx_minimal");
} else {
StaticRuntimes.push_back("scudo");
if (SanArgs.linkCXXRuntimes())
StaticRuntimes.push_back("scudo_cxx");
}
}
}
// Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
// C runtime, etc). Returns true if sanitizer system deps need to be linked in.
bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
ArgStringList &CmdArgs) {
SmallVector<StringRef, 4> SharedRuntimes, StaticRuntimes,
NonWholeStaticRuntimes, HelperStaticRuntimes, RequiredSymbols;
collectSanitizerRuntimes(TC, Args, SharedRuntimes, StaticRuntimes,
NonWholeStaticRuntimes, HelperStaticRuntimes,
RequiredSymbols);
const SanitizerArgs &SanArgs = TC.getSanitizerArgs(Args);
// Inject libfuzzer dependencies.
if (SanArgs.needsFuzzer() && SanArgs.linkRuntimes() &&
!Args.hasArg(options::OPT_shared)) {
addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer", false, true);
if (SanArgs.needsFuzzerInterceptors())
addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer_interceptors", false,
true);
if (!Args.hasArg(clang::driver::options::OPT_nostdlibxx)) {
bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
!Args.hasArg(options::OPT_static);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bstatic");
TC.AddCXXStdlibLibArgs(Args, CmdArgs);
if (OnlyLibstdcxxStatic)
CmdArgs.push_back("-Bdynamic");
}
}
for (auto RT : SharedRuntimes)
addSanitizerRuntime(TC, Args, CmdArgs, RT, true, false);
for (auto RT : HelperStaticRuntimes)
addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
bool AddExportDynamic = false;
for (auto RT : StaticRuntimes) {
addSanitizerRuntime(TC, Args, CmdArgs, RT, false, true);
AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
}
for (auto RT : NonWholeStaticRuntimes) {
addSanitizerRuntime(TC, Args, CmdArgs, RT, false, false);
AddExportDynamic |= !addSanitizerDynamicList(TC, Args, CmdArgs, RT);
}
for (auto S : RequiredSymbols) {
CmdArgs.push_back("-u");
CmdArgs.push_back(Args.MakeArgString(S));
}
// If there is a static runtime with no dynamic list, force all the symbols
// to be dynamic to be sure we export sanitizer interface functions.
if (AddExportDynamic)
CmdArgs.push_back("--export-dynamic");
if (SanArgs.hasCrossDsoCfi() && !AddExportDynamic)
CmdArgs.push_back("--export-dynamic-symbol=__cfi_check");
return !StaticRuntimes.empty() || !NonWholeStaticRuntimes.empty();
}
bool tools::addXRayRuntime(const ToolChain&TC, const ArgList &Args, ArgStringList &CmdArgs) {
if (Args.hasArg(options::OPT_shared))
return false;
if (TC.getXRayArgs().needsXRayRt()) {
CmdArgs.push_back("-whole-archive");
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "xray"));
for (const auto &Mode : TC.getXRayArgs().modeList())
CmdArgs.push_back(TC.getCompilerRTArgString(Args, Mode));
CmdArgs.push_back("-no-whole-archive");
return true;
}
return false;
}
void tools::linkXRayRuntimeDeps(const ToolChain &TC, ArgStringList &CmdArgs) {
CmdArgs.push_back(getAsNeededOption(TC, false));
CmdArgs.push_back("-lpthread");
if (!TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-lrt");
CmdArgs.push_back("-lm");
if (!TC.getTriple().isOSFreeBSD() &&
!TC.getTriple().isOSNetBSD() &&
!TC.getTriple().isOSOpenBSD())
CmdArgs.push_back("-ldl");
}
bool tools::areOptimizationsEnabled(const ArgList &Args) {
// Find the last -O arg and see if it is non-zero.
if (Arg *A = Args.getLastArg(options::OPT_O_Group))
return !A->getOption().matches(options::OPT_O0);
// Defaults to -O0.
return false;
}
const char *tools::SplitDebugName(const JobAction &JA, const ArgList &Args,
const InputInfo &Input,
const InputInfo &Output) {
auto AddPostfix = [JA](auto &F) {
if (JA.getOffloadingDeviceKind() == Action::OFK_HIP)
F += (Twine("_") + JA.getOffloadingArch()).str();
F += ".dwo";
};
if (Arg *A = Args.getLastArg(options::OPT_gsplit_dwarf_EQ))
if (StringRef(A->getValue()) == "single")
return Args.MakeArgString(Output.getFilename());
Arg *FinalOutput = Args.getLastArg(options::OPT_o);
if (FinalOutput && Args.hasArg(options::OPT_c)) {
SmallString<128> T(FinalOutput->getValue());
llvm::sys::path::remove_filename(T);
llvm::sys::path::append(T, llvm::sys::path::stem(FinalOutput->getValue()));
AddPostfix(T);
return Args.MakeArgString(T);
} else {
// Use the compilation dir.
Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
options::OPT_fdebug_compilation_dir_EQ);
SmallString<128> T(A ? A->getValue() : "");
SmallString<128> F(llvm::sys::path::stem(Input.getBaseInput()));
AddPostfix(F);
T += F;
return Args.MakeArgString(T);
}
}
void tools::SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
const JobAction &JA, const ArgList &Args,
const InputInfo &Output, const char *OutFile) {
ArgStringList ExtractArgs;
ExtractArgs.push_back("--extract-dwo");
ArgStringList StripArgs;
StripArgs.push_back("--strip-dwo");
// Grabbing the output of the earlier compile step.
StripArgs.push_back(Output.getFilename());
ExtractArgs.push_back(Output.getFilename());
ExtractArgs.push_back(OutFile);
const char *Exec =
Args.MakeArgString(TC.GetProgramPath(CLANG_DEFAULT_OBJCOPY));
InputInfo II(types::TY_Object, Output.getFilename(), Output.getFilename());
// First extract the dwo sections.
C.addCommand(std::make_unique<Command>(JA, T,
ResponseFileSupport::AtFileCurCP(),
Exec, ExtractArgs, II, Output));
// Then remove them from the original .o file.
C.addCommand(std::make_unique<Command>(
JA, T, ResponseFileSupport::AtFileCurCP(), Exec, StripArgs, II, Output));
}
// Claim options we don't want to warn if they are unused. We do this for
// options that build systems might add but are unused when assembling or only
// running the preprocessor for example.
void tools::claimNoWarnArgs(const ArgList &Args) {
// Don't warn about unused -f(no-)?lto. This can happen when we're
// preprocessing, precompiling or assembling.
Args.ClaimAllArgs(options::OPT_flto_EQ);
Args.ClaimAllArgs(options::OPT_flto);
Args.ClaimAllArgs(options::OPT_fno_lto);
}
Arg *tools::getLastProfileUseArg(const ArgList &Args) {
auto *ProfileUseArg = Args.getLastArg(
options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ,
options::OPT_fprofile_use, options::OPT_fprofile_use_EQ,
options::OPT_fno_profile_instr_use);
if (ProfileUseArg &&
ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use))
ProfileUseArg = nullptr;
return ProfileUseArg;
}
Arg *tools::getLastProfileSampleUseArg(const ArgList &Args) {
auto *ProfileSampleUseArg = Args.getLastArg(
options::OPT_fprofile_sample_use, options::OPT_fprofile_sample_use_EQ,
options::OPT_fauto_profile, options::OPT_fauto_profile_EQ,
options::OPT_fno_profile_sample_use, options::OPT_fno_auto_profile);
if (ProfileSampleUseArg &&
(ProfileSampleUseArg->getOption().matches(
options::OPT_fno_profile_sample_use) ||
ProfileSampleUseArg->getOption().matches(options::OPT_fno_auto_profile)))
return nullptr;
return Args.getLastArg(options::OPT_fprofile_sample_use_EQ,
options::OPT_fauto_profile_EQ);
}
/// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments. Then,
/// smooshes them together with platform defaults, to decide whether
/// this compile should be using PIC mode or not. Returns a tuple of
/// (RelocationModel, PICLevel, IsPIE).
std::tuple<llvm::Reloc::Model, unsigned, bool>
tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) {
const llvm::Triple &EffectiveTriple = ToolChain.getEffectiveTriple();
const llvm::Triple &Triple = ToolChain.getTriple();
bool PIE = ToolChain.isPIEDefault(Args);
bool PIC = PIE || ToolChain.isPICDefault();
// The Darwin/MachO default to use PIC does not apply when using -static.
if (Triple.isOSBinFormatMachO() && Args.hasArg(options::OPT_static))
PIE = PIC = false;
bool IsPICLevelTwo = PIC;
bool KernelOrKext =
Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
// Android-specific defaults for PIC/PIE
if (Triple.isAndroid()) {
switch (Triple.getArch()) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
case llvm::Triple::aarch64:
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
PIC = true; // "-fpic"
break;
case llvm::Triple::x86:
case llvm::Triple::x86_64:
PIC = true; // "-fPIC"
IsPICLevelTwo = true;
break;
default:
break;
}
}
// OpenBSD-specific defaults for PIE
if (Triple.isOSOpenBSD()) {
switch (ToolChain.getArch()) {
case llvm::Triple::arm:
case llvm::Triple::aarch64:
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
IsPICLevelTwo = false; // "-fpie"
break;
case llvm::Triple::ppc:
case llvm::Triple::sparcv9:
IsPICLevelTwo = true; // "-fPIE"
break;
default:
break;
}
}
// AMDGPU-specific defaults for PIC.
if (Triple.getArch() == llvm::Triple::amdgcn)
PIC = true;
// The last argument relating to either PIC or PIE wins, and no
// other argument is used. If the last argument is any flavor of the
// '-fno-...' arguments, both PIC and PIE are disabled. Any PIE
// option implicitly enables PIC at the same level.
Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
options::OPT_fpic, options::OPT_fno_pic,
options::OPT_fPIE, options::OPT_fno_PIE,
options::OPT_fpie, options::OPT_fno_pie);
if (Triple.isOSWindows() && !Triple.isOSCygMing() && LastPICArg &&
LastPICArg == Args.getLastArg(options::OPT_fPIC, options::OPT_fpic,
options::OPT_fPIE, options::OPT_fpie)) {
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< LastPICArg->getSpelling() << Triple.str();
if (Triple.getArch() == llvm::Triple::x86_64)
return std::make_tuple(llvm::Reloc::PIC_, 2U, false);
return std::make_tuple(llvm::Reloc::Static, 0U, false);
}
// Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
// is forced, then neither PIC nor PIE flags will have no effect.
if (!ToolChain.isPICDefaultForced()) {
if (LastPICArg) {
Option O = LastPICArg->getOption();
if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) ||
O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) {
PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie);
PIC =
PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic);
IsPICLevelTwo =
O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC);
} else {
PIE = PIC = false;
if (EffectiveTriple.isPS4CPU()) {
Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ);
StringRef Model = ModelArg ? ModelArg->getValue() : "";
if (Model != "kernel") {
PIC = true;
ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic)
<< LastPICArg->getSpelling();
}
}
}
}
}
// Introduce a Darwin and PS4-specific hack. If the default is PIC, but the
// PIC level would've been set to level 1, force it back to level 2 PIC
// instead.
if (PIC && (Triple.isOSDarwin() || EffectiveTriple.isPS4CPU()))
IsPICLevelTwo |= ToolChain.isPICDefault();
// This kernel flags are a trump-card: they will disable PIC/PIE
// generation, independent of the argument order.
if (KernelOrKext &&
((!EffectiveTriple.isiOS() || EffectiveTriple.isOSVersionLT(6)) &&
!EffectiveTriple.isWatchOS()))
PIC = PIE = false;
if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) {
// This is a very special mode. It trumps the other modes, almost no one
// uses it, and it isn't even valid on any OS but Darwin.
if (!Triple.isOSDarwin())
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << Triple.str();
// FIXME: Warn when this flag trumps some other PIC or PIE flag.
// Only a forced PIC mode can cause the actual compile to have PIC defines
// etc., no flags are sufficient. This behavior was selected to closely
// match that of llvm-gcc and Apple GCC before that.
PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced();
return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2U : 0U, false);
}
bool EmbeddedPISupported;
switch (Triple.getArch()) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
EmbeddedPISupported = true;
break;
default:
EmbeddedPISupported = false;
break;
}
bool ROPI = false, RWPI = false;
Arg* LastROPIArg = Args.getLastArg(options::OPT_fropi, options::OPT_fno_ropi);
if (LastROPIArg && LastROPIArg->getOption().matches(options::OPT_fropi)) {
if (!EmbeddedPISupported)
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< LastROPIArg->getSpelling() << Triple.str();
ROPI = true;
}
Arg *LastRWPIArg = Args.getLastArg(options::OPT_frwpi, options::OPT_fno_rwpi);
if (LastRWPIArg && LastRWPIArg->getOption().matches(options::OPT_frwpi)) {
if (!EmbeddedPISupported)
ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
<< LastRWPIArg->getSpelling() << Triple.str();
RWPI = true;
}
// ROPI and RWPI are not compatible with PIC or PIE.
if ((ROPI || RWPI) && (PIC || PIE))
ToolChain.getDriver().Diag(diag::err_drv_ropi_rwpi_incompatible_with_pic);
if (Triple.isMIPS()) {
StringRef CPUName;
StringRef ABIName;
mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
// When targeting the N64 ABI, PIC is the default, except in the case
// when the -mno-abicalls option is used. In that case we exit
// at next check regardless of PIC being set below.
if (ABIName == "n64")
PIC = true;
// When targettng MIPS with -mno-abicalls, it's always static.
if(Args.hasArg(options::OPT_mno_abicalls))
return std::make_tuple(llvm::Reloc::Static, 0U, false);
// Unlike other architectures, MIPS, even with -fPIC/-mxgot/multigot,
// does not use PIC level 2 for historical reasons.
IsPICLevelTwo = false;
}
if (PIC)
return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2U : 1U, PIE);
llvm::Reloc::Model RelocM = llvm::Reloc::Static;
if (ROPI && RWPI)
RelocM = llvm::Reloc::ROPI_RWPI;
else if (ROPI)
RelocM = llvm::Reloc::ROPI;
else if (RWPI)
RelocM = llvm::Reloc::RWPI;
return std::make_tuple(RelocM, 0U, false);
}
// `-falign-functions` indicates that the functions should be aligned to a
// 16-byte boundary.
//
// `-falign-functions=1` is the same as `-fno-align-functions`.
//
// The scalar `n` in `-falign-functions=n` must be an integral value between
// [0, 65536]. If the value is not a power-of-two, it will be rounded up to
// the nearest power-of-two.
//
// If we return `0`, the frontend will default to the backend's preferred
// alignment.
//
// NOTE: icc only allows values between [0, 4096]. icc uses `-falign-functions`
// to mean `-falign-functions=16`. GCC defaults to the backend's preferred
// alignment. For unaligned functions, we default to the backend's preferred
// alignment.
unsigned tools::ParseFunctionAlignment(const ToolChain &TC,
const ArgList &Args) {
const Arg *A = Args.getLastArg(options::OPT_falign_functions,
options::OPT_falign_functions_EQ,
options::OPT_fno_align_functions);
if (!A || A->getOption().matches(options::OPT_fno_align_functions))
return 0;
if (A->getOption().matches(options::OPT_falign_functions))
return 0;
unsigned Value = 0;
if (StringRef(A->getValue()).getAsInteger(10, Value) || Value > 65536)
TC.getDriver().Diag(diag::err_drv_invalid_int_value)
<< A->getAsString(Args) << A->getValue();
return Value ? llvm::Log2_32_Ceil(std::min(Value, 65536u)) : Value;
}
unsigned tools::ParseDebugDefaultVersion(const ToolChain &TC,
const ArgList &Args) {
const Arg *A = Args.getLastArg(options::OPT_fdebug_default_version);
if (!A)
return 0;
unsigned Value = 0;
if (StringRef(A->getValue()).getAsInteger(10, Value) || Value > 5 ||
Value < 2)
TC.getDriver().Diag(diag::err_drv_invalid_int_value)
<< A->getAsString(Args) << A->getValue();
return Value;
}
void tools::AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args,
ArgStringList &CmdArgs) {
llvm::Reloc::Model RelocationModel;
unsigned PICLevel;
bool IsPIE;
std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(ToolChain, Args);
if (RelocationModel != llvm::Reloc::Static)
CmdArgs.push_back("-KPIC");
}
/// Determine whether Objective-C automated reference counting is
/// enabled.
bool tools::isObjCAutoRefCount(const ArgList &Args) {
return Args.hasFlag(options::OPT_fobjc_arc, options::OPT_fno_objc_arc, false);
}
enum class LibGccType { UnspecifiedLibGcc, StaticLibGcc, SharedLibGcc };
static LibGccType getLibGccType(const ToolChain &TC, const Driver &D,
const ArgList &Args) {
if (Args.hasArg(options::OPT_static_libgcc) ||
Args.hasArg(options::OPT_static) || Args.hasArg(options::OPT_static_pie))
return LibGccType::StaticLibGcc;
if (Args.hasArg(options::OPT_shared_libgcc))
return LibGccType::SharedLibGcc;
// The Android NDK only provides libunwind.a, not libunwind.so.
if (TC.getTriple().isAndroid())
return LibGccType::StaticLibGcc;
// For MinGW, don't imply a shared libgcc here, we only want to return
// SharedLibGcc if that was explicitly requested.
if (D.CCCIsCXX() && !TC.getTriple().isOSCygMing())
return LibGccType::SharedLibGcc;
return LibGccType::UnspecifiedLibGcc;
}
// Gcc adds libgcc arguments in various ways:
//
// gcc <none>: -lgcc --as-needed -lgcc_s --no-as-needed
// g++ <none>: -lgcc_s -lgcc
// gcc shared: -lgcc_s -lgcc
// g++ shared: -lgcc_s -lgcc
// gcc static: -lgcc -lgcc_eh
// g++ static: -lgcc -lgcc_eh
// gcc static-pie: -lgcc -lgcc_eh
// g++ static-pie: -lgcc -lgcc_eh
//
// Also, certain targets need additional adjustments.
static void AddUnwindLibrary(const ToolChain &TC, const Driver &D,
ArgStringList &CmdArgs, const ArgList &Args) {
ToolChain::UnwindLibType UNW = TC.GetUnwindLibType(Args);
// Targets that don't use unwind libraries.
if ((TC.getTriple().isAndroid() && UNW == ToolChain::UNW_Libgcc) ||
TC.getTriple().isOSIAMCU() || TC.getTriple().isOSBinFormatWasm() ||
UNW == ToolChain::UNW_None)
return;
LibGccType LGT = getLibGccType(TC, D, Args);
bool AsNeeded = LGT == LibGccType::UnspecifiedLibGcc &&
!TC.getTriple().isAndroid() &&
!TC.getTriple().isOSCygMing() && !TC.getTriple().isOSAIX();
if (AsNeeded)
CmdArgs.push_back(getAsNeededOption(TC, true));
switch (UNW) {
case ToolChain::UNW_None:
return;
case ToolChain::UNW_Libgcc: {
if (LGT == LibGccType::StaticLibGcc)
CmdArgs.push_back("-lgcc_eh");
else
CmdArgs.push_back("-lgcc_s");
break;
}
case ToolChain::UNW_CompilerRT:
if (TC.getTriple().isOSAIX()) {
// AIX only has libunwind as a shared library. So do not pass
// anything in if -static is specified.
if (LGT != LibGccType::StaticLibGcc)
CmdArgs.push_back("-lunwind");
} else if (LGT == LibGccType::StaticLibGcc) {
CmdArgs.push_back("-l:libunwind.a");
} else if (TC.getTriple().isOSCygMing()) {
if (LGT == LibGccType::SharedLibGcc)
CmdArgs.push_back("-l:libunwind.dll.a");
else
// Let the linker choose between libunwind.dll.a and libunwind.a
// depending on what's available, and depending on the -static flag
CmdArgs.push_back("-lunwind");
} else {
CmdArgs.push_back("-l:libunwind.so");
}
break;
}
if (AsNeeded)
CmdArgs.push_back(getAsNeededOption(TC, false));
}
static void AddLibgcc(const ToolChain &TC, const Driver &D,
ArgStringList &CmdArgs, const ArgList &Args) {
LibGccType LGT = getLibGccType(TC, D, Args);
if (LGT != LibGccType::SharedLibGcc)
CmdArgs.push_back("-lgcc");
AddUnwindLibrary(TC, D, CmdArgs, Args);
if (LGT == LibGccType::SharedLibGcc)
CmdArgs.push_back("-lgcc");
}
void tools::AddRunTimeLibs(const ToolChain &TC, const Driver &D,
ArgStringList &CmdArgs, const ArgList &Args) {
// Make use of compiler-rt if --rtlib option is used
ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(Args);
switch (RLT) {
case ToolChain::RLT_CompilerRT:
CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins"));
AddUnwindLibrary(TC, D, CmdArgs, Args);
break;
case ToolChain::RLT_Libgcc:
// Make sure libgcc is not used under MSVC environment by default
if (TC.getTriple().isKnownWindowsMSVCEnvironment()) {
// Issue error diagnostic if libgcc is explicitly specified
// through command line as --rtlib option argument.
if (Args.hasArg(options::OPT_rtlib_EQ)) {
TC.getDriver().Diag(diag::err_drv_unsupported_rtlib_for_platform)
<< Args.getLastArg(options::OPT_rtlib_EQ)->getValue() << "MSVC";
}
} else
AddLibgcc(TC, D, CmdArgs, Args);
break;
}
// On Android, the unwinder uses dl_iterate_phdr (or one of
// dl_unwind_find_exidx/__gnu_Unwind_Find_exidx on arm32) from libdl.so. For
// statically-linked executables, these functions come from libc.a instead.
if (TC.getTriple().isAndroid() && !Args.hasArg(options::OPT_static) &&
!Args.hasArg(options::OPT_static_pie))
CmdArgs.push_back("-ldl");
}
SmallString<128> tools::getStatsFileName(const llvm::opt::ArgList &Args,
const InputInfo &Output,
const InputInfo &Input,
const Driver &D) {
const Arg *A = Args.getLastArg(options::OPT_save_stats_EQ);
if (!A)
return {};
StringRef SaveStats = A->getValue();
SmallString<128> StatsFile;
if (SaveStats == "obj" && Output.isFilename()) {
StatsFile.assign(Output.getFilename());
llvm::sys::path::remove_filename(StatsFile);
} else if (SaveStats != "cwd") {
D.Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << SaveStats;
return {};
}
StringRef BaseName = llvm::sys::path::filename(Input.getBaseInput());
llvm::sys::path::append(StatsFile, BaseName);
llvm::sys::path::replace_extension(StatsFile, "stats");
return StatsFile;
}
void tools::addMultilibFlag(bool Enabled, const char *const Flag,
Multilib::flags_list &Flags) {
Flags.push_back(std::string(Enabled ? "+" : "-") + Flag);
}
void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
ArgStringList &CmdArgs, bool IsLTO) {
auto addArg = [&, IsLTO](const Twine &Arg) {
if (IsLTO) {
CmdArgs.push_back(Args.MakeArgString("-plugin-opt=" + Arg));
} else {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString(Arg));
}
};
if (Args.hasArg(options::OPT_mbranches_within_32B_boundaries)) {
addArg(Twine("-x86-branches-within-32B-boundaries"));
}
if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_boundary_EQ)) {
StringRef Value = A->getValue();
unsigned Boundary;
if (Value.getAsInteger(10, Boundary) || Boundary < 16 ||
!llvm::isPowerOf2_64(Boundary)) {
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Value << A->getOption().getName();
} else {
addArg("-x86-align-branch-boundary=" + Twine(Boundary));
}
}
if (const Arg *A = Args.getLastArg(options::OPT_malign_branch_EQ)) {
std::string AlignBranch;
for (StringRef T : A->getValues()) {
if (T != "fused" && T != "jcc" && T != "jmp" && T != "call" &&
T != "ret" && T != "indirect")
D.Diag(diag::err_drv_invalid_malign_branch_EQ)
<< T << "fused, jcc, jmp, call, ret, indirect";
if (!AlignBranch.empty())
AlignBranch += '+';
AlignBranch += T;
}
addArg("-x86-align-branch=" + Twine(AlignBranch));
}
if (const Arg *A = Args.getLastArg(options::OPT_mpad_max_prefix_size_EQ)) {
StringRef Value = A->getValue();
unsigned PrefixSize;
if (Value.getAsInteger(10, PrefixSize)) {
D.Diag(diag::err_drv_invalid_argument_to_option)
<< Value << A->getOption().getName();
} else {
addArg("-x86-pad-max-prefix-size=" + Twine(PrefixSize));
}
}
}
/// SDLSearch: Search for Static Device Library
/// The search for SDL bitcode files is consistent with how static host
/// libraries are discovered. That is, the -l option triggers a search for
/// files in a set of directories called the LINKPATH. The host library search
/// procedure looks for a specific filename in the LINKPATH. The filename for
/// a host library is lib<libname>.a or lib<libname>.so. For SDLs, there is an
/// ordered-set of filenames that are searched. We call this ordered-set of
/// filenames as SEARCH-ORDER. Since an SDL can either be device-type specific,
/// architecture specific, or generic across all architectures, a naming
/// convention and search order is used where the file name embeds the
/// architecture name <arch-name> (nvptx or amdgcn) and the GPU device type
/// <device-name> such as sm_30 and gfx906. <device-name> is absent in case of
/// device-independent SDLs. To reduce congestion in host library directories,
/// the search first looks for files in the “libdevice” subdirectory. SDLs that
/// are bc files begin with the prefix “lib”.
///
/// Machine-code SDLs can also be managed as an archive (*.a file). The
/// convention has been to use the prefix “lib”. To avoid confusion with host
/// archive libraries, we use prefix "libbc-" for the bitcode SDL archives.
///
bool tools::SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
SmallVector<std::string, 8> LibraryPaths, std::string Lib,
StringRef Arch, StringRef Target, bool isBitCodeSDL,
bool postClangLink) {
SmallVector<std::string, 12> SDLs;
std::string LibDeviceLoc = "/libdevice";
std::string LibBcPrefix = "/libbc-";
std::string LibPrefix = "/lib";
if (isBitCodeSDL) {
// SEARCH-ORDER for Bitcode SDLs:
// libdevice/libbc-<libname>-<arch-name>-<device-type>.a
// libbc-<libname>-<arch-name>-<device-type>.a
// libdevice/libbc-<libname>-<arch-name>.a
// libbc-<libname>-<arch-name>.a
// libdevice/libbc-<libname>.a
// libbc-<libname>.a
// libdevice/lib<libname>-<arch-name>-<device-type>.bc
// lib<libname>-<arch-name>-<device-type>.bc
// libdevice/lib<libname>-<arch-name>.bc
// lib<libname>-<arch-name>.bc
// libdevice/lib<libname>.bc
// lib<libname>.bc
for (StringRef Base : {LibBcPrefix, LibPrefix}) {
const auto *Ext = Base.contains(LibBcPrefix) ? ".a" : ".bc";
for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
Twine(Lib + "-" + Arch).str(), Twine(Lib).str()}) {
SDLs.push_back(Twine(LibDeviceLoc + Base + Suffix + Ext).str());
SDLs.push_back(Twine(Base + Suffix + Ext).str());
}
}
} else {
// SEARCH-ORDER for Machine-code SDLs:
// libdevice/lib<libname>-<arch-name>-<device-type>.a
// lib<libname>-<arch-name>-<device-type>.a
// libdevice/lib<libname>-<arch-name>.a
// lib<libname>-<arch-name>.a
const auto *Ext = ".a";
for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
Twine(Lib + "-" + Arch).str()}) {
SDLs.push_back(Twine(LibDeviceLoc + LibPrefix + Suffix + Ext).str());
SDLs.push_back(Twine(LibPrefix + Suffix + Ext).str());
}
}
// The CUDA toolchain does not use a global device llvm-link before the LLVM
// backend generates ptx. So currently, the use of bitcode SDL for nvptx is
// only possible with post-clang-cc1 linking. Clang cc1 has a feature that
// will link libraries after clang compilation while the LLVM IR is still in
// memory. This utilizes a clang cc1 option called “-mlink-builtin-bitcode”.
// This is a clang -cc1 option that is generated by the clang driver. The
// option value must a full path to an existing file.
bool FoundSDL = false;
for (auto LPath : LibraryPaths) {
for (auto SDL : SDLs) {
auto FullName = Twine(LPath + SDL).str();
if (llvm::sys::fs::exists(FullName)) {
if (postClangLink)
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(FullName));
FoundSDL = true;
break;
}
}
if (FoundSDL)
break;
}
return FoundSDL;
}
/// Search if a user provided archive file lib<libname>.a exists in any of
/// the library paths. If so, add a new command to clang-offload-bundler to
/// unbundle this archive and create a temporary device specific archive. Name
/// of this SDL is passed to the llvm-link (for amdgcn) or to the
/// clang-nvlink-wrapper (for nvptx) commands by the driver.
bool tools::GetSDLFromOffloadArchive(
Compilation &C, const Driver &D, const Tool &T, const JobAction &JA,
const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args, SmallVector<std::string, 8> LibraryPaths,
StringRef Lib, StringRef Arch, StringRef Target, bool isBitCodeSDL,
bool postClangLink) {
// We don't support bitcode archive bundles for nvptx
if (isBitCodeSDL && Arch.contains("nvptx"))
return false;
bool FoundAOB = false;
SmallVector<std::string, 2> AOBFileNames;
std::string ArchiveOfBundles;
for (auto LPath : LibraryPaths) {
ArchiveOfBundles.clear();
AOBFileNames.push_back(Twine(LPath + "/libdevice/lib" + Lib + ".a").str());
AOBFileNames.push_back(Twine(LPath + "/lib" + Lib + ".a").str());
for (auto AOB : AOBFileNames) {
if (llvm::sys::fs::exists(AOB)) {
ArchiveOfBundles = AOB;
FoundAOB = true;
break;
}
}
if (!FoundAOB)
continue;
StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
std::string OutputLib = D.GetTemporaryPath(
Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a");
C.addTempFile(C.getArgs().MakeArgString(OutputLib));
ArgStringList CmdArgs;
SmallString<128> DeviceTriple;
DeviceTriple += Action::GetOffloadKindName(JA.getOffloadingDeviceKind());
DeviceTriple += '-';
std::string NormalizedTriple = T.getToolChain().getTriple().normalize();
DeviceTriple += NormalizedTriple;
if (!Target.empty()) {
DeviceTriple += '-';
DeviceTriple += Target;
}
std::string UnbundleArg("-unbundle");
std::string TypeArg("-type=a");
std::string InputArg("-inputs=" + ArchiveOfBundles);
std::string OffloadArg("-targets=" + std::string(DeviceTriple));
std::string OutputArg("-outputs=" + OutputLib);
const char *UBProgram = DriverArgs.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));
ArgStringList UBArgs;
UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg));
UBArgs.push_back(C.getArgs().MakeArgString(TypeArg));
UBArgs.push_back(C.getArgs().MakeArgString(InputArg));
UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg));
UBArgs.push_back(C.getArgs().MakeArgString(OutputArg));
// Add this flag to not exit from clang-offload-bundler if no compatible
// code object is found in heterogenous archive library.
std::string AdditionalArgs("-allow-missing-bundles");
UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs));
C.addCommand(std::make_unique<Command>(
JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs,
InputInfo(&JA, C.getArgs().MakeArgString(OutputLib))));
if (postClangLink)
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(OutputLib));
break;
}
return FoundAOB;
}
// Wrapper function used by driver for adding SDLs during link phase.
void tools::AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
const JobAction &JA,
const InputInfoList &Inputs,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink) {
AddStaticDeviceLibs(&C, &T, &JA, &Inputs, C.getDriver(), DriverArgs, CC1Args,
Arch, Target, isBitCodeSDL, postClangLink);
}
// Wrapper function used for post clang linking of bitcode SDLS for nvptx by
// the CUDA toolchain.
void tools::AddStaticDeviceLibsPostLinking(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink) {
AddStaticDeviceLibs(nullptr, nullptr, nullptr, nullptr, D, DriverArgs,
CC1Args, Arch, Target, isBitCodeSDL, postClangLink);
}
// User defined Static Device Libraries(SDLs) can be passed to clang for
// offloading GPU compilers. Like static host libraries, the use of a SDL is
// specified with the -l command line option. The primary difference between
// host and SDLs is the filenames for SDLs (refer SEARCH-ORDER for Bitcode SDLs
// and SEARCH-ORDER for Machine-code SDLs for the naming convention).
// SDLs are of following types:
//
// * Bitcode SDLs: They can either be a *.bc file or an archive of *.bc files.
// For NVPTX, these libraries are post-clang linked following each
// compilation. For AMDGPU, these libraries are linked one time
// during the application link phase.
//
// * Machine-code SDLs: They are archive files. For NVPTX, the archive members
// contain cubin for Nvidia GPUs and are linked one time during the
// link phase by the CUDA SDK linker called nvlink. For AMDGPU, the
// process for machine code SDLs is still in development. But they
// will be linked by the LLVM tool lld.
//
// * Bundled objects that contain both host and device codes: Bundled objects
// may also contain library code compiled from source. For NVPTX, the
// bundle contains cubin. For AMDGPU, the bundle contains bitcode.
//
// For Bitcode and Machine-code SDLs, current compiler toolchains hardcode the
// inclusion of specific SDLs such as math libraries and the OpenMP device
// library libomptarget.
void tools::AddStaticDeviceLibs(Compilation *C, const Tool *T,
const JobAction *JA,
const InputInfoList *Inputs, const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink) {
SmallVector<std::string, 8> LibraryPaths;
// Add search directories from LIBRARY_PATH env variable
llvm::Optional<std::string> LibPath =
llvm::sys::Process::GetEnv("LIBRARY_PATH");
if (LibPath) {
SmallVector<StringRef, 8> Frags;
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
for (StringRef Path : Frags)
LibraryPaths.emplace_back(Path.trim());
}
// Add directories from user-specified -L options
for (std::string Search_Dir : DriverArgs.getAllArgValues(options::OPT_L))
LibraryPaths.emplace_back(Search_Dir);
// Add path to lib-debug folders
SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
LibraryPaths.emplace_back(DefaultLibPath.c_str());
// Build list of Static Device Libraries SDLs specified by -l option
llvm::SmallSet<std::string, 16> SDLNames;
static const StringRef HostOnlyArchives[] = {
"omp", "cudart", "m", "gcc", "gcc_s", "pthread", "hip_hcc"};
for (auto SDLName : DriverArgs.getAllArgValues(options::OPT_l)) {
if (!HostOnlyArchives->contains(SDLName)) {
SDLNames.insert(SDLName);
}
}
// The search stops as soon as an SDL file is found. The driver then provides
// the full filename of the SDL to the llvm-link or clang-nvlink-wrapper
// command. If no SDL is found after searching each LINKPATH with
// SEARCH-ORDER, it is possible that an archive file lib<libname>.a exists
// and may contain bundled object files.
for (auto SDLName : SDLNames) {
// This is the only call to SDLSearch
if (!SDLSearch(D, DriverArgs, CC1Args, LibraryPaths, SDLName, Arch, Target,
isBitCodeSDL, postClangLink)) {
GetSDLFromOffloadArchive(*C, D, *T, *JA, *Inputs, DriverArgs, CC1Args,
LibraryPaths, SDLName, Arch, Target,
isBitCodeSDL, postClangLink);
}
}
}
static llvm::opt::Arg *
getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
// The last of -mcode-object-v3, -mno-code-object-v3 and
// -mcode-object-version=<version> wins.
return Args.getLastArg(options::OPT_mcode_object_v3_legacy,
options::OPT_mno_code_object_v3_legacy,
options::OPT_mcode_object_version_EQ);
}
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
const unsigned MinCodeObjVer = 2;
const unsigned MaxCodeObjVer = 4;
// Emit warnings for legacy options even if they are overridden.
if (Args.hasArg(options::OPT_mno_code_object_v3_legacy))
D.Diag(diag::warn_drv_deprecated_arg) << "-mno-code-object-v3"
<< "-mcode-object-version=2";
if (Args.hasArg(options::OPT_mcode_object_v3_legacy))
D.Diag(diag::warn_drv_deprecated_arg) << "-mcode-object-v3"
<< "-mcode-object-version=3";
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
if (CodeObjArg->getOption().getID() ==
options::OPT_mcode_object_version_EQ) {
unsigned CodeObjVer = MaxCodeObjVer;
auto Remnant =
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
if (Remnant || CodeObjVer < MinCodeObjVer || CodeObjVer > MaxCodeObjVer)
D.Diag(diag::err_drv_invalid_int_value)
<< CodeObjArg->getAsString(Args) << CodeObjArg->getValue();
}
}
}
unsigned tools::getAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
unsigned CodeObjVer = 4; // default
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
if (CodeObjArg->getOption().getID() ==
options::OPT_mno_code_object_v3_legacy) {
CodeObjVer = 2;
} else if (CodeObjArg->getOption().getID() ==
options::OPT_mcode_object_v3_legacy) {
CodeObjVer = 3;
} else {
StringRef(CodeObjArg->getValue()).getAsInteger(0, CodeObjVer);
}
}
return CodeObjVer;
}
bool tools::haveAMDGPUCodeObjectVersionArgument(
const Driver &D, const llvm::opt::ArgList &Args) {
return getAMDGPUCodeObjectArgument(D, Args) != nullptr;
}
void tools::addMachineOutlinerArgs(const Driver &D,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
const llvm::Triple &Triple, bool IsLTO) {
auto addArg = [&, IsLTO](const Twine &Arg) {
if (IsLTO) {
CmdArgs.push_back(Args.MakeArgString("-plugin-opt=" + Arg));
} else {
CmdArgs.push_back("-mllvm");
CmdArgs.push_back(Args.MakeArgString(Arg));
}
};
if (Arg *A = Args.getLastArg(options::OPT_moutline,
options::OPT_mno_outline)) {
if (A->getOption().matches(options::OPT_moutline)) {
// We only support -moutline in AArch64 and ARM targets right now. If
// we're not compiling for these, emit a warning and ignore the flag.
// Otherwise, add the proper mllvm flags.
if (!(Triple.isARM() || Triple.isThumb() ||
Triple.getArch() == llvm::Triple::aarch64 ||
Triple.getArch() == llvm::Triple::aarch64_32)) {
D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName();
} else {
addArg(Twine("-enable-machine-outliner"));
}
} else {
// Disable all outlining behaviour.
addArg(Twine("-enable-machine-outliner=never"));
}
}
}
void tools::addOpenMPDeviceRTL(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef BitcodeSuffix,
const llvm::Triple &Triple) {
SmallVector<StringRef, 8> LibraryPaths;
// Add path to clang lib / lib64 folder.
SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
LibraryPaths.emplace_back(DefaultLibPath.c_str());
// Add user defined library paths from LIBRARY_PATH.
llvm::Optional<std::string> LibPath =
llvm::sys::Process::GetEnv("LIBRARY_PATH");
if (LibPath) {
SmallVector<StringRef, 8> Frags;
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
for (StringRef Path : Frags)
LibraryPaths.emplace_back(Path.trim());
}
OptSpecifier LibomptargetBCPathOpt =
Triple.isAMDGCN() ? options::OPT_libomptarget_amdgcn_bc_path_EQ
: options::OPT_libomptarget_nvptx_bc_path_EQ;
StringRef ArchPrefix = Triple.isAMDGCN() ? "amdgcn" : "nvptx";
std::string LibOmpTargetName = "libomptarget-" + BitcodeSuffix.str() + ".bc";
// First check whether user specifies bc library
if (const Arg *A = DriverArgs.getLastArg(LibomptargetBCPathOpt)) {
SmallString<128> LibOmpTargetFile(A->getValue());
if (llvm::sys::fs::exists(LibOmpTargetFile) &&
llvm::sys::fs::is_directory(LibOmpTargetFile)) {
llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
}
if (llvm::sys::fs::exists(LibOmpTargetFile)) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
} else {
D.Diag(diag::err_drv_omp_offload_target_bcruntime_not_found)
<< LibOmpTargetFile;
}
} else {
bool FoundBCLibrary = false;
for (StringRef LibraryPath : LibraryPaths) {
SmallString<128> LibOmpTargetFile(LibraryPath);
llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
if (llvm::sys::fs::exists(LibOmpTargetFile)) {
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
FoundBCLibrary = true;
break;
}
}
if (!FoundBCLibrary)
D.Diag(diag::err_drv_omp_offload_target_missingbcruntime)
<< LibOmpTargetName << ArchPrefix;
}
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h
index 646fa76949b7..23012dc247e4 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/CommonArgs.h
@@ -1,197 +1,199 @@
//===--- CommonArgs.h - Args handling for multiple toolchains ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_COMMONARGS_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_COMMONARGS_H
#include "clang/Driver/Driver.h"
#include "clang/Driver/InputInfo.h"
#include "clang/Driver/Multilib.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
#include "llvm/Support/CodeGen.h"
namespace clang {
namespace driver {
namespace tools {
void addPathIfExists(const Driver &D, const Twine &Path,
ToolChain::path_list &Paths);
void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs, const JobAction &JA);
void addLinkerCompressDebugSectionsOption(const ToolChain &TC,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
void claimNoWarnArgs(const llvm::opt::ArgList &Args);
bool addSanitizerRuntimes(const ToolChain &TC, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
void linkSanitizerRuntimeDeps(const ToolChain &TC,
llvm::opt::ArgStringList &CmdArgs);
bool addXRayRuntime(const ToolChain &TC, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
void linkXRayRuntimeDeps(const ToolChain &TC,
llvm::opt::ArgStringList &CmdArgs);
void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
llvm::opt::ArgStringList &CmdArgs,
const llvm::opt::ArgList &Args);
void AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
const JobAction &JA,
const InputInfoList &Inputs,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CmdArgs,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink);
void AddStaticDeviceLibsPostLinking(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CmdArgs,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink);
void AddStaticDeviceLibs(Compilation *C, const Tool *T, const JobAction *JA,
const InputInfoList *Inputs, const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
StringRef Target, bool isBitCodeSDL,
bool postClangLink);
bool SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CmdArgs,
SmallVector<std::string, 8> LibraryPaths, std::string Lib,
StringRef Arch, StringRef Target, bool isBitCodeSDL,
bool postClangLink);
bool GetSDLFromOffloadArchive(Compilation &C, const Driver &D, const Tool &T,
const JobAction &JA, const InputInfoList &Inputs,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
SmallVector<std::string, 8> LibraryPaths,
StringRef Lib, StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink);
const char *SplitDebugName(const JobAction &JA, const llvm::opt::ArgList &Args,
const InputInfo &Input, const InputInfo &Output);
void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
const JobAction &JA, const llvm::opt::ArgList &Args,
const InputInfo &Output, const char *OutFile);
void addLTOOptions(const ToolChain &ToolChain, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs, const InputInfo &Output,
const InputInfo &Input, bool IsThinLTO);
std::tuple<llvm::Reloc::Model, unsigned, bool>
ParsePICArgs(const ToolChain &ToolChain, const llvm::opt::ArgList &Args);
unsigned ParseFunctionAlignment(const ToolChain &TC,
const llvm::opt::ArgList &Args);
unsigned ParseDebugDefaultVersion(const ToolChain &TC,
const llvm::opt::ArgList &Args);
void AddAssemblerKPIC(const ToolChain &ToolChain,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
void addOpenMPRuntimeSpecificRPath(const ToolChain &TC,
const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
void addArchSpecificRPath(const ToolChain &TC, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs);
/// Returns true, if an OpenMP runtime has been added.
bool addOpenMPRuntime(llvm::opt::ArgStringList &CmdArgs, const ToolChain &TC,
const llvm::opt::ArgList &Args,
bool ForceStaticHostRuntime = false,
bool IsOffloadingHost = false, bool GompNeedsRT = false);
+const char *getAsNeededOption(const ToolChain &TC, bool as_needed);
+
llvm::opt::Arg *getLastProfileUseArg(const llvm::opt::ArgList &Args);
llvm::opt::Arg *getLastProfileSampleUseArg(const llvm::opt::ArgList &Args);
bool isObjCAutoRefCount(const llvm::opt::ArgList &Args);
llvm::StringRef getLTOParallelism(const llvm::opt::ArgList &Args,
const Driver &D);
bool areOptimizationsEnabled(const llvm::opt::ArgList &Args);
bool isUseSeparateSections(const llvm::Triple &Triple);
/// \p EnvVar is split by system delimiter for environment variables.
/// If \p ArgName is "-I", "-L", or an empty string, each entry from \p EnvVar
/// is prefixed by \p ArgName then added to \p Args. Otherwise, for each
/// entry of \p EnvVar, \p ArgName is added to \p Args first, then the entry
/// itself is added.
void addDirectoryList(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs, const char *ArgName,
const char *EnvVar);
void AddTargetFeature(const llvm::opt::ArgList &Args,
std::vector<StringRef> &Features,
llvm::opt::OptSpecifier OnOpt,
llvm::opt::OptSpecifier OffOpt, StringRef FeatureName);
std::string getCPUName(const Driver &D, const llvm::opt::ArgList &Args,
const llvm::Triple &T, bool FromAs = false);
/// Iterate \p Args and convert -mxxx to +xxx and -mno-xxx to -xxx and
/// append it to \p Features.
///
/// Note: Since \p Features may contain default values before calling
/// this function, or may be appended with entries to override arguments,
/// entries in \p Features are not unique.
void handleTargetFeaturesGroup(const llvm::opt::ArgList &Args,
std::vector<StringRef> &Features,
llvm::opt::OptSpecifier Group);
/// If there are multiple +xxx or -xxx features, keep the last one.
std::vector<StringRef>
unifyTargetFeatures(const std::vector<StringRef> &Features);
/// Handles the -save-stats option and returns the filename to save statistics
/// to.
SmallString<128> getStatsFileName(const llvm::opt::ArgList &Args,
const InputInfo &Output,
const InputInfo &Input, const Driver &D);
/// \p Flag must be a flag accepted by the driver with its leading '-' removed,
// otherwise '-print-multi-lib' will not emit them correctly.
void addMultilibFlag(bool Enabled, const char *const Flag,
Multilib::flags_list &Flags);
void addX86AlignBranchArgs(const Driver &D, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs, bool IsLTO);
void checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args);
unsigned getAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args);
bool haveAMDGPUCodeObjectVersionArgument(const Driver &D,
const llvm::opt::ArgList &Args);
void addMachineOutlinerArgs(const Driver &D, const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs,
const llvm::Triple &Triple, bool IsLTO);
void addOpenMPDeviceRTL(const Driver &D, const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef BitcodeSuffix, const llvm::Triple &Triple);
} // end namespace tools
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_COMMONARGS_H
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/DragonFly.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/DragonFly.cpp
index 9568b47e89e6..8cfec6a6c4e0 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/DragonFly.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/DragonFly.cpp
@@ -1,201 +1,204 @@
//===--- DragonFly.cpp - DragonFly ToolChain Implementations ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "DragonFly.h"
#include "CommonArgs.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/Options.h"
#include "llvm/Option/ArgList.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
/// DragonFly Tools
// For now, DragonFly Assemble does just about the same as for
// FreeBSD, but this may change soon.
void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
claimNoWarnArgs(Args);
ArgStringList CmdArgs;
// When building 32-bit code on DragonFly/pc64, we have to explicitly
// instruct as in the base system to assemble 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86)
CmdArgs.push_back("--32");
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (const auto &II : Inputs)
CmdArgs.push_back(II.getFilename());
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const Driver &D = getToolChain().getDriver();
ArgStringList CmdArgs;
if (!D.SysRoot.empty())
CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
CmdArgs.push_back("--eh-frame-hdr");
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
if (Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-Bshareable");
else {
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/usr/libexec/ld-elf.so.2");
}
CmdArgs.push_back("--hash-style=gnu");
CmdArgs.push_back("--enable-new-dtags");
}
// When building 32-bit code on DragonFly/pc64, we have to explicitly
// instruct ld in the base system to link 32-bit code.
if (getToolChain().getArch() == llvm::Triple::x86) {
CmdArgs.push_back("-m");
CmdArgs.push_back("elf_i386");
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
- if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
+ options::OPT_r)) {
if (!Args.hasArg(options::OPT_shared)) {
if (Args.hasArg(options::OPT_pg))
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("gcrt1.o")));
else {
if (Args.hasArg(options::OPT_pie))
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("Scrt1.o")));
else
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
}
}
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crtbeginS.o")));
else
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
}
Args.AddAllArgs(CmdArgs,
{options::OPT_L, options::OPT_T_Group, options::OPT_e});
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
- if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs,
+ options::OPT_r)) {
CmdArgs.push_back("-L/usr/lib/gcc80");
if (!Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-rpath");
CmdArgs.push_back("/usr/lib/gcc80");
}
if (D.CCCIsCXX()) {
if (getToolChain().ShouldLinkCXXStdlib(Args))
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
CmdArgs.push_back("-lm");
}
if (Args.hasArg(options::OPT_pthread))
CmdArgs.push_back("-lpthread");
if (!Args.hasArg(options::OPT_nolibc)) {
CmdArgs.push_back("-lc");
}
if (Args.hasArg(options::OPT_static) ||
Args.hasArg(options::OPT_static_libgcc)) {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("-lgcc_eh");
} else {
if (Args.hasArg(options::OPT_shared_libgcc)) {
CmdArgs.push_back("-lgcc_pic");
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back("-lgcc");
} else {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lgcc_pic");
CmdArgs.push_back("--no-as-needed");
}
}
}
- if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
+ options::OPT_r)) {
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
else
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
}
getToolChain().addProfileRTLibs(Args, CmdArgs);
const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
/// DragonFly - DragonFly tool chain which can call as(1) and ld(1) directly.
DragonFly::DragonFly(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
// Path mangling to find libexec
getProgramPaths().push_back(getDriver().getInstalledDir());
if (getDriver().getInstalledDir() != getDriver().Dir)
getProgramPaths().push_back(getDriver().Dir);
getFilePaths().push_back(getDriver().Dir + "/../lib");
getFilePaths().push_back("/usr/lib");
getFilePaths().push_back("/usr/lib/gcc80");
}
Tool *DragonFly::buildAssembler() const {
return new tools::dragonfly::Assembler(*this);
}
Tool *DragonFly::buildLinker() const {
return new tools::dragonfly::Linker(*this);
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/NetBSD.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/NetBSD.cpp
index 37b1fc5215ff..d1eda14a51f0 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/NetBSD.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/NetBSD.cpp
@@ -1,517 +1,520 @@
//===--- NetBSD.cpp - NetBSD ToolChain Implementations ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "NetBSD.h"
#include "Arch/ARM.h"
#include "Arch/Mips.h"
#include "Arch/Sparc.h"
#include "CommonArgs.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/Options.h"
#include "clang/Driver/SanitizerArgs.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/VirtualFileSystem.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const toolchains::NetBSD &ToolChain =
static_cast<const toolchains::NetBSD &>(getToolChain());
const Driver &D = ToolChain.getDriver();
const llvm::Triple &Triple = ToolChain.getTriple();
claimNoWarnArgs(Args);
ArgStringList CmdArgs;
// GNU as needs different flags for creating the correct output format
// on architectures with different ABIs or optional feature sets.
switch (ToolChain.getArch()) {
case llvm::Triple::x86:
CmdArgs.push_back("--32");
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb: {
StringRef MArch, MCPU;
arm::getARMArchCPUFromArgs(Args, MArch, MCPU, /*FromAs*/ true);
std::string Arch = arm::getARMTargetCPU(MCPU, MArch, Triple);
CmdArgs.push_back(Args.MakeArgString("-mcpu=" + Arch));
break;
}
case llvm::Triple::mips:
case llvm::Triple::mipsel:
case llvm::Triple::mips64:
case llvm::Triple::mips64el: {
StringRef CPUName;
StringRef ABIName;
mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
CmdArgs.push_back("-march");
CmdArgs.push_back(CPUName.data());
CmdArgs.push_back("-mabi");
CmdArgs.push_back(mips::getGnuCompatibleMipsABIName(ABIName).data());
if (Triple.isLittleEndian())
CmdArgs.push_back("-EL");
else
CmdArgs.push_back("-EB");
AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
case llvm::Triple::sparc:
case llvm::Triple::sparcel: {
CmdArgs.push_back("-32");
std::string CPU = getCPUName(D, Args, Triple);
CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, Triple));
AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
case llvm::Triple::sparcv9: {
CmdArgs.push_back("-64");
std::string CPU = getCPUName(D, Args, Triple);
CmdArgs.push_back(sparc::getSparcAsmModeForCPU(CPU, Triple));
AddAssemblerKPIC(ToolChain, Args, CmdArgs);
break;
}
default:
break;
}
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (const auto &II : Inputs)
CmdArgs.push_back(II.getFilename());
const char *Exec = Args.MakeArgString((ToolChain.GetProgramPath("as")));
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
const toolchains::NetBSD &ToolChain =
static_cast<const toolchains::NetBSD &>(getToolChain());
const Driver &D = ToolChain.getDriver();
const llvm::Triple &Triple = ToolChain.getTriple();
ArgStringList CmdArgs;
if (!D.SysRoot.empty())
CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
CmdArgs.push_back("--eh-frame-hdr");
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
if (Args.hasArg(options::OPT_pie)) {
Args.AddAllArgs(CmdArgs, options::OPT_pie);
CmdArgs.push_back("--no-dynamic-linker");
}
} else {
if (Args.hasArg(options::OPT_rdynamic))
CmdArgs.push_back("-export-dynamic");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-Bshareable");
} else {
Args.AddAllArgs(CmdArgs, options::OPT_pie);
CmdArgs.push_back("-dynamic-linker");
CmdArgs.push_back("/libexec/ld.elf_so");
}
}
// Many NetBSD architectures support more than one ABI.
// Determine the correct emulation for ld.
switch (ToolChain.getArch()) {
case llvm::Triple::x86:
CmdArgs.push_back("-m");
CmdArgs.push_back("elf_i386");
break;
case llvm::Triple::arm:
case llvm::Triple::thumb:
CmdArgs.push_back("-m");
switch (Triple.getEnvironment()) {
case llvm::Triple::EABI:
case llvm::Triple::GNUEABI:
CmdArgs.push_back("armelf_nbsd_eabi");
break;
case llvm::Triple::EABIHF:
case llvm::Triple::GNUEABIHF:
CmdArgs.push_back("armelf_nbsd_eabihf");
break;
default:
CmdArgs.push_back("armelf_nbsd");
break;
}
break;
case llvm::Triple::armeb:
case llvm::Triple::thumbeb:
arm::appendBE8LinkFlag(Args, CmdArgs, ToolChain.getEffectiveTriple());
CmdArgs.push_back("-m");
switch (Triple.getEnvironment()) {
case llvm::Triple::EABI:
case llvm::Triple::GNUEABI:
CmdArgs.push_back("armelfb_nbsd_eabi");
break;
case llvm::Triple::EABIHF:
case llvm::Triple::GNUEABIHF:
CmdArgs.push_back("armelfb_nbsd_eabihf");
break;
default:
CmdArgs.push_back("armelfb_nbsd");
break;
}
break;
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
if (mips::hasMipsAbiArg(Args, "32")) {
CmdArgs.push_back("-m");
if (ToolChain.getArch() == llvm::Triple::mips64)
CmdArgs.push_back("elf32btsmip");
else
CmdArgs.push_back("elf32ltsmip");
} else if (mips::hasMipsAbiArg(Args, "64")) {
CmdArgs.push_back("-m");
if (ToolChain.getArch() == llvm::Triple::mips64)
CmdArgs.push_back("elf64btsmip");
else
CmdArgs.push_back("elf64ltsmip");
}
break;
case llvm::Triple::ppc:
CmdArgs.push_back("-m");
CmdArgs.push_back("elf32ppc_nbsd");
break;
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
CmdArgs.push_back("-m");
CmdArgs.push_back("elf64ppc");
break;
case llvm::Triple::sparc:
CmdArgs.push_back("-m");
CmdArgs.push_back("elf32_sparc");
break;
case llvm::Triple::sparcv9:
CmdArgs.push_back("-m");
CmdArgs.push_back("elf64_sparc");
break;
default:
break;
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
- if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
+ options::OPT_r)) {
if (!Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back(
Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
}
CmdArgs.push_back(
Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie)) {
CmdArgs.push_back(
Args.MakeArgString(ToolChain.GetFilePath("crtbeginS.o")));
} else {
CmdArgs.push_back(
Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
}
}
Args.AddAllArgs(CmdArgs, options::OPT_L);
Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
Args.AddAllArgs(CmdArgs, options::OPT_e);
Args.AddAllArgs(CmdArgs, options::OPT_s);
Args.AddAllArgs(CmdArgs, options::OPT_t);
Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
Args.AddAllArgs(CmdArgs, options::OPT_r);
bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs(Args);
if (SanArgs.needsSharedRt()) {
CmdArgs.push_back("-rpath");
CmdArgs.push_back(Args.MakeArgString(ToolChain.getCompilerRTPath()));
}
VersionTuple OsVersion = Triple.getOSVersion();
bool useLibgcc = true;
if (OsVersion >= VersionTuple(7) || OsVersion.getMajor() == 0) {
switch (ToolChain.getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
case llvm::Triple::sparc:
case llvm::Triple::sparcv9:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
useLibgcc = false;
break;
default:
break;
}
}
- if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs,
+ options::OPT_r)) {
// Use the static OpenMP runtime with -static-openmp
bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
!Args.hasArg(options::OPT_static);
addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP);
if (D.CCCIsCXX()) {
if (ToolChain.ShouldLinkCXXStdlib(Args))
ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
CmdArgs.push_back("-lm");
}
if (NeedsSanitizerDeps)
linkSanitizerRuntimeDeps(ToolChain, CmdArgs);
if (NeedsXRayDeps)
linkXRayRuntimeDeps(ToolChain, CmdArgs);
if (Args.hasArg(options::OPT_pthread))
CmdArgs.push_back("-lpthread");
CmdArgs.push_back("-lc");
if (useLibgcc) {
if (Args.hasArg(options::OPT_static)) {
// libgcc_eh depends on libc, so resolve as much as possible,
// pull in any new requirements from libc and then get the rest
// of libgcc.
CmdArgs.push_back("-lgcc_eh");
CmdArgs.push_back("-lc");
CmdArgs.push_back("-lgcc");
} else {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("--as-needed");
CmdArgs.push_back("-lgcc_s");
CmdArgs.push_back("--no-as-needed");
}
}
}
- if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
+ options::OPT_r)) {
if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
CmdArgs.push_back(
Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
else
CmdArgs.push_back(
Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
}
ToolChain.addProfileRTLibs(Args, CmdArgs);
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
}
/// NetBSD - NetBSD tool chain which can call as(1) and ld(1) directly.
NetBSD::NetBSD(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
if (!Args.hasArg(options::OPT_nostdlib)) {
// When targeting a 32-bit platform, try the special directory used on
// 64-bit hosts, and only fall back to the main library directory if that
// doesn't work.
// FIXME: It'd be nicer to test if this directory exists, but I'm not sure
// what all logic is needed to emulate the '=' prefix here.
switch (Triple.getArch()) {
case llvm::Triple::x86:
getFilePaths().push_back("=/usr/lib/i386");
break;
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
switch (Triple.getEnvironment()) {
case llvm::Triple::EABI:
case llvm::Triple::GNUEABI:
getFilePaths().push_back("=/usr/lib/eabi");
break;
case llvm::Triple::EABIHF:
case llvm::Triple::GNUEABIHF:
getFilePaths().push_back("=/usr/lib/eabihf");
break;
default:
getFilePaths().push_back("=/usr/lib/oabi");
break;
}
break;
case llvm::Triple::mips64:
case llvm::Triple::mips64el:
if (tools::mips::hasMipsAbiArg(Args, "o32"))
getFilePaths().push_back("=/usr/lib/o32");
else if (tools::mips::hasMipsAbiArg(Args, "64"))
getFilePaths().push_back("=/usr/lib/64");
break;
case llvm::Triple::ppc:
getFilePaths().push_back("=/usr/lib/powerpc");
break;
case llvm::Triple::sparc:
getFilePaths().push_back("=/usr/lib/sparc");
break;
default:
break;
}
getFilePaths().push_back("=/usr/lib");
}
}
Tool *NetBSD::buildAssembler() const {
return new tools::netbsd::Assembler(*this);
}
Tool *NetBSD::buildLinker() const { return new tools::netbsd::Linker(*this); }
ToolChain::CXXStdlibType NetBSD::GetDefaultCXXStdlibType() const {
VersionTuple OsVersion = getTriple().getOSVersion();
if (OsVersion >= VersionTuple(7) || OsVersion.getMajor() == 0) {
switch (getArch()) {
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
case llvm::Triple::ppc:
case llvm::Triple::ppc64:
case llvm::Triple::ppc64le:
case llvm::Triple::sparc:
case llvm::Triple::sparcv9:
case llvm::Triple::x86:
case llvm::Triple::x86_64:
return ToolChain::CST_Libcxx;
default:
break;
}
}
return ToolChain::CST_Libstdcxx;
}
void NetBSD::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
const std::string Candidates[] = {
// directory relative to build tree
getDriver().Dir + "/../include/c++/v1",
// system install with full upstream path
getDriver().SysRoot + "/usr/include/c++/v1",
// system install from src
getDriver().SysRoot + "/usr/include/c++",
};
for (const auto &IncludePath : Candidates) {
if (!getVFS().exists(IncludePath + "/__config"))
continue;
// Use the first candidate that looks valid.
addSystemInclude(DriverArgs, CC1Args, IncludePath);
return;
}
}
void NetBSD::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
addLibStdCXXIncludePaths(getDriver().SysRoot + "/usr/include/g++", "", "",
DriverArgs, CC1Args);
}
llvm::ExceptionHandling NetBSD::GetExceptionModel(const ArgList &Args) const {
// NetBSD uses Dwarf exceptions on ARM.
llvm::Triple::ArchType TArch = getTriple().getArch();
if (TArch == llvm::Triple::arm || TArch == llvm::Triple::armeb ||
TArch == llvm::Triple::thumb || TArch == llvm::Triple::thumbeb)
return llvm::ExceptionHandling::DwarfCFI;
return llvm::ExceptionHandling::None;
}
SanitizerMask NetBSD::getSupportedSanitizers() const {
const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
SanitizerMask Res = ToolChain::getSupportedSanitizers();
if (IsX86 || IsX86_64) {
Res |= SanitizerKind::Address;
Res |= SanitizerKind::PointerCompare;
Res |= SanitizerKind::PointerSubtract;
Res |= SanitizerKind::Function;
Res |= SanitizerKind::Leak;
Res |= SanitizerKind::SafeStack;
Res |= SanitizerKind::Scudo;
Res |= SanitizerKind::Vptr;
}
if (IsX86_64) {
Res |= SanitizerKind::DataFlow;
Res |= SanitizerKind::Fuzzer;
Res |= SanitizerKind::FuzzerNoLink;
Res |= SanitizerKind::HWAddress;
Res |= SanitizerKind::KernelAddress;
Res |= SanitizerKind::KernelHWAddress;
Res |= SanitizerKind::KernelMemory;
Res |= SanitizerKind::Memory;
Res |= SanitizerKind::Thread;
}
return Res;
}
void NetBSD::addClangTargetOptions(const ArgList &DriverArgs,
ArgStringList &CC1Args,
Action::OffloadKind) const {
const SanitizerArgs &SanArgs = getSanitizerArgs(DriverArgs);
if (SanArgs.hasAnySanitizer())
CC1Args.push_back("-D_REENTRANT");
VersionTuple OsVersion = getTriple().getOSVersion();
bool UseInitArrayDefault =
OsVersion >= VersionTuple(9) || OsVersion.getMajor() == 0 ||
getTriple().getArch() == llvm::Triple::aarch64 ||
getTriple().getArch() == llvm::Triple::aarch64_be ||
getTriple().getArch() == llvm::Triple::arm ||
getTriple().getArch() == llvm::Triple::armeb;
if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
options::OPT_fno_use_init_array, UseInitArrayDefault))
CC1Args.push_back("-fno-use-init-array");
}
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.h b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.h
index 95c10cc62316..9d668711b91b 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.h
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/OpenBSD.h
@@ -1,102 +1,106 @@
//===--- OpenBSD.h - OpenBSD ToolChain Implementations ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_OPENBSD_H
#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_OPENBSD_H
#include "Gnu.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Driver/Tool.h"
#include "clang/Driver/ToolChain.h"
namespace clang {
namespace driver {
namespace tools {
/// openbsd -- Directly call GNU Binutils assembler and linker
namespace openbsd {
class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
public:
Assembler(const ToolChain &TC)
: Tool("openbsd::Assembler", "assembler", TC) {}
bool hasIntegratedCPP() const override { return false; }
void ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
};
class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
public:
Linker(const ToolChain &TC) : Tool("openbsd::Linker", "linker", TC) {}
bool hasIntegratedCPP() const override { return false; }
bool isLinkJob() const override { return true; }
void ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output, const InputInfoList &Inputs,
const llvm::opt::ArgList &TCArgs,
const char *LinkingOutput) const override;
};
} // end namespace openbsd
} // end namespace tools
namespace toolchains {
class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF {
public:
OpenBSD(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
bool HasNativeLLVMSupport() const override;
bool IsMathErrnoDefault() const override { return false; }
bool IsObjCNonFragileABIDefault() const override { return true; }
bool isPIEDefault(const llvm::opt::ArgList &Args) const override {
return true;
}
RuntimeLibType GetDefaultRuntimeLibType() const override {
return ToolChain::RLT_CompilerRT;
}
CXXStdlibType GetDefaultCXXStdlibType() const override {
return ToolChain::CST_Libcxx;
}
void
AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const override;
void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const override;
std::string getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component,
FileType Type = ToolChain::FT_Static) const override;
+ bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override {
+ return true;
+ }
+
LangOptions::StackProtectorMode
GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
return LangOptions::SSPStrong;
}
unsigned GetDefaultDwarfVersion() const override { return 2; }
SanitizerMask getSupportedSanitizers() const override;
protected:
Tool *buildAssembler() const override;
Tool *buildLinker() const override;
};
} // end namespace toolchains
} // end namespace driver
} // end namespace clang
#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_OPENBSD_H
diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Solaris.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Solaris.cpp
index 4d1af094f481..24f18b92dd66 100644
--- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Solaris.cpp
+++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Solaris.cpp
@@ -1,289 +1,296 @@
//===--- Solaris.cpp - Solaris ToolChain Implementations --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Solaris.h"
#include "CommonArgs.h"
#include "clang/Basic/LangStandard.h"
#include "clang/Config/config.h"
#include "clang/Driver/Compilation.h"
#include "clang/Driver/Driver.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "clang/Driver/Options.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;
void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
claimNoWarnArgs(Args);
ArgStringList CmdArgs;
Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
for (const auto &II : Inputs)
CmdArgs.push_back(II.getFilename());
const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, Inputs, Output));
}
void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
const ArgList &Args,
const char *LinkingOutput) const {
ArgStringList CmdArgs;
// Demangle C++ names in errors
CmdArgs.push_back("-C");
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
CmdArgs.push_back("-e");
CmdArgs.push_back("_start");
}
if (Args.hasArg(options::OPT_static)) {
CmdArgs.push_back("-Bstatic");
CmdArgs.push_back("-dn");
} else {
CmdArgs.push_back("-Bdynamic");
if (Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-shared");
}
// libpthread has been folded into libc since Solaris 10, no need to do
// anything for pthreads. Claim argument to avoid warning.
Args.ClaimAllArgs(options::OPT_pthread);
Args.ClaimAllArgs(options::OPT_pthreads);
}
if (Output.isFilename()) {
CmdArgs.push_back("-o");
CmdArgs.push_back(Output.getFilename());
} else {
assert(Output.isNothing() && "Invalid output.");
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
if (!Args.hasArg(options::OPT_shared))
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
const Arg *Std = Args.getLastArg(options::OPT_std_EQ, options::OPT_ansi);
bool HaveAnsi = false;
const LangStandard *LangStd = nullptr;
if (Std) {
HaveAnsi = Std->getOption().matches(options::OPT_ansi);
if (!HaveAnsi)
LangStd = LangStandard::getLangStandardForName(Std->getValue());
}
const char *values_X = "values-Xa.o";
// Use values-Xc.o for -ansi, -std=c*, -std=iso9899:199409.
if (HaveAnsi || (LangStd && !LangStd->isGNUMode()))
values_X = "values-Xc.o";
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(values_X)));
const char *values_xpg = "values-xpg6.o";
// Use values-xpg4.o for -std=c90, -std=gnu90, -std=iso9899:199409.
if (LangStd && LangStd->getLanguage() == Language::C && !LangStd->isC99())
values_xpg = "values-xpg4.o";
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath(values_xpg)));
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
}
getToolChain().AddFilePathLibArgs(Args, CmdArgs);
Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
options::OPT_e, options::OPT_r});
bool NeedsSanitizerDeps = addSanitizerRuntimes(getToolChain(), Args, CmdArgs);
AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
if (getToolChain().ShouldLinkCXXStdlib(Args))
getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
if (Args.hasArg(options::OPT_fstack_protector) ||
Args.hasArg(options::OPT_fstack_protector_strong) ||
Args.hasArg(options::OPT_fstack_protector_all)) {
// Explicitly link ssp libraries, not folded into Solaris libc.
CmdArgs.push_back("-lssp_nonshared");
CmdArgs.push_back("-lssp");
}
+ // LLVM support for atomics on 32-bit SPARC V8+ is incomplete, so
+ // forcibly link with libatomic as a workaround.
+ if (getToolChain().getTriple().getArch() == llvm::Triple::sparc) {
+ CmdArgs.push_back(getAsNeededOption(getToolChain(), true));
+ CmdArgs.push_back("-latomic");
+ CmdArgs.push_back(getAsNeededOption(getToolChain(), false));
+ }
CmdArgs.push_back("-lgcc_s");
CmdArgs.push_back("-lc");
if (!Args.hasArg(options::OPT_shared)) {
CmdArgs.push_back("-lgcc");
CmdArgs.push_back("-lm");
}
if (NeedsSanitizerDeps)
linkSanitizerRuntimeDeps(getToolChain(), CmdArgs);
}
if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
CmdArgs.push_back(
Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
}
CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
getToolChain().addProfileRTLibs(Args, CmdArgs);
const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
Exec, CmdArgs, Inputs, Output));
}
static StringRef getSolarisLibSuffix(const llvm::Triple &Triple) {
switch (Triple.getArch()) {
case llvm::Triple::x86:
case llvm::Triple::sparc:
break;
case llvm::Triple::x86_64:
return "/amd64";
case llvm::Triple::sparcv9:
return "/sparcv9";
default:
llvm_unreachable("Unsupported architecture");
}
return "";
}
/// Solaris - Solaris tool chain which can call as(1) and ld(1) directly.
Solaris::Solaris(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: Generic_ELF(D, Triple, Args) {
GCCInstallation.init(Triple, Args);
StringRef LibSuffix = getSolarisLibSuffix(Triple);
path_list &Paths = getFilePaths();
if (GCCInstallation.isValid()) {
// On Solaris gcc uses both an architecture-specific path with triple in it
// as well as a more generic lib path (+arch suffix).
addPathIfExists(D,
GCCInstallation.getInstallPath() +
GCCInstallation.getMultilib().gccSuffix(),
Paths);
addPathIfExists(D, GCCInstallation.getParentLibPath() + LibSuffix, Paths);
}
// If we are currently running Clang inside of the requested system root,
// add its parent library path to those searched.
if (StringRef(D.Dir).startswith(D.SysRoot))
addPathIfExists(D, D.Dir + "/../lib", Paths);
addPathIfExists(D, D.SysRoot + "/usr/lib" + LibSuffix, Paths);
}
SanitizerMask Solaris::getSupportedSanitizers() const {
const bool IsX86 = getTriple().getArch() == llvm::Triple::x86;
const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64;
SanitizerMask Res = ToolChain::getSupportedSanitizers();
// FIXME: Omit X86_64 until 64-bit support is figured out.
if (IsX86) {
Res |= SanitizerKind::Address;
Res |= SanitizerKind::PointerCompare;
Res |= SanitizerKind::PointerSubtract;
}
if (IsX86 || IsX86_64)
Res |= SanitizerKind::Function;
Res |= SanitizerKind::Vptr;
return Res;
}
Tool *Solaris::buildAssembler() const {
return new tools::solaris::Assembler(*this);
}
Tool *Solaris::buildLinker() const { return new tools::solaris::Linker(*this); }
void Solaris::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
const Driver &D = getDriver();
if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc))
return;
if (!DriverArgs.hasArg(options::OPT_nostdlibinc))
addSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/local/include");
if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
SmallString<128> P(D.ResourceDir);
llvm::sys::path::append(P, "include");
addSystemInclude(DriverArgs, CC1Args, P);
}
if (DriverArgs.hasArg(options::OPT_nostdlibinc))
return;
// Check for configure-time C include directories.
StringRef CIncludeDirs(C_INCLUDE_DIRS);
if (CIncludeDirs != "") {
SmallVector<StringRef, 5> dirs;
CIncludeDirs.split(dirs, ":");
for (StringRef dir : dirs) {
StringRef Prefix =
llvm::sys::path::is_absolute(dir) ? "" : StringRef(D.SysRoot);
addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir);
}
return;
}
// Add include directories specific to the selected multilib set and multilib.
if (GCCInstallation.isValid()) {
const MultilibSet::IncludeDirsFunc &Callback =
Multilibs.includeDirsCallback();
if (Callback) {
for (const auto &Path : Callback(GCCInstallation.getMultilib()))
addExternCSystemIncludeIfExists(
DriverArgs, CC1Args, GCCInstallation.getInstallPath() + Path);
}
}
addExternCSystemInclude(DriverArgs, CC1Args, D.SysRoot + "/usr/include");
}
void Solaris::addLibStdCxxIncludePaths(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args) const {
// We need a detected GCC installation on Solaris (similar to Linux)
// to provide libstdc++'s headers.
if (!GCCInstallation.isValid())
return;
// By default, look for the C++ headers in an include directory adjacent to
// the lib directory of the GCC installation.
// On Solaris this usually looks like /usr/gcc/X.Y/include/c++/X.Y.Z
StringRef LibDir = GCCInstallation.getParentLibPath();
StringRef TripleStr = GCCInstallation.getTriple().str();
const Multilib &Multilib = GCCInstallation.getMultilib();
const GCCVersion &Version = GCCInstallation.getVersion();
// The primary search for libstdc++ supports multiarch variants.
addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
TripleStr, Multilib.includeSuffix(), DriverArgs,
CC1Args);
}
diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp
index 1ab26e58a404..5d63a26132b7 100644
--- a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1,4417 +1,4417 @@
//===- ASTReaderDecl.cpp - Decl Deserialization ---------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the ASTReader::readDeclRecord method, which is the
// entrypoint for loading a decl.
//
//===----------------------------------------------------------------------===//
#include "ASTCommon.h"
#include "ASTReaderInternals.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/AttrIterator.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclFriend.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclOpenMP.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/DeclVisitor.h"
#include "clang/AST/DeclarationName.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExternalASTSource.h"
#include "clang/AST/LambdaCapture.h"
#include "clang/AST/NestedNameSpecifier.h"
#include "clang/AST/OpenMPClause.h"
#include "clang/AST/Redeclarable.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/TemplateBase.h"
#include "clang/AST/Type.h"
#include "clang/AST/UnresolvedSet.h"
#include "clang/Basic/AttrKinds.h"
#include "clang/Basic/ExceptionSpecificationType.h"
#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/Lambda.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/Linkage.h"
#include "clang/Basic/Module.h"
#include "clang/Basic/PragmaKinds.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Sema/IdentifierResolver.h"
#include "clang/Serialization/ASTBitCodes.h"
#include "clang/Serialization/ASTRecordReader.h"
#include "clang/Serialization/ContinuousRangeMap.h"
#include "clang/Serialization/ModuleFile.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <string>
#include <utility>
using namespace clang;
using namespace serialization;
//===----------------------------------------------------------------------===//
// Declaration deserialization
//===----------------------------------------------------------------------===//
namespace clang {
class ASTDeclReader : public DeclVisitor<ASTDeclReader, void> {
ASTReader &Reader;
ASTRecordReader &Record;
ASTReader::RecordLocation Loc;
const DeclID ThisDeclID;
const SourceLocation ThisDeclLoc;
using RecordData = ASTReader::RecordData;
TypeID DeferredTypeID = 0;
unsigned AnonymousDeclNumber;
GlobalDeclID NamedDeclForTagDecl = 0;
IdentifierInfo *TypedefNameForLinkage = nullptr;
bool HasPendingBody = false;
///A flag to carry the information for a decl from the entity is
/// used. We use it to delay the marking of the canonical decl as used until
/// the entire declaration is deserialized and merged.
bool IsDeclMarkedUsed = false;
uint64_t GetCurrentCursorOffset();
uint64_t ReadLocalOffset() {
uint64_t LocalOffset = Record.readInt();
assert(LocalOffset < Loc.Offset && "offset point after current record");
return LocalOffset ? Loc.Offset - LocalOffset : 0;
}
uint64_t ReadGlobalOffset() {
uint64_t Local = ReadLocalOffset();
return Local ? Record.getGlobalBitOffset(Local) : 0;
}
SourceLocation readSourceLocation() {
return Record.readSourceLocation();
}
SourceRange readSourceRange() {
return Record.readSourceRange();
}
TypeSourceInfo *readTypeSourceInfo() {
return Record.readTypeSourceInfo();
}
serialization::DeclID readDeclID() {
return Record.readDeclID();
}
std::string readString() {
return Record.readString();
}
void readDeclIDList(SmallVectorImpl<DeclID> &IDs) {
for (unsigned I = 0, Size = Record.readInt(); I != Size; ++I)
IDs.push_back(readDeclID());
}
Decl *readDecl() {
return Record.readDecl();
}
template<typename T>
T *readDeclAs() {
return Record.readDeclAs<T>();
}
serialization::SubmoduleID readSubmoduleID() {
if (Record.getIdx() == Record.size())
return 0;
return Record.getGlobalSubmoduleID(Record.readInt());
}
Module *readModule() {
return Record.getSubmodule(readSubmoduleID());
}
void ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update);
void ReadCXXDefinitionData(struct CXXRecordDecl::DefinitionData &Data,
const CXXRecordDecl *D);
void MergeDefinitionData(CXXRecordDecl *D,
struct CXXRecordDecl::DefinitionData &&NewDD);
void ReadObjCDefinitionData(struct ObjCInterfaceDecl::DefinitionData &Data);
void MergeDefinitionData(ObjCInterfaceDecl *D,
struct ObjCInterfaceDecl::DefinitionData &&NewDD);
void ReadObjCDefinitionData(struct ObjCProtocolDecl::DefinitionData &Data);
void MergeDefinitionData(ObjCProtocolDecl *D,
struct ObjCProtocolDecl::DefinitionData &&NewDD);
static DeclContext *getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC);
static NamedDecl *getAnonymousDeclForMerging(ASTReader &Reader,
DeclContext *DC,
unsigned Index);
static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC,
unsigned Index, NamedDecl *D);
/// Results from loading a RedeclarableDecl.
class RedeclarableResult {
Decl *MergeWith;
GlobalDeclID FirstID;
bool IsKeyDecl;
public:
RedeclarableResult(Decl *MergeWith, GlobalDeclID FirstID, bool IsKeyDecl)
: MergeWith(MergeWith), FirstID(FirstID), IsKeyDecl(IsKeyDecl) {}
/// Retrieve the first ID.
GlobalDeclID getFirstID() const { return FirstID; }
/// Is this declaration a key declaration?
bool isKeyDecl() const { return IsKeyDecl; }
/// Get a known declaration that this should be merged with, if
/// any.
Decl *getKnownMergeTarget() const { return MergeWith; }
};
/// Class used to capture the result of searching for an existing
/// declaration of a specific kind and name, along with the ability
/// to update the place where this result was found (the declaration
/// chain hanging off an identifier or the DeclContext we searched in)
/// if requested.
class FindExistingResult {
ASTReader &Reader;
NamedDecl *New = nullptr;
NamedDecl *Existing = nullptr;
bool AddResult = false;
unsigned AnonymousDeclNumber = 0;
IdentifierInfo *TypedefNameForLinkage = nullptr;
public:
FindExistingResult(ASTReader &Reader) : Reader(Reader) {}
FindExistingResult(ASTReader &Reader, NamedDecl *New, NamedDecl *Existing,
unsigned AnonymousDeclNumber,
IdentifierInfo *TypedefNameForLinkage)
: Reader(Reader), New(New), Existing(Existing), AddResult(true),
AnonymousDeclNumber(AnonymousDeclNumber),
TypedefNameForLinkage(TypedefNameForLinkage) {}
FindExistingResult(FindExistingResult &&Other)
: Reader(Other.Reader), New(Other.New), Existing(Other.Existing),
AddResult(Other.AddResult),
AnonymousDeclNumber(Other.AnonymousDeclNumber),
TypedefNameForLinkage(Other.TypedefNameForLinkage) {
Other.AddResult = false;
}
FindExistingResult &operator=(FindExistingResult &&) = delete;
~FindExistingResult();
/// Suppress the addition of this result into the known set of
/// names.
void suppress() { AddResult = false; }
operator NamedDecl*() const { return Existing; }
template<typename T>
operator T*() const { return dyn_cast_or_null<T>(Existing); }
};
static DeclContext *getPrimaryContextForMerging(ASTReader &Reader,
DeclContext *DC);
FindExistingResult findExisting(NamedDecl *D);
public:
ASTDeclReader(ASTReader &Reader, ASTRecordReader &Record,
ASTReader::RecordLocation Loc,
DeclID thisDeclID, SourceLocation ThisDeclLoc)
: Reader(Reader), Record(Record), Loc(Loc), ThisDeclID(thisDeclID),
ThisDeclLoc(ThisDeclLoc) {}
template <typename T> static
void AddLazySpecializations(T *D,
SmallVectorImpl<serialization::DeclID>& IDs) {
if (IDs.empty())
return;
// FIXME: We should avoid this pattern of getting the ASTContext.
ASTContext &C = D->getASTContext();
auto *&LazySpecializations = D->getCommonPtr()->LazySpecializations;
if (auto &Old = LazySpecializations) {
IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0]);
llvm::sort(IDs);
IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end());
}
auto *Result = new (C) serialization::DeclID[1 + IDs.size()];
*Result = IDs.size();
std::copy(IDs.begin(), IDs.end(), Result + 1);
LazySpecializations = Result;
}
template <typename DeclT>
static Decl *getMostRecentDeclImpl(Redeclarable<DeclT> *D);
static Decl *getMostRecentDeclImpl(...);
static Decl *getMostRecentDecl(Decl *D);
static void mergeInheritableAttributes(ASTReader &Reader, Decl *D,
Decl *Previous);
template <typename DeclT>
static void attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<DeclT> *D, Decl *Previous,
Decl *Canon);
static void attachPreviousDeclImpl(ASTReader &Reader, ...);
static void attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous,
Decl *Canon);
template <typename DeclT>
static void attachLatestDeclImpl(Redeclarable<DeclT> *D, Decl *Latest);
static void attachLatestDeclImpl(...);
static void attachLatestDecl(Decl *D, Decl *latest);
template <typename DeclT>
static void markIncompleteDeclChainImpl(Redeclarable<DeclT> *D);
static void markIncompleteDeclChainImpl(...);
/// Determine whether this declaration has a pending body.
bool hasPendingBody() const { return HasPendingBody; }
void ReadFunctionDefinition(FunctionDecl *FD);
void Visit(Decl *D);
void UpdateDecl(Decl *D, SmallVectorImpl<serialization::DeclID> &);
static void setNextObjCCategory(ObjCCategoryDecl *Cat,
ObjCCategoryDecl *Next) {
Cat->NextClassCategory = Next;
}
void VisitDecl(Decl *D);
void VisitPragmaCommentDecl(PragmaCommentDecl *D);
void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D);
void VisitTranslationUnitDecl(TranslationUnitDecl *TU);
void VisitNamedDecl(NamedDecl *ND);
void VisitLabelDecl(LabelDecl *LD);
void VisitNamespaceDecl(NamespaceDecl *D);
void VisitUsingDirectiveDecl(UsingDirectiveDecl *D);
void VisitNamespaceAliasDecl(NamespaceAliasDecl *D);
void VisitTypeDecl(TypeDecl *TD);
RedeclarableResult VisitTypedefNameDecl(TypedefNameDecl *TD);
void VisitTypedefDecl(TypedefDecl *TD);
void VisitTypeAliasDecl(TypeAliasDecl *TD);
void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D);
void VisitUnresolvedUsingIfExistsDecl(UnresolvedUsingIfExistsDecl *D);
RedeclarableResult VisitTagDecl(TagDecl *TD);
void VisitEnumDecl(EnumDecl *ED);
RedeclarableResult VisitRecordDeclImpl(RecordDecl *RD);
void VisitRecordDecl(RecordDecl *RD);
RedeclarableResult VisitCXXRecordDeclImpl(CXXRecordDecl *D);
void VisitCXXRecordDecl(CXXRecordDecl *D) { VisitCXXRecordDeclImpl(D); }
RedeclarableResult VisitClassTemplateSpecializationDeclImpl(
ClassTemplateSpecializationDecl *D);
void VisitClassTemplateSpecializationDecl(
ClassTemplateSpecializationDecl *D) {
VisitClassTemplateSpecializationDeclImpl(D);
}
void VisitClassTemplatePartialSpecializationDecl(
ClassTemplatePartialSpecializationDecl *D);
void VisitClassScopeFunctionSpecializationDecl(
ClassScopeFunctionSpecializationDecl *D);
RedeclarableResult
VisitVarTemplateSpecializationDeclImpl(VarTemplateSpecializationDecl *D);
void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D) {
VisitVarTemplateSpecializationDeclImpl(D);
}
void VisitVarTemplatePartialSpecializationDecl(
VarTemplatePartialSpecializationDecl *D);
void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D);
void VisitValueDecl(ValueDecl *VD);
void VisitEnumConstantDecl(EnumConstantDecl *ECD);
void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D);
void VisitDeclaratorDecl(DeclaratorDecl *DD);
void VisitFunctionDecl(FunctionDecl *FD);
void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *GD);
void VisitCXXMethodDecl(CXXMethodDecl *D);
void VisitCXXConstructorDecl(CXXConstructorDecl *D);
void VisitCXXDestructorDecl(CXXDestructorDecl *D);
void VisitCXXConversionDecl(CXXConversionDecl *D);
void VisitFieldDecl(FieldDecl *FD);
void VisitMSPropertyDecl(MSPropertyDecl *FD);
void VisitMSGuidDecl(MSGuidDecl *D);
void VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D);
void VisitIndirectFieldDecl(IndirectFieldDecl *FD);
RedeclarableResult VisitVarDeclImpl(VarDecl *D);
void VisitVarDecl(VarDecl *VD) { VisitVarDeclImpl(VD); }
void VisitImplicitParamDecl(ImplicitParamDecl *PD);
void VisitParmVarDecl(ParmVarDecl *PD);
void VisitDecompositionDecl(DecompositionDecl *DD);
void VisitBindingDecl(BindingDecl *BD);
void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D);
DeclID VisitTemplateDecl(TemplateDecl *D);
void VisitConceptDecl(ConceptDecl *D);
void VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D);
RedeclarableResult VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D);
void VisitClassTemplateDecl(ClassTemplateDecl *D);
void VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D);
void VisitVarTemplateDecl(VarTemplateDecl *D);
void VisitFunctionTemplateDecl(FunctionTemplateDecl *D);
void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D);
void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
void VisitUsingDecl(UsingDecl *D);
void VisitUsingEnumDecl(UsingEnumDecl *D);
void VisitUsingPackDecl(UsingPackDecl *D);
void VisitUsingShadowDecl(UsingShadowDecl *D);
void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D);
void VisitLinkageSpecDecl(LinkageSpecDecl *D);
void VisitExportDecl(ExportDecl *D);
void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD);
void VisitImportDecl(ImportDecl *D);
void VisitAccessSpecDecl(AccessSpecDecl *D);
void VisitFriendDecl(FriendDecl *D);
void VisitFriendTemplateDecl(FriendTemplateDecl *D);
void VisitStaticAssertDecl(StaticAssertDecl *D);
void VisitBlockDecl(BlockDecl *BD);
void VisitCapturedDecl(CapturedDecl *CD);
void VisitEmptyDecl(EmptyDecl *D);
void VisitLifetimeExtendedTemporaryDecl(LifetimeExtendedTemporaryDecl *D);
std::pair<uint64_t, uint64_t> VisitDeclContext(DeclContext *DC);
template<typename T>
RedeclarableResult VisitRedeclarable(Redeclarable<T> *D);
template<typename T>
void mergeRedeclarable(Redeclarable<T> *D, RedeclarableResult &Redecl,
DeclID TemplatePatternID = 0);
template<typename T>
void mergeRedeclarable(Redeclarable<T> *D, T *Existing,
RedeclarableResult &Redecl,
DeclID TemplatePatternID = 0);
template<typename T>
void mergeMergeable(Mergeable<T> *D);
void mergeMergeable(LifetimeExtendedTemporaryDecl *D);
void mergeTemplatePattern(RedeclarableTemplateDecl *D,
RedeclarableTemplateDecl *Existing,
DeclID DsID, bool IsKeyDecl);
ObjCTypeParamList *ReadObjCTypeParamList();
// FIXME: Reorder according to DeclNodes.td?
void VisitObjCMethodDecl(ObjCMethodDecl *D);
void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D);
void VisitObjCContainerDecl(ObjCContainerDecl *D);
void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D);
void VisitObjCIvarDecl(ObjCIvarDecl *D);
void VisitObjCProtocolDecl(ObjCProtocolDecl *D);
void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D);
void VisitObjCCategoryDecl(ObjCCategoryDecl *D);
void VisitObjCImplDecl(ObjCImplDecl *D);
void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D);
void VisitObjCImplementationDecl(ObjCImplementationDecl *D);
void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D);
void VisitObjCPropertyDecl(ObjCPropertyDecl *D);
void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D);
void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D);
void VisitOMPAllocateDecl(OMPAllocateDecl *D);
void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D);
void VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D);
void VisitOMPRequiresDecl(OMPRequiresDecl *D);
void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D);
};
} // namespace clang
namespace {
/// Iterator over the redeclarations of a declaration that have already
/// been merged into the same redeclaration chain.
template<typename DeclT>
class MergedRedeclIterator {
DeclT *Start;
DeclT *Canonical = nullptr;
DeclT *Current = nullptr;
public:
MergedRedeclIterator() = default;
MergedRedeclIterator(DeclT *Start) : Start(Start), Current(Start) {}
DeclT *operator*() { return Current; }
MergedRedeclIterator &operator++() {
if (Current->isFirstDecl()) {
Canonical = Current;
Current = Current->getMostRecentDecl();
} else
Current = Current->getPreviousDecl();
// If we started in the merged portion, we'll reach our start position
// eventually. Otherwise, we'll never reach it, but the second declaration
// we reached was the canonical declaration, so stop when we see that one
// again.
if (Current == Start || Current == Canonical)
Current = nullptr;
return *this;
}
friend bool operator!=(const MergedRedeclIterator &A,
const MergedRedeclIterator &B) {
return A.Current != B.Current;
}
};
} // namespace
template <typename DeclT>
static llvm::iterator_range<MergedRedeclIterator<DeclT>>
merged_redecls(DeclT *D) {
return llvm::make_range(MergedRedeclIterator<DeclT>(D),
MergedRedeclIterator<DeclT>());
}
uint64_t ASTDeclReader::GetCurrentCursorOffset() {
return Loc.F->DeclsCursor.GetCurrentBitNo() + Loc.F->GlobalBitOffset;
}
void ASTDeclReader::ReadFunctionDefinition(FunctionDecl *FD) {
if (Record.readInt()) {
Reader.DefinitionSource[FD] =
Loc.F->Kind == ModuleKind::MK_MainFile ||
Reader.getContext().getLangOpts().BuildingPCHWithObjectFile;
}
if (auto *CD = dyn_cast<CXXConstructorDecl>(FD)) {
CD->setNumCtorInitializers(Record.readInt());
if (CD->getNumCtorInitializers())
CD->CtorInitializers = ReadGlobalOffset();
}
// Store the offset of the body so we can lazily load it later.
Reader.PendingBodies[FD] = GetCurrentCursorOffset();
HasPendingBody = true;
}
void ASTDeclReader::Visit(Decl *D) {
DeclVisitor<ASTDeclReader, void>::Visit(D);
// At this point we have deserialized and merged the decl and it is safe to
// update its canonical decl to signal that the entire entity is used.
D->getCanonicalDecl()->Used |= IsDeclMarkedUsed;
IsDeclMarkedUsed = false;
if (auto *DD = dyn_cast<DeclaratorDecl>(D)) {
if (auto *TInfo = DD->getTypeSourceInfo())
Record.readTypeLoc(TInfo->getTypeLoc());
}
if (auto *TD = dyn_cast<TypeDecl>(D)) {
// We have a fully initialized TypeDecl. Read its type now.
TD->setTypeForDecl(Reader.GetType(DeferredTypeID).getTypePtrOrNull());
// If this is a tag declaration with a typedef name for linkage, it's safe
// to load that typedef now.
if (NamedDeclForTagDecl)
cast<TagDecl>(D)->TypedefNameDeclOrQualifier =
cast<TypedefNameDecl>(Reader.GetDecl(NamedDeclForTagDecl));
} else if (auto *ID = dyn_cast<ObjCInterfaceDecl>(D)) {
// if we have a fully initialized TypeDecl, we can safely read its type now.
ID->TypeForDecl = Reader.GetType(DeferredTypeID).getTypePtrOrNull();
} else if (auto *FD = dyn_cast<FunctionDecl>(D)) {
// FunctionDecl's body was written last after all other Stmts/Exprs.
// We only read it if FD doesn't already have a body (e.g., from another
// module).
// FIXME: Can we diagnose ODR violations somehow?
if (Record.readInt())
ReadFunctionDefinition(FD);
}
}
void ASTDeclReader::VisitDecl(Decl *D) {
if (D->isTemplateParameter() || D->isTemplateParameterPack() ||
isa<ParmVarDecl>(D) || isa<ObjCTypeParamDecl>(D)) {
// We don't want to deserialize the DeclContext of a template
// parameter or of a parameter of a function template immediately. These
// entities might be used in the formulation of its DeclContext (for
// example, a function parameter can be used in decltype() in trailing
// return type of the function). Use the translation unit DeclContext as a
// placeholder.
GlobalDeclID SemaDCIDForTemplateParmDecl = readDeclID();
GlobalDeclID LexicalDCIDForTemplateParmDecl = readDeclID();
if (!LexicalDCIDForTemplateParmDecl)
LexicalDCIDForTemplateParmDecl = SemaDCIDForTemplateParmDecl;
Reader.addPendingDeclContextInfo(D,
SemaDCIDForTemplateParmDecl,
LexicalDCIDForTemplateParmDecl);
D->setDeclContext(Reader.getContext().getTranslationUnitDecl());
} else {
auto *SemaDC = readDeclAs<DeclContext>();
auto *LexicalDC = readDeclAs<DeclContext>();
if (!LexicalDC)
LexicalDC = SemaDC;
DeclContext *MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC);
// Avoid calling setLexicalDeclContext() directly because it uses
// Decl::getASTContext() internally which is unsafe during derialization.
D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC,
Reader.getContext());
}
D->setLocation(ThisDeclLoc);
D->InvalidDecl = Record.readInt();
if (Record.readInt()) { // hasAttrs
AttrVec Attrs;
Record.readAttributes(Attrs);
// Avoid calling setAttrs() directly because it uses Decl::getASTContext()
// internally which is unsafe during derialization.
D->setAttrsImpl(Attrs, Reader.getContext());
}
D->setImplicit(Record.readInt());
D->Used = Record.readInt();
IsDeclMarkedUsed |= D->Used;
D->setReferenced(Record.readInt());
D->setTopLevelDeclInObjCContainer(Record.readInt());
D->setAccess((AccessSpecifier)Record.readInt());
D->FromASTFile = true;
bool ModulePrivate = Record.readInt();
// Determine whether this declaration is part of a (sub)module. If so, it
// may not yet be visible.
if (unsigned SubmoduleID = readSubmoduleID()) {
// Store the owning submodule ID in the declaration.
D->setModuleOwnershipKind(
ModulePrivate ? Decl::ModuleOwnershipKind::ModulePrivate
: Decl::ModuleOwnershipKind::VisibleWhenImported);
D->setOwningModuleID(SubmoduleID);
if (ModulePrivate) {
// Module-private declarations are never visible, so there is no work to
// do.
} else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) {
// If local visibility is being tracked, this declaration will become
// hidden and visible as the owning module does.
} else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) {
// Mark the declaration as visible when its owning module becomes visible.
if (Owner->NameVisibility == Module::AllVisible)
D->setVisibleDespiteOwningModule();
else
Reader.HiddenNamesMap[Owner].push_back(D);
}
} else if (ModulePrivate) {
D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate);
}
}
void ASTDeclReader::VisitPragmaCommentDecl(PragmaCommentDecl *D) {
VisitDecl(D);
D->setLocation(readSourceLocation());
D->CommentKind = (PragmaMSCommentKind)Record.readInt();
std::string Arg = readString();
memcpy(D->getTrailingObjects<char>(), Arg.data(), Arg.size());
D->getTrailingObjects<char>()[Arg.size()] = '\0';
}
void ASTDeclReader::VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D) {
VisitDecl(D);
D->setLocation(readSourceLocation());
std::string Name = readString();
memcpy(D->getTrailingObjects<char>(), Name.data(), Name.size());
D->getTrailingObjects<char>()[Name.size()] = '\0';
D->ValueStart = Name.size() + 1;
std::string Value = readString();
memcpy(D->getTrailingObjects<char>() + D->ValueStart, Value.data(),
Value.size());
D->getTrailingObjects<char>()[D->ValueStart + Value.size()] = '\0';
}
void ASTDeclReader::VisitTranslationUnitDecl(TranslationUnitDecl *TU) {
llvm_unreachable("Translation units are not serialized");
}
void ASTDeclReader::VisitNamedDecl(NamedDecl *ND) {
VisitDecl(ND);
ND->setDeclName(Record.readDeclarationName());
AnonymousDeclNumber = Record.readInt();
}
void ASTDeclReader::VisitTypeDecl(TypeDecl *TD) {
VisitNamedDecl(TD);
TD->setLocStart(readSourceLocation());
// Delay type reading until after we have fully initialized the decl.
DeferredTypeID = Record.getGlobalTypeID(Record.readInt());
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitTypedefNameDecl(TypedefNameDecl *TD) {
RedeclarableResult Redecl = VisitRedeclarable(TD);
VisitTypeDecl(TD);
TypeSourceInfo *TInfo = readTypeSourceInfo();
if (Record.readInt()) { // isModed
QualType modedT = Record.readType();
TD->setModedTypeSourceInfo(TInfo, modedT);
} else
TD->setTypeSourceInfo(TInfo);
// Read and discard the declaration for which this is a typedef name for
// linkage, if it exists. We cannot rely on our type to pull in this decl,
// because it might have been merged with a type from another module and
// thus might not refer to our version of the declaration.
readDecl();
return Redecl;
}
void ASTDeclReader::VisitTypedefDecl(TypedefDecl *TD) {
RedeclarableResult Redecl = VisitTypedefNameDecl(TD);
mergeRedeclarable(TD, Redecl);
}
void ASTDeclReader::VisitTypeAliasDecl(TypeAliasDecl *TD) {
RedeclarableResult Redecl = VisitTypedefNameDecl(TD);
if (auto *Template = readDeclAs<TypeAliasTemplateDecl>())
// Merged when we merge the template.
TD->setDescribedAliasTemplate(Template);
else
mergeRedeclarable(TD, Redecl);
}
ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) {
RedeclarableResult Redecl = VisitRedeclarable(TD);
VisitTypeDecl(TD);
TD->IdentifierNamespace = Record.readInt();
TD->setTagKind((TagDecl::TagKind)Record.readInt());
if (!isa<CXXRecordDecl>(TD))
TD->setCompleteDefinition(Record.readInt());
TD->setEmbeddedInDeclarator(Record.readInt());
TD->setFreeStanding(Record.readInt());
TD->setCompleteDefinitionRequired(Record.readInt());
TD->setBraceRange(readSourceRange());
switch (Record.readInt()) {
case 0:
break;
case 1: { // ExtInfo
auto *Info = new (Reader.getContext()) TagDecl::ExtInfo();
Record.readQualifierInfo(*Info);
TD->TypedefNameDeclOrQualifier = Info;
break;
}
case 2: // TypedefNameForAnonDecl
NamedDeclForTagDecl = readDeclID();
TypedefNameForLinkage = Record.readIdentifier();
break;
default:
llvm_unreachable("unexpected tag info kind");
}
if (!isa<CXXRecordDecl>(TD))
mergeRedeclarable(TD, Redecl);
return Redecl;
}
void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) {
VisitTagDecl(ED);
if (TypeSourceInfo *TI = readTypeSourceInfo())
ED->setIntegerTypeSourceInfo(TI);
else
ED->setIntegerType(Record.readType());
ED->setPromotionType(Record.readType());
ED->setNumPositiveBits(Record.readInt());
ED->setNumNegativeBits(Record.readInt());
ED->setScoped(Record.readInt());
ED->setScopedUsingClassTag(Record.readInt());
ED->setFixed(Record.readInt());
ED->setHasODRHash(true);
ED->ODRHash = Record.readInt();
// If this is a definition subject to the ODR, and we already have a
// definition, merge this one into it.
if (ED->isCompleteDefinition() &&
Reader.getContext().getLangOpts().Modules &&
Reader.getContext().getLangOpts().CPlusPlus) {
EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()];
if (!OldDef) {
// This is the first time we've seen an imported definition. Look for a
// local definition before deciding that we are the first definition.
for (auto *D : merged_redecls(ED->getCanonicalDecl())) {
if (!D->isFromASTFile() && D->isCompleteDefinition()) {
OldDef = D;
break;
}
}
}
if (OldDef) {
Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef));
ED->setCompleteDefinition(false);
Reader.mergeDefinitionVisibility(OldDef, ED);
if (OldDef->getODRHash() != ED->getODRHash())
Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED);
} else {
OldDef = ED;
}
}
if (auto *InstED = readDeclAs<EnumDecl>()) {
auto TSK = (TemplateSpecializationKind)Record.readInt();
SourceLocation POI = readSourceLocation();
ED->setInstantiationOfMemberEnum(Reader.getContext(), InstED, TSK);
ED->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
}
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) {
RedeclarableResult Redecl = VisitTagDecl(RD);
RD->setHasFlexibleArrayMember(Record.readInt());
RD->setAnonymousStructOrUnion(Record.readInt());
RD->setHasObjectMember(Record.readInt());
RD->setHasVolatileMember(Record.readInt());
RD->setNonTrivialToPrimitiveDefaultInitialize(Record.readInt());
RD->setNonTrivialToPrimitiveCopy(Record.readInt());
RD->setNonTrivialToPrimitiveDestroy(Record.readInt());
RD->setHasNonTrivialToPrimitiveDefaultInitializeCUnion(Record.readInt());
RD->setHasNonTrivialToPrimitiveDestructCUnion(Record.readInt());
RD->setHasNonTrivialToPrimitiveCopyCUnion(Record.readInt());
RD->setParamDestroyedInCallee(Record.readInt());
RD->setArgPassingRestrictions((RecordDecl::ArgPassingKind)Record.readInt());
return Redecl;
}
void ASTDeclReader::VisitRecordDecl(RecordDecl *RD) {
VisitRecordDeclImpl(RD);
// Maintain the invariant of a redeclaration chain containing only
// a single definition.
if (RD->isCompleteDefinition()) {
RecordDecl *Canon = static_cast<RecordDecl *>(RD->getCanonicalDecl());
RecordDecl *&OldDef = Reader.RecordDefinitions[Canon];
if (!OldDef) {
// This is the first time we've seen an imported definition. Look for a
// local definition before deciding that we are the first definition.
for (auto *D : merged_redecls(Canon)) {
if (!D->isFromASTFile() && D->isCompleteDefinition()) {
OldDef = D;
break;
}
}
}
if (OldDef) {
Reader.MergedDeclContexts.insert(std::make_pair(RD, OldDef));
RD->setCompleteDefinition(false);
Reader.mergeDefinitionVisibility(OldDef, RD);
} else {
OldDef = RD;
}
}
}
void ASTDeclReader::VisitValueDecl(ValueDecl *VD) {
VisitNamedDecl(VD);
// For function declarations, defer reading the type in case the function has
// a deduced return type that references an entity declared within the
// function.
if (isa<FunctionDecl>(VD))
DeferredTypeID = Record.getGlobalTypeID(Record.readInt());
else
VD->setType(Record.readType());
}
void ASTDeclReader::VisitEnumConstantDecl(EnumConstantDecl *ECD) {
VisitValueDecl(ECD);
if (Record.readInt())
ECD->setInitExpr(Record.readExpr());
ECD->setInitVal(Record.readAPSInt());
mergeMergeable(ECD);
}
void ASTDeclReader::VisitDeclaratorDecl(DeclaratorDecl *DD) {
VisitValueDecl(DD);
DD->setInnerLocStart(readSourceLocation());
if (Record.readInt()) { // hasExtInfo
auto *Info = new (Reader.getContext()) DeclaratorDecl::ExtInfo();
Record.readQualifierInfo(*Info);
Info->TrailingRequiresClause = Record.readExpr();
DD->DeclInfo = Info;
}
QualType TSIType = Record.readType();
DD->setTypeSourceInfo(
TSIType.isNull() ? nullptr
: Reader.getContext().CreateTypeSourceInfo(TSIType));
}
void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
RedeclarableResult Redecl = VisitRedeclarable(FD);
VisitDeclaratorDecl(FD);
// Attach a type to this function. Use the real type if possible, but fall
// back to the type as written if it involves a deduced return type.
if (FD->getTypeSourceInfo() &&
FD->getTypeSourceInfo()->getType()->castAs<FunctionType>()
->getReturnType()->getContainedAutoType()) {
// We'll set up the real type in Visit, once we've finished loading the
// function.
FD->setType(FD->getTypeSourceInfo()->getType());
Reader.PendingFunctionTypes.push_back({FD, DeferredTypeID});
} else {
FD->setType(Reader.GetType(DeferredTypeID));
}
DeferredTypeID = 0;
FD->DNLoc = Record.readDeclarationNameLoc(FD->getDeclName());
FD->IdentifierNamespace = Record.readInt();
// FunctionDecl's body is handled last at ASTDeclReader::Visit,
// after everything else is read.
FD->setStorageClass(static_cast<StorageClass>(Record.readInt()));
FD->setInlineSpecified(Record.readInt());
FD->setImplicitlyInline(Record.readInt());
FD->setVirtualAsWritten(Record.readInt());
// We defer calling `FunctionDecl::setPure()` here as for methods of
// `CXXTemplateSpecializationDecl`s, we may not have connected up the
// definition (which is required for `setPure`).
const bool Pure = Record.readInt();
FD->setHasInheritedPrototype(Record.readInt());
FD->setHasWrittenPrototype(Record.readInt());
FD->setDeletedAsWritten(Record.readInt());
FD->setTrivial(Record.readInt());
FD->setTrivialForCall(Record.readInt());
FD->setDefaulted(Record.readInt());
FD->setExplicitlyDefaulted(Record.readInt());
FD->setHasImplicitReturnZero(Record.readInt());
FD->setConstexprKind(static_cast<ConstexprSpecKind>(Record.readInt()));
FD->setUsesSEHTry(Record.readInt());
FD->setHasSkippedBody(Record.readInt());
FD->setIsMultiVersion(Record.readInt());
FD->setLateTemplateParsed(Record.readInt());
FD->setCachedLinkage(static_cast<Linkage>(Record.readInt()));
FD->EndRangeLoc = readSourceLocation();
FD->ODRHash = Record.readInt();
FD->setHasODRHash(true);
if (FD->isDefaulted()) {
if (unsigned NumLookups = Record.readInt()) {
SmallVector<DeclAccessPair, 8> Lookups;
for (unsigned I = 0; I != NumLookups; ++I) {
NamedDecl *ND = Record.readDeclAs<NamedDecl>();
AccessSpecifier AS = (AccessSpecifier)Record.readInt();
Lookups.push_back(DeclAccessPair::make(ND, AS));
}
FD->setDefaultedFunctionInfo(FunctionDecl::DefaultedFunctionInfo::Create(
Reader.getContext(), Lookups));
}
}
switch ((FunctionDecl::TemplatedKind)Record.readInt()) {
case FunctionDecl::TK_NonTemplate:
mergeRedeclarable(FD, Redecl);
break;
case FunctionDecl::TK_FunctionTemplate:
// Merged when we merge the template.
FD->setDescribedFunctionTemplate(readDeclAs<FunctionTemplateDecl>());
break;
case FunctionDecl::TK_MemberSpecialization: {
auto *InstFD = readDeclAs<FunctionDecl>();
auto TSK = (TemplateSpecializationKind)Record.readInt();
SourceLocation POI = readSourceLocation();
FD->setInstantiationOfMemberFunction(Reader.getContext(), InstFD, TSK);
FD->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
mergeRedeclarable(FD, Redecl);
break;
}
case FunctionDecl::TK_FunctionTemplateSpecialization: {
auto *Template = readDeclAs<FunctionTemplateDecl>();
auto TSK = (TemplateSpecializationKind)Record.readInt();
// Template arguments.
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true);
// Template args as written.
SmallVector<TemplateArgumentLoc, 8> TemplArgLocs;
SourceLocation LAngleLoc, RAngleLoc;
bool HasTemplateArgumentsAsWritten = Record.readInt();
if (HasTemplateArgumentsAsWritten) {
unsigned NumTemplateArgLocs = Record.readInt();
TemplArgLocs.reserve(NumTemplateArgLocs);
for (unsigned i = 0; i != NumTemplateArgLocs; ++i)
TemplArgLocs.push_back(Record.readTemplateArgumentLoc());
LAngleLoc = readSourceLocation();
RAngleLoc = readSourceLocation();
}
SourceLocation POI = readSourceLocation();
ASTContext &C = Reader.getContext();
TemplateArgumentList *TemplArgList
= TemplateArgumentList::CreateCopy(C, TemplArgs);
TemplateArgumentListInfo TemplArgsInfo(LAngleLoc, RAngleLoc);
for (unsigned i = 0, e = TemplArgLocs.size(); i != e; ++i)
TemplArgsInfo.addArgument(TemplArgLocs[i]);
MemberSpecializationInfo *MSInfo = nullptr;
if (Record.readInt()) {
auto *FD = readDeclAs<FunctionDecl>();
auto TSK = (TemplateSpecializationKind)Record.readInt();
SourceLocation POI = readSourceLocation();
MSInfo = new (C) MemberSpecializationInfo(FD, TSK);
MSInfo->setPointOfInstantiation(POI);
}
FunctionTemplateSpecializationInfo *FTInfo =
FunctionTemplateSpecializationInfo::Create(
C, FD, Template, TSK, TemplArgList,
HasTemplateArgumentsAsWritten ? &TemplArgsInfo : nullptr, POI,
MSInfo);
FD->TemplateOrSpecialization = FTInfo;
if (FD->isCanonicalDecl()) { // if canonical add to template's set.
// The template that contains the specializations set. It's not safe to
// use getCanonicalDecl on Template since it may still be initializing.
auto *CanonTemplate = readDeclAs<FunctionTemplateDecl>();
// Get the InsertPos by FindNodeOrInsertPos() instead of calling
// InsertNode(FTInfo) directly to avoid the getASTContext() call in
// FunctionTemplateSpecializationInfo's Profile().
// We avoid getASTContext because a decl in the parent hierarchy may
// be initializing.
llvm::FoldingSetNodeID ID;
FunctionTemplateSpecializationInfo::Profile(ID, TemplArgs, C);
void *InsertPos = nullptr;
FunctionTemplateDecl::Common *CommonPtr = CanonTemplate->getCommonPtr();
FunctionTemplateSpecializationInfo *ExistingInfo =
CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos);
if (InsertPos)
CommonPtr->Specializations.InsertNode(FTInfo, InsertPos);
else {
assert(Reader.getContext().getLangOpts().Modules &&
"already deserialized this template specialization");
mergeRedeclarable(FD, ExistingInfo->getFunction(), Redecl);
}
}
break;
}
case FunctionDecl::TK_DependentFunctionTemplateSpecialization: {
// Templates.
UnresolvedSet<8> TemplDecls;
unsigned NumTemplates = Record.readInt();
while (NumTemplates--)
TemplDecls.addDecl(readDeclAs<NamedDecl>());
// Templates args.
TemplateArgumentListInfo TemplArgs;
unsigned NumArgs = Record.readInt();
while (NumArgs--)
TemplArgs.addArgument(Record.readTemplateArgumentLoc());
TemplArgs.setLAngleLoc(readSourceLocation());
TemplArgs.setRAngleLoc(readSourceLocation());
FD->setDependentTemplateSpecialization(Reader.getContext(),
TemplDecls, TemplArgs);
// These are not merged; we don't need to merge redeclarations of dependent
// template friends.
break;
}
}
// Defer calling `setPure` until merging above has guaranteed we've set
// `DefinitionData` (as this will need to access it).
FD->setPure(Pure);
// Read in the parameters.
unsigned NumParams = Record.readInt();
SmallVector<ParmVarDecl *, 16> Params;
Params.reserve(NumParams);
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(readDeclAs<ParmVarDecl>());
FD->setParams(Reader.getContext(), Params);
}
void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {
VisitNamedDecl(MD);
if (Record.readInt()) {
// Load the body on-demand. Most clients won't care, because method
// definitions rarely show up in headers.
Reader.PendingBodies[MD] = GetCurrentCursorOffset();
HasPendingBody = true;
}
MD->setSelfDecl(readDeclAs<ImplicitParamDecl>());
MD->setCmdDecl(readDeclAs<ImplicitParamDecl>());
MD->setInstanceMethod(Record.readInt());
MD->setVariadic(Record.readInt());
MD->setPropertyAccessor(Record.readInt());
MD->setSynthesizedAccessorStub(Record.readInt());
MD->setDefined(Record.readInt());
MD->setOverriding(Record.readInt());
MD->setHasSkippedBody(Record.readInt());
MD->setIsRedeclaration(Record.readInt());
MD->setHasRedeclaration(Record.readInt());
if (MD->hasRedeclaration())
Reader.getContext().setObjCMethodRedeclaration(MD,
readDeclAs<ObjCMethodDecl>());
MD->setDeclImplementation((ObjCMethodDecl::ImplementationControl)Record.readInt());
MD->setObjCDeclQualifier((Decl::ObjCDeclQualifier)Record.readInt());
MD->setRelatedResultType(Record.readInt());
MD->setReturnType(Record.readType());
MD->setReturnTypeSourceInfo(readTypeSourceInfo());
MD->DeclEndLoc = readSourceLocation();
unsigned NumParams = Record.readInt();
SmallVector<ParmVarDecl *, 16> Params;
Params.reserve(NumParams);
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(readDeclAs<ParmVarDecl>());
MD->setSelLocsKind((SelectorLocationsKind)Record.readInt());
unsigned NumStoredSelLocs = Record.readInt();
SmallVector<SourceLocation, 16> SelLocs;
SelLocs.reserve(NumStoredSelLocs);
for (unsigned i = 0; i != NumStoredSelLocs; ++i)
SelLocs.push_back(readSourceLocation());
MD->setParamsAndSelLocs(Reader.getContext(), Params, SelLocs);
}
void ASTDeclReader::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) {
VisitTypedefNameDecl(D);
D->Variance = Record.readInt();
D->Index = Record.readInt();
D->VarianceLoc = readSourceLocation();
D->ColonLoc = readSourceLocation();
}
void ASTDeclReader::VisitObjCContainerDecl(ObjCContainerDecl *CD) {
VisitNamedDecl(CD);
CD->setAtStartLoc(readSourceLocation());
CD->setAtEndRange(readSourceRange());
}
ObjCTypeParamList *ASTDeclReader::ReadObjCTypeParamList() {
unsigned numParams = Record.readInt();
if (numParams == 0)
return nullptr;
SmallVector<ObjCTypeParamDecl *, 4> typeParams;
typeParams.reserve(numParams);
for (unsigned i = 0; i != numParams; ++i) {
auto *typeParam = readDeclAs<ObjCTypeParamDecl>();
if (!typeParam)
return nullptr;
typeParams.push_back(typeParam);
}
SourceLocation lAngleLoc = readSourceLocation();
SourceLocation rAngleLoc = readSourceLocation();
return ObjCTypeParamList::create(Reader.getContext(), lAngleLoc,
typeParams, rAngleLoc);
}
void ASTDeclReader::ReadObjCDefinitionData(
struct ObjCInterfaceDecl::DefinitionData &Data) {
// Read the superclass.
Data.SuperClassTInfo = readTypeSourceInfo();
Data.EndLoc = readSourceLocation();
Data.HasDesignatedInitializers = Record.readInt();
// Read the directly referenced protocols and their SourceLocations.
unsigned NumProtocols = Record.readInt();
SmallVector<ObjCProtocolDecl *, 16> Protocols;
Protocols.reserve(NumProtocols);
for (unsigned I = 0; I != NumProtocols; ++I)
Protocols.push_back(readDeclAs<ObjCProtocolDecl>());
SmallVector<SourceLocation, 16> ProtoLocs;
ProtoLocs.reserve(NumProtocols);
for (unsigned I = 0; I != NumProtocols; ++I)
ProtoLocs.push_back(readSourceLocation());
Data.ReferencedProtocols.set(Protocols.data(), NumProtocols, ProtoLocs.data(),
Reader.getContext());
// Read the transitive closure of protocols referenced by this class.
NumProtocols = Record.readInt();
Protocols.clear();
Protocols.reserve(NumProtocols);
for (unsigned I = 0; I != NumProtocols; ++I)
Protocols.push_back(readDeclAs<ObjCProtocolDecl>());
Data.AllReferencedProtocols.set(Protocols.data(), NumProtocols,
Reader.getContext());
}
void ASTDeclReader::MergeDefinitionData(ObjCInterfaceDecl *D,
struct ObjCInterfaceDecl::DefinitionData &&NewDD) {
struct ObjCInterfaceDecl::DefinitionData &DD = D->data();
if (DD.Definition != NewDD.Definition) {
Reader.MergedDeclContexts.insert(
std::make_pair(NewDD.Definition, DD.Definition));
Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition);
}
// FIXME: odr checking?
}
void ASTDeclReader::VisitObjCInterfaceDecl(ObjCInterfaceDecl *ID) {
RedeclarableResult Redecl = VisitRedeclarable(ID);
VisitObjCContainerDecl(ID);
DeferredTypeID = Record.getGlobalTypeID(Record.readInt());
mergeRedeclarable(ID, Redecl);
ID->TypeParamList = ReadObjCTypeParamList();
if (Record.readInt()) {
// Read the definition.
ID->allocateDefinitionData();
ReadObjCDefinitionData(ID->data());
ObjCInterfaceDecl *Canon = ID->getCanonicalDecl();
if (Canon->Data.getPointer()) {
// If we already have a definition, keep the definition invariant and
// merge the data.
MergeDefinitionData(Canon, std::move(ID->data()));
ID->Data = Canon->Data;
} else {
// Set the definition data of the canonical declaration, so other
// redeclarations will see it.
ID->getCanonicalDecl()->Data = ID->Data;
// We will rebuild this list lazily.
ID->setIvarList(nullptr);
}
// Note that we have deserialized a definition.
Reader.PendingDefinitions.insert(ID);
// Note that we've loaded this Objective-C class.
Reader.ObjCClassesLoaded.push_back(ID);
} else {
ID->Data = ID->getCanonicalDecl()->Data;
}
}
void ASTDeclReader::VisitObjCIvarDecl(ObjCIvarDecl *IVD) {
VisitFieldDecl(IVD);
IVD->setAccessControl((ObjCIvarDecl::AccessControl)Record.readInt());
// This field will be built lazily.
IVD->setNextIvar(nullptr);
bool synth = Record.readInt();
IVD->setSynthesize(synth);
}
void ASTDeclReader::ReadObjCDefinitionData(
struct ObjCProtocolDecl::DefinitionData &Data) {
unsigned NumProtoRefs = Record.readInt();
SmallVector<ObjCProtocolDecl *, 16> ProtoRefs;
ProtoRefs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoRefs.push_back(readDeclAs<ObjCProtocolDecl>());
SmallVector<SourceLocation, 16> ProtoLocs;
ProtoLocs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoLocs.push_back(readSourceLocation());
Data.ReferencedProtocols.set(ProtoRefs.data(), NumProtoRefs,
ProtoLocs.data(), Reader.getContext());
}
void ASTDeclReader::MergeDefinitionData(ObjCProtocolDecl *D,
struct ObjCProtocolDecl::DefinitionData &&NewDD) {
struct ObjCProtocolDecl::DefinitionData &DD = D->data();
if (DD.Definition != NewDD.Definition) {
Reader.MergedDeclContexts.insert(
std::make_pair(NewDD.Definition, DD.Definition));
Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition);
}
// FIXME: odr checking?
}
void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) {
RedeclarableResult Redecl = VisitRedeclarable(PD);
VisitObjCContainerDecl(PD);
mergeRedeclarable(PD, Redecl);
if (Record.readInt()) {
// Read the definition.
PD->allocateDefinitionData();
ReadObjCDefinitionData(PD->data());
ObjCProtocolDecl *Canon = PD->getCanonicalDecl();
if (Canon->Data.getPointer()) {
// If we already have a definition, keep the definition invariant and
// merge the data.
MergeDefinitionData(Canon, std::move(PD->data()));
PD->Data = Canon->Data;
} else {
// Set the definition data of the canonical declaration, so other
// redeclarations will see it.
PD->getCanonicalDecl()->Data = PD->Data;
}
// Note that we have deserialized a definition.
Reader.PendingDefinitions.insert(PD);
} else {
PD->Data = PD->getCanonicalDecl()->Data;
}
}
void ASTDeclReader::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *FD) {
VisitFieldDecl(FD);
}
void ASTDeclReader::VisitObjCCategoryDecl(ObjCCategoryDecl *CD) {
VisitObjCContainerDecl(CD);
CD->setCategoryNameLoc(readSourceLocation());
CD->setIvarLBraceLoc(readSourceLocation());
CD->setIvarRBraceLoc(readSourceLocation());
// Note that this category has been deserialized. We do this before
// deserializing the interface declaration, so that it will consider this
/// category.
Reader.CategoriesDeserialized.insert(CD);
CD->ClassInterface = readDeclAs<ObjCInterfaceDecl>();
CD->TypeParamList = ReadObjCTypeParamList();
unsigned NumProtoRefs = Record.readInt();
SmallVector<ObjCProtocolDecl *, 16> ProtoRefs;
ProtoRefs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoRefs.push_back(readDeclAs<ObjCProtocolDecl>());
SmallVector<SourceLocation, 16> ProtoLocs;
ProtoLocs.reserve(NumProtoRefs);
for (unsigned I = 0; I != NumProtoRefs; ++I)
ProtoLocs.push_back(readSourceLocation());
CD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(),
Reader.getContext());
// Protocols in the class extension belong to the class.
if (NumProtoRefs > 0 && CD->ClassInterface && CD->IsClassExtension())
CD->ClassInterface->mergeClassExtensionProtocolList(
(ObjCProtocolDecl *const *)ProtoRefs.data(), NumProtoRefs,
Reader.getContext());
}
void ASTDeclReader::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) {
VisitNamedDecl(CAD);
CAD->setClassInterface(readDeclAs<ObjCInterfaceDecl>());
}
void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) {
VisitNamedDecl(D);
D->setAtLoc(readSourceLocation());
D->setLParenLoc(readSourceLocation());
QualType T = Record.readType();
TypeSourceInfo *TSI = readTypeSourceInfo();
D->setType(T, TSI);
D->setPropertyAttributes((ObjCPropertyAttribute::Kind)Record.readInt());
D->setPropertyAttributesAsWritten(
(ObjCPropertyAttribute::Kind)Record.readInt());
D->setPropertyImplementation(
(ObjCPropertyDecl::PropertyControl)Record.readInt());
DeclarationName GetterName = Record.readDeclarationName();
SourceLocation GetterLoc = readSourceLocation();
D->setGetterName(GetterName.getObjCSelector(), GetterLoc);
DeclarationName SetterName = Record.readDeclarationName();
SourceLocation SetterLoc = readSourceLocation();
D->setSetterName(SetterName.getObjCSelector(), SetterLoc);
D->setGetterMethodDecl(readDeclAs<ObjCMethodDecl>());
D->setSetterMethodDecl(readDeclAs<ObjCMethodDecl>());
D->setPropertyIvarDecl(readDeclAs<ObjCIvarDecl>());
}
void ASTDeclReader::VisitObjCImplDecl(ObjCImplDecl *D) {
VisitObjCContainerDecl(D);
D->setClassInterface(readDeclAs<ObjCInterfaceDecl>());
}
void ASTDeclReader::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) {
VisitObjCImplDecl(D);
D->CategoryNameLoc = readSourceLocation();
}
void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) {
VisitObjCImplDecl(D);
D->setSuperClass(readDeclAs<ObjCInterfaceDecl>());
D->SuperLoc = readSourceLocation();
D->setIvarLBraceLoc(readSourceLocation());
D->setIvarRBraceLoc(readSourceLocation());
D->setHasNonZeroConstructors(Record.readInt());
D->setHasDestructors(Record.readInt());
D->NumIvarInitializers = Record.readInt();
if (D->NumIvarInitializers)
D->IvarInitializers = ReadGlobalOffset();
}
void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
VisitDecl(D);
D->setAtLoc(readSourceLocation());
D->setPropertyDecl(readDeclAs<ObjCPropertyDecl>());
D->PropertyIvarDecl = readDeclAs<ObjCIvarDecl>();
D->IvarLoc = readSourceLocation();
D->setGetterMethodDecl(readDeclAs<ObjCMethodDecl>());
D->setSetterMethodDecl(readDeclAs<ObjCMethodDecl>());
D->setGetterCXXConstructor(Record.readExpr());
D->setSetterCXXAssignment(Record.readExpr());
}
void ASTDeclReader::VisitFieldDecl(FieldDecl *FD) {
VisitDeclaratorDecl(FD);
FD->Mutable = Record.readInt();
if (auto ISK = static_cast<FieldDecl::InitStorageKind>(Record.readInt())) {
FD->InitStorage.setInt(ISK);
FD->InitStorage.setPointer(ISK == FieldDecl::ISK_CapturedVLAType
? Record.readType().getAsOpaquePtr()
: Record.readExpr());
}
if (auto *BW = Record.readExpr())
FD->setBitWidth(BW);
if (!FD->getDeclName()) {
if (auto *Tmpl = readDeclAs<FieldDecl>())
Reader.getContext().setInstantiatedFromUnnamedFieldDecl(FD, Tmpl);
}
mergeMergeable(FD);
}
void ASTDeclReader::VisitMSPropertyDecl(MSPropertyDecl *PD) {
VisitDeclaratorDecl(PD);
PD->GetterId = Record.readIdentifier();
PD->SetterId = Record.readIdentifier();
}
void ASTDeclReader::VisitMSGuidDecl(MSGuidDecl *D) {
VisitValueDecl(D);
D->PartVal.Part1 = Record.readInt();
D->PartVal.Part2 = Record.readInt();
D->PartVal.Part3 = Record.readInt();
for (auto &C : D->PartVal.Part4And5)
C = Record.readInt();
// Add this GUID to the AST context's lookup structure, and merge if needed.
if (MSGuidDecl *Existing = Reader.getContext().MSGuidDecls.GetOrInsertNode(D))
Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl());
}
void ASTDeclReader::VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D) {
VisitValueDecl(D);
D->Value = Record.readAPValue();
// Add this template parameter object to the AST context's lookup structure,
// and merge if needed.
if (TemplateParamObjectDecl *Existing =
Reader.getContext().TemplateParamObjectDecls.GetOrInsertNode(D))
Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl());
}
void ASTDeclReader::VisitIndirectFieldDecl(IndirectFieldDecl *FD) {
VisitValueDecl(FD);
FD->ChainingSize = Record.readInt();
assert(FD->ChainingSize >= 2 && "Anonymous chaining must be >= 2");
FD->Chaining = new (Reader.getContext())NamedDecl*[FD->ChainingSize];
for (unsigned I = 0; I != FD->ChainingSize; ++I)
FD->Chaining[I] = readDeclAs<NamedDecl>();
mergeMergeable(FD);
}
ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
RedeclarableResult Redecl = VisitRedeclarable(VD);
VisitDeclaratorDecl(VD);
VD->VarDeclBits.SClass = (StorageClass)Record.readInt();
VD->VarDeclBits.TSCSpec = Record.readInt();
VD->VarDeclBits.InitStyle = Record.readInt();
VD->VarDeclBits.ARCPseudoStrong = Record.readInt();
if (!isa<ParmVarDecl>(VD)) {
VD->NonParmVarDeclBits.IsThisDeclarationADemotedDefinition =
Record.readInt();
VD->NonParmVarDeclBits.ExceptionVar = Record.readInt();
VD->NonParmVarDeclBits.NRVOVariable = Record.readInt();
VD->NonParmVarDeclBits.CXXForRangeDecl = Record.readInt();
VD->NonParmVarDeclBits.ObjCForDecl = Record.readInt();
VD->NonParmVarDeclBits.IsInline = Record.readInt();
VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt();
VD->NonParmVarDeclBits.IsConstexpr = Record.readInt();
VD->NonParmVarDeclBits.IsInitCapture = Record.readInt();
VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = Record.readInt();
VD->NonParmVarDeclBits.ImplicitParamKind = Record.readInt();
VD->NonParmVarDeclBits.EscapingByref = Record.readInt();
}
auto VarLinkage = Linkage(Record.readInt());
VD->setCachedLinkage(VarLinkage);
// Reconstruct the one piece of the IdentifierNamespace that we need.
if (VD->getStorageClass() == SC_Extern && VarLinkage != NoLinkage &&
VD->getLexicalDeclContext()->isFunctionOrMethod())
VD->setLocalExternDecl();
if (uint64_t Val = Record.readInt()) {
VD->setInit(Record.readExpr());
if (Val != 1) {
EvaluatedStmt *Eval = VD->ensureEvaluatedStmt();
Eval->HasConstantInitialization = (Val & 2) != 0;
Eval->HasConstantDestruction = (Val & 4) != 0;
}
}
if (VD->hasAttr<BlocksAttr>() && VD->getType()->getAsCXXRecordDecl()) {
Expr *CopyExpr = Record.readExpr();
if (CopyExpr)
Reader.getContext().setBlockVarCopyInit(VD, CopyExpr, Record.readInt());
}
if (VD->getStorageDuration() == SD_Static && Record.readInt()) {
Reader.DefinitionSource[VD] =
Loc.F->Kind == ModuleKind::MK_MainFile ||
Reader.getContext().getLangOpts().BuildingPCHWithObjectFile;
}
enum VarKind {
VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization
};
switch ((VarKind)Record.readInt()) {
case VarNotTemplate:
// Only true variables (not parameters or implicit parameters) can be
// merged; the other kinds are not really redeclarable at all.
if (!isa<ParmVarDecl>(VD) && !isa<ImplicitParamDecl>(VD) &&
!isa<VarTemplateSpecializationDecl>(VD))
mergeRedeclarable(VD, Redecl);
break;
case VarTemplate:
// Merged when we merge the template.
VD->setDescribedVarTemplate(readDeclAs<VarTemplateDecl>());
break;
case StaticDataMemberSpecialization: { // HasMemberSpecializationInfo.
auto *Tmpl = readDeclAs<VarDecl>();
auto TSK = (TemplateSpecializationKind)Record.readInt();
SourceLocation POI = readSourceLocation();
Reader.getContext().setInstantiatedFromStaticDataMember(VD, Tmpl, TSK,POI);
mergeRedeclarable(VD, Redecl);
break;
}
}
return Redecl;
}
void ASTDeclReader::VisitImplicitParamDecl(ImplicitParamDecl *PD) {
VisitVarDecl(PD);
}
void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) {
VisitVarDecl(PD);
unsigned isObjCMethodParam = Record.readInt();
unsigned scopeDepth = Record.readInt();
unsigned scopeIndex = Record.readInt();
unsigned declQualifier = Record.readInt();
if (isObjCMethodParam) {
assert(scopeDepth == 0);
PD->setObjCMethodScopeInfo(scopeIndex);
PD->ParmVarDeclBits.ScopeDepthOrObjCQuals = declQualifier;
} else {
PD->setScopeInfo(scopeDepth, scopeIndex);
}
PD->ParmVarDeclBits.IsKNRPromoted = Record.readInt();
PD->ParmVarDeclBits.HasInheritedDefaultArg = Record.readInt();
if (Record.readInt()) // hasUninstantiatedDefaultArg.
PD->setUninstantiatedDefaultArg(Record.readExpr());
// FIXME: If this is a redeclaration of a function from another module, handle
// inheritance of default arguments.
}
void ASTDeclReader::VisitDecompositionDecl(DecompositionDecl *DD) {
VisitVarDecl(DD);
auto **BDs = DD->getTrailingObjects<BindingDecl *>();
for (unsigned I = 0; I != DD->NumBindings; ++I) {
BDs[I] = readDeclAs<BindingDecl>();
BDs[I]->setDecomposedDecl(DD);
}
}
void ASTDeclReader::VisitBindingDecl(BindingDecl *BD) {
VisitValueDecl(BD);
BD->Binding = Record.readExpr();
}
void ASTDeclReader::VisitFileScopeAsmDecl(FileScopeAsmDecl *AD) {
VisitDecl(AD);
AD->setAsmString(cast<StringLiteral>(Record.readExpr()));
AD->setRParenLoc(readSourceLocation());
}
void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) {
VisitDecl(BD);
BD->setBody(cast_or_null<CompoundStmt>(Record.readStmt()));
BD->setSignatureAsWritten(readTypeSourceInfo());
unsigned NumParams = Record.readInt();
SmallVector<ParmVarDecl *, 16> Params;
Params.reserve(NumParams);
for (unsigned I = 0; I != NumParams; ++I)
Params.push_back(readDeclAs<ParmVarDecl>());
BD->setParams(Params);
BD->setIsVariadic(Record.readInt());
BD->setBlockMissingReturnType(Record.readInt());
BD->setIsConversionFromLambda(Record.readInt());
BD->setDoesNotEscape(Record.readInt());
BD->setCanAvoidCopyToHeap(Record.readInt());
bool capturesCXXThis = Record.readInt();
unsigned numCaptures = Record.readInt();
SmallVector<BlockDecl::Capture, 16> captures;
captures.reserve(numCaptures);
for (unsigned i = 0; i != numCaptures; ++i) {
auto *decl = readDeclAs<VarDecl>();
unsigned flags = Record.readInt();
bool byRef = (flags & 1);
bool nested = (flags & 2);
Expr *copyExpr = ((flags & 4) ? Record.readExpr() : nullptr);
captures.push_back(BlockDecl::Capture(decl, byRef, nested, copyExpr));
}
BD->setCaptures(Reader.getContext(), captures, capturesCXXThis);
}
void ASTDeclReader::VisitCapturedDecl(CapturedDecl *CD) {
VisitDecl(CD);
unsigned ContextParamPos = Record.readInt();
CD->setNothrow(Record.readInt() != 0);
// Body is set by VisitCapturedStmt.
for (unsigned I = 0; I < CD->NumParams; ++I) {
if (I != ContextParamPos)
CD->setParam(I, readDeclAs<ImplicitParamDecl>());
else
CD->setContextParam(I, readDeclAs<ImplicitParamDecl>());
}
}
void ASTDeclReader::VisitLinkageSpecDecl(LinkageSpecDecl *D) {
VisitDecl(D);
D->setLanguage((LinkageSpecDecl::LanguageIDs)Record.readInt());
D->setExternLoc(readSourceLocation());
D->setRBraceLoc(readSourceLocation());
}
void ASTDeclReader::VisitExportDecl(ExportDecl *D) {
VisitDecl(D);
D->RBraceLoc = readSourceLocation();
}
void ASTDeclReader::VisitLabelDecl(LabelDecl *D) {
VisitNamedDecl(D);
D->setLocStart(readSourceLocation());
}
void ASTDeclReader::VisitNamespaceDecl(NamespaceDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
VisitNamedDecl(D);
D->setInline(Record.readInt());
D->LocStart = readSourceLocation();
D->RBraceLoc = readSourceLocation();
// Defer loading the anonymous namespace until we've finished merging
// this namespace; loading it might load a later declaration of the
// same namespace, and we have an invariant that older declarations
// get merged before newer ones try to merge.
GlobalDeclID AnonNamespace = 0;
if (Redecl.getFirstID() == ThisDeclID) {
AnonNamespace = readDeclID();
} else {
// Link this namespace back to the first declaration, which has already
// been deserialized.
D->AnonOrFirstNamespaceAndInline.setPointer(D->getFirstDecl());
}
mergeRedeclarable(D, Redecl);
if (AnonNamespace) {
// Each module has its own anonymous namespace, which is disjoint from
// any other module's anonymous namespaces, so don't attach the anonymous
// namespace at all.
auto *Anon = cast<NamespaceDecl>(Reader.GetDecl(AnonNamespace));
if (!Record.isModule())
D->setAnonymousNamespace(Anon);
}
}
void ASTDeclReader::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
VisitNamedDecl(D);
D->NamespaceLoc = readSourceLocation();
D->IdentLoc = readSourceLocation();
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->Namespace = readDeclAs<NamedDecl>();
mergeRedeclarable(D, Redecl);
}
void ASTDeclReader::VisitUsingDecl(UsingDecl *D) {
VisitNamedDecl(D);
D->setUsingLoc(readSourceLocation());
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName());
D->FirstUsingShadow.setPointer(readDeclAs<UsingShadowDecl>());
D->setTypename(Record.readInt());
if (auto *Pattern = readDeclAs<NamedDecl>())
Reader.getContext().setInstantiatedFromUsingDecl(D, Pattern);
mergeMergeable(D);
}
void ASTDeclReader::VisitUsingEnumDecl(UsingEnumDecl *D) {
VisitNamedDecl(D);
D->setUsingLoc(readSourceLocation());
D->setEnumLoc(readSourceLocation());
D->Enum = readDeclAs<EnumDecl>();
D->FirstUsingShadow.setPointer(readDeclAs<UsingShadowDecl>());
if (auto *Pattern = readDeclAs<UsingEnumDecl>())
Reader.getContext().setInstantiatedFromUsingEnumDecl(D, Pattern);
mergeMergeable(D);
}
void ASTDeclReader::VisitUsingPackDecl(UsingPackDecl *D) {
VisitNamedDecl(D);
D->InstantiatedFrom = readDeclAs<NamedDecl>();
auto **Expansions = D->getTrailingObjects<NamedDecl *>();
for (unsigned I = 0; I != D->NumExpansions; ++I)
Expansions[I] = readDeclAs<NamedDecl>();
mergeMergeable(D);
}
void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
VisitNamedDecl(D);
D->Underlying = readDeclAs<NamedDecl>();
D->IdentifierNamespace = Record.readInt();
D->UsingOrNextShadow = readDeclAs<NamedDecl>();
auto *Pattern = readDeclAs<UsingShadowDecl>();
if (Pattern)
Reader.getContext().setInstantiatedFromUsingShadowDecl(D, Pattern);
mergeRedeclarable(D, Redecl);
}
void ASTDeclReader::VisitConstructorUsingShadowDecl(
ConstructorUsingShadowDecl *D) {
VisitUsingShadowDecl(D);
D->NominatedBaseClassShadowDecl = readDeclAs<ConstructorUsingShadowDecl>();
D->ConstructedBaseClassShadowDecl = readDeclAs<ConstructorUsingShadowDecl>();
D->IsVirtual = Record.readInt();
}
void ASTDeclReader::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) {
VisitNamedDecl(D);
D->UsingLoc = readSourceLocation();
D->NamespaceLoc = readSourceLocation();
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->NominatedNamespace = readDeclAs<NamedDecl>();
D->CommonAncestor = readDeclAs<DeclContext>();
}
void ASTDeclReader::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) {
VisitValueDecl(D);
D->setUsingLoc(readSourceLocation());
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName());
D->EllipsisLoc = readSourceLocation();
mergeMergeable(D);
}
void ASTDeclReader::VisitUnresolvedUsingTypenameDecl(
UnresolvedUsingTypenameDecl *D) {
VisitTypeDecl(D);
D->TypenameLocation = readSourceLocation();
D->QualifierLoc = Record.readNestedNameSpecifierLoc();
D->EllipsisLoc = readSourceLocation();
mergeMergeable(D);
}
void ASTDeclReader::VisitUnresolvedUsingIfExistsDecl(
UnresolvedUsingIfExistsDecl *D) {
VisitNamedDecl(D);
}
void ASTDeclReader::ReadCXXDefinitionData(
struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D) {
#define FIELD(Name, Width, Merge) \
Data.Name = Record.readInt();
#include "clang/AST/CXXRecordDeclDefinitionBits.def"
// Note: the caller has deserialized the IsLambda bit already.
Data.ODRHash = Record.readInt();
Data.HasODRHash = true;
if (Record.readInt()) {
Reader.DefinitionSource[D] =
Loc.F->Kind == ModuleKind::MK_MainFile ||
Reader.getContext().getLangOpts().BuildingPCHWithObjectFile;
}
Data.NumBases = Record.readInt();
if (Data.NumBases)
Data.Bases = ReadGlobalOffset();
Data.NumVBases = Record.readInt();
if (Data.NumVBases)
Data.VBases = ReadGlobalOffset();
Record.readUnresolvedSet(Data.Conversions);
Data.ComputedVisibleConversions = Record.readInt();
if (Data.ComputedVisibleConversions)
Record.readUnresolvedSet(Data.VisibleConversions);
assert(Data.Definition && "Data.Definition should be already set!");
Data.FirstFriend = readDeclID();
if (Data.IsLambda) {
using Capture = LambdaCapture;
auto &Lambda = static_cast<CXXRecordDecl::LambdaDefinitionData &>(Data);
Lambda.Dependent = Record.readInt();
Lambda.IsGenericLambda = Record.readInt();
Lambda.CaptureDefault = Record.readInt();
Lambda.NumCaptures = Record.readInt();
Lambda.NumExplicitCaptures = Record.readInt();
Lambda.HasKnownInternalLinkage = Record.readInt();
Lambda.ManglingNumber = Record.readInt();
D->setDeviceLambdaManglingNumber(Record.readInt());
Lambda.ContextDecl = readDeclID();
Lambda.Captures = (Capture *)Reader.getContext().Allocate(
sizeof(Capture) * Lambda.NumCaptures);
Capture *ToCapture = Lambda.Captures;
Lambda.MethodTyInfo = readTypeSourceInfo();
for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
SourceLocation Loc = readSourceLocation();
bool IsImplicit = Record.readInt();
auto Kind = static_cast<LambdaCaptureKind>(Record.readInt());
switch (Kind) {
case LCK_StarThis:
case LCK_This:
case LCK_VLAType:
*ToCapture++ = Capture(Loc, IsImplicit, Kind, nullptr,SourceLocation());
break;
case LCK_ByCopy:
case LCK_ByRef:
auto *Var = readDeclAs<VarDecl>();
SourceLocation EllipsisLoc = readSourceLocation();
*ToCapture++ = Capture(Loc, IsImplicit, Kind, Var, EllipsisLoc);
break;
}
}
}
}
void ASTDeclReader::MergeDefinitionData(
CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&MergeDD) {
assert(D->DefinitionData &&
"merging class definition into non-definition");
auto &DD = *D->DefinitionData;
if (DD.Definition != MergeDD.Definition) {
// Track that we merged the definitions.
Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition,
DD.Definition));
Reader.PendingDefinitions.erase(MergeDD.Definition);
MergeDD.Definition->setCompleteDefinition(false);
Reader.mergeDefinitionVisibility(DD.Definition, MergeDD.Definition);
assert(Reader.Lookups.find(MergeDD.Definition) == Reader.Lookups.end() &&
"already loaded pending lookups for merged definition");
}
auto PFDI = Reader.PendingFakeDefinitionData.find(&DD);
if (PFDI != Reader.PendingFakeDefinitionData.end() &&
PFDI->second == ASTReader::PendingFakeDefinitionKind::Fake) {
// We faked up this definition data because we found a class for which we'd
// not yet loaded the definition. Replace it with the real thing now.
assert(!DD.IsLambda && !MergeDD.IsLambda && "faked up lambda definition?");
PFDI->second = ASTReader::PendingFakeDefinitionKind::FakeLoaded;
// Don't change which declaration is the definition; that is required
// to be invariant once we select it.
auto *Def = DD.Definition;
DD = std::move(MergeDD);
DD.Definition = Def;
return;
}
bool DetectedOdrViolation = false;
#define FIELD(Name, Width, Merge) Merge(Name)
#define MERGE_OR(Field) DD.Field |= MergeDD.Field;
#define NO_MERGE(Field) \
DetectedOdrViolation |= DD.Field != MergeDD.Field; \
MERGE_OR(Field)
#include "clang/AST/CXXRecordDeclDefinitionBits.def"
NO_MERGE(IsLambda)
#undef NO_MERGE
#undef MERGE_OR
if (DD.NumBases != MergeDD.NumBases || DD.NumVBases != MergeDD.NumVBases)
DetectedOdrViolation = true;
// FIXME: Issue a diagnostic if the base classes don't match when we come
// to lazily load them.
// FIXME: Issue a diagnostic if the list of conversion functions doesn't
// match when we come to lazily load them.
if (MergeDD.ComputedVisibleConversions && !DD.ComputedVisibleConversions) {
DD.VisibleConversions = std::move(MergeDD.VisibleConversions);
DD.ComputedVisibleConversions = true;
}
// FIXME: Issue a diagnostic if FirstFriend doesn't match when we come to
// lazily load it.
if (DD.IsLambda) {
// FIXME: ODR-checking for merging lambdas (this happens, for instance,
// when they occur within the body of a function template specialization).
}
if (D->getODRHash() != MergeDD.ODRHash) {
DetectedOdrViolation = true;
}
if (DetectedOdrViolation)
Reader.PendingOdrMergeFailures[DD.Definition].push_back(
{MergeDD.Definition, &MergeDD});
}
void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update) {
struct CXXRecordDecl::DefinitionData *DD;
ASTContext &C = Reader.getContext();
// Determine whether this is a lambda closure type, so that we can
// allocate the appropriate DefinitionData structure.
bool IsLambda = Record.readInt();
if (IsLambda)
DD = new (C) CXXRecordDecl::LambdaDefinitionData(D, nullptr, false, false,
LCD_None);
else
DD = new (C) struct CXXRecordDecl::DefinitionData(D);
CXXRecordDecl *Canon = D->getCanonicalDecl();
// Set decl definition data before reading it, so that during deserialization
// when we read CXXRecordDecl, it already has definition data and we don't
// set fake one.
if (!Canon->DefinitionData)
Canon->DefinitionData = DD;
D->DefinitionData = Canon->DefinitionData;
ReadCXXDefinitionData(*DD, D);
// We might already have a different definition for this record. This can
// happen either because we're reading an update record, or because we've
// already done some merging. Either way, just merge into it.
if (Canon->DefinitionData != DD) {
MergeDefinitionData(Canon, std::move(*DD));
return;
}
// Mark this declaration as being a definition.
D->setCompleteDefinition(true);
// If this is not the first declaration or is an update record, we can have
// other redeclarations already. Make a note that we need to propagate the
// DefinitionData pointer onto them.
if (Update || Canon != D)
Reader.PendingDefinitions.insert(D);
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitCXXRecordDeclImpl(CXXRecordDecl *D) {
RedeclarableResult Redecl = VisitRecordDeclImpl(D);
ASTContext &C = Reader.getContext();
enum CXXRecKind {
CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization
};
switch ((CXXRecKind)Record.readInt()) {
case CXXRecNotTemplate:
// Merged when we merge the folding set entry in the primary template.
if (!isa<ClassTemplateSpecializationDecl>(D))
mergeRedeclarable(D, Redecl);
break;
case CXXRecTemplate: {
// Merged when we merge the template.
auto *Template = readDeclAs<ClassTemplateDecl>();
D->TemplateOrInstantiation = Template;
if (!Template->getTemplatedDecl()) {
// We've not actually loaded the ClassTemplateDecl yet, because we're
// currently being loaded as its pattern. Rely on it to set up our
// TypeForDecl (see VisitClassTemplateDecl).
//
// Beware: we do not yet know our canonical declaration, and may still
// get merged once the surrounding class template has got off the ground.
DeferredTypeID = 0;
}
break;
}
case CXXRecMemberSpecialization: {
auto *RD = readDeclAs<CXXRecordDecl>();
auto TSK = (TemplateSpecializationKind)Record.readInt();
SourceLocation POI = readSourceLocation();
MemberSpecializationInfo *MSI = new (C) MemberSpecializationInfo(RD, TSK);
MSI->setPointOfInstantiation(POI);
D->TemplateOrInstantiation = MSI;
mergeRedeclarable(D, Redecl);
break;
}
}
bool WasDefinition = Record.readInt();
if (WasDefinition)
ReadCXXRecordDefinition(D, /*Update*/false);
else
// Propagate DefinitionData pointer from the canonical declaration.
D->DefinitionData = D->getCanonicalDecl()->DefinitionData;
// Lazily load the key function to avoid deserializing every method so we can
// compute it.
if (WasDefinition) {
DeclID KeyFn = readDeclID();
if (KeyFn && D->isCompleteDefinition())
// FIXME: This is wrong for the ARM ABI, where some other module may have
// made this function no longer be a key function. We need an update
// record or similar for that case.
C.KeyFunctions[D] = KeyFn;
}
return Redecl;
}
void ASTDeclReader::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) {
D->setExplicitSpecifier(Record.readExplicitSpec());
D->Ctor = readDeclAs<CXXConstructorDecl>();
VisitFunctionDecl(D);
D->setIsCopyDeductionCandidate(Record.readInt());
}
void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) {
VisitFunctionDecl(D);
unsigned NumOverridenMethods = Record.readInt();
if (D->isCanonicalDecl()) {
while (NumOverridenMethods--) {
// Avoid invariant checking of CXXMethodDecl::addOverriddenMethod,
// MD may be initializing.
if (auto *MD = readDeclAs<CXXMethodDecl>())
Reader.getContext().addOverriddenMethod(D, MD->getCanonicalDecl());
}
} else {
// We don't care about which declarations this used to override; we get
// the relevant information from the canonical declaration.
Record.skipInts(NumOverridenMethods);
}
}
void ASTDeclReader::VisitCXXConstructorDecl(CXXConstructorDecl *D) {
// We need the inherited constructor information to merge the declaration,
// so we have to read it before we call VisitCXXMethodDecl.
D->setExplicitSpecifier(Record.readExplicitSpec());
if (D->isInheritingConstructor()) {
auto *Shadow = readDeclAs<ConstructorUsingShadowDecl>();
auto *Ctor = readDeclAs<CXXConstructorDecl>();
*D->getTrailingObjects<InheritedConstructor>() =
InheritedConstructor(Shadow, Ctor);
}
VisitCXXMethodDecl(D);
}
void ASTDeclReader::VisitCXXDestructorDecl(CXXDestructorDecl *D) {
VisitCXXMethodDecl(D);
if (auto *OperatorDelete = readDeclAs<FunctionDecl>()) {
CXXDestructorDecl *Canon = D->getCanonicalDecl();
auto *ThisArg = Record.readExpr();
// FIXME: Check consistency if we have an old and new operator delete.
if (!Canon->OperatorDelete) {
Canon->OperatorDelete = OperatorDelete;
Canon->OperatorDeleteThisArg = ThisArg;
}
}
}
void ASTDeclReader::VisitCXXConversionDecl(CXXConversionDecl *D) {
D->setExplicitSpecifier(Record.readExplicitSpec());
VisitCXXMethodDecl(D);
}
void ASTDeclReader::VisitImportDecl(ImportDecl *D) {
VisitDecl(D);
D->ImportedModule = readModule();
D->setImportComplete(Record.readInt());
auto *StoredLocs = D->getTrailingObjects<SourceLocation>();
for (unsigned I = 0, N = Record.back(); I != N; ++I)
StoredLocs[I] = readSourceLocation();
Record.skipInts(1); // The number of stored source locations.
}
void ASTDeclReader::VisitAccessSpecDecl(AccessSpecDecl *D) {
VisitDecl(D);
D->setColonLoc(readSourceLocation());
}
void ASTDeclReader::VisitFriendDecl(FriendDecl *D) {
VisitDecl(D);
if (Record.readInt()) // hasFriendDecl
D->Friend = readDeclAs<NamedDecl>();
else
D->Friend = readTypeSourceInfo();
for (unsigned i = 0; i != D->NumTPLists; ++i)
D->getTrailingObjects<TemplateParameterList *>()[i] =
Record.readTemplateParameterList();
D->NextFriend = readDeclID();
D->UnsupportedFriend = (Record.readInt() != 0);
D->FriendLoc = readSourceLocation();
}
void ASTDeclReader::VisitFriendTemplateDecl(FriendTemplateDecl *D) {
VisitDecl(D);
unsigned NumParams = Record.readInt();
D->NumParams = NumParams;
- D->Params = new TemplateParameterList*[NumParams];
+ D->Params = new (Reader.getContext()) TemplateParameterList *[NumParams];
for (unsigned i = 0; i != NumParams; ++i)
D->Params[i] = Record.readTemplateParameterList();
if (Record.readInt()) // HasFriendDecl
D->Friend = readDeclAs<NamedDecl>();
else
D->Friend = readTypeSourceInfo();
D->FriendLoc = readSourceLocation();
}
DeclID ASTDeclReader::VisitTemplateDecl(TemplateDecl *D) {
VisitNamedDecl(D);
DeclID PatternID = readDeclID();
auto *TemplatedDecl = cast_or_null<NamedDecl>(Reader.GetDecl(PatternID));
TemplateParameterList *TemplateParams = Record.readTemplateParameterList();
D->init(TemplatedDecl, TemplateParams);
return PatternID;
}
void ASTDeclReader::VisitConceptDecl(ConceptDecl *D) {
VisitTemplateDecl(D);
D->ConstraintExpr = Record.readExpr();
mergeMergeable(D);
}
void ASTDeclReader::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) {
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarable(D);
// Make sure we've allocated the Common pointer first. We do this before
// VisitTemplateDecl so that getCommonPtr() can be used during initialization.
RedeclarableTemplateDecl *CanonD = D->getCanonicalDecl();
if (!CanonD->Common) {
CanonD->Common = CanonD->newCommon(Reader.getContext());
Reader.PendingDefinitions.insert(CanonD);
}
D->Common = CanonD->Common;
// If this is the first declaration of the template, fill in the information
// for the 'common' pointer.
if (ThisDeclID == Redecl.getFirstID()) {
if (auto *RTD = readDeclAs<RedeclarableTemplateDecl>()) {
assert(RTD->getKind() == D->getKind() &&
"InstantiatedFromMemberTemplate kind mismatch");
D->setInstantiatedFromMemberTemplate(RTD);
if (Record.readInt())
D->setMemberSpecialization();
}
}
DeclID PatternID = VisitTemplateDecl(D);
D->IdentifierNamespace = Record.readInt();
mergeRedeclarable(D, Redecl, PatternID);
// If we merged the template with a prior declaration chain, merge the common
// pointer.
// FIXME: Actually merge here, don't just overwrite.
D->Common = D->getCanonicalDecl()->Common;
return Redecl;
}
void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);
if (ThisDeclID == Redecl.getFirstID()) {
// This ClassTemplateDecl owns a CommonPtr; read it to keep track of all of
// the specializations.
SmallVector<serialization::DeclID, 32> SpecIDs;
readDeclIDList(SpecIDs);
ASTDeclReader::AddLazySpecializations(D, SpecIDs);
}
if (D->getTemplatedDecl()->TemplateOrInstantiation) {
// We were loaded before our templated declaration was. We've not set up
// its corresponding type yet (see VisitCXXRecordDeclImpl), so reconstruct
// it now.
Reader.getContext().getInjectedClassNameType(
D->getTemplatedDecl(), D->getInjectedClassNameSpecialization());
}
}
void ASTDeclReader::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) {
llvm_unreachable("BuiltinTemplates are not serialized");
}
/// TODO: Unify with ClassTemplateDecl version?
/// May require unifying ClassTemplateDecl and
/// VarTemplateDecl beyond TemplateDecl...
void ASTDeclReader::VisitVarTemplateDecl(VarTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);
if (ThisDeclID == Redecl.getFirstID()) {
// This VarTemplateDecl owns a CommonPtr; read it to keep track of all of
// the specializations.
SmallVector<serialization::DeclID, 32> SpecIDs;
readDeclIDList(SpecIDs);
ASTDeclReader::AddLazySpecializations(D, SpecIDs);
}
}
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
ClassTemplateSpecializationDecl *D) {
RedeclarableResult Redecl = VisitCXXRecordDeclImpl(D);
ASTContext &C = Reader.getContext();
if (Decl *InstD = readDecl()) {
if (auto *CTD = dyn_cast<ClassTemplateDecl>(InstD)) {
D->SpecializedTemplate = CTD;
} else {
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs);
TemplateArgumentList *ArgList
= TemplateArgumentList::CreateCopy(C, TemplArgs);
auto *PS =
new (C) ClassTemplateSpecializationDecl::
SpecializedPartialSpecialization();
PS->PartialSpecialization
= cast<ClassTemplatePartialSpecializationDecl>(InstD);
PS->TemplateArgs = ArgList;
D->SpecializedTemplate = PS;
}
}
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true);
D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
D->PointOfInstantiation = readSourceLocation();
D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();
bool writtenAsCanonicalDecl = Record.readInt();
if (writtenAsCanonicalDecl) {
auto *CanonPattern = readDeclAs<ClassTemplateDecl>();
if (D->isCanonicalDecl()) { // It's kept in the folding set.
// Set this as, or find, the canonical declaration for this specialization
ClassTemplateSpecializationDecl *CanonSpec;
if (auto *Partial = dyn_cast<ClassTemplatePartialSpecializationDecl>(D)) {
CanonSpec = CanonPattern->getCommonPtr()->PartialSpecializations
.GetOrInsertNode(Partial);
} else {
CanonSpec =
CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D);
}
// If there was already a canonical specialization, merge into it.
if (CanonSpec != D) {
mergeRedeclarable<TagDecl>(D, CanonSpec, Redecl);
// This declaration might be a definition. Merge with any existing
// definition.
if (auto *DDD = D->DefinitionData) {
if (CanonSpec->DefinitionData)
MergeDefinitionData(CanonSpec, std::move(*DDD));
else
CanonSpec->DefinitionData = D->DefinitionData;
}
D->DefinitionData = CanonSpec->DefinitionData;
}
}
}
// Explicit info.
if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) {
auto *ExplicitInfo =
new (C) ClassTemplateSpecializationDecl::ExplicitSpecializationInfo;
ExplicitInfo->TypeAsWritten = TyInfo;
ExplicitInfo->ExternLoc = readSourceLocation();
ExplicitInfo->TemplateKeywordLoc = readSourceLocation();
D->ExplicitInfo = ExplicitInfo;
}
return Redecl;
}
void ASTDeclReader::VisitClassTemplatePartialSpecializationDecl(
ClassTemplatePartialSpecializationDecl *D) {
// We need to read the template params first because redeclarable is going to
// need them for profiling
TemplateParameterList *Params = Record.readTemplateParameterList();
D->TemplateParams = Params;
D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();
RedeclarableResult Redecl = VisitClassTemplateSpecializationDeclImpl(D);
// These are read/set from/to the first declaration.
if (ThisDeclID == Redecl.getFirstID()) {
D->InstantiatedFromMember.setPointer(
readDeclAs<ClassTemplatePartialSpecializationDecl>());
D->InstantiatedFromMember.setInt(Record.readInt());
}
}
void ASTDeclReader::VisitClassScopeFunctionSpecializationDecl(
ClassScopeFunctionSpecializationDecl *D) {
VisitDecl(D);
D->Specialization = readDeclAs<CXXMethodDecl>();
if (Record.readInt())
D->TemplateArgs = Record.readASTTemplateArgumentListInfo();
}
void ASTDeclReader::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D);
if (ThisDeclID == Redecl.getFirstID()) {
// This FunctionTemplateDecl owns a CommonPtr; read it.
SmallVector<serialization::DeclID, 32> SpecIDs;
readDeclIDList(SpecIDs);
ASTDeclReader::AddLazySpecializations(D, SpecIDs);
}
}
/// TODO: Unify with ClassTemplateSpecializationDecl version?
/// May require unifying ClassTemplate(Partial)SpecializationDecl and
/// VarTemplate(Partial)SpecializationDecl with a new data
/// structure Template(Partial)SpecializationDecl, and
/// using Template(Partial)SpecializationDecl as input type.
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
VarTemplateSpecializationDecl *D) {
RedeclarableResult Redecl = VisitVarDeclImpl(D);
ASTContext &C = Reader.getContext();
if (Decl *InstD = readDecl()) {
if (auto *VTD = dyn_cast<VarTemplateDecl>(InstD)) {
D->SpecializedTemplate = VTD;
} else {
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs);
TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy(
C, TemplArgs);
auto *PS =
new (C)
VarTemplateSpecializationDecl::SpecializedPartialSpecialization();
PS->PartialSpecialization =
cast<VarTemplatePartialSpecializationDecl>(InstD);
PS->TemplateArgs = ArgList;
D->SpecializedTemplate = PS;
}
}
// Explicit info.
if (TypeSourceInfo *TyInfo = readTypeSourceInfo()) {
auto *ExplicitInfo =
new (C) VarTemplateSpecializationDecl::ExplicitSpecializationInfo;
ExplicitInfo->TypeAsWritten = TyInfo;
ExplicitInfo->ExternLoc = readSourceLocation();
ExplicitInfo->TemplateKeywordLoc = readSourceLocation();
D->ExplicitInfo = ExplicitInfo;
}
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true);
D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs);
D->PointOfInstantiation = readSourceLocation();
D->SpecializationKind = (TemplateSpecializationKind)Record.readInt();
D->IsCompleteDefinition = Record.readInt();
bool writtenAsCanonicalDecl = Record.readInt();
if (writtenAsCanonicalDecl) {
auto *CanonPattern = readDeclAs<VarTemplateDecl>();
if (D->isCanonicalDecl()) { // It's kept in the folding set.
// FIXME: If it's already present, merge it.
if (auto *Partial = dyn_cast<VarTemplatePartialSpecializationDecl>(D)) {
CanonPattern->getCommonPtr()->PartialSpecializations
.GetOrInsertNode(Partial);
} else {
CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D);
}
}
}
return Redecl;
}
/// TODO: Unify with ClassTemplatePartialSpecializationDecl version?
/// May require unifying ClassTemplate(Partial)SpecializationDecl and
/// VarTemplate(Partial)SpecializationDecl with a new data
/// structure Template(Partial)SpecializationDecl, and
/// using Template(Partial)SpecializationDecl as input type.
void ASTDeclReader::VisitVarTemplatePartialSpecializationDecl(
VarTemplatePartialSpecializationDecl *D) {
TemplateParameterList *Params = Record.readTemplateParameterList();
D->TemplateParams = Params;
D->ArgsAsWritten = Record.readASTTemplateArgumentListInfo();
RedeclarableResult Redecl = VisitVarTemplateSpecializationDeclImpl(D);
// These are read/set from/to the first declaration.
if (ThisDeclID == Redecl.getFirstID()) {
D->InstantiatedFromMember.setPointer(
readDeclAs<VarTemplatePartialSpecializationDecl>());
D->InstantiatedFromMember.setInt(Record.readInt());
}
}
void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) {
VisitTypeDecl(D);
D->setDeclaredWithTypename(Record.readInt());
if (Record.readBool()) {
NestedNameSpecifierLoc NNS = Record.readNestedNameSpecifierLoc();
DeclarationNameInfo DN = Record.readDeclarationNameInfo();
ConceptDecl *NamedConcept = Record.readDeclAs<ConceptDecl>();
const ASTTemplateArgumentListInfo *ArgsAsWritten = nullptr;
if (Record.readBool())
ArgsAsWritten = Record.readASTTemplateArgumentListInfo();
Expr *ImmediatelyDeclaredConstraint = Record.readExpr();
D->setTypeConstraint(NNS, DN, /*FoundDecl=*/nullptr, NamedConcept,
ArgsAsWritten, ImmediatelyDeclaredConstraint);
if ((D->ExpandedParameterPack = Record.readInt()))
D->NumExpanded = Record.readInt();
}
if (Record.readInt())
D->setDefaultArgument(readTypeSourceInfo());
}
void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
VisitDeclaratorDecl(D);
// TemplateParmPosition.
D->setDepth(Record.readInt());
D->setPosition(Record.readInt());
if (D->hasPlaceholderTypeConstraint())
D->setPlaceholderTypeConstraint(Record.readExpr());
if (D->isExpandedParameterPack()) {
auto TypesAndInfos =
D->getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
new (&TypesAndInfos[I].first) QualType(Record.readType());
TypesAndInfos[I].second = readTypeSourceInfo();
}
} else {
// Rest of NonTypeTemplateParmDecl.
D->ParameterPack = Record.readInt();
if (Record.readInt())
D->setDefaultArgument(Record.readExpr());
}
}
void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
VisitTemplateDecl(D);
// TemplateParmPosition.
D->setDepth(Record.readInt());
D->setPosition(Record.readInt());
if (D->isExpandedParameterPack()) {
auto **Data = D->getTrailingObjects<TemplateParameterList *>();
for (unsigned I = 0, N = D->getNumExpansionTemplateParameters();
I != N; ++I)
Data[I] = Record.readTemplateParameterList();
} else {
// Rest of TemplateTemplateParmDecl.
D->ParameterPack = Record.readInt();
if (Record.readInt())
D->setDefaultArgument(Reader.getContext(),
Record.readTemplateArgumentLoc());
}
}
void ASTDeclReader::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
VisitRedeclarableTemplateDecl(D);
}
void ASTDeclReader::VisitStaticAssertDecl(StaticAssertDecl *D) {
VisitDecl(D);
D->AssertExprAndFailed.setPointer(Record.readExpr());
D->AssertExprAndFailed.setInt(Record.readInt());
D->Message = cast_or_null<StringLiteral>(Record.readExpr());
D->RParenLoc = readSourceLocation();
}
void ASTDeclReader::VisitEmptyDecl(EmptyDecl *D) {
VisitDecl(D);
}
void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl(
LifetimeExtendedTemporaryDecl *D) {
VisitDecl(D);
D->ExtendingDecl = readDeclAs<ValueDecl>();
D->ExprWithTemporary = Record.readStmt();
if (Record.readInt()) {
D->Value = new (D->getASTContext()) APValue(Record.readAPValue());
D->getASTContext().addDestruction(D->Value);
}
D->ManglingNumber = Record.readInt();
mergeMergeable(D);
}
std::pair<uint64_t, uint64_t>
ASTDeclReader::VisitDeclContext(DeclContext *DC) {
uint64_t LexicalOffset = ReadLocalOffset();
uint64_t VisibleOffset = ReadLocalOffset();
return std::make_pair(LexicalOffset, VisibleOffset);
}
template <typename T>
ASTDeclReader::RedeclarableResult
ASTDeclReader::VisitRedeclarable(Redeclarable<T> *D) {
DeclID FirstDeclID = readDeclID();
Decl *MergeWith = nullptr;
bool IsKeyDecl = ThisDeclID == FirstDeclID;
bool IsFirstLocalDecl = false;
uint64_t RedeclOffset = 0;
// 0 indicates that this declaration was the only declaration of its entity,
// and is used for space optimization.
if (FirstDeclID == 0) {
FirstDeclID = ThisDeclID;
IsKeyDecl = true;
IsFirstLocalDecl = true;
} else if (unsigned N = Record.readInt()) {
// This declaration was the first local declaration, but may have imported
// other declarations.
IsKeyDecl = N == 1;
IsFirstLocalDecl = true;
// We have some declarations that must be before us in our redeclaration
// chain. Read them now, and remember that we ought to merge with one of
// them.
// FIXME: Provide a known merge target to the second and subsequent such
// declaration.
for (unsigned I = 0; I != N - 1; ++I)
MergeWith = readDecl();
RedeclOffset = ReadLocalOffset();
} else {
// This declaration was not the first local declaration. Read the first
// local declaration now, to trigger the import of other redeclarations.
(void)readDecl();
}
auto *FirstDecl = cast_or_null<T>(Reader.GetDecl(FirstDeclID));
if (FirstDecl != D) {
// We delay loading of the redeclaration chain to avoid deeply nested calls.
// We temporarily set the first (canonical) declaration as the previous one
// which is the one that matters and mark the real previous DeclID to be
// loaded & attached later on.
D->RedeclLink = Redeclarable<T>::PreviousDeclLink(FirstDecl);
D->First = FirstDecl->getCanonicalDecl();
}
auto *DAsT = static_cast<T *>(D);
// Note that we need to load local redeclarations of this decl and build a
// decl chain for them. This must happen *after* we perform the preloading
// above; this ensures that the redeclaration chain is built in the correct
// order.
if (IsFirstLocalDecl)
Reader.PendingDeclChains.push_back(std::make_pair(DAsT, RedeclOffset));
return RedeclarableResult(MergeWith, FirstDeclID, IsKeyDecl);
}
/// Attempts to merge the given declaration (D) with another declaration
/// of the same entity.
template<typename T>
void ASTDeclReader::mergeRedeclarable(Redeclarable<T> *DBase,
RedeclarableResult &Redecl,
DeclID TemplatePatternID) {
// If modules are not available, there is no reason to perform this merge.
if (!Reader.getContext().getLangOpts().Modules)
return;
// If we're not the canonical declaration, we don't need to merge.
if (!DBase->isFirstDecl())
return;
auto *D = static_cast<T *>(DBase);
if (auto *Existing = Redecl.getKnownMergeTarget())
// We already know of an existing declaration we should merge with.
mergeRedeclarable(D, cast<T>(Existing), Redecl, TemplatePatternID);
else if (FindExistingResult ExistingRes = findExisting(D))
if (T *Existing = ExistingRes)
mergeRedeclarable(D, Existing, Redecl, TemplatePatternID);
}
/// "Cast" to type T, asserting if we don't have an implicit conversion.
/// We use this to put code in a template that will only be valid for certain
/// instantiations.
template<typename T> static T assert_cast(T t) { return t; }
template<typename T> static T assert_cast(...) {
llvm_unreachable("bad assert_cast");
}
/// Merge together the pattern declarations from two template
/// declarations.
void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D,
RedeclarableTemplateDecl *Existing,
DeclID DsID, bool IsKeyDecl) {
auto *DPattern = D->getTemplatedDecl();
auto *ExistingPattern = Existing->getTemplatedDecl();
RedeclarableResult Result(/*MergeWith*/ ExistingPattern,
DPattern->getCanonicalDecl()->getGlobalID(),
IsKeyDecl);
if (auto *DClass = dyn_cast<CXXRecordDecl>(DPattern)) {
// Merge with any existing definition.
// FIXME: This is duplicated in several places. Refactor.
auto *ExistingClass =
cast<CXXRecordDecl>(ExistingPattern)->getCanonicalDecl();
if (auto *DDD = DClass->DefinitionData) {
if (ExistingClass->DefinitionData) {
MergeDefinitionData(ExistingClass, std::move(*DDD));
} else {
ExistingClass->DefinitionData = DClass->DefinitionData;
// We may have skipped this before because we thought that DClass
// was the canonical declaration.
Reader.PendingDefinitions.insert(DClass);
}
}
DClass->DefinitionData = ExistingClass->DefinitionData;
return mergeRedeclarable(DClass, cast<TagDecl>(ExistingPattern),
Result);
}
if (auto *DFunction = dyn_cast<FunctionDecl>(DPattern))
return mergeRedeclarable(DFunction, cast<FunctionDecl>(ExistingPattern),
Result);
if (auto *DVar = dyn_cast<VarDecl>(DPattern))
return mergeRedeclarable(DVar, cast<VarDecl>(ExistingPattern), Result);
if (auto *DAlias = dyn_cast<TypeAliasDecl>(DPattern))
return mergeRedeclarable(DAlias, cast<TypedefNameDecl>(ExistingPattern),
Result);
llvm_unreachable("merged an unknown kind of redeclarable template");
}
/// Attempts to merge the given declaration (D) with another declaration
/// of the same entity.
template<typename T>
void ASTDeclReader::mergeRedeclarable(Redeclarable<T> *DBase, T *Existing,
RedeclarableResult &Redecl,
DeclID TemplatePatternID) {
auto *D = static_cast<T *>(DBase);
T *ExistingCanon = Existing->getCanonicalDecl();
T *DCanon = D->getCanonicalDecl();
if (ExistingCanon != DCanon) {
assert(DCanon->getGlobalID() == Redecl.getFirstID() &&
"already merged this declaration");
// Have our redeclaration link point back at the canonical declaration
// of the existing declaration, so that this declaration has the
// appropriate canonical declaration.
D->RedeclLink = Redeclarable<T>::PreviousDeclLink(ExistingCanon);
D->First = ExistingCanon;
ExistingCanon->Used |= D->Used;
D->Used = false;
// When we merge a namespace, update its pointer to the first namespace.
// We cannot have loaded any redeclarations of this declaration yet, so
// there's nothing else that needs to be updated.
if (auto *Namespace = dyn_cast<NamespaceDecl>(D))
Namespace->AnonOrFirstNamespaceAndInline.setPointer(
assert_cast<NamespaceDecl*>(ExistingCanon));
// When we merge a template, merge its pattern.
if (auto *DTemplate = dyn_cast<RedeclarableTemplateDecl>(D))
mergeTemplatePattern(
DTemplate, assert_cast<RedeclarableTemplateDecl*>(ExistingCanon),
TemplatePatternID, Redecl.isKeyDecl());
// If this declaration is a key declaration, make a note of that.
if (Redecl.isKeyDecl())
Reader.KeyDecls[ExistingCanon].push_back(Redecl.getFirstID());
}
}
/// ODR-like semantics for C/ObjC allow us to merge tag types and a structural
/// check in Sema guarantees the types can be merged (see C11 6.2.7/1 or C89
/// 6.1.2.6/1). Although most merging is done in Sema, we need to guarantee
/// that some types are mergeable during deserialization, otherwise name
/// lookup fails. This is the case for EnumConstantDecl.
static bool allowODRLikeMergeInC(NamedDecl *ND) {
if (!ND)
return false;
// TODO: implement merge for other necessary decls.
if (isa<EnumConstantDecl, FieldDecl, IndirectFieldDecl>(ND))
return true;
return false;
}
/// Attempts to merge LifetimeExtendedTemporaryDecl with
/// identical class definitions from two different modules.
void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) {
// If modules are not available, there is no reason to perform this merge.
if (!Reader.getContext().getLangOpts().Modules)
return;
LifetimeExtendedTemporaryDecl *LETDecl = D;
LifetimeExtendedTemporaryDecl *&LookupResult =
Reader.LETemporaryForMerging[std::make_pair(
LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())];
if (LookupResult)
Reader.getContext().setPrimaryMergedDecl(LETDecl,
LookupResult->getCanonicalDecl());
else
LookupResult = LETDecl;
}
/// Attempts to merge the given declaration (D) with another declaration
/// of the same entity, for the case where the entity is not actually
/// redeclarable. This happens, for instance, when merging the fields of
/// identical class definitions from two different modules.
template<typename T>
void ASTDeclReader::mergeMergeable(Mergeable<T> *D) {
// If modules are not available, there is no reason to perform this merge.
if (!Reader.getContext().getLangOpts().Modules)
return;
// ODR-based merging is performed in C++ and in some cases (tag types) in C.
// Note that C identically-named things in different translation units are
// not redeclarations, but may still have compatible types, where ODR-like
// semantics may apply.
if (!Reader.getContext().getLangOpts().CPlusPlus &&
!allowODRLikeMergeInC(dyn_cast<NamedDecl>(static_cast<T*>(D))))
return;
if (FindExistingResult ExistingRes = findExisting(static_cast<T*>(D)))
if (T *Existing = ExistingRes)
Reader.getContext().setPrimaryMergedDecl(static_cast<T *>(D),
Existing->getCanonicalDecl());
}
void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) {
Record.readOMPChildren(D->Data);
VisitDecl(D);
}
void ASTDeclReader::VisitOMPAllocateDecl(OMPAllocateDecl *D) {
Record.readOMPChildren(D->Data);
VisitDecl(D);
}
void ASTDeclReader::VisitOMPRequiresDecl(OMPRequiresDecl * D) {
Record.readOMPChildren(D->Data);
VisitDecl(D);
}
void ASTDeclReader::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) {
VisitValueDecl(D);
D->setLocation(readSourceLocation());
Expr *In = Record.readExpr();
Expr *Out = Record.readExpr();
D->setCombinerData(In, Out);
Expr *Combiner = Record.readExpr();
D->setCombiner(Combiner);
Expr *Orig = Record.readExpr();
Expr *Priv = Record.readExpr();
D->setInitializerData(Orig, Priv);
Expr *Init = Record.readExpr();
auto IK = static_cast<OMPDeclareReductionDecl::InitKind>(Record.readInt());
D->setInitializer(Init, IK);
D->PrevDeclInScope = readDeclID();
}
void ASTDeclReader::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) {
Record.readOMPChildren(D->Data);
VisitValueDecl(D);
D->VarName = Record.readDeclarationName();
D->PrevDeclInScope = readDeclID();
}
void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) {
VisitVarDecl(D);
}
//===----------------------------------------------------------------------===//
// Attribute Reading
//===----------------------------------------------------------------------===//
namespace {
class AttrReader {
ASTRecordReader &Reader;
public:
AttrReader(ASTRecordReader &Reader) : Reader(Reader) {}
uint64_t readInt() {
return Reader.readInt();
}
SourceRange readSourceRange() {
return Reader.readSourceRange();
}
SourceLocation readSourceLocation() {
return Reader.readSourceLocation();
}
Expr *readExpr() { return Reader.readExpr(); }
std::string readString() {
return Reader.readString();
}
TypeSourceInfo *readTypeSourceInfo() {
return Reader.readTypeSourceInfo();
}
IdentifierInfo *readIdentifier() {
return Reader.readIdentifier();
}
VersionTuple readVersionTuple() {
return Reader.readVersionTuple();
}
OMPTraitInfo *readOMPTraitInfo() { return Reader.readOMPTraitInfo(); }
template <typename T> T *GetLocalDeclAs(uint32_t LocalID) {
return Reader.GetLocalDeclAs<T>(LocalID);
}
};
}
Attr *ASTRecordReader::readAttr() {
AttrReader Record(*this);
auto V = Record.readInt();
if (!V)
return nullptr;
Attr *New = nullptr;
// Kind is stored as a 1-based integer because 0 is used to indicate a null
// Attr pointer.
auto Kind = static_cast<attr::Kind>(V - 1);
ASTContext &Context = getContext();
IdentifierInfo *AttrName = Record.readIdentifier();
IdentifierInfo *ScopeName = Record.readIdentifier();
SourceRange AttrRange = Record.readSourceRange();
SourceLocation ScopeLoc = Record.readSourceLocation();
unsigned ParsedKind = Record.readInt();
unsigned Syntax = Record.readInt();
unsigned SpellingIndex = Record.readInt();
AttributeCommonInfo Info(AttrName, ScopeName, AttrRange, ScopeLoc,
AttributeCommonInfo::Kind(ParsedKind),
AttributeCommonInfo::Syntax(Syntax), SpellingIndex);
#include "clang/Serialization/AttrPCHRead.inc"
assert(New && "Unable to decode attribute?");
return New;
}
/// Reads attributes from the current stream position.
void ASTRecordReader::readAttributes(AttrVec &Attrs) {
for (unsigned I = 0, E = readInt(); I != E; ++I)
Attrs.push_back(readAttr());
}
//===----------------------------------------------------------------------===//
// ASTReader Implementation
//===----------------------------------------------------------------------===//
/// Note that we have loaded the declaration with the given
/// Index.
///
/// This routine notes that this declaration has already been loaded,
/// so that future GetDecl calls will return this declaration rather
/// than trying to load a new declaration.
inline void ASTReader::LoadedDecl(unsigned Index, Decl *D) {
assert(!DeclsLoaded[Index] && "Decl loaded twice?");
DeclsLoaded[Index] = D;
}
/// Determine whether the consumer will be interested in seeing
/// this declaration (via HandleTopLevelDecl).
///
/// This routine should return true for anything that might affect
/// code generation, e.g., inline function definitions, Objective-C
/// declarations with metadata, etc.
static bool isConsumerInterestedIn(ASTContext &Ctx, Decl *D, bool HasBody) {
// An ObjCMethodDecl is never considered as "interesting" because its
// implementation container always is.
// An ImportDecl or VarDecl imported from a module map module will get
// emitted when we import the relevant module.
if (isPartOfPerModuleInitializer(D)) {
auto *M = D->getImportedOwningModule();
if (M && M->Kind == Module::ModuleMapModule &&
Ctx.DeclMustBeEmitted(D))
return false;
}
if (isa<FileScopeAsmDecl>(D) ||
isa<ObjCProtocolDecl>(D) ||
isa<ObjCImplDecl>(D) ||
isa<ImportDecl>(D) ||
isa<PragmaCommentDecl>(D) ||
isa<PragmaDetectMismatchDecl>(D))
return true;
if (isa<OMPThreadPrivateDecl>(D) || isa<OMPDeclareReductionDecl>(D) ||
isa<OMPDeclareMapperDecl>(D) || isa<OMPAllocateDecl>(D) ||
isa<OMPRequiresDecl>(D))
return !D->getDeclContext()->isFunctionOrMethod();
if (const auto *Var = dyn_cast<VarDecl>(D))
return Var->isFileVarDecl() &&
(Var->isThisDeclarationADefinition() == VarDecl::Definition ||
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Var));
if (const auto *Func = dyn_cast<FunctionDecl>(D))
return Func->doesThisDeclarationHaveABody() || HasBody;
if (auto *ES = D->getASTContext().getExternalSource())
if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never)
return true;
return false;
}
/// Get the correct cursor and offset for loading a declaration.
ASTReader::RecordLocation
ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
GlobalDeclMapType::iterator I = GlobalDeclMap.find(ID);
assert(I != GlobalDeclMap.end() && "Corrupted global declaration map");
ModuleFile *M = I->second;
const DeclOffset &DOffs =
M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
Loc = TranslateSourceLocation(*M, DOffs.getLocation());
return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset));
}
ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
auto I = GlobalBitOffsetsMap.find(GlobalOffset);
assert(I != GlobalBitOffsetsMap.end() && "Corrupted global bit offsets map");
return RecordLocation(I->second, GlobalOffset - I->second->GlobalBitOffset);
}
uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset) {
return LocalOffset + M.GlobalBitOffset;
}
/// Find the context in which we should search for previous declarations when
/// looking for declarations to merge.
DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
DeclContext *DC) {
if (auto *ND = dyn_cast<NamespaceDecl>(DC))
return ND->getOriginalNamespace();
if (auto *RD = dyn_cast<CXXRecordDecl>(DC)) {
// Try to dig out the definition.
auto *DD = RD->DefinitionData;
if (!DD)
DD = RD->getCanonicalDecl()->DefinitionData;
// If there's no definition yet, then DC's definition is added by an update
// record, but we've not yet loaded that update record. In this case, we
// commit to DC being the canonical definition now, and will fix this when
// we load the update record.
if (!DD) {
DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD);
RD->setCompleteDefinition(true);
RD->DefinitionData = DD;
RD->getCanonicalDecl()->DefinitionData = DD;
// Track that we did this horrible thing so that we can fix it later.
Reader.PendingFakeDefinitionData.insert(
std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake));
}
return DD->Definition;
}
if (auto *RD = dyn_cast<RecordDecl>(DC))
return RD->getDefinition();
if (auto *ED = dyn_cast<EnumDecl>(DC))
return ED->getASTContext().getLangOpts().CPlusPlus? ED->getDefinition()
: nullptr;
if (auto *OID = dyn_cast<ObjCInterfaceDecl>(DC))
return OID->getDefinition();
// We can see the TU here only if we have no Sema object. In that case,
// there's no TU scope to look in, so using the DC alone is sufficient.
if (auto *TU = dyn_cast<TranslationUnitDecl>(DC))
return TU;
return nullptr;
}
ASTDeclReader::FindExistingResult::~FindExistingResult() {
// Record that we had a typedef name for linkage whether or not we merge
// with that declaration.
if (TypedefNameForLinkage) {
DeclContext *DC = New->getDeclContext()->getRedeclContext();
Reader.ImportedTypedefNamesForLinkage.insert(
std::make_pair(std::make_pair(DC, TypedefNameForLinkage), New));
return;
}
if (!AddResult || Existing)
return;
DeclarationName Name = New->getDeclName();
DeclContext *DC = New->getDeclContext()->getRedeclContext();
if (needsAnonymousDeclarationNumber(New)) {
setAnonymousDeclForMerging(Reader, New->getLexicalDeclContext(),
AnonymousDeclNumber, New);
} else if (DC->isTranslationUnit() &&
!Reader.getContext().getLangOpts().CPlusPlus) {
if (Reader.getIdResolver().tryAddTopLevelDecl(New, Name))
Reader.PendingFakeLookupResults[Name.getAsIdentifierInfo()]
.push_back(New);
} else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) {
// Add the declaration to its redeclaration context so later merging
// lookups will find it.
MergeDC->makeDeclVisibleInContextImpl(New, /*Internal*/true);
}
}
/// Find the declaration that should be merged into, given the declaration found
/// by name lookup. If we're merging an anonymous declaration within a typedef,
/// we need a matching typedef, and we merge with the type inside it.
static NamedDecl *getDeclForMerging(NamedDecl *Found,
bool IsTypedefNameForLinkage) {
if (!IsTypedefNameForLinkage)
return Found;
// If we found a typedef declaration that gives a name to some other
// declaration, then we want that inner declaration. Declarations from
// AST files are handled via ImportedTypedefNamesForLinkage.
if (Found->isFromASTFile())
return nullptr;
if (auto *TND = dyn_cast<TypedefNameDecl>(Found))
return TND->getAnonDeclWithTypedefName(/*AnyRedecl*/true);
return nullptr;
}
/// Find the declaration to use to populate the anonymous declaration table
/// for the given lexical DeclContext. We only care about finding local
/// definitions of the context; we'll merge imported ones as we go.
DeclContext *
ASTDeclReader::getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC) {
// For classes, we track the definition as we merge.
if (auto *RD = dyn_cast<CXXRecordDecl>(LexicalDC)) {
auto *DD = RD->getCanonicalDecl()->DefinitionData;
return DD ? DD->Definition : nullptr;
}
// For anything else, walk its merged redeclarations looking for a definition.
// Note that we can't just call getDefinition here because the redeclaration
// chain isn't wired up.
for (auto *D : merged_redecls(cast<Decl>(LexicalDC))) {
if (auto *FD = dyn_cast<FunctionDecl>(D))
if (FD->isThisDeclarationADefinition())
return FD;
if (auto *MD = dyn_cast<ObjCMethodDecl>(D))
if (MD->isThisDeclarationADefinition())
return MD;
if (auto *RD = dyn_cast<RecordDecl>(D))
if (RD->isThisDeclarationADefinition())
return RD;
}
// No merged definition yet.
return nullptr;
}
NamedDecl *ASTDeclReader::getAnonymousDeclForMerging(ASTReader &Reader,
DeclContext *DC,
unsigned Index) {
// If the lexical context has been merged, look into the now-canonical
// definition.
auto *CanonDC = cast<Decl>(DC)->getCanonicalDecl();
// If we've seen this before, return the canonical declaration.
auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC];
if (Index < Previous.size() && Previous[Index])
return Previous[Index];
// If this is the first time, but we have parsed a declaration of the context,
// build the anonymous declaration list from the parsed declaration.
auto *PrimaryDC = getPrimaryDCForAnonymousDecl(DC);
if (PrimaryDC && !cast<Decl>(PrimaryDC)->isFromASTFile()) {
numberAnonymousDeclsWithin(PrimaryDC, [&](NamedDecl *ND, unsigned Number) {
if (Previous.size() == Number)
Previous.push_back(cast<NamedDecl>(ND->getCanonicalDecl()));
else
Previous[Number] = cast<NamedDecl>(ND->getCanonicalDecl());
});
}
return Index < Previous.size() ? Previous[Index] : nullptr;
}
void ASTDeclReader::setAnonymousDeclForMerging(ASTReader &Reader,
DeclContext *DC, unsigned Index,
NamedDecl *D) {
auto *CanonDC = cast<Decl>(DC)->getCanonicalDecl();
auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC];
if (Index >= Previous.size())
Previous.resize(Index + 1);
if (!Previous[Index])
Previous[Index] = D;
}
ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
DeclarationName Name = TypedefNameForLinkage ? TypedefNameForLinkage
: D->getDeclName();
if (!Name && !needsAnonymousDeclarationNumber(D)) {
// Don't bother trying to find unnamed declarations that are in
// unmergeable contexts.
FindExistingResult Result(Reader, D, /*Existing=*/nullptr,
AnonymousDeclNumber, TypedefNameForLinkage);
Result.suppress();
return Result;
}
ASTContext &C = Reader.getContext();
DeclContext *DC = D->getDeclContext()->getRedeclContext();
if (TypedefNameForLinkage) {
auto It = Reader.ImportedTypedefNamesForLinkage.find(
std::make_pair(DC, TypedefNameForLinkage));
if (It != Reader.ImportedTypedefNamesForLinkage.end())
if (C.isSameEntity(It->second, D))
return FindExistingResult(Reader, D, It->second, AnonymousDeclNumber,
TypedefNameForLinkage);
// Go on to check in other places in case an existing typedef name
// was not imported.
}
if (needsAnonymousDeclarationNumber(D)) {
// This is an anonymous declaration that we may need to merge. Look it up
// in its context by number.
if (auto *Existing = getAnonymousDeclForMerging(
Reader, D->getLexicalDeclContext(), AnonymousDeclNumber))
if (C.isSameEntity(Existing, D))
return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
TypedefNameForLinkage);
} else if (DC->isTranslationUnit() &&
!Reader.getContext().getLangOpts().CPlusPlus) {
IdentifierResolver &IdResolver = Reader.getIdResolver();
// Temporarily consider the identifier to be up-to-date. We don't want to
// cause additional lookups here.
class UpToDateIdentifierRAII {
IdentifierInfo *II;
bool WasOutToDate = false;
public:
explicit UpToDateIdentifierRAII(IdentifierInfo *II) : II(II) {
if (II) {
WasOutToDate = II->isOutOfDate();
if (WasOutToDate)
II->setOutOfDate(false);
}
}
~UpToDateIdentifierRAII() {
if (WasOutToDate)
II->setOutOfDate(true);
}
} UpToDate(Name.getAsIdentifierInfo());
for (IdentifierResolver::iterator I = IdResolver.begin(Name),
IEnd = IdResolver.end();
I != IEnd; ++I) {
if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage))
if (C.isSameEntity(Existing, D))
return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
TypedefNameForLinkage);
}
} else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) {
DeclContext::lookup_result R = MergeDC->noload_lookup(Name);
for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) {
if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage))
if (C.isSameEntity(Existing, D))
return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
TypedefNameForLinkage);
}
} else {
// Not in a mergeable context.
return FindExistingResult(Reader);
}
// If this declaration is from a merged context, make a note that we need to
// check that the canonical definition of that context contains the decl.
//
// FIXME: We should do something similar if we merge two definitions of the
// same template specialization into the same CXXRecordDecl.
auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext());
if (MergedDCIt != Reader.MergedDeclContexts.end() &&
MergedDCIt->second == D->getDeclContext())
Reader.PendingOdrMergeChecks.push_back(D);
return FindExistingResult(Reader, D, /*Existing=*/nullptr,
AnonymousDeclNumber, TypedefNameForLinkage);
}
template<typename DeclT>
Decl *ASTDeclReader::getMostRecentDeclImpl(Redeclarable<DeclT> *D) {
return D->RedeclLink.getLatestNotUpdated();
}
Decl *ASTDeclReader::getMostRecentDeclImpl(...) {
llvm_unreachable("getMostRecentDecl on non-redeclarable declaration");
}
Decl *ASTDeclReader::getMostRecentDecl(Decl *D) {
assert(D);
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
return getMostRecentDeclImpl(cast<TYPE##Decl>(D));
#include "clang/AST/DeclNodes.inc"
}
llvm_unreachable("unknown decl kind");
}
Decl *ASTReader::getMostRecentExistingDecl(Decl *D) {
return ASTDeclReader::getMostRecentDecl(D->getCanonicalDecl());
}
void ASTDeclReader::mergeInheritableAttributes(ASTReader &Reader, Decl *D,
Decl *Previous) {
InheritableAttr *NewAttr = nullptr;
ASTContext &Context = Reader.getContext();
const auto *IA = Previous->getAttr<MSInheritanceAttr>();
if (IA && !D->hasAttr<MSInheritanceAttr>()) {
NewAttr = cast<InheritableAttr>(IA->clone(Context));
NewAttr->setInherited(true);
D->addAttr(NewAttr);
}
}
template<typename DeclT>
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<DeclT> *D,
Decl *Previous, Decl *Canon) {
D->RedeclLink.setPrevious(cast<DeclT>(Previous));
D->First = cast<DeclT>(Previous)->First;
}
namespace clang {
template<>
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<VarDecl> *D,
Decl *Previous, Decl *Canon) {
auto *VD = static_cast<VarDecl *>(D);
auto *PrevVD = cast<VarDecl>(Previous);
D->RedeclLink.setPrevious(PrevVD);
D->First = PrevVD->First;
// We should keep at most one definition on the chain.
// FIXME: Cache the definition once we've found it. Building a chain with
// N definitions currently takes O(N^2) time here.
if (VD->isThisDeclarationADefinition() == VarDecl::Definition) {
for (VarDecl *CurD = PrevVD; CurD; CurD = CurD->getPreviousDecl()) {
if (CurD->isThisDeclarationADefinition() == VarDecl::Definition) {
Reader.mergeDefinitionVisibility(CurD, VD);
VD->demoteThisDefinitionToDeclaration();
break;
}
}
}
}
static bool isUndeducedReturnType(QualType T) {
auto *DT = T->getContainedDeducedType();
return DT && !DT->isDeduced();
}
template<>
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
Redeclarable<FunctionDecl> *D,
Decl *Previous, Decl *Canon) {
auto *FD = static_cast<FunctionDecl *>(D);
auto *PrevFD = cast<FunctionDecl>(Previous);
FD->RedeclLink.setPrevious(PrevFD);
FD->First = PrevFD->First;
// If the previous declaration is an inline function declaration, then this
// declaration is too.
if (PrevFD->isInlined() != FD->isInlined()) {
// FIXME: [dcl.fct.spec]p4:
// If a function with external linkage is declared inline in one
// translation unit, it shall be declared inline in all translation
// units in which it appears.
//
// Be careful of this case:
//
// module A:
// template<typename T> struct X { void f(); };
// template<typename T> inline void X<T>::f() {}
//
// module B instantiates the declaration of X<int>::f
// module C instantiates the definition of X<int>::f
//
// If module B and C are merged, we do not have a violation of this rule.
FD->setImplicitlyInline(true);
}
auto *FPT = FD->getType()->getAs<FunctionProtoType>();
auto *PrevFPT = PrevFD->getType()->getAs<FunctionProtoType>();
if (FPT && PrevFPT) {
// If we need to propagate an exception specification along the redecl
// chain, make a note of that so that we can do so later.
bool IsUnresolved = isUnresolvedExceptionSpec(FPT->getExceptionSpecType());
bool WasUnresolved =
isUnresolvedExceptionSpec(PrevFPT->getExceptionSpecType());
if (IsUnresolved != WasUnresolved)
Reader.PendingExceptionSpecUpdates.insert(
{Canon, IsUnresolved ? PrevFD : FD});
// If we need to propagate a deduced return type along the redecl chain,
// make a note of that so that we can do it later.
bool IsUndeduced = isUndeducedReturnType(FPT->getReturnType());
bool WasUndeduced = isUndeducedReturnType(PrevFPT->getReturnType());
if (IsUndeduced != WasUndeduced)
Reader.PendingDeducedTypeUpdates.insert(
{cast<FunctionDecl>(Canon),
(IsUndeduced ? PrevFPT : FPT)->getReturnType()});
}
}
} // namespace clang
void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) {
llvm_unreachable("attachPreviousDecl on non-redeclarable declaration");
}
/// Inherit the default template argument from \p From to \p To. Returns
/// \c false if there is no default template for \p From.
template <typename ParmDecl>
static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From,
Decl *ToD) {
auto *To = cast<ParmDecl>(ToD);
if (!From->hasDefaultArgument())
return false;
To->setInheritedDefaultArgument(Context, From);
return true;
}
static void inheritDefaultTemplateArguments(ASTContext &Context,
TemplateDecl *From,
TemplateDecl *To) {
auto *FromTP = From->getTemplateParameters();
auto *ToTP = To->getTemplateParameters();
assert(FromTP->size() == ToTP->size() && "merged mismatched templates?");
for (unsigned I = 0, N = FromTP->size(); I != N; ++I) {
NamedDecl *FromParam = FromTP->getParam(I);
NamedDecl *ToParam = ToTP->getParam(I);
if (auto *FTTP = dyn_cast<TemplateTypeParmDecl>(FromParam))
inheritDefaultTemplateArgument(Context, FTTP, ToParam);
else if (auto *FNTTP = dyn_cast<NonTypeTemplateParmDecl>(FromParam))
inheritDefaultTemplateArgument(Context, FNTTP, ToParam);
else
inheritDefaultTemplateArgument(
Context, cast<TemplateTemplateParmDecl>(FromParam), ToParam);
}
}
void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D,
Decl *Previous, Decl *Canon) {
assert(D && Previous);
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
attachPreviousDeclImpl(Reader, cast<TYPE##Decl>(D), Previous, Canon); \
break;
#include "clang/AST/DeclNodes.inc"
}
// If the declaration was visible in one module, a redeclaration of it in
// another module remains visible even if it wouldn't be visible by itself.
//
// FIXME: In this case, the declaration should only be visible if a module
// that makes it visible has been imported.
D->IdentifierNamespace |=
Previous->IdentifierNamespace &
(Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Type);
// If the declaration declares a template, it may inherit default arguments
// from the previous declaration.
if (auto *TD = dyn_cast<TemplateDecl>(D))
inheritDefaultTemplateArguments(Reader.getContext(),
cast<TemplateDecl>(Previous), TD);
// If any of the declaration in the chain contains an Inheritable attribute,
// it needs to be added to all the declarations in the redeclarable chain.
// FIXME: Only the logic of merging MSInheritableAttr is present, it should
// be extended for all inheritable attributes.
mergeInheritableAttributes(Reader, D, Previous);
}
template<typename DeclT>
void ASTDeclReader::attachLatestDeclImpl(Redeclarable<DeclT> *D, Decl *Latest) {
D->RedeclLink.setLatest(cast<DeclT>(Latest));
}
void ASTDeclReader::attachLatestDeclImpl(...) {
llvm_unreachable("attachLatestDecl on non-redeclarable declaration");
}
void ASTDeclReader::attachLatestDecl(Decl *D, Decl *Latest) {
assert(D && Latest);
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
attachLatestDeclImpl(cast<TYPE##Decl>(D), Latest); \
break;
#include "clang/AST/DeclNodes.inc"
}
}
template<typename DeclT>
void ASTDeclReader::markIncompleteDeclChainImpl(Redeclarable<DeclT> *D) {
D->RedeclLink.markIncomplete();
}
void ASTDeclReader::markIncompleteDeclChainImpl(...) {
llvm_unreachable("markIncompleteDeclChain on non-redeclarable declaration");
}
void ASTReader::markIncompleteDeclChain(Decl *D) {
switch (D->getKind()) {
#define ABSTRACT_DECL(TYPE)
#define DECL(TYPE, BASE) \
case Decl::TYPE: \
ASTDeclReader::markIncompleteDeclChainImpl(cast<TYPE##Decl>(D)); \
break;
#include "clang/AST/DeclNodes.inc"
}
}
/// Read the declaration at the given offset from the AST file.
Decl *ASTReader::ReadDeclRecord(DeclID ID) {
unsigned Index = ID - NUM_PREDEF_DECL_IDS;
SourceLocation DeclLoc;
RecordLocation Loc = DeclCursorForID(ID, DeclLoc);
llvm::BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor;
// Keep track of where we are in the stream, then jump back there
// after reading this declaration.
SavedStreamPosition SavedPosition(DeclsCursor);
ReadingKindTracker ReadingKind(Read_Decl, *this);
// Note that we are loading a declaration record.
Deserializing ADecl(this);
auto Fail = [](const char *what, llvm::Error &&Err) {
llvm::report_fatal_error(Twine("ASTReader::readDeclRecord failed ") + what +
": " + toString(std::move(Err)));
};
if (llvm::Error JumpFailed = DeclsCursor.JumpToBit(Loc.Offset))
Fail("jumping", std::move(JumpFailed));
ASTRecordReader Record(*this, *Loc.F);
ASTDeclReader Reader(*this, Record, Loc, ID, DeclLoc);
Expected<unsigned> MaybeCode = DeclsCursor.ReadCode();
if (!MaybeCode)
Fail("reading code", MaybeCode.takeError());
unsigned Code = MaybeCode.get();
ASTContext &Context = getContext();
Decl *D = nullptr;
Expected<unsigned> MaybeDeclCode = Record.readRecord(DeclsCursor, Code);
if (!MaybeDeclCode)
llvm::report_fatal_error(
Twine("ASTReader::readDeclRecord failed reading decl code: ") +
toString(MaybeDeclCode.takeError()));
switch ((DeclCode)MaybeDeclCode.get()) {
case DECL_CONTEXT_LEXICAL:
case DECL_CONTEXT_VISIBLE:
llvm_unreachable("Record cannot be de-serialized with readDeclRecord");
case DECL_TYPEDEF:
D = TypedefDecl::CreateDeserialized(Context, ID);
break;
case DECL_TYPEALIAS:
D = TypeAliasDecl::CreateDeserialized(Context, ID);
break;
case DECL_ENUM:
D = EnumDecl::CreateDeserialized(Context, ID);
break;
case DECL_RECORD:
D = RecordDecl::CreateDeserialized(Context, ID);
break;
case DECL_ENUM_CONSTANT:
D = EnumConstantDecl::CreateDeserialized(Context, ID);
break;
case DECL_FUNCTION:
D = FunctionDecl::CreateDeserialized(Context, ID);
break;
case DECL_LINKAGE_SPEC:
D = LinkageSpecDecl::CreateDeserialized(Context, ID);
break;
case DECL_EXPORT:
D = ExportDecl::CreateDeserialized(Context, ID);
break;
case DECL_LABEL:
D = LabelDecl::CreateDeserialized(Context, ID);
break;
case DECL_NAMESPACE:
D = NamespaceDecl::CreateDeserialized(Context, ID);
break;
case DECL_NAMESPACE_ALIAS:
D = NamespaceAliasDecl::CreateDeserialized(Context, ID);
break;
case DECL_USING:
D = UsingDecl::CreateDeserialized(Context, ID);
break;
case DECL_USING_PACK:
D = UsingPackDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_USING_SHADOW:
D = UsingShadowDecl::CreateDeserialized(Context, ID);
break;
case DECL_USING_ENUM:
D = UsingEnumDecl::CreateDeserialized(Context, ID);
break;
case DECL_CONSTRUCTOR_USING_SHADOW:
D = ConstructorUsingShadowDecl::CreateDeserialized(Context, ID);
break;
case DECL_USING_DIRECTIVE:
D = UsingDirectiveDecl::CreateDeserialized(Context, ID);
break;
case DECL_UNRESOLVED_USING_VALUE:
D = UnresolvedUsingValueDecl::CreateDeserialized(Context, ID);
break;
case DECL_UNRESOLVED_USING_TYPENAME:
D = UnresolvedUsingTypenameDecl::CreateDeserialized(Context, ID);
break;
case DECL_UNRESOLVED_USING_IF_EXISTS:
D = UnresolvedUsingIfExistsDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_RECORD:
D = CXXRecordDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_DEDUCTION_GUIDE:
D = CXXDeductionGuideDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_METHOD:
D = CXXMethodDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_CONSTRUCTOR:
D = CXXConstructorDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_CXX_DESTRUCTOR:
D = CXXDestructorDecl::CreateDeserialized(Context, ID);
break;
case DECL_CXX_CONVERSION:
D = CXXConversionDecl::CreateDeserialized(Context, ID);
break;
case DECL_ACCESS_SPEC:
D = AccessSpecDecl::CreateDeserialized(Context, ID);
break;
case DECL_FRIEND:
D = FriendDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_FRIEND_TEMPLATE:
D = FriendTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_TEMPLATE:
D = ClassTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_TEMPLATE_SPECIALIZATION:
D = ClassTemplateSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION:
D = ClassTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR_TEMPLATE:
D = VarTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR_TEMPLATE_SPECIALIZATION:
D = VarTemplateSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION:
D = VarTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_CLASS_SCOPE_FUNCTION_SPECIALIZATION:
D = ClassScopeFunctionSpecializationDecl::CreateDeserialized(Context, ID);
break;
case DECL_FUNCTION_TEMPLATE:
D = FunctionTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_TEMPLATE_TYPE_PARM: {
bool HasTypeConstraint = Record.readInt();
D = TemplateTypeParmDecl::CreateDeserialized(Context, ID,
HasTypeConstraint);
break;
}
case DECL_NON_TYPE_TEMPLATE_PARM: {
bool HasTypeConstraint = Record.readInt();
D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID,
HasTypeConstraint);
break;
}
case DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK: {
bool HasTypeConstraint = Record.readInt();
D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID,
Record.readInt(),
HasTypeConstraint);
break;
}
case DECL_TEMPLATE_TEMPLATE_PARM:
D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID);
break;
case DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK:
D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID,
Record.readInt());
break;
case DECL_TYPE_ALIAS_TEMPLATE:
D = TypeAliasTemplateDecl::CreateDeserialized(Context, ID);
break;
case DECL_CONCEPT:
D = ConceptDecl::CreateDeserialized(Context, ID);
break;
case DECL_REQUIRES_EXPR_BODY:
D = RequiresExprBodyDecl::CreateDeserialized(Context, ID);
break;
case DECL_STATIC_ASSERT:
D = StaticAssertDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_METHOD:
D = ObjCMethodDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_INTERFACE:
D = ObjCInterfaceDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_IVAR:
D = ObjCIvarDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_PROTOCOL:
D = ObjCProtocolDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_AT_DEFS_FIELD:
D = ObjCAtDefsFieldDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_CATEGORY:
D = ObjCCategoryDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_CATEGORY_IMPL:
D = ObjCCategoryImplDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_IMPLEMENTATION:
D = ObjCImplementationDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_COMPATIBLE_ALIAS:
D = ObjCCompatibleAliasDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_PROPERTY:
D = ObjCPropertyDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_PROPERTY_IMPL:
D = ObjCPropertyImplDecl::CreateDeserialized(Context, ID);
break;
case DECL_FIELD:
D = FieldDecl::CreateDeserialized(Context, ID);
break;
case DECL_INDIRECTFIELD:
D = IndirectFieldDecl::CreateDeserialized(Context, ID);
break;
case DECL_VAR:
D = VarDecl::CreateDeserialized(Context, ID);
break;
case DECL_IMPLICIT_PARAM:
D = ImplicitParamDecl::CreateDeserialized(Context, ID);
break;
case DECL_PARM_VAR:
D = ParmVarDecl::CreateDeserialized(Context, ID);
break;
case DECL_DECOMPOSITION:
D = DecompositionDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_BINDING:
D = BindingDecl::CreateDeserialized(Context, ID);
break;
case DECL_FILE_SCOPE_ASM:
D = FileScopeAsmDecl::CreateDeserialized(Context, ID);
break;
case DECL_BLOCK:
D = BlockDecl::CreateDeserialized(Context, ID);
break;
case DECL_MS_PROPERTY:
D = MSPropertyDecl::CreateDeserialized(Context, ID);
break;
case DECL_MS_GUID:
D = MSGuidDecl::CreateDeserialized(Context, ID);
break;
case DECL_TEMPLATE_PARAM_OBJECT:
D = TemplateParamObjectDecl::CreateDeserialized(Context, ID);
break;
case DECL_CAPTURED:
D = CapturedDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_CXX_BASE_SPECIFIERS:
Error("attempt to read a C++ base-specifier record as a declaration");
return nullptr;
case DECL_CXX_CTOR_INITIALIZERS:
Error("attempt to read a C++ ctor initializer record as a declaration");
return nullptr;
case DECL_IMPORT:
// Note: last entry of the ImportDecl record is the number of stored source
// locations.
D = ImportDecl::CreateDeserialized(Context, ID, Record.back());
break;
case DECL_OMP_THREADPRIVATE: {
Record.skipInts(1);
unsigned NumChildren = Record.readInt();
Record.skipInts(1);
D = OMPThreadPrivateDecl::CreateDeserialized(Context, ID, NumChildren);
break;
}
case DECL_OMP_ALLOCATE: {
unsigned NumClauses = Record.readInt();
unsigned NumVars = Record.readInt();
Record.skipInts(1);
D = OMPAllocateDecl::CreateDeserialized(Context, ID, NumVars, NumClauses);
break;
}
case DECL_OMP_REQUIRES: {
unsigned NumClauses = Record.readInt();
Record.skipInts(2);
D = OMPRequiresDecl::CreateDeserialized(Context, ID, NumClauses);
break;
}
case DECL_OMP_DECLARE_REDUCTION:
D = OMPDeclareReductionDecl::CreateDeserialized(Context, ID);
break;
case DECL_OMP_DECLARE_MAPPER: {
unsigned NumClauses = Record.readInt();
Record.skipInts(2);
D = OMPDeclareMapperDecl::CreateDeserialized(Context, ID, NumClauses);
break;
}
case DECL_OMP_CAPTUREDEXPR:
D = OMPCapturedExprDecl::CreateDeserialized(Context, ID);
break;
case DECL_PRAGMA_COMMENT:
D = PragmaCommentDecl::CreateDeserialized(Context, ID, Record.readInt());
break;
case DECL_PRAGMA_DETECT_MISMATCH:
D = PragmaDetectMismatchDecl::CreateDeserialized(Context, ID,
Record.readInt());
break;
case DECL_EMPTY:
D = EmptyDecl::CreateDeserialized(Context, ID);
break;
case DECL_LIFETIME_EXTENDED_TEMPORARY:
D = LifetimeExtendedTemporaryDecl::CreateDeserialized(Context, ID);
break;
case DECL_OBJC_TYPE_PARAM:
D = ObjCTypeParamDecl::CreateDeserialized(Context, ID);
break;
}
assert(D && "Unknown declaration reading AST file");
LoadedDecl(Index, D);
// Set the DeclContext before doing any deserialization, to make sure internal
// calls to Decl::getASTContext() by Decl's methods will find the
// TranslationUnitDecl without crashing.
D->setDeclContext(Context.getTranslationUnitDecl());
Reader.Visit(D);
// If this declaration is also a declaration context, get the
// offsets for its tables of lexical and visible declarations.
if (auto *DC = dyn_cast<DeclContext>(D)) {
std::pair<uint64_t, uint64_t> Offsets = Reader.VisitDeclContext(DC);
if (Offsets.first &&
ReadLexicalDeclContextStorage(*Loc.F, DeclsCursor, Offsets.first, DC))
return nullptr;
if (Offsets.second &&
ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID))
return nullptr;
}
assert(Record.getIdx() == Record.size());
// Load any relevant update records.
PendingUpdateRecords.push_back(
PendingUpdateRecord(ID, D, /*JustLoaded=*/true));
// Load the categories after recursive loading is finished.
if (auto *Class = dyn_cast<ObjCInterfaceDecl>(D))
// If we already have a definition when deserializing the ObjCInterfaceDecl,
// we put the Decl in PendingDefinitions so we can pull the categories here.
if (Class->isThisDeclarationADefinition() ||
PendingDefinitions.count(Class))
loadObjCCategories(ID, Class);
// If we have deserialized a declaration that has a definition the
// AST consumer might need to know about, queue it.
// We don't pass it to the consumer immediately because we may be in recursive
// loading, and some declarations may still be initializing.
PotentiallyInterestingDecls.push_back(
InterestingDecl(D, Reader.hasPendingBody()));
return D;
}
void ASTReader::PassInterestingDeclsToConsumer() {
assert(Consumer);
if (PassingDeclsToConsumer)
return;
// Guard variable to avoid recursively redoing the process of passing
// decls to consumer.
SaveAndRestore<bool> GuardPassingDeclsToConsumer(PassingDeclsToConsumer,
true);
// Ensure that we've loaded all potentially-interesting declarations
// that need to be eagerly loaded.
for (auto ID : EagerlyDeserializedDecls)
GetDecl(ID);
EagerlyDeserializedDecls.clear();
while (!PotentiallyInterestingDecls.empty()) {
InterestingDecl D = PotentiallyInterestingDecls.front();
PotentiallyInterestingDecls.pop_front();
if (isConsumerInterestedIn(getContext(), D.getDecl(), D.hasPendingBody()))
PassInterestingDeclToConsumer(D.getDecl());
}
}
void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) {
// The declaration may have been modified by files later in the chain.
// If this is the case, read the record containing the updates from each file
// and pass it to ASTDeclReader to make the modifications.
serialization::GlobalDeclID ID = Record.ID;
Decl *D = Record.D;
ProcessingUpdatesRAIIObj ProcessingUpdates(*this);
DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID);
SmallVector<serialization::DeclID, 8> PendingLazySpecializationIDs;
if (UpdI != DeclUpdateOffsets.end()) {
auto UpdateOffsets = std::move(UpdI->second);
DeclUpdateOffsets.erase(UpdI);
// Check if this decl was interesting to the consumer. If we just loaded
// the declaration, then we know it was interesting and we skip the call
// to isConsumerInterestedIn because it is unsafe to call in the
// current ASTReader state.
bool WasInteresting =
Record.JustLoaded || isConsumerInterestedIn(getContext(), D, false);
for (auto &FileAndOffset : UpdateOffsets) {
ModuleFile *F = FileAndOffset.first;
uint64_t Offset = FileAndOffset.second;
llvm::BitstreamCursor &Cursor = F->DeclsCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error JumpFailed = Cursor.JumpToBit(Offset))
// FIXME don't do a fatal error.
llvm::report_fatal_error(
Twine("ASTReader::loadDeclUpdateRecords failed jumping: ") +
toString(std::move(JumpFailed)));
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode)
llvm::report_fatal_error(
Twine("ASTReader::loadDeclUpdateRecords failed reading code: ") +
toString(MaybeCode.takeError()));
unsigned Code = MaybeCode.get();
ASTRecordReader Record(*this, *F);
if (Expected<unsigned> MaybeRecCode = Record.readRecord(Cursor, Code))
assert(MaybeRecCode.get() == DECL_UPDATES &&
"Expected DECL_UPDATES record!");
else
llvm::report_fatal_error(
Twine("ASTReader::loadDeclUpdateRecords failed reading rec code: ") +
toString(MaybeCode.takeError()));
ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID,
SourceLocation());
Reader.UpdateDecl(D, PendingLazySpecializationIDs);
// We might have made this declaration interesting. If so, remember that
// we need to hand it off to the consumer.
if (!WasInteresting &&
isConsumerInterestedIn(getContext(), D, Reader.hasPendingBody())) {
PotentiallyInterestingDecls.push_back(
InterestingDecl(D, Reader.hasPendingBody()));
WasInteresting = true;
}
}
}
// Add the lazy specializations to the template.
assert((PendingLazySpecializationIDs.empty() || isa<ClassTemplateDecl>(D) ||
isa<FunctionTemplateDecl>(D) || isa<VarTemplateDecl>(D)) &&
"Must not have pending specializations");
if (auto *CTD = dyn_cast<ClassTemplateDecl>(D))
ASTDeclReader::AddLazySpecializations(CTD, PendingLazySpecializationIDs);
else if (auto *FTD = dyn_cast<FunctionTemplateDecl>(D))
ASTDeclReader::AddLazySpecializations(FTD, PendingLazySpecializationIDs);
else if (auto *VTD = dyn_cast<VarTemplateDecl>(D))
ASTDeclReader::AddLazySpecializations(VTD, PendingLazySpecializationIDs);
PendingLazySpecializationIDs.clear();
// Load the pending visible updates for this decl context, if it has any.
auto I = PendingVisibleUpdates.find(ID);
if (I != PendingVisibleUpdates.end()) {
auto VisibleUpdates = std::move(I->second);
PendingVisibleUpdates.erase(I);
auto *DC = cast<DeclContext>(D)->getPrimaryContext();
for (const auto &Update : VisibleUpdates)
Lookups[DC].Table.add(
Update.Mod, Update.Data,
reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod));
DC->setHasExternalVisibleStorage(true);
}
}
void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {
// Attach FirstLocal to the end of the decl chain.
Decl *CanonDecl = FirstLocal->getCanonicalDecl();
if (FirstLocal != CanonDecl) {
Decl *PrevMostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl);
ASTDeclReader::attachPreviousDecl(
*this, FirstLocal, PrevMostRecent ? PrevMostRecent : CanonDecl,
CanonDecl);
}
if (!LocalOffset) {
ASTDeclReader::attachLatestDecl(CanonDecl, FirstLocal);
return;
}
// Load the list of other redeclarations from this module file.
ModuleFile *M = getOwningModuleFile(FirstLocal);
assert(M && "imported decl from no module file");
llvm::BitstreamCursor &Cursor = M->DeclsCursor;
SavedStreamPosition SavedPosition(Cursor);
if (llvm::Error JumpFailed = Cursor.JumpToBit(LocalOffset))
llvm::report_fatal_error(
Twine("ASTReader::loadPendingDeclChain failed jumping: ") +
toString(std::move(JumpFailed)));
RecordData Record;
Expected<unsigned> MaybeCode = Cursor.ReadCode();
if (!MaybeCode)
llvm::report_fatal_error(
Twine("ASTReader::loadPendingDeclChain failed reading code: ") +
toString(MaybeCode.takeError()));
unsigned Code = MaybeCode.get();
if (Expected<unsigned> MaybeRecCode = Cursor.readRecord(Code, Record))
assert(MaybeRecCode.get() == LOCAL_REDECLARATIONS &&
"expected LOCAL_REDECLARATIONS record!");
else
llvm::report_fatal_error(
Twine("ASTReader::loadPendingDeclChain failed reading rec code: ") +
toString(MaybeCode.takeError()));
// FIXME: We have several different dispatches on decl kind here; maybe
// we should instead generate one loop per kind and dispatch up-front?
Decl *MostRecent = FirstLocal;
for (unsigned I = 0, N = Record.size(); I != N; ++I) {
auto *D = GetLocalDecl(*M, Record[N - I - 1]);
ASTDeclReader::attachPreviousDecl(*this, D, MostRecent, CanonDecl);
MostRecent = D;
}
ASTDeclReader::attachLatestDecl(CanonDecl, MostRecent);
}
namespace {
/// Given an ObjC interface, goes through the modules and links to the
/// interface all the categories for it.
class ObjCCategoriesVisitor {
ASTReader &Reader;
ObjCInterfaceDecl *Interface;
llvm::SmallPtrSetImpl<ObjCCategoryDecl *> &Deserialized;
ObjCCategoryDecl *Tail = nullptr;
llvm::DenseMap<DeclarationName, ObjCCategoryDecl *> NameCategoryMap;
serialization::GlobalDeclID InterfaceID;
unsigned PreviousGeneration;
void add(ObjCCategoryDecl *Cat) {
// Only process each category once.
if (!Deserialized.erase(Cat))
return;
// Check for duplicate categories.
if (Cat->getDeclName()) {
ObjCCategoryDecl *&Existing = NameCategoryMap[Cat->getDeclName()];
if (Existing &&
Reader.getOwningModuleFile(Existing)
!= Reader.getOwningModuleFile(Cat)) {
// FIXME: We should not warn for duplicates in diamond:
//
// MT //
// / \ //
// ML MR //
// \ / //
// MB //
//
// If there are duplicates in ML/MR, there will be warning when
// creating MB *and* when importing MB. We should not warn when
// importing.
Reader.Diag(Cat->getLocation(), diag::warn_dup_category_def)
<< Interface->getDeclName() << Cat->getDeclName();
Reader.Diag(Existing->getLocation(), diag::note_previous_definition);
} else if (!Existing) {
// Record this category.
Existing = Cat;
}
}
// Add this category to the end of the chain.
if (Tail)
ASTDeclReader::setNextObjCCategory(Tail, Cat);
else
Interface->setCategoryListRaw(Cat);
Tail = Cat;
}
public:
ObjCCategoriesVisitor(ASTReader &Reader,
ObjCInterfaceDecl *Interface,
llvm::SmallPtrSetImpl<ObjCCategoryDecl *> &Deserialized,
serialization::GlobalDeclID InterfaceID,
unsigned PreviousGeneration)
: Reader(Reader), Interface(Interface), Deserialized(Deserialized),
InterfaceID(InterfaceID), PreviousGeneration(PreviousGeneration) {
// Populate the name -> category map with the set of known categories.
for (auto *Cat : Interface->known_categories()) {
if (Cat->getDeclName())
NameCategoryMap[Cat->getDeclName()] = Cat;
// Keep track of the tail of the category list.
Tail = Cat;
}
}
bool operator()(ModuleFile &M) {
// If we've loaded all of the category information we care about from
// this module file, we're done.
if (M.Generation <= PreviousGeneration)
return true;
// Map global ID of the definition down to the local ID used in this
// module file. If there is no such mapping, we'll find nothing here
// (or in any module it imports).
DeclID LocalID = Reader.mapGlobalIDToModuleFileGlobalID(M, InterfaceID);
if (!LocalID)
return true;
// Perform a binary search to find the local redeclarations for this
// declaration (if any).
const ObjCCategoriesInfo Compare = { LocalID, 0 };
const ObjCCategoriesInfo *Result
= std::lower_bound(M.ObjCCategoriesMap,
M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap,
Compare);
if (Result == M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap ||
Result->DefinitionID != LocalID) {
// We didn't find anything. If the class definition is in this module
// file, then the module files it depends on cannot have any categories,
// so suppress further lookup.
return Reader.isDeclIDFromModule(InterfaceID, M);
}
// We found something. Dig out all of the categories.
unsigned Offset = Result->Offset;
unsigned N = M.ObjCCategories[Offset];
M.ObjCCategories[Offset++] = 0; // Don't try to deserialize again
for (unsigned I = 0; I != N; ++I)
add(cast_or_null<ObjCCategoryDecl>(
Reader.GetLocalDecl(M, M.ObjCCategories[Offset++])));
return true;
}
};
} // namespace
void ASTReader::loadObjCCategories(serialization::GlobalDeclID ID,
ObjCInterfaceDecl *D,
unsigned PreviousGeneration) {
ObjCCategoriesVisitor Visitor(*this, D, CategoriesDeserialized, ID,
PreviousGeneration);
ModuleMgr.visit(Visitor);
}
template<typename DeclT, typename Fn>
static void forAllLaterRedecls(DeclT *D, Fn F) {
F(D);
// Check whether we've already merged D into its redeclaration chain.
// MostRecent may or may not be nullptr if D has not been merged. If
// not, walk the merged redecl chain and see if it's there.
auto *MostRecent = D->getMostRecentDecl();
bool Found = false;
for (auto *Redecl = MostRecent; Redecl && !Found;
Redecl = Redecl->getPreviousDecl())
Found = (Redecl == D);
// If this declaration is merged, apply the functor to all later decls.
if (Found) {
for (auto *Redecl = MostRecent; Redecl != D;
Redecl = Redecl->getPreviousDecl())
F(Redecl);
}
}
void ASTDeclReader::UpdateDecl(Decl *D,
llvm::SmallVectorImpl<serialization::DeclID> &PendingLazySpecializationIDs) {
while (Record.getIdx() < Record.size()) {
switch ((DeclUpdateKind)Record.readInt()) {
case UPD_CXX_ADDED_IMPLICIT_MEMBER: {
auto *RD = cast<CXXRecordDecl>(D);
// FIXME: If we also have an update record for instantiating the
// definition of D, we need that to happen before we get here.
Decl *MD = Record.readDecl();
assert(MD && "couldn't read decl from update record");
// FIXME: We should call addHiddenDecl instead, to add the member
// to its DeclContext.
RD->addedMember(MD);
break;
}
case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION:
// It will be added to the template's lazy specialization set.
PendingLazySpecializationIDs.push_back(readDeclID());
break;
case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: {
auto *Anon = readDeclAs<NamespaceDecl>();
// Each module has its own anonymous namespace, which is disjoint from
// any other module's anonymous namespaces, so don't attach the anonymous
// namespace at all.
if (!Record.isModule()) {
if (auto *TU = dyn_cast<TranslationUnitDecl>(D))
TU->setAnonymousNamespace(Anon);
else
cast<NamespaceDecl>(D)->setAnonymousNamespace(Anon);
}
break;
}
case UPD_CXX_ADDED_VAR_DEFINITION: {
auto *VD = cast<VarDecl>(D);
VD->NonParmVarDeclBits.IsInline = Record.readInt();
VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt();
uint64_t Val = Record.readInt();
if (Val && !VD->getInit()) {
VD->setInit(Record.readExpr());
if (Val != 1) {
EvaluatedStmt *Eval = VD->ensureEvaluatedStmt();
Eval->HasConstantInitialization = (Val & 2) != 0;
Eval->HasConstantDestruction = (Val & 4) != 0;
}
}
break;
}
case UPD_CXX_POINT_OF_INSTANTIATION: {
SourceLocation POI = Record.readSourceLocation();
if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(D)) {
VTSD->setPointOfInstantiation(POI);
} else if (auto *VD = dyn_cast<VarDecl>(D)) {
VD->getMemberSpecializationInfo()->setPointOfInstantiation(POI);
} else {
auto *FD = cast<FunctionDecl>(D);
if (auto *FTSInfo = FD->TemplateOrSpecialization
.dyn_cast<FunctionTemplateSpecializationInfo *>())
FTSInfo->setPointOfInstantiation(POI);
else
FD->TemplateOrSpecialization.get<MemberSpecializationInfo *>()
->setPointOfInstantiation(POI);
}
break;
}
case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: {
auto *Param = cast<ParmVarDecl>(D);
// We have to read the default argument regardless of whether we use it
// so that hypothetical further update records aren't messed up.
// TODO: Add a function to skip over the next expr record.
auto *DefaultArg = Record.readExpr();
// Only apply the update if the parameter still has an uninstantiated
// default argument.
if (Param->hasUninstantiatedDefaultArg())
Param->setDefaultArg(DefaultArg);
break;
}
case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER: {
auto *FD = cast<FieldDecl>(D);
auto *DefaultInit = Record.readExpr();
// Only apply the update if the field still has an uninstantiated
// default member initializer.
if (FD->hasInClassInitializer() && !FD->getInClassInitializer()) {
if (DefaultInit)
FD->setInClassInitializer(DefaultInit);
else
// Instantiation failed. We can get here if we serialized an AST for
// an invalid program.
FD->removeInClassInitializer();
}
break;
}
case UPD_CXX_ADDED_FUNCTION_DEFINITION: {
auto *FD = cast<FunctionDecl>(D);
if (Reader.PendingBodies[FD]) {
// FIXME: Maybe check for ODR violations.
// It's safe to stop now because this update record is always last.
return;
}
if (Record.readInt()) {
// Maintain AST consistency: any later redeclarations of this function
// are inline if this one is. (We might have merged another declaration
// into this one.)
forAllLaterRedecls(FD, [](FunctionDecl *FD) {
FD->setImplicitlyInline();
});
}
FD->setInnerLocStart(readSourceLocation());
ReadFunctionDefinition(FD);
assert(Record.getIdx() == Record.size() && "lazy body must be last");
break;
}
case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: {
auto *RD = cast<CXXRecordDecl>(D);
auto *OldDD = RD->getCanonicalDecl()->DefinitionData;
bool HadRealDefinition =
OldDD && (OldDD->Definition != RD ||
!Reader.PendingFakeDefinitionData.count(OldDD));
RD->setParamDestroyedInCallee(Record.readInt());
RD->setArgPassingRestrictions(
(RecordDecl::ArgPassingKind)Record.readInt());
ReadCXXRecordDefinition(RD, /*Update*/true);
// Visible update is handled separately.
uint64_t LexicalOffset = ReadLocalOffset();
if (!HadRealDefinition && LexicalOffset) {
Record.readLexicalDeclContextStorage(LexicalOffset, RD);
Reader.PendingFakeDefinitionData.erase(OldDD);
}
auto TSK = (TemplateSpecializationKind)Record.readInt();
SourceLocation POI = readSourceLocation();
if (MemberSpecializationInfo *MSInfo =
RD->getMemberSpecializationInfo()) {
MSInfo->setTemplateSpecializationKind(TSK);
MSInfo->setPointOfInstantiation(POI);
} else {
auto *Spec = cast<ClassTemplateSpecializationDecl>(RD);
Spec->setTemplateSpecializationKind(TSK);
Spec->setPointOfInstantiation(POI);
if (Record.readInt()) {
auto *PartialSpec =
readDeclAs<ClassTemplatePartialSpecializationDecl>();
SmallVector<TemplateArgument, 8> TemplArgs;
Record.readTemplateArgumentList(TemplArgs);
auto *TemplArgList = TemplateArgumentList::CreateCopy(
Reader.getContext(), TemplArgs);
// FIXME: If we already have a partial specialization set,
// check that it matches.
if (!Spec->getSpecializedTemplateOrPartial()
.is<ClassTemplatePartialSpecializationDecl *>())
Spec->setInstantiationOf(PartialSpec, TemplArgList);
}
}
RD->setTagKind((TagTypeKind)Record.readInt());
RD->setLocation(readSourceLocation());
RD->setLocStart(readSourceLocation());
RD->setBraceRange(readSourceRange());
if (Record.readInt()) {
AttrVec Attrs;
Record.readAttributes(Attrs);
// If the declaration already has attributes, we assume that some other
// AST file already loaded them.
if (!D->hasAttrs())
D->setAttrsImpl(Attrs, Reader.getContext());
}
break;
}
case UPD_CXX_RESOLVED_DTOR_DELETE: {
// Set the 'operator delete' directly to avoid emitting another update
// record.
auto *Del = readDeclAs<FunctionDecl>();
auto *First = cast<CXXDestructorDecl>(D->getCanonicalDecl());
auto *ThisArg = Record.readExpr();
// FIXME: Check consistency if we have an old and new operator delete.
if (!First->OperatorDelete) {
First->OperatorDelete = Del;
First->OperatorDeleteThisArg = ThisArg;
}
break;
}
case UPD_CXX_RESOLVED_EXCEPTION_SPEC: {
SmallVector<QualType, 8> ExceptionStorage;
auto ESI = Record.readExceptionSpecInfo(ExceptionStorage);
// Update this declaration's exception specification, if needed.
auto *FD = cast<FunctionDecl>(D);
auto *FPT = FD->getType()->castAs<FunctionProtoType>();
// FIXME: If the exception specification is already present, check that it
// matches.
if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) {
FD->setType(Reader.getContext().getFunctionType(
FPT->getReturnType(), FPT->getParamTypes(),
FPT->getExtProtoInfo().withExceptionSpec(ESI)));
// When we get to the end of deserializing, see if there are other decls
// that we need to propagate this exception specification onto.
Reader.PendingExceptionSpecUpdates.insert(
std::make_pair(FD->getCanonicalDecl(), FD));
}
break;
}
case UPD_CXX_DEDUCED_RETURN_TYPE: {
auto *FD = cast<FunctionDecl>(D);
QualType DeducedResultType = Record.readType();
Reader.PendingDeducedTypeUpdates.insert(
{FD->getCanonicalDecl(), DeducedResultType});
break;
}
case UPD_DECL_MARKED_USED:
// Maintain AST consistency: any later redeclarations are used too.
D->markUsed(Reader.getContext());
break;
case UPD_MANGLING_NUMBER:
Reader.getContext().setManglingNumber(cast<NamedDecl>(D),
Record.readInt());
break;
case UPD_STATIC_LOCAL_NUMBER:
Reader.getContext().setStaticLocalNumber(cast<VarDecl>(D),
Record.readInt());
break;
case UPD_DECL_MARKED_OPENMP_THREADPRIVATE:
D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(
Reader.getContext(), readSourceRange(),
AttributeCommonInfo::AS_Pragma));
break;
case UPD_DECL_MARKED_OPENMP_ALLOCATE: {
auto AllocatorKind =
static_cast<OMPAllocateDeclAttr::AllocatorTypeTy>(Record.readInt());
Expr *Allocator = Record.readExpr();
Expr *Alignment = Record.readExpr();
SourceRange SR = readSourceRange();
D->addAttr(OMPAllocateDeclAttr::CreateImplicit(
Reader.getContext(), AllocatorKind, Allocator, Alignment, SR,
AttributeCommonInfo::AS_Pragma));
break;
}
case UPD_DECL_EXPORTED: {
unsigned SubmoduleID = readSubmoduleID();
auto *Exported = cast<NamedDecl>(D);
Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr;
Reader.getContext().mergeDefinitionIntoModule(Exported, Owner);
Reader.PendingMergedDefinitionsToDeduplicate.insert(Exported);
break;
}
case UPD_DECL_MARKED_OPENMP_DECLARETARGET: {
auto MapType = Record.readEnum<OMPDeclareTargetDeclAttr::MapTypeTy>();
auto DevType = Record.readEnum<OMPDeclareTargetDeclAttr::DevTypeTy>();
Expr *IndirectE = Record.readExpr();
bool Indirect = Record.readBool();
unsigned Level = Record.readInt();
D->addAttr(OMPDeclareTargetDeclAttr::CreateImplicit(
Reader.getContext(), MapType, DevType, IndirectE, Indirect, Level,
readSourceRange(), AttributeCommonInfo::AS_Pragma));
break;
}
case UPD_ADDED_ATTR_TO_RECORD:
AttrVec Attrs;
Record.readAttributes(Attrs);
assert(Attrs.size() == 1);
D->addAttr(Attrs[0]);
break;
}
}
}
diff --git a/contrib/llvm-project/libcxx/include/__algorithm/comp_ref_type.h b/contrib/llvm-project/libcxx/include/__algorithm/comp_ref_type.h
index 6cc6405686f5..0802d2496f5c 100644
--- a/contrib/llvm-project/libcxx/include/__algorithm/comp_ref_type.h
+++ b/contrib/llvm-project/libcxx/include/__algorithm/comp_ref_type.h
@@ -1,85 +1,85 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP___ALGORITHM_COMP_REF_TYPE_H
#define _LIBCPP___ALGORITHM_COMP_REF_TYPE_H
#include <__config>
#ifdef _LIBCPP_DEBUG
# include <__debug>
# include <__utility/declval.h>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
_LIBCPP_BEGIN_NAMESPACE_STD
#ifdef _LIBCPP_DEBUG
template <class _Compare>
struct __debug_less
{
_Compare &__comp_;
- _LIBCPP_CONSTEXPR_AFTER_CXX17
+ _LIBCPP_CONSTEXPR_AFTER_CXX11
__debug_less(_Compare& __c) : __comp_(__c) {}
template <class _Tp, class _Up>
- _LIBCPP_CONSTEXPR_AFTER_CXX17
+ _LIBCPP_CONSTEXPR_AFTER_CXX11
bool operator()(const _Tp& __x, const _Up& __y)
{
bool __r = __comp_(__x, __y);
if (__r)
__do_compare_assert(0, __y, __x);
return __r;
}
template <class _Tp, class _Up>
- _LIBCPP_CONSTEXPR_AFTER_CXX17
+ _LIBCPP_CONSTEXPR_AFTER_CXX11
bool operator()(_Tp& __x, _Up& __y)
{
bool __r = __comp_(__x, __y);
if (__r)
__do_compare_assert(0, __y, __x);
return __r;
}
template <class _LHS, class _RHS>
- _LIBCPP_CONSTEXPR_AFTER_CXX17
+ _LIBCPP_CONSTEXPR_AFTER_CXX11
inline _LIBCPP_INLINE_VISIBILITY
decltype((void)declval<_Compare&>()(
declval<_LHS &>(), declval<_RHS &>()))
__do_compare_assert(int, _LHS & __l, _RHS & __r) {
_LIBCPP_ASSERT(!__comp_(__l, __r),
"Comparator does not induce a strict weak ordering");
}
template <class _LHS, class _RHS>
- _LIBCPP_CONSTEXPR_AFTER_CXX17
+ _LIBCPP_CONSTEXPR_AFTER_CXX11
inline _LIBCPP_INLINE_VISIBILITY
void __do_compare_assert(long, _LHS &, _RHS &) {}
};
#endif // _LIBCPP_DEBUG
template <class _Comp>
struct __comp_ref_type {
// Pass the comparator by lvalue reference. Or in debug mode, using a
// debugging wrapper that stores a reference.
#ifndef _LIBCPP_DEBUG
typedef _Comp& type;
#else
typedef __debug_less<_Comp> type;
#endif
};
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP___ALGORITHM_COMP_REF_TYPE_H
diff --git a/contrib/llvm-project/libcxx/src/filesystem/operations.cpp b/contrib/llvm-project/libcxx/src/filesystem/operations.cpp
index 7aeeffaae8f3..39fb5739739b 100644
--- a/contrib/llvm-project/libcxx/src/filesystem/operations.cpp
+++ b/contrib/llvm-project/libcxx/src/filesystem/operations.cpp
@@ -1,2098 +1,2100 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "filesystem"
#include "array"
#include "iterator"
#include "string_view"
#include "type_traits"
#include "vector"
#include "cstdlib"
#include "climits"
#include "filesystem_common.h"
#include "posix_compat.h"
#if defined(_LIBCPP_WIN32API)
# define WIN32_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
#else
# include <dirent.h>
# include <sys/stat.h>
# include <sys/statvfs.h>
# include <unistd.h>
#endif
#include <time.h>
#include <fcntl.h> /* values for fchmodat */
#if __has_include(<sys/sendfile.h>)
# include <sys/sendfile.h>
# define _LIBCPP_FILESYSTEM_USE_SENDFILE
#elif defined(__APPLE__) || __has_include(<copyfile.h>)
# include <copyfile.h>
# define _LIBCPP_FILESYSTEM_USE_COPYFILE
#else
# include "fstream"
# define _LIBCPP_FILESYSTEM_USE_FSTREAM
#endif
#if !defined(CLOCK_REALTIME) && !defined(_LIBCPP_WIN32API)
# include <sys/time.h> // for gettimeofday and timeval
#endif
#if defined(__ELF__) && defined(_LIBCPP_LINK_RT_LIB)
# pragma comment(lib, "rt")
#endif
_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
namespace {
bool isSeparator(path::value_type C) {
if (C == '/')
return true;
#if defined(_LIBCPP_WIN32API)
if (C == '\\')
return true;
#endif
return false;
}
bool isDriveLetter(path::value_type C) {
return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z');
}
namespace parser {
using string_view_t = path::__string_view;
using string_view_pair = pair<string_view_t, string_view_t>;
using PosPtr = path::value_type const*;
struct PathParser {
enum ParserState : unsigned char {
// Zero is a special sentinel value used by default constructed iterators.
PS_BeforeBegin = path::iterator::_BeforeBegin,
PS_InRootName = path::iterator::_InRootName,
PS_InRootDir = path::iterator::_InRootDir,
PS_InFilenames = path::iterator::_InFilenames,
PS_InTrailingSep = path::iterator::_InTrailingSep,
PS_AtEnd = path::iterator::_AtEnd
};
const string_view_t Path;
string_view_t RawEntry;
ParserState State;
private:
PathParser(string_view_t P, ParserState State) noexcept : Path(P),
State(State) {}
public:
PathParser(string_view_t P, string_view_t E, unsigned char S)
: Path(P), RawEntry(E), State(static_cast<ParserState>(S)) {
// S cannot be '0' or PS_BeforeBegin.
}
static PathParser CreateBegin(string_view_t P) noexcept {
PathParser PP(P, PS_BeforeBegin);
PP.increment();
return PP;
}
static PathParser CreateEnd(string_view_t P) noexcept {
PathParser PP(P, PS_AtEnd);
return PP;
}
PosPtr peek() const noexcept {
auto TkEnd = getNextTokenStartPos();
auto End = getAfterBack();
return TkEnd == End ? nullptr : TkEnd;
}
void increment() noexcept {
const PosPtr End = getAfterBack();
const PosPtr Start = getNextTokenStartPos();
if (Start == End)
return makeState(PS_AtEnd);
switch (State) {
case PS_BeforeBegin: {
PosPtr TkEnd = consumeRootName(Start, End);
if (TkEnd)
return makeState(PS_InRootName, Start, TkEnd);
}
_LIBCPP_FALLTHROUGH();
case PS_InRootName: {
PosPtr TkEnd = consumeAllSeparators(Start, End);
if (TkEnd)
return makeState(PS_InRootDir, Start, TkEnd);
else
return makeState(PS_InFilenames, Start, consumeName(Start, End));
}
case PS_InRootDir:
return makeState(PS_InFilenames, Start, consumeName(Start, End));
case PS_InFilenames: {
PosPtr SepEnd = consumeAllSeparators(Start, End);
if (SepEnd != End) {
PosPtr TkEnd = consumeName(SepEnd, End);
if (TkEnd)
return makeState(PS_InFilenames, SepEnd, TkEnd);
}
return makeState(PS_InTrailingSep, Start, SepEnd);
}
case PS_InTrailingSep:
return makeState(PS_AtEnd);
case PS_AtEnd:
_LIBCPP_UNREACHABLE();
}
}
void decrement() noexcept {
const PosPtr REnd = getBeforeFront();
const PosPtr RStart = getCurrentTokenStartPos() - 1;
if (RStart == REnd) // we're decrementing the begin
return makeState(PS_BeforeBegin);
switch (State) {
case PS_AtEnd: {
// Try to consume a trailing separator or root directory first.
if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
if (SepEnd == REnd)
return makeState(PS_InRootDir, Path.data(), RStart + 1);
PosPtr TkStart = consumeRootName(SepEnd, REnd);
if (TkStart == REnd)
return makeState(PS_InRootDir, RStart, RStart + 1);
return makeState(PS_InTrailingSep, SepEnd + 1, RStart + 1);
} else {
PosPtr TkStart = consumeRootName(RStart, REnd);
if (TkStart == REnd)
return makeState(PS_InRootName, TkStart + 1, RStart + 1);
TkStart = consumeName(RStart, REnd);
return makeState(PS_InFilenames, TkStart + 1, RStart + 1);
}
}
case PS_InTrailingSep:
return makeState(PS_InFilenames, consumeName(RStart, REnd) + 1,
RStart + 1);
case PS_InFilenames: {
PosPtr SepEnd = consumeAllSeparators(RStart, REnd);
if (SepEnd == REnd)
return makeState(PS_InRootDir, Path.data(), RStart + 1);
PosPtr TkStart = consumeRootName(SepEnd ? SepEnd : RStart, REnd);
if (TkStart == REnd) {
if (SepEnd)
return makeState(PS_InRootDir, SepEnd + 1, RStart + 1);
return makeState(PS_InRootName, TkStart + 1, RStart + 1);
}
TkStart = consumeName(SepEnd, REnd);
return makeState(PS_InFilenames, TkStart + 1, SepEnd + 1);
}
case PS_InRootDir:
return makeState(PS_InRootName, Path.data(), RStart + 1);
case PS_InRootName:
case PS_BeforeBegin:
_LIBCPP_UNREACHABLE();
}
}
/// \brief Return a view with the "preferred representation" of the current
/// element. For example trailing separators are represented as a '.'
string_view_t operator*() const noexcept {
switch (State) {
case PS_BeforeBegin:
case PS_AtEnd:
return PS("");
case PS_InRootDir:
if (RawEntry[0] == '\\')
return PS("\\");
else
return PS("/");
case PS_InTrailingSep:
return PS("");
case PS_InRootName:
case PS_InFilenames:
return RawEntry;
}
_LIBCPP_UNREACHABLE();
}
explicit operator bool() const noexcept {
return State != PS_BeforeBegin && State != PS_AtEnd;
}
PathParser& operator++() noexcept {
increment();
return *this;
}
PathParser& operator--() noexcept {
decrement();
return *this;
}
bool atEnd() const noexcept {
return State == PS_AtEnd;
}
bool inRootDir() const noexcept {
return State == PS_InRootDir;
}
bool inRootName() const noexcept {
return State == PS_InRootName;
}
bool inRootPath() const noexcept {
return inRootName() || inRootDir();
}
private:
void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
State = NewState;
RawEntry = string_view_t(Start, End - Start);
}
void makeState(ParserState NewState) noexcept {
State = NewState;
RawEntry = {};
}
PosPtr getAfterBack() const noexcept { return Path.data() + Path.size(); }
PosPtr getBeforeFront() const noexcept { return Path.data() - 1; }
/// \brief Return a pointer to the first character after the currently
/// lexed element.
PosPtr getNextTokenStartPos() const noexcept {
switch (State) {
case PS_BeforeBegin:
return Path.data();
case PS_InRootName:
case PS_InRootDir:
case PS_InFilenames:
return &RawEntry.back() + 1;
case PS_InTrailingSep:
case PS_AtEnd:
return getAfterBack();
}
_LIBCPP_UNREACHABLE();
}
/// \brief Return a pointer to the first character in the currently lexed
/// element.
PosPtr getCurrentTokenStartPos() const noexcept {
switch (State) {
case PS_BeforeBegin:
case PS_InRootName:
return &Path.front();
case PS_InRootDir:
case PS_InFilenames:
case PS_InTrailingSep:
return &RawEntry.front();
case PS_AtEnd:
return &Path.back() + 1;
}
_LIBCPP_UNREACHABLE();
}
// Consume all consecutive separators.
PosPtr consumeAllSeparators(PosPtr P, PosPtr End) const noexcept {
if (P == nullptr || P == End || !isSeparator(*P))
return nullptr;
const int Inc = P < End ? 1 : -1;
P += Inc;
while (P != End && isSeparator(*P))
P += Inc;
return P;
}
// Consume exactly N separators, or return nullptr.
PosPtr consumeNSeparators(PosPtr P, PosPtr End, int N) const noexcept {
PosPtr Ret = consumeAllSeparators(P, End);
if (Ret == nullptr)
return nullptr;
if (P < End) {
if (Ret == P + N)
return Ret;
} else {
if (Ret == P - N)
return Ret;
}
return nullptr;
}
PosPtr consumeName(PosPtr P, PosPtr End) const noexcept {
PosPtr Start = P;
if (P == nullptr || P == End || isSeparator(*P))
return nullptr;
const int Inc = P < End ? 1 : -1;
P += Inc;
while (P != End && !isSeparator(*P))
P += Inc;
if (P == End && Inc < 0) {
// Iterating backwards and consumed all the rest of the input.
// Check if the start of the string would have been considered
// a root name.
PosPtr RootEnd = consumeRootName(End + 1, Start);
if (RootEnd)
return RootEnd - 1;
}
return P;
}
PosPtr consumeDriveLetter(PosPtr P, PosPtr End) const noexcept {
if (P == End)
return nullptr;
if (P < End) {
if (P + 1 == End || !isDriveLetter(P[0]) || P[1] != ':')
return nullptr;
return P + 2;
} else {
if (P - 1 == End || !isDriveLetter(P[-1]) || P[0] != ':')
return nullptr;
return P - 2;
}
}
PosPtr consumeNetworkRoot(PosPtr P, PosPtr End) const noexcept {
if (P == End)
return nullptr;
if (P < End)
return consumeName(consumeNSeparators(P, End, 2), End);
else
return consumeNSeparators(consumeName(P, End), End, 2);
}
PosPtr consumeRootName(PosPtr P, PosPtr End) const noexcept {
#if defined(_LIBCPP_WIN32API)
if (PosPtr Ret = consumeDriveLetter(P, End))
return Ret;
if (PosPtr Ret = consumeNetworkRoot(P, End))
return Ret;
#endif
return nullptr;
}
};
string_view_pair separate_filename(string_view_t const& s) {
if (s == PS(".") || s == PS("..") || s.empty())
return string_view_pair{s, PS("")};
auto pos = s.find_last_of('.');
if (pos == string_view_t::npos || pos == 0)
return string_view_pair{s, string_view_t{}};
return string_view_pair{s.substr(0, pos), s.substr(pos)};
}
string_view_t createView(PosPtr S, PosPtr E) noexcept {
return {S, static_cast<size_t>(E - S) + 1};
}
} // namespace parser
} // namespace
// POSIX HELPERS
#if defined(_LIBCPP_WIN32API)
namespace detail {
errc __win_err_to_errc(int err) {
constexpr struct {
DWORD win;
errc errc;
} win_error_mapping[] = {
{ERROR_ACCESS_DENIED, errc::permission_denied},
{ERROR_ALREADY_EXISTS, errc::file_exists},
{ERROR_BAD_NETPATH, errc::no_such_file_or_directory},
{ERROR_BAD_PATHNAME, errc::no_such_file_or_directory},
{ERROR_BAD_UNIT, errc::no_such_device},
{ERROR_BROKEN_PIPE, errc::broken_pipe},
{ERROR_BUFFER_OVERFLOW, errc::filename_too_long},
{ERROR_BUSY, errc::device_or_resource_busy},
{ERROR_BUSY_DRIVE, errc::device_or_resource_busy},
{ERROR_CANNOT_MAKE, errc::permission_denied},
{ERROR_CANTOPEN, errc::io_error},
{ERROR_CANTREAD, errc::io_error},
{ERROR_CANTWRITE, errc::io_error},
{ERROR_CURRENT_DIRECTORY, errc::permission_denied},
{ERROR_DEV_NOT_EXIST, errc::no_such_device},
{ERROR_DEVICE_IN_USE, errc::device_or_resource_busy},
{ERROR_DIR_NOT_EMPTY, errc::directory_not_empty},
{ERROR_DIRECTORY, errc::invalid_argument},
{ERROR_DISK_FULL, errc::no_space_on_device},
{ERROR_FILE_EXISTS, errc::file_exists},
{ERROR_FILE_NOT_FOUND, errc::no_such_file_or_directory},
{ERROR_HANDLE_DISK_FULL, errc::no_space_on_device},
{ERROR_INVALID_ACCESS, errc::permission_denied},
{ERROR_INVALID_DRIVE, errc::no_such_device},
{ERROR_INVALID_FUNCTION, errc::function_not_supported},
{ERROR_INVALID_HANDLE, errc::invalid_argument},
{ERROR_INVALID_NAME, errc::no_such_file_or_directory},
{ERROR_INVALID_PARAMETER, errc::invalid_argument},
{ERROR_LOCK_VIOLATION, errc::no_lock_available},
{ERROR_LOCKED, errc::no_lock_available},
{ERROR_NEGATIVE_SEEK, errc::invalid_argument},
{ERROR_NOACCESS, errc::permission_denied},
{ERROR_NOT_ENOUGH_MEMORY, errc::not_enough_memory},
{ERROR_NOT_READY, errc::resource_unavailable_try_again},
{ERROR_NOT_SAME_DEVICE, errc::cross_device_link},
{ERROR_NOT_SUPPORTED, errc::not_supported},
{ERROR_OPEN_FAILED, errc::io_error},
{ERROR_OPEN_FILES, errc::device_or_resource_busy},
{ERROR_OPERATION_ABORTED, errc::operation_canceled},
{ERROR_OUTOFMEMORY, errc::not_enough_memory},
{ERROR_PATH_NOT_FOUND, errc::no_such_file_or_directory},
{ERROR_READ_FAULT, errc::io_error},
{ERROR_REPARSE_TAG_INVALID, errc::invalid_argument},
{ERROR_RETRY, errc::resource_unavailable_try_again},
{ERROR_SEEK, errc::io_error},
{ERROR_SHARING_VIOLATION, errc::permission_denied},
{ERROR_TOO_MANY_OPEN_FILES, errc::too_many_files_open},
{ERROR_WRITE_FAULT, errc::io_error},
{ERROR_WRITE_PROTECT, errc::permission_denied},
};
for (const auto &pair : win_error_mapping)
if (pair.win == static_cast<DWORD>(err))
return pair.errc;
return errc::invalid_argument;
}
} // namespace detail
#endif
namespace detail {
namespace {
using value_type = path::value_type;
using string_type = path::string_type;
struct FileDescriptor {
const path& name;
int fd = -1;
StatT m_stat;
file_status m_status;
template <class... Args>
static FileDescriptor create(const path* p, error_code& ec, Args... args) {
ec.clear();
int fd;
if ((fd = detail::open(p->c_str(), args...)) == -1) {
ec = capture_errno();
return FileDescriptor{p};
}
return FileDescriptor(p, fd);
}
template <class... Args>
static FileDescriptor create_with_status(const path* p, error_code& ec,
Args... args) {
FileDescriptor fd = create(p, ec, args...);
if (!ec)
fd.refresh_status(ec);
return fd;
}
file_status get_status() const { return m_status; }
StatT const& get_stat() const { return m_stat; }
bool status_known() const { return _VSTD_FS::status_known(m_status); }
file_status refresh_status(error_code& ec);
void close() noexcept {
if (fd != -1)
detail::close(fd);
fd = -1;
}
FileDescriptor(FileDescriptor&& other)
: name(other.name), fd(other.fd), m_stat(other.m_stat),
m_status(other.m_status) {
other.fd = -1;
other.m_status = file_status{};
}
~FileDescriptor() { close(); }
FileDescriptor(FileDescriptor const&) = delete;
FileDescriptor& operator=(FileDescriptor const&) = delete;
private:
explicit FileDescriptor(const path* p, int fd = -1) : name(*p), fd(fd) {}
};
perms posix_get_perms(const StatT& st) noexcept {
return static_cast<perms>(st.st_mode) & perms::mask;
}
file_status create_file_status(error_code& m_ec, path const& p,
const StatT& path_stat, error_code* ec) {
if (ec)
*ec = m_ec;
if (m_ec && (m_ec.value() == ENOENT || m_ec.value() == ENOTDIR)) {
return file_status(file_type::not_found);
} else if (m_ec) {
ErrorHandler<void> err("posix_stat", ec, &p);
err.report(m_ec, "failed to determine attributes for the specified path");
return file_status(file_type::none);
}
// else
file_status fs_tmp;
auto const mode = path_stat.st_mode;
if (S_ISLNK(mode))
fs_tmp.type(file_type::symlink);
else if (S_ISREG(mode))
fs_tmp.type(file_type::regular);
else if (S_ISDIR(mode))
fs_tmp.type(file_type::directory);
else if (S_ISBLK(mode))
fs_tmp.type(file_type::block);
else if (S_ISCHR(mode))
fs_tmp.type(file_type::character);
else if (S_ISFIFO(mode))
fs_tmp.type(file_type::fifo);
else if (S_ISSOCK(mode))
fs_tmp.type(file_type::socket);
else
fs_tmp.type(file_type::unknown);
fs_tmp.permissions(detail::posix_get_perms(path_stat));
return fs_tmp;
}
file_status posix_stat(path const& p, StatT& path_stat, error_code* ec) {
error_code m_ec;
if (detail::stat(p.c_str(), &path_stat) == -1)
m_ec = detail::capture_errno();
return create_file_status(m_ec, p, path_stat, ec);
}
file_status posix_stat(path const& p, error_code* ec) {
StatT path_stat;
return posix_stat(p, path_stat, ec);
}
file_status posix_lstat(path const& p, StatT& path_stat, error_code* ec) {
error_code m_ec;
if (detail::lstat(p.c_str(), &path_stat) == -1)
m_ec = detail::capture_errno();
return create_file_status(m_ec, p, path_stat, ec);
}
file_status posix_lstat(path const& p, error_code* ec) {
StatT path_stat;
return posix_lstat(p, path_stat, ec);
}
// http://pubs.opengroup.org/onlinepubs/9699919799/functions/ftruncate.html
bool posix_ftruncate(const FileDescriptor& fd, off_t to_size, error_code& ec) {
if (detail::ftruncate(fd.fd, to_size) == -1) {
ec = capture_errno();
return true;
}
ec.clear();
return false;
}
bool posix_fchmod(const FileDescriptor& fd, const StatT& st, error_code& ec) {
if (detail::fchmod(fd.fd, st.st_mode) == -1) {
ec = capture_errno();
return true;
}
ec.clear();
return false;
}
bool stat_equivalent(const StatT& st1, const StatT& st2) {
return (st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
}
file_status FileDescriptor::refresh_status(error_code& ec) {
// FD must be open and good.
m_status = file_status{};
m_stat = {};
error_code m_ec;
if (detail::fstat(fd, &m_stat) == -1)
m_ec = capture_errno();
m_status = create_file_status(m_ec, name, m_stat, &ec);
return m_status;
}
} // namespace
} // end namespace detail
using detail::capture_errno;
using detail::ErrorHandler;
using detail::StatT;
using detail::TimeSpec;
using parser::createView;
using parser::PathParser;
using parser::string_view_t;
const bool _FilesystemClock::is_steady;
_FilesystemClock::time_point _FilesystemClock::now() noexcept {
typedef chrono::duration<rep> __secs;
#if defined(_LIBCPP_WIN32API)
typedef chrono::duration<rep, nano> __nsecs;
FILETIME time;
GetSystemTimeAsFileTime(&time);
TimeSpec tp = detail::filetime_to_timespec(time);
return time_point(__secs(tp.tv_sec) +
chrono::duration_cast<duration>(__nsecs(tp.tv_nsec)));
#elif defined(CLOCK_REALTIME)
typedef chrono::duration<rep, nano> __nsecs;
struct timespec tp;
if (0 != clock_gettime(CLOCK_REALTIME, &tp))
__throw_system_error(errno, "clock_gettime(CLOCK_REALTIME) failed");
return time_point(__secs(tp.tv_sec) +
chrono::duration_cast<duration>(__nsecs(tp.tv_nsec)));
#else
typedef chrono::duration<rep, micro> __microsecs;
timeval tv;
gettimeofday(&tv, 0);
return time_point(__secs(tv.tv_sec) + __microsecs(tv.tv_usec));
#endif // CLOCK_REALTIME
}
filesystem_error::~filesystem_error() {}
void filesystem_error::__create_what(int __num_paths) {
const char* derived_what = system_error::what();
__storage_->__what_ = [&]() -> string {
switch (__num_paths) {
case 0:
return detail::format_string("filesystem error: %s", derived_what);
case 1:
return detail::format_string("filesystem error: %s [" PATH_CSTR_FMT "]",
derived_what, path1().c_str());
case 2:
return detail::format_string("filesystem error: %s [" PATH_CSTR_FMT "] [" PATH_CSTR_FMT "]",
derived_what, path1().c_str(), path2().c_str());
}
_LIBCPP_UNREACHABLE();
}();
}
static path __do_absolute(const path& p, path* cwd, error_code* ec) {
if (ec)
ec->clear();
if (p.is_absolute())
return p;
*cwd = __current_path(ec);
if (ec && *ec)
return {};
return (*cwd) / p;
}
path __absolute(const path& p, error_code* ec) {
path cwd;
return __do_absolute(p, &cwd, ec);
}
path __canonical(path const& orig_p, error_code* ec) {
path cwd;
ErrorHandler<path> err("canonical", ec, &orig_p, &cwd);
path p = __do_absolute(orig_p, &cwd, ec);
#if (defined(_POSIX_VERSION) && _POSIX_VERSION >= 200112) || defined(_LIBCPP_WIN32API)
std::unique_ptr<path::value_type, decltype(&::free)>
hold(detail::realpath(p.c_str(), nullptr), &::free);
if (hold.get() == nullptr)
return err.report(capture_errno());
return {hold.get()};
#else
#if defined(__MVS__) && !defined(PATH_MAX)
path::value_type buff[ _XOPEN_PATH_MAX + 1 ];
#else
path::value_type buff[PATH_MAX + 1];
#endif
path::value_type* ret;
if ((ret = detail::realpath(p.c_str(), buff)) == nullptr)
return err.report(capture_errno());
return {ret};
#endif
}
void __copy(const path& from, const path& to, copy_options options,
error_code* ec) {
ErrorHandler<void> err("copy", ec, &from, &to);
const bool sym_status = bool(
options & (copy_options::create_symlinks | copy_options::skip_symlinks));
const bool sym_status2 = bool(options & copy_options::copy_symlinks);
error_code m_ec1;
StatT f_st = {};
const file_status f = sym_status || sym_status2
? detail::posix_lstat(from, f_st, &m_ec1)
: detail::posix_stat(from, f_st, &m_ec1);
if (m_ec1)
return err.report(m_ec1);
StatT t_st = {};
const file_status t = sym_status ? detail::posix_lstat(to, t_st, &m_ec1)
: detail::posix_stat(to, t_st, &m_ec1);
if (not status_known(t))
return err.report(m_ec1);
if (!exists(f) || is_other(f) || is_other(t) ||
(is_directory(f) && is_regular_file(t)) ||
detail::stat_equivalent(f_st, t_st)) {
return err.report(errc::function_not_supported);
}
if (ec)
ec->clear();
if (is_symlink(f)) {
if (bool(copy_options::skip_symlinks & options)) {
// do nothing
} else if (not exists(t)) {
__copy_symlink(from, to, ec);
} else {
return err.report(errc::file_exists);
}
return;
} else if (is_regular_file(f)) {
if (bool(copy_options::directories_only & options)) {
// do nothing
} else if (bool(copy_options::create_symlinks & options)) {
__create_symlink(from, to, ec);
} else if (bool(copy_options::create_hard_links & options)) {
__create_hard_link(from, to, ec);
} else if (is_directory(t)) {
__copy_file(from, to / from.filename(), options, ec);
} else {
__copy_file(from, to, options, ec);
}
return;
} else if (is_directory(f) && bool(copy_options::create_symlinks & options)) {
return err.report(errc::is_a_directory);
} else if (is_directory(f) && (bool(copy_options::recursive & options) ||
copy_options::none == options)) {
if (!exists(t)) {
// create directory to with attributes from 'from'.
__create_directory(to, from, ec);
if (ec && *ec) {
return;
}
}
directory_iterator it =
ec ? directory_iterator(from, *ec) : directory_iterator(from);
if (ec && *ec) {
return;
}
error_code m_ec2;
for (; it != directory_iterator(); it.increment(m_ec2)) {
if (m_ec2) {
return err.report(m_ec2);
}
__copy(it->path(), to / it->path().filename(),
options | copy_options::__in_recursive_copy, ec);
if (ec && *ec) {
return;
}
}
}
}
namespace detail {
namespace {
#if defined(_LIBCPP_FILESYSTEM_USE_SENDFILE)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
size_t count = read_fd.get_stat().st_size;
do {
ssize_t res;
if ((res = ::sendfile(write_fd.fd, read_fd.fd, nullptr, count)) == -1) {
ec = capture_errno();
return false;
}
count -= res;
} while (count > 0);
ec.clear();
return true;
}
#elif defined(_LIBCPP_FILESYSTEM_USE_COPYFILE)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
struct CopyFileState {
copyfile_state_t state;
CopyFileState() { state = copyfile_state_alloc(); }
~CopyFileState() { copyfile_state_free(state); }
private:
CopyFileState(CopyFileState const&) = delete;
CopyFileState& operator=(CopyFileState const&) = delete;
};
CopyFileState cfs;
if (fcopyfile(read_fd.fd, write_fd.fd, cfs.state, COPYFILE_DATA) < 0) {
ec = capture_errno();
return false;
}
ec.clear();
return true;
}
#elif defined(_LIBCPP_FILESYSTEM_USE_FSTREAM)
bool copy_file_impl(FileDescriptor& read_fd, FileDescriptor& write_fd, error_code& ec) {
ifstream in;
in.__open(read_fd.fd, ios::binary);
if (!in.is_open()) {
// This assumes that __open didn't reset the error code.
ec = capture_errno();
return false;
}
read_fd.fd = -1;
ofstream out;
out.__open(write_fd.fd, ios::binary);
if (!out.is_open()) {
ec = capture_errno();
return false;
}
write_fd.fd = -1;
if (in.good() && out.good()) {
using InIt = istreambuf_iterator<char>;
using OutIt = ostreambuf_iterator<char>;
InIt bin(in);
InIt ein;
OutIt bout(out);
copy(bin, ein, bout);
}
if (out.fail() || in.fail()) {
ec = make_error_code(errc::io_error);
return false;
}
ec.clear();
return true;
}
#else
# error "Unknown implementation for copy_file_impl"
#endif // copy_file_impl implementation
} // end anonymous namespace
} // end namespace detail
bool __copy_file(const path& from, const path& to, copy_options options,
error_code* ec) {
using detail::FileDescriptor;
ErrorHandler<bool> err("copy_file", ec, &to, &from);
error_code m_ec;
FileDescriptor from_fd = FileDescriptor::create_with_status(
&from, m_ec, O_RDONLY | O_NONBLOCK | O_BINARY);
if (m_ec)
return err.report(m_ec);
auto from_st = from_fd.get_status();
StatT const& from_stat = from_fd.get_stat();
if (!is_regular_file(from_st)) {
if (not m_ec)
m_ec = make_error_code(errc::not_supported);
return err.report(m_ec);
}
const bool skip_existing = bool(copy_options::skip_existing & options);
const bool update_existing = bool(copy_options::update_existing & options);
const bool overwrite_existing =
bool(copy_options::overwrite_existing & options);
StatT to_stat_path;
file_status to_st = detail::posix_stat(to, to_stat_path, &m_ec);
if (!status_known(to_st))
return err.report(m_ec);
const bool to_exists = exists(to_st);
if (to_exists && !is_regular_file(to_st))
return err.report(errc::not_supported);
if (to_exists && detail::stat_equivalent(from_stat, to_stat_path))
return err.report(errc::file_exists);
if (to_exists && skip_existing)
return false;
bool ShouldCopy = [&]() {
if (to_exists && update_existing) {
auto from_time = detail::extract_mtime(from_stat);
auto to_time = detail::extract_mtime(to_stat_path);
if (from_time.tv_sec < to_time.tv_sec)
return false;
if (from_time.tv_sec == to_time.tv_sec &&
from_time.tv_nsec <= to_time.tv_nsec)
return false;
return true;
}
if (!to_exists || overwrite_existing)
return true;
return err.report(errc::file_exists);
}();
if (!ShouldCopy)
return false;
// Don't truncate right away. We may not be opening the file we originally
// looked at; we'll check this later.
int to_open_flags = O_WRONLY | O_BINARY;
if (!to_exists)
to_open_flags |= O_CREAT;
FileDescriptor to_fd = FileDescriptor::create_with_status(
&to, m_ec, to_open_flags, from_stat.st_mode);
if (m_ec)
return err.report(m_ec);
if (to_exists) {
// Check that the file we initially stat'ed is equivalent to the one
// we opened.
// FIXME: report this better.
if (!detail::stat_equivalent(to_stat_path, to_fd.get_stat()))
return err.report(errc::bad_file_descriptor);
// Set the permissions and truncate the file we opened.
if (detail::posix_fchmod(to_fd, from_stat, m_ec))
return err.report(m_ec);
if (detail::posix_ftruncate(to_fd, 0, m_ec))
return err.report(m_ec);
}
if (!copy_file_impl(from_fd, to_fd, m_ec)) {
// FIXME: Remove the dest file if we failed, and it didn't exist previously.
return err.report(m_ec);
}
return true;
}
void __copy_symlink(const path& existing_symlink, const path& new_symlink,
error_code* ec) {
const path real_path(__read_symlink(existing_symlink, ec));
if (ec && *ec) {
return;
}
#if defined(_LIBCPP_WIN32API)
error_code local_ec;
if (is_directory(real_path, local_ec))
__create_directory_symlink(real_path, new_symlink, ec);
else
#endif
__create_symlink(real_path, new_symlink, ec);
}
bool __create_directories(const path& p, error_code* ec) {
ErrorHandler<bool> err("create_directories", ec, &p);
error_code m_ec;
auto const st = detail::posix_stat(p, &m_ec);
if (!status_known(st))
return err.report(m_ec);
else if (is_directory(st))
return false;
else if (exists(st))
return err.report(errc::file_exists);
const path parent = p.parent_path();
if (!parent.empty()) {
const file_status parent_st = status(parent, m_ec);
if (not status_known(parent_st))
return err.report(m_ec);
if (not exists(parent_st)) {
if (parent == p)
return err.report(errc::invalid_argument);
__create_directories(parent, ec);
if (ec && *ec) {
return false;
}
} else if (not is_directory(parent_st))
return err.report(errc::not_a_directory);
}
bool ret = __create_directory(p, &m_ec);
if (m_ec)
return err.report(m_ec);
return ret;
}
bool __create_directory(const path& p, error_code* ec) {
ErrorHandler<bool> err("create_directory", ec, &p);
if (detail::mkdir(p.c_str(), static_cast<int>(perms::all)) == 0)
return true;
if (errno != EEXIST)
return err.report(capture_errno());
error_code mec = capture_errno();
error_code ignored_ec;
const file_status st = status(p, ignored_ec);
if (!is_directory(st))
return err.report(mec);
return false;
}
bool __create_directory(path const& p, path const& attributes, error_code* ec) {
ErrorHandler<bool> err("create_directory", ec, &p, &attributes);
StatT attr_stat;
error_code mec;
file_status st = detail::posix_stat(attributes, attr_stat, &mec);
if (!status_known(st))
return err.report(mec);
if (!is_directory(st))
return err.report(errc::not_a_directory,
"the specified attribute path is invalid");
if (detail::mkdir(p.c_str(), attr_stat.st_mode) == 0)
return true;
if (errno != EEXIST)
return err.report(capture_errno());
mec = capture_errno();
error_code ignored_ec;
st = status(p, ignored_ec);
if (!is_directory(st))
return err.report(mec);
return false;
}
void __create_directory_symlink(path const& from, path const& to,
error_code* ec) {
ErrorHandler<void> err("create_directory_symlink", ec, &from, &to);
if (detail::symlink_dir(from.c_str(), to.c_str()) == -1)
return err.report(capture_errno());
}
void __create_hard_link(const path& from, const path& to, error_code* ec) {
ErrorHandler<void> err("create_hard_link", ec, &from, &to);
if (detail::link(from.c_str(), to.c_str()) == -1)
return err.report(capture_errno());
}
void __create_symlink(path const& from, path const& to, error_code* ec) {
ErrorHandler<void> err("create_symlink", ec, &from, &to);
if (detail::symlink_file(from.c_str(), to.c_str()) == -1)
return err.report(capture_errno());
}
path __current_path(error_code* ec) {
ErrorHandler<path> err("current_path", ec);
#if defined(_LIBCPP_WIN32API) || defined(__GLIBC__) || defined(__APPLE__)
// Common extension outside of POSIX getcwd() spec, without needing to
// preallocate a buffer. Also supported by a number of other POSIX libcs.
int size = 0;
path::value_type* ptr = nullptr;
typedef decltype(&::free) Deleter;
Deleter deleter = &::free;
#else
auto size = ::pathconf(".", _PC_PATH_MAX);
_LIBCPP_ASSERT(size >= 0, "pathconf returned a 0 as max size");
auto buff = unique_ptr<path::value_type[]>(new path::value_type[size + 1]);
path::value_type* ptr = buff.get();
// Preallocated buffer, don't free the buffer in the second unique_ptr
// below.
struct Deleter { void operator()(void*) const {} };
Deleter deleter;
#endif
unique_ptr<path::value_type, Deleter> hold(detail::getcwd(ptr, size),
deleter);
if (hold.get() == nullptr)
return err.report(capture_errno(), "call to getcwd failed");
return {hold.get()};
}
void __current_path(const path& p, error_code* ec) {
ErrorHandler<void> err("current_path", ec, &p);
if (detail::chdir(p.c_str()) == -1)
err.report(capture_errno());
}
bool __equivalent(const path& p1, const path& p2, error_code* ec) {
ErrorHandler<bool> err("equivalent", ec, &p1, &p2);
error_code ec1, ec2;
StatT st1 = {}, st2 = {};
auto s1 = detail::posix_stat(p1.native(), st1, &ec1);
if (!exists(s1))
return err.report(errc::not_supported);
auto s2 = detail::posix_stat(p2.native(), st2, &ec2);
if (!exists(s2))
return err.report(errc::not_supported);
return detail::stat_equivalent(st1, st2);
}
uintmax_t __file_size(const path& p, error_code* ec) {
ErrorHandler<uintmax_t> err("file_size", ec, &p);
error_code m_ec;
StatT st;
file_status fst = detail::posix_stat(p, st, &m_ec);
if (!exists(fst) || !is_regular_file(fst)) {
errc error_kind =
is_directory(fst) ? errc::is_a_directory : errc::not_supported;
if (!m_ec)
m_ec = make_error_code(error_kind);
return err.report(m_ec);
}
// is_regular_file(p) == true
return static_cast<uintmax_t>(st.st_size);
}
uintmax_t __hard_link_count(const path& p, error_code* ec) {
ErrorHandler<uintmax_t> err("hard_link_count", ec, &p);
error_code m_ec;
StatT st;
detail::posix_stat(p, st, &m_ec);
if (m_ec)
return err.report(m_ec);
return static_cast<uintmax_t>(st.st_nlink);
}
bool __fs_is_empty(const path& p, error_code* ec) {
ErrorHandler<bool> err("is_empty", ec, &p);
error_code m_ec;
StatT pst;
auto st = detail::posix_stat(p, pst, &m_ec);
if (m_ec)
return err.report(m_ec);
else if (!is_directory(st) && !is_regular_file(st))
return err.report(errc::not_supported);
else if (is_directory(st)) {
auto it = ec ? directory_iterator(p, *ec) : directory_iterator(p);
if (ec && *ec)
return false;
return it == directory_iterator{};
} else if (is_regular_file(st))
return static_cast<uintmax_t>(pst.st_size) == 0;
_LIBCPP_UNREACHABLE();
}
static file_time_type __extract_last_write_time(const path& p, const StatT& st,
error_code* ec) {
using detail::fs_time;
ErrorHandler<file_time_type> err("last_write_time", ec, &p);
auto ts = detail::extract_mtime(st);
if (!fs_time::is_representable(ts))
return err.report(errc::value_too_large);
return fs_time::convert_from_timespec(ts);
}
file_time_type __last_write_time(const path& p, error_code* ec) {
using namespace chrono;
ErrorHandler<file_time_type> err("last_write_time", ec, &p);
error_code m_ec;
StatT st;
detail::posix_stat(p, st, &m_ec);
if (m_ec)
return err.report(m_ec);
return __extract_last_write_time(p, st, ec);
}
void __last_write_time(const path& p, file_time_type new_time, error_code* ec) {
using detail::fs_time;
ErrorHandler<void> err("last_write_time", ec, &p);
#if defined(_LIBCPP_WIN32API)
TimeSpec ts;
if (!fs_time::convert_to_timespec(ts, new_time))
return err.report(errc::value_too_large);
detail::WinHandle h(p.c_str(), FILE_WRITE_ATTRIBUTES, 0);
if (!h)
return err.report(detail::make_windows_error(GetLastError()));
FILETIME last_write = timespec_to_filetime(ts);
if (!SetFileTime(h, nullptr, nullptr, &last_write))
return err.report(detail::make_windows_error(GetLastError()));
#else
error_code m_ec;
array<TimeSpec, 2> tbuf;
#if !defined(_LIBCPP_USE_UTIMENSAT)
// This implementation has a race condition between determining the
// last access time and attempting to set it to the same value using
// ::utimes
StatT st;
file_status fst = detail::posix_stat(p, st, &m_ec);
if (m_ec)
return err.report(m_ec);
tbuf[0] = detail::extract_atime(st);
#else
tbuf[0].tv_sec = 0;
tbuf[0].tv_nsec = UTIME_OMIT;
#endif
if (!fs_time::convert_to_timespec(tbuf[1], new_time))
return err.report(errc::value_too_large);
detail::set_file_times(p, tbuf, m_ec);
if (m_ec)
return err.report(m_ec);
#endif
}
void __permissions(const path& p, perms prms, perm_options opts,
error_code* ec) {
ErrorHandler<void> err("permissions", ec, &p);
auto has_opt = [&](perm_options o) { return bool(o & opts); };
const bool resolve_symlinks = !has_opt(perm_options::nofollow);
const bool add_perms = has_opt(perm_options::add);
const bool remove_perms = has_opt(perm_options::remove);
_LIBCPP_ASSERT(
(add_perms + remove_perms + has_opt(perm_options::replace)) == 1,
"One and only one of the perm_options constants replace, add, or remove "
"is present in opts");
bool set_sym_perms = false;
prms &= perms::mask;
if (!resolve_symlinks || (add_perms || remove_perms)) {
error_code m_ec;
file_status st = resolve_symlinks ? detail::posix_stat(p, &m_ec)
: detail::posix_lstat(p, &m_ec);
set_sym_perms = is_symlink(st);
if (m_ec)
return err.report(m_ec);
_LIBCPP_ASSERT(st.permissions() != perms::unknown,
"Permissions unexpectedly unknown");
if (add_perms)
prms |= st.permissions();
else if (remove_perms)
prms = st.permissions() & ~prms;
}
const auto real_perms = static_cast<detail::ModeT>(prms & perms::mask);
#if defined(AT_SYMLINK_NOFOLLOW) && defined(AT_FDCWD)
const int flags = set_sym_perms ? AT_SYMLINK_NOFOLLOW : 0;
if (detail::fchmodat(AT_FDCWD, p.c_str(), real_perms, flags) == -1) {
return err.report(capture_errno());
}
#else
if (set_sym_perms)
return err.report(errc::operation_not_supported);
if (::chmod(p.c_str(), real_perms) == -1) {
return err.report(capture_errno());
}
#endif
}
path __read_symlink(const path& p, error_code* ec) {
ErrorHandler<path> err("read_symlink", ec, &p);
#if defined(PATH_MAX) || defined(MAX_SYMLINK_SIZE)
struct NullDeleter { void operator()(void*) const {} };
#ifdef MAX_SYMLINK_SIZE
const size_t size = MAX_SYMLINK_SIZE + 1;
#else
const size_t size = PATH_MAX + 1;
#endif
path::value_type stack_buff[size];
auto buff = std::unique_ptr<path::value_type[], NullDeleter>(stack_buff);
#else
StatT sb;
if (detail::lstat(p.c_str(), &sb) == -1) {
return err.report(capture_errno());
}
const size_t size = sb.st_size + 1;
auto buff = unique_ptr<path::value_type[]>(new path::value_type[size]);
#endif
detail::SSizeT ret;
if ((ret = detail::readlink(p.c_str(), buff.get(), size)) == -1)
return err.report(capture_errno());
_LIBCPP_ASSERT(ret > 0, "TODO");
if (static_cast<size_t>(ret) >= size)
return err.report(errc::value_too_large);
buff[ret] = 0;
return {buff.get()};
}
bool __remove(const path& p, error_code* ec) {
ErrorHandler<bool> err("remove", ec, &p);
if (detail::remove(p.c_str()) == -1) {
if (errno != ENOENT)
err.report(capture_errno());
return false;
}
return true;
}
// We currently have two implementations of `__remove_all`. The first one is general and
// used on platforms where we don't have access to the `openat()` family of POSIX functions.
// That implementation uses `directory_iterator`, however it is vulnerable to some race
// conditions, see https://reviews.llvm.org/D118134 for details.
//
// The second implementation is used on platforms where `openat()` & friends are available,
// and it threads file descriptors through recursive calls to avoid such race conditions.
#if defined(_LIBCPP_WIN32API)
# define REMOVE_ALL_USE_DIRECTORY_ITERATOR
#endif
#if defined(REMOVE_ALL_USE_DIRECTORY_ITERATOR)
namespace {
uintmax_t remove_all_impl(path const& p, error_code& ec) {
const auto npos = static_cast<uintmax_t>(-1);
const file_status st = __symlink_status(p, &ec);
if (ec)
return npos;
uintmax_t count = 1;
if (is_directory(st)) {
for (directory_iterator it(p, ec); !ec && it != directory_iterator();
it.increment(ec)) {
auto other_count = remove_all_impl(it->path(), ec);
if (ec)
return npos;
count += other_count;
}
if (ec)
return npos;
}
if (!__remove(p, &ec))
return npos;
return count;
}
} // end namespace
uintmax_t __remove_all(const path& p, error_code* ec) {
ErrorHandler<uintmax_t> err("remove_all", ec, &p);
error_code mec;
auto count = remove_all_impl(p, mec);
if (mec) {
if (mec == errc::no_such_file_or_directory)
return 0;
return err.report(mec);
}
return count;
}
#else // !REMOVE_ALL_USE_DIRECTORY_ITERATOR
namespace {
template <class Cleanup>
struct scope_exit {
explicit scope_exit(Cleanup const& cleanup)
: cleanup_(cleanup)
{ }
~scope_exit() { cleanup_(); }
private:
Cleanup cleanup_;
};
uintmax_t remove_all_impl(int parent_directory, const path& p, error_code& ec) {
// First, try to open the path as a directory.
const int options = O_CLOEXEC | O_RDONLY | O_DIRECTORY | O_NOFOLLOW;
int fd = ::openat(parent_directory, p.c_str(), options);
if (fd != -1) {
// If that worked, iterate over the contents of the directory and
// remove everything in it, recursively.
- scope_exit close_fd([=] { ::close(fd); });
DIR* stream = ::fdopendir(fd);
if (stream == nullptr) {
+ ::close(fd);
ec = detail::capture_errno();
return 0;
}
+ // Note: `::closedir` will also close the associated file descriptor, so
+ // there should be no call to `close(fd)`.
scope_exit close_stream([=] { ::closedir(stream); });
uintmax_t count = 0;
while (true) {
auto [str, type] = detail::posix_readdir(stream, ec);
static_assert(std::is_same_v<decltype(str), std::string_view>);
if (str == "." || str == "..") {
continue;
} else if (ec || str.empty()) {
break; // we're done iterating through the directory
} else {
count += remove_all_impl(fd, str, ec);
}
}
// Then, remove the now-empty directory itself.
if (::unlinkat(parent_directory, p.c_str(), AT_REMOVEDIR) == -1) {
ec = detail::capture_errno();
return count;
}
return count + 1; // the contents of the directory + the directory itself
}
ec = detail::capture_errno();
// If we failed to open `p` because it didn't exist, it's not an
// error -- it might have moved or have been deleted already.
if (ec == errc::no_such_file_or_directory) {
ec.clear();
return 0;
}
// If opening `p` failed because it wasn't a directory, remove it as
// a normal file instead. Note that `openat()` can return either ENOTDIR
// or ELOOP depending on the exact reason of the failure.
if (ec == errc::not_a_directory || ec == errc::too_many_symbolic_link_levels) {
ec.clear();
if (::unlinkat(parent_directory, p.c_str(), /* flags = */0) == -1) {
ec = detail::capture_errno();
return 0;
}
return 1;
}
// Otherwise, it's a real error -- we don't remove anything.
return 0;
}
} // end namespace
uintmax_t __remove_all(const path& p, error_code* ec) {
ErrorHandler<uintmax_t> err("remove_all", ec, &p);
error_code mec;
uintmax_t count = remove_all_impl(AT_FDCWD, p, mec);
if (mec)
return err.report(mec);
return count;
}
#endif // REMOVE_ALL_USE_DIRECTORY_ITERATOR
void __rename(const path& from, const path& to, error_code* ec) {
ErrorHandler<void> err("rename", ec, &from, &to);
if (detail::rename(from.c_str(), to.c_str()) == -1)
err.report(capture_errno());
}
void __resize_file(const path& p, uintmax_t size, error_code* ec) {
ErrorHandler<void> err("resize_file", ec, &p);
if (detail::truncate(p.c_str(), static_cast< ::off_t>(size)) == -1)
return err.report(capture_errno());
}
space_info __space(const path& p, error_code* ec) {
ErrorHandler<void> err("space", ec, &p);
space_info si;
detail::StatVFS m_svfs = {};
if (detail::statvfs(p.c_str(), &m_svfs) == -1) {
err.report(capture_errno());
si.capacity = si.free = si.available = static_cast<uintmax_t>(-1);
return si;
}
// Multiply with overflow checking.
auto do_mult = [&](uintmax_t& out, uintmax_t other) {
out = other * m_svfs.f_frsize;
if (other == 0 || out / other != m_svfs.f_frsize)
out = static_cast<uintmax_t>(-1);
};
do_mult(si.capacity, m_svfs.f_blocks);
do_mult(si.free, m_svfs.f_bfree);
do_mult(si.available, m_svfs.f_bavail);
return si;
}
file_status __status(const path& p, error_code* ec) {
return detail::posix_stat(p, ec);
}
file_status __symlink_status(const path& p, error_code* ec) {
return detail::posix_lstat(p, ec);
}
path __temp_directory_path(error_code* ec) {
ErrorHandler<path> err("temp_directory_path", ec);
#if defined(_LIBCPP_WIN32API)
wchar_t buf[MAX_PATH];
DWORD retval = GetTempPathW(MAX_PATH, buf);
if (!retval)
return err.report(detail::make_windows_error(GetLastError()));
if (retval > MAX_PATH)
return err.report(errc::filename_too_long);
// GetTempPathW returns a path with a trailing slash, which we
// shouldn't include for consistency.
if (buf[retval-1] == L'\\')
buf[retval-1] = L'\0';
path p(buf);
#else
const char* env_paths[] = {"TMPDIR", "TMP", "TEMP", "TEMPDIR"};
const char* ret = nullptr;
for (auto& ep : env_paths)
if ((ret = getenv(ep)))
break;
if (ret == nullptr)
ret = "/tmp";
path p(ret);
#endif
error_code m_ec;
file_status st = detail::posix_stat(p, &m_ec);
if (!status_known(st))
return err.report(m_ec, "cannot access path " PATH_CSTR_FMT, p.c_str());
if (!exists(st) || !is_directory(st))
return err.report(errc::not_a_directory,
"path " PATH_CSTR_FMT " is not a directory", p.c_str());
return p;
}
path __weakly_canonical(const path& p, error_code* ec) {
ErrorHandler<path> err("weakly_canonical", ec, &p);
if (p.empty())
return __canonical("", ec);
path result;
path tmp;
tmp.__reserve(p.native().size());
auto PP = PathParser::CreateEnd(p.native());
--PP;
vector<string_view_t> DNEParts;
while (PP.State != PathParser::PS_BeforeBegin) {
tmp.assign(createView(p.native().data(), &PP.RawEntry.back()));
error_code m_ec;
file_status st = __status(tmp, &m_ec);
if (!status_known(st)) {
return err.report(m_ec);
} else if (exists(st)) {
result = __canonical(tmp, ec);
break;
}
DNEParts.push_back(*PP);
--PP;
}
if (PP.State == PathParser::PS_BeforeBegin)
result = __canonical("", ec);
if (ec)
ec->clear();
if (DNEParts.empty())
return result;
for (auto It = DNEParts.rbegin(); It != DNEParts.rend(); ++It)
result /= *It;
return result.lexically_normal();
}
///////////////////////////////////////////////////////////////////////////////
// path definitions
///////////////////////////////////////////////////////////////////////////////
constexpr path::value_type path::preferred_separator;
path& path::replace_extension(path const& replacement) {
path p = extension();
if (not p.empty()) {
__pn_.erase(__pn_.size() - p.native().size());
}
if (!replacement.empty()) {
if (replacement.native()[0] != '.') {
__pn_ += PS(".");
}
__pn_.append(replacement.__pn_);
}
return *this;
}
///////////////////////////////////////////////////////////////////////////////
// path.decompose
string_view_t path::__root_name() const {
auto PP = PathParser::CreateBegin(__pn_);
if (PP.State == PathParser::PS_InRootName)
return *PP;
return {};
}
string_view_t path::__root_directory() const {
auto PP = PathParser::CreateBegin(__pn_);
if (PP.State == PathParser::PS_InRootName)
++PP;
if (PP.State == PathParser::PS_InRootDir)
return *PP;
return {};
}
string_view_t path::__root_path_raw() const {
auto PP = PathParser::CreateBegin(__pn_);
if (PP.State == PathParser::PS_InRootName) {
auto NextCh = PP.peek();
if (NextCh && isSeparator(*NextCh)) {
++PP;
return createView(__pn_.data(), &PP.RawEntry.back());
}
return PP.RawEntry;
}
if (PP.State == PathParser::PS_InRootDir)
return *PP;
return {};
}
static bool ConsumeRootName(PathParser *PP) {
static_assert(PathParser::PS_BeforeBegin == 1 &&
PathParser::PS_InRootName == 2,
"Values for enums are incorrect");
while (PP->State <= PathParser::PS_InRootName)
++(*PP);
return PP->State == PathParser::PS_AtEnd;
}
static bool ConsumeRootDir(PathParser* PP) {
static_assert(PathParser::PS_BeforeBegin == 1 &&
PathParser::PS_InRootName == 2 &&
PathParser::PS_InRootDir == 3, "Values for enums are incorrect");
while (PP->State <= PathParser::PS_InRootDir)
++(*PP);
return PP->State == PathParser::PS_AtEnd;
}
string_view_t path::__relative_path() const {
auto PP = PathParser::CreateBegin(__pn_);
if (ConsumeRootDir(&PP))
return {};
return createView(PP.RawEntry.data(), &__pn_.back());
}
string_view_t path::__parent_path() const {
if (empty())
return {};
// Determine if we have a root path but not a relative path. In that case
// return *this.
{
auto PP = PathParser::CreateBegin(__pn_);
if (ConsumeRootDir(&PP))
return __pn_;
}
// Otherwise remove a single element from the end of the path, and return
// a string representing that path
{
auto PP = PathParser::CreateEnd(__pn_);
--PP;
if (PP.RawEntry.data() == __pn_.data())
return {};
--PP;
return createView(__pn_.data(), &PP.RawEntry.back());
}
}
string_view_t path::__filename() const {
if (empty())
return {};
{
PathParser PP = PathParser::CreateBegin(__pn_);
if (ConsumeRootDir(&PP))
return {};
}
return *(--PathParser::CreateEnd(__pn_));
}
string_view_t path::__stem() const {
return parser::separate_filename(__filename()).first;
}
string_view_t path::__extension() const {
return parser::separate_filename(__filename()).second;
}
////////////////////////////////////////////////////////////////////////////
// path.gen
enum PathPartKind : unsigned char {
PK_None,
PK_RootSep,
PK_Filename,
PK_Dot,
PK_DotDot,
PK_TrailingSep
};
static PathPartKind ClassifyPathPart(string_view_t Part) {
if (Part.empty())
return PK_TrailingSep;
if (Part == PS("."))
return PK_Dot;
if (Part == PS(".."))
return PK_DotDot;
if (Part == PS("/"))
return PK_RootSep;
#if defined(_LIBCPP_WIN32API)
if (Part == PS("\\"))
return PK_RootSep;
#endif
return PK_Filename;
}
path path::lexically_normal() const {
if (__pn_.empty())
return *this;
using PartKindPair = pair<string_view_t, PathPartKind>;
vector<PartKindPair> Parts;
// Guess as to how many elements the path has to avoid reallocating.
Parts.reserve(32);
// Track the total size of the parts as we collect them. This allows the
// resulting path to reserve the correct amount of memory.
size_t NewPathSize = 0;
auto AddPart = [&](PathPartKind K, string_view_t P) {
NewPathSize += P.size();
Parts.emplace_back(P, K);
};
auto LastPartKind = [&]() {
if (Parts.empty())
return PK_None;
return Parts.back().second;
};
bool MaybeNeedTrailingSep = false;
// Build a stack containing the remaining elements of the path, popping off
// elements which occur before a '..' entry.
for (auto PP = PathParser::CreateBegin(__pn_); PP; ++PP) {
auto Part = *PP;
PathPartKind Kind = ClassifyPathPart(Part);
switch (Kind) {
case PK_Filename:
case PK_RootSep: {
// Add all non-dot and non-dot-dot elements to the stack of elements.
AddPart(Kind, Part);
MaybeNeedTrailingSep = false;
break;
}
case PK_DotDot: {
// Only push a ".." element if there are no elements preceding the "..",
// or if the preceding element is itself "..".
auto LastKind = LastPartKind();
if (LastKind == PK_Filename) {
NewPathSize -= Parts.back().first.size();
Parts.pop_back();
} else if (LastKind != PK_RootSep)
AddPart(PK_DotDot, PS(".."));
MaybeNeedTrailingSep = LastKind == PK_Filename;
break;
}
case PK_Dot:
case PK_TrailingSep: {
MaybeNeedTrailingSep = true;
break;
}
case PK_None:
_LIBCPP_UNREACHABLE();
}
}
// [fs.path.generic]p6.8: If the path is empty, add a dot.
if (Parts.empty())
return PS(".");
// [fs.path.generic]p6.7: If the last filename is dot-dot, remove any
// trailing directory-separator.
bool NeedTrailingSep = MaybeNeedTrailingSep && LastPartKind() == PK_Filename;
path Result;
Result.__pn_.reserve(Parts.size() + NewPathSize + NeedTrailingSep);
for (auto& PK : Parts)
Result /= PK.first;
if (NeedTrailingSep)
Result /= PS("");
Result.make_preferred();
return Result;
}
static int DetermineLexicalElementCount(PathParser PP) {
int Count = 0;
for (; PP; ++PP) {
auto Elem = *PP;
if (Elem == PS(".."))
--Count;
else if (Elem != PS(".") && Elem != PS(""))
++Count;
}
return Count;
}
path path::lexically_relative(const path& base) const {
{ // perform root-name/root-directory mismatch checks
auto PP = PathParser::CreateBegin(__pn_);
auto PPBase = PathParser::CreateBegin(base.__pn_);
auto CheckIterMismatchAtBase = [&]() {
return PP.State != PPBase.State &&
(PP.inRootPath() || PPBase.inRootPath());
};
if (PP.inRootName() && PPBase.inRootName()) {
if (*PP != *PPBase)
return {};
} else if (CheckIterMismatchAtBase())
return {};
if (PP.inRootPath())
++PP;
if (PPBase.inRootPath())
++PPBase;
if (CheckIterMismatchAtBase())
return {};
}
// Find the first mismatching element
auto PP = PathParser::CreateBegin(__pn_);
auto PPBase = PathParser::CreateBegin(base.__pn_);
while (PP && PPBase && PP.State == PPBase.State && *PP == *PPBase) {
++PP;
++PPBase;
}
// If there is no mismatch, return ".".
if (!PP && !PPBase)
return ".";
// Otherwise, determine the number of elements, 'n', which are not dot or
// dot-dot minus the number of dot-dot elements.
int ElemCount = DetermineLexicalElementCount(PPBase);
if (ElemCount < 0)
return {};
// if n == 0 and (a == end() || a->empty()), returns path("."); otherwise
if (ElemCount == 0 && (PP.atEnd() || *PP == PS("")))
return PS(".");
// return a path constructed with 'n' dot-dot elements, followed by the the
// elements of '*this' after the mismatch.
path Result;
// FIXME: Reserve enough room in Result that it won't have to re-allocate.
while (ElemCount--)
Result /= PS("..");
for (; PP; ++PP)
Result /= *PP;
return Result;
}
////////////////////////////////////////////////////////////////////////////
// path.comparisons
static int CompareRootName(PathParser *LHS, PathParser *RHS) {
if (!LHS->inRootName() && !RHS->inRootName())
return 0;
auto GetRootName = [](PathParser *Parser) -> string_view_t {
return Parser->inRootName() ? **Parser : PS("");
};
int res = GetRootName(LHS).compare(GetRootName(RHS));
ConsumeRootName(LHS);
ConsumeRootName(RHS);
return res;
}
static int CompareRootDir(PathParser *LHS, PathParser *RHS) {
if (!LHS->inRootDir() && RHS->inRootDir())
return -1;
else if (LHS->inRootDir() && !RHS->inRootDir())
return 1;
else {
ConsumeRootDir(LHS);
ConsumeRootDir(RHS);
return 0;
}
}
static int CompareRelative(PathParser *LHSPtr, PathParser *RHSPtr) {
auto &LHS = *LHSPtr;
auto &RHS = *RHSPtr;
int res;
while (LHS && RHS) {
if ((res = (*LHS).compare(*RHS)) != 0)
return res;
++LHS;
++RHS;
}
return 0;
}
static int CompareEndState(PathParser *LHS, PathParser *RHS) {
if (LHS->atEnd() && !RHS->atEnd())
return -1;
else if (!LHS->atEnd() && RHS->atEnd())
return 1;
return 0;
}
int path::__compare(string_view_t __s) const {
auto LHS = PathParser::CreateBegin(__pn_);
auto RHS = PathParser::CreateBegin(__s);
int res;
if ((res = CompareRootName(&LHS, &RHS)) != 0)
return res;
if ((res = CompareRootDir(&LHS, &RHS)) != 0)
return res;
if ((res = CompareRelative(&LHS, &RHS)) != 0)
return res;
return CompareEndState(&LHS, &RHS);
}
////////////////////////////////////////////////////////////////////////////
// path.nonmembers
size_t hash_value(const path& __p) noexcept {
auto PP = PathParser::CreateBegin(__p.native());
size_t hash_value = 0;
hash<string_view_t> hasher;
while (PP) {
hash_value = __hash_combine(hash_value, hasher(*PP));
++PP;
}
return hash_value;
}
////////////////////////////////////////////////////////////////////////////
// path.itr
path::iterator path::begin() const {
auto PP = PathParser::CreateBegin(__pn_);
iterator it;
it.__path_ptr_ = this;
it.__state_ = static_cast<path::iterator::_ParserState>(PP.State);
it.__entry_ = PP.RawEntry;
it.__stashed_elem_.__assign_view(*PP);
return it;
}
path::iterator path::end() const {
iterator it{};
it.__state_ = path::iterator::_AtEnd;
it.__path_ptr_ = this;
return it;
}
path::iterator& path::iterator::__increment() {
PathParser PP(__path_ptr_->native(), __entry_, __state_);
++PP;
__state_ = static_cast<_ParserState>(PP.State);
__entry_ = PP.RawEntry;
__stashed_elem_.__assign_view(*PP);
return *this;
}
path::iterator& path::iterator::__decrement() {
PathParser PP(__path_ptr_->native(), __entry_, __state_);
--PP;
__state_ = static_cast<_ParserState>(PP.State);
__entry_ = PP.RawEntry;
__stashed_elem_.__assign_view(*PP);
return *this;
}
#if defined(_LIBCPP_WIN32API)
////////////////////////////////////////////////////////////////////////////
// Windows path conversions
size_t __wide_to_char(const wstring &str, char *out, size_t outlen) {
if (str.empty())
return 0;
ErrorHandler<size_t> err("__wide_to_char", nullptr);
UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
BOOL used_default = FALSE;
int ret = WideCharToMultiByte(codepage, 0, str.data(), str.size(), out,
outlen, nullptr, &used_default);
if (ret <= 0 || used_default)
return err.report(errc::illegal_byte_sequence);
return ret;
}
size_t __char_to_wide(const string &str, wchar_t *out, size_t outlen) {
if (str.empty())
return 0;
ErrorHandler<size_t> err("__char_to_wide", nullptr);
UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
int ret = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str.data(),
str.size(), out, outlen);
if (ret <= 0)
return err.report(errc::illegal_byte_sequence);
return ret;
}
#endif
///////////////////////////////////////////////////////////////////////////////
// directory entry definitions
///////////////////////////////////////////////////////////////////////////////
error_code directory_entry::__do_refresh() noexcept {
__data_.__reset();
error_code failure_ec;
StatT full_st;
file_status st = detail::posix_lstat(__p_, full_st, &failure_ec);
if (!status_known(st)) {
__data_.__reset();
return failure_ec;
}
if (!_VSTD_FS::exists(st) || !_VSTD_FS::is_symlink(st)) {
__data_.__cache_type_ = directory_entry::_RefreshNonSymlink;
__data_.__type_ = st.type();
__data_.__non_sym_perms_ = st.permissions();
} else { // we have a symlink
__data_.__sym_perms_ = st.permissions();
// Get the information about the linked entity.
// Ignore errors from stat, since we don't want errors regarding symlink
// resolution to be reported to the user.
error_code ignored_ec;
st = detail::posix_stat(__p_, full_st, &ignored_ec);
__data_.__type_ = st.type();
__data_.__non_sym_perms_ = st.permissions();
// If we failed to resolve the link, then only partially populate the
// cache.
if (!status_known(st)) {
__data_.__cache_type_ = directory_entry::_RefreshSymlinkUnresolved;
return error_code{};
}
// Otherwise, we resolved the link, potentially as not existing.
// That's OK.
__data_.__cache_type_ = directory_entry::_RefreshSymlink;
}
if (_VSTD_FS::is_regular_file(st))
__data_.__size_ = static_cast<uintmax_t>(full_st.st_size);
if (_VSTD_FS::exists(st)) {
__data_.__nlink_ = static_cast<uintmax_t>(full_st.st_nlink);
// Attempt to extract the mtime, and fail if it's not representable using
// file_time_type. For now we ignore the error, as we'll report it when
// the value is actually used.
error_code ignored_ec;
__data_.__write_time_ =
__extract_last_write_time(__p_, full_st, &ignored_ec);
}
return failure_ec;
}
_LIBCPP_END_NAMESPACE_FILESYSTEM
diff --git a/contrib/llvm-project/libunwind/src/UnwindRegistersSave.S b/contrib/llvm-project/libunwind/src/UnwindRegistersSave.S
index 9566bb0335fe..b39489235ce6 100644
--- a/contrib/llvm-project/libunwind/src/UnwindRegistersSave.S
+++ b/contrib/llvm-project/libunwind/src/UnwindRegistersSave.S
@@ -1,1175 +1,1177 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "assembly.h"
.text
#if !defined(__USING_SJLJ_EXCEPTIONS__)
#if defined(__i386__)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# + +
# +-----------------------+
# + thread_state pointer +
# +-----------------------+
# + return address +
# +-----------------------+ <-- SP
# + +
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
_LIBUNWIND_CET_ENDBR
push %eax
movl 8(%esp), %eax
movl %ebx, 4(%eax)
movl %ecx, 8(%eax)
movl %edx, 12(%eax)
movl %edi, 16(%eax)
movl %esi, 20(%eax)
movl %ebp, 24(%eax)
movl %esp, %edx
addl $8, %edx
movl %edx, 28(%eax) # store what sp was at call site as esp
# skip ss
# skip eflags
movl 4(%esp), %edx
movl %edx, 40(%eax) # store return address as eip
# skip cs
# skip ds
# skip es
# skip fs
# skip gs
movl (%esp), %edx
movl %edx, (%eax) # store original eax
popl %eax
xorl %eax, %eax # return UNW_ESUCCESS
ret
#elif defined(__x86_64__)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in rdi
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
#if defined(_WIN64)
#define PTR %rcx
#define TMP %rdx
#else
#define PTR %rdi
#define TMP %rsi
#endif
_LIBUNWIND_CET_ENDBR
movq %rax, (PTR)
movq %rbx, 8(PTR)
movq %rcx, 16(PTR)
movq %rdx, 24(PTR)
movq %rdi, 32(PTR)
movq %rsi, 40(PTR)
movq %rbp, 48(PTR)
movq %rsp, 56(PTR)
addq $8, 56(PTR)
movq %r8, 64(PTR)
movq %r9, 72(PTR)
movq %r10, 80(PTR)
movq %r11, 88(PTR)
movq %r12, 96(PTR)
movq %r13,104(PTR)
movq %r14,112(PTR)
movq %r15,120(PTR)
movq (%rsp),TMP
movq TMP,128(PTR) # store return address as rip
# skip rflags
# skip cs
# skip fs
# skip gs
#if defined(_WIN64)
movdqu %xmm0,176(PTR)
movdqu %xmm1,192(PTR)
movdqu %xmm2,208(PTR)
movdqu %xmm3,224(PTR)
movdqu %xmm4,240(PTR)
movdqu %xmm5,256(PTR)
movdqu %xmm6,272(PTR)
movdqu %xmm7,288(PTR)
movdqu %xmm8,304(PTR)
movdqu %xmm9,320(PTR)
movdqu %xmm10,336(PTR)
movdqu %xmm11,352(PTR)
movdqu %xmm12,368(PTR)
movdqu %xmm13,384(PTR)
movdqu %xmm14,400(PTR)
movdqu %xmm15,416(PTR)
#endif
xorl %eax, %eax # return UNW_ESUCCESS
ret
#elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in a0 ($4)
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
.set push
.set noat
.set noreorder
.set nomacro
sw $1, (4 * 1)($4)
sw $2, (4 * 2)($4)
sw $3, (4 * 3)($4)
sw $4, (4 * 4)($4)
sw $5, (4 * 5)($4)
sw $6, (4 * 6)($4)
sw $7, (4 * 7)($4)
sw $8, (4 * 8)($4)
sw $9, (4 * 9)($4)
sw $10, (4 * 10)($4)
sw $11, (4 * 11)($4)
sw $12, (4 * 12)($4)
sw $13, (4 * 13)($4)
sw $14, (4 * 14)($4)
sw $15, (4 * 15)($4)
sw $16, (4 * 16)($4)
sw $17, (4 * 17)($4)
sw $18, (4 * 18)($4)
sw $19, (4 * 19)($4)
sw $20, (4 * 20)($4)
sw $21, (4 * 21)($4)
sw $22, (4 * 22)($4)
sw $23, (4 * 23)($4)
sw $24, (4 * 24)($4)
sw $25, (4 * 25)($4)
sw $26, (4 * 26)($4)
sw $27, (4 * 27)($4)
sw $28, (4 * 28)($4)
sw $29, (4 * 29)($4)
sw $30, (4 * 30)($4)
sw $31, (4 * 31)($4)
# Store return address to pc
sw $31, (4 * 32)($4)
# hi and lo
mfhi $8
sw $8, (4 * 33)($4)
mflo $8
sw $8, (4 * 34)($4)
#ifdef __mips_hard_float
#if __mips_fpr != 64
sdc1 $f0, (4 * 36 + 8 * 0)($4)
sdc1 $f2, (4 * 36 + 8 * 2)($4)
sdc1 $f4, (4 * 36 + 8 * 4)($4)
sdc1 $f6, (4 * 36 + 8 * 6)($4)
sdc1 $f8, (4 * 36 + 8 * 8)($4)
sdc1 $f10, (4 * 36 + 8 * 10)($4)
sdc1 $f12, (4 * 36 + 8 * 12)($4)
sdc1 $f14, (4 * 36 + 8 * 14)($4)
sdc1 $f16, (4 * 36 + 8 * 16)($4)
sdc1 $f18, (4 * 36 + 8 * 18)($4)
sdc1 $f20, (4 * 36 + 8 * 20)($4)
sdc1 $f22, (4 * 36 + 8 * 22)($4)
sdc1 $f24, (4 * 36 + 8 * 24)($4)
sdc1 $f26, (4 * 36 + 8 * 26)($4)
sdc1 $f28, (4 * 36 + 8 * 28)($4)
sdc1 $f30, (4 * 36 + 8 * 30)($4)
#else
sdc1 $f0, (4 * 36 + 8 * 0)($4)
sdc1 $f1, (4 * 36 + 8 * 1)($4)
sdc1 $f2, (4 * 36 + 8 * 2)($4)
sdc1 $f3, (4 * 36 + 8 * 3)($4)
sdc1 $f4, (4 * 36 + 8 * 4)($4)
sdc1 $f5, (4 * 36 + 8 * 5)($4)
sdc1 $f6, (4 * 36 + 8 * 6)($4)
sdc1 $f7, (4 * 36 + 8 * 7)($4)
sdc1 $f8, (4 * 36 + 8 * 8)($4)
sdc1 $f9, (4 * 36 + 8 * 9)($4)
sdc1 $f10, (4 * 36 + 8 * 10)($4)
sdc1 $f11, (4 * 36 + 8 * 11)($4)
sdc1 $f12, (4 * 36 + 8 * 12)($4)
sdc1 $f13, (4 * 36 + 8 * 13)($4)
sdc1 $f14, (4 * 36 + 8 * 14)($4)
sdc1 $f15, (4 * 36 + 8 * 15)($4)
sdc1 $f16, (4 * 36 + 8 * 16)($4)
sdc1 $f17, (4 * 36 + 8 * 17)($4)
sdc1 $f18, (4 * 36 + 8 * 18)($4)
sdc1 $f19, (4 * 36 + 8 * 19)($4)
sdc1 $f20, (4 * 36 + 8 * 20)($4)
sdc1 $f21, (4 * 36 + 8 * 21)($4)
sdc1 $f22, (4 * 36 + 8 * 22)($4)
sdc1 $f23, (4 * 36 + 8 * 23)($4)
sdc1 $f24, (4 * 36 + 8 * 24)($4)
sdc1 $f25, (4 * 36 + 8 * 25)($4)
sdc1 $f26, (4 * 36 + 8 * 26)($4)
sdc1 $f27, (4 * 36 + 8 * 27)($4)
sdc1 $f28, (4 * 36 + 8 * 28)($4)
sdc1 $f29, (4 * 36 + 8 * 29)($4)
sdc1 $f30, (4 * 36 + 8 * 30)($4)
sdc1 $f31, (4 * 36 + 8 * 31)($4)
#endif
#endif
jr $31
# return UNW_ESUCCESS
or $2, $0, $0
.set pop
#elif defined(__mips64)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in a0 ($4)
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
.set push
.set noat
.set noreorder
.set nomacro
sd $1, (8 * 1)($4)
sd $2, (8 * 2)($4)
sd $3, (8 * 3)($4)
sd $4, (8 * 4)($4)
sd $5, (8 * 5)($4)
sd $6, (8 * 6)($4)
sd $7, (8 * 7)($4)
sd $8, (8 * 8)($4)
sd $9, (8 * 9)($4)
sd $10, (8 * 10)($4)
sd $11, (8 * 11)($4)
sd $12, (8 * 12)($4)
sd $13, (8 * 13)($4)
sd $14, (8 * 14)($4)
sd $15, (8 * 15)($4)
sd $16, (8 * 16)($4)
sd $17, (8 * 17)($4)
sd $18, (8 * 18)($4)
sd $19, (8 * 19)($4)
sd $20, (8 * 20)($4)
sd $21, (8 * 21)($4)
sd $22, (8 * 22)($4)
sd $23, (8 * 23)($4)
sd $24, (8 * 24)($4)
sd $25, (8 * 25)($4)
sd $26, (8 * 26)($4)
sd $27, (8 * 27)($4)
sd $28, (8 * 28)($4)
sd $29, (8 * 29)($4)
sd $30, (8 * 30)($4)
sd $31, (8 * 31)($4)
# Store return address to pc
sd $31, (8 * 32)($4)
# hi and lo
mfhi $8
sd $8, (8 * 33)($4)
mflo $8
sd $8, (8 * 34)($4)
#ifdef __mips_hard_float
sdc1 $f0, (8 * 35)($4)
sdc1 $f1, (8 * 36)($4)
sdc1 $f2, (8 * 37)($4)
sdc1 $f3, (8 * 38)($4)
sdc1 $f4, (8 * 39)($4)
sdc1 $f5, (8 * 40)($4)
sdc1 $f6, (8 * 41)($4)
sdc1 $f7, (8 * 42)($4)
sdc1 $f8, (8 * 43)($4)
sdc1 $f9, (8 * 44)($4)
sdc1 $f10, (8 * 45)($4)
sdc1 $f11, (8 * 46)($4)
sdc1 $f12, (8 * 47)($4)
sdc1 $f13, (8 * 48)($4)
sdc1 $f14, (8 * 49)($4)
sdc1 $f15, (8 * 50)($4)
sdc1 $f16, (8 * 51)($4)
sdc1 $f17, (8 * 52)($4)
sdc1 $f18, (8 * 53)($4)
sdc1 $f19, (8 * 54)($4)
sdc1 $f20, (8 * 55)($4)
sdc1 $f21, (8 * 56)($4)
sdc1 $f22, (8 * 57)($4)
sdc1 $f23, (8 * 58)($4)
sdc1 $f24, (8 * 59)($4)
sdc1 $f25, (8 * 60)($4)
sdc1 $f26, (8 * 61)($4)
sdc1 $f27, (8 * 62)($4)
sdc1 $f28, (8 * 63)($4)
sdc1 $f29, (8 * 64)($4)
sdc1 $f30, (8 * 65)($4)
sdc1 $f31, (8 * 66)($4)
#endif
jr $31
# return UNW_ESUCCESS
or $2, $0, $0
.set pop
# elif defined(__mips__)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# Just trap for the time being.
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
teq $0, $0
#elif defined(__powerpc64__)
//
// extern int __unw_getcontext(unw_context_t* thread_state)
//
// On entry:
// thread_state pointer is in r3
//
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
// store register (GPR)
#define PPC64_STR(n) \
std n, (8 * (n + 2))(3)
// save GPRs
PPC64_STR(0)
mflr 0
std 0, PPC64_OFFS_SRR0(3) // store lr as ssr0
PPC64_STR(1)
PPC64_STR(2)
PPC64_STR(3)
PPC64_STR(4)
PPC64_STR(5)
PPC64_STR(6)
PPC64_STR(7)
PPC64_STR(8)
PPC64_STR(9)
PPC64_STR(10)
PPC64_STR(11)
PPC64_STR(12)
PPC64_STR(13)
PPC64_STR(14)
PPC64_STR(15)
PPC64_STR(16)
PPC64_STR(17)
PPC64_STR(18)
PPC64_STR(19)
PPC64_STR(20)
PPC64_STR(21)
PPC64_STR(22)
PPC64_STR(23)
PPC64_STR(24)
PPC64_STR(25)
PPC64_STR(26)
PPC64_STR(27)
PPC64_STR(28)
PPC64_STR(29)
PPC64_STR(30)
PPC64_STR(31)
mfcr 0
std 0, PPC64_OFFS_CR(3)
mfxer 0
std 0, PPC64_OFFS_XER(3)
mflr 0
std 0, PPC64_OFFS_LR(3)
mfctr 0
std 0, PPC64_OFFS_CTR(3)
mfvrsave 0
std 0, PPC64_OFFS_VRSAVE(3)
#if defined(__VSX__)
// save VS registers
// (note that this also saves floating point registers and V registers,
// because part of VS is mapped to these registers)
addi 4, 3, PPC64_OFFS_FP
// store VS register
#define PPC64_STVS(n) \
stxvd2x n, 0, 4 ;\
addi 4, 4, 16
PPC64_STVS(0)
PPC64_STVS(1)
PPC64_STVS(2)
PPC64_STVS(3)
PPC64_STVS(4)
PPC64_STVS(5)
PPC64_STVS(6)
PPC64_STVS(7)
PPC64_STVS(8)
PPC64_STVS(9)
PPC64_STVS(10)
PPC64_STVS(11)
PPC64_STVS(12)
PPC64_STVS(13)
PPC64_STVS(14)
PPC64_STVS(15)
PPC64_STVS(16)
PPC64_STVS(17)
PPC64_STVS(18)
PPC64_STVS(19)
PPC64_STVS(20)
PPC64_STVS(21)
PPC64_STVS(22)
PPC64_STVS(23)
PPC64_STVS(24)
PPC64_STVS(25)
PPC64_STVS(26)
PPC64_STVS(27)
PPC64_STVS(28)
PPC64_STVS(29)
PPC64_STVS(30)
PPC64_STVS(31)
PPC64_STVS(32)
PPC64_STVS(33)
PPC64_STVS(34)
PPC64_STVS(35)
PPC64_STVS(36)
PPC64_STVS(37)
PPC64_STVS(38)
PPC64_STVS(39)
PPC64_STVS(40)
PPC64_STVS(41)
PPC64_STVS(42)
PPC64_STVS(43)
PPC64_STVS(44)
PPC64_STVS(45)
PPC64_STVS(46)
PPC64_STVS(47)
PPC64_STVS(48)
PPC64_STVS(49)
PPC64_STVS(50)
PPC64_STVS(51)
PPC64_STVS(52)
PPC64_STVS(53)
PPC64_STVS(54)
PPC64_STVS(55)
PPC64_STVS(56)
PPC64_STVS(57)
PPC64_STVS(58)
PPC64_STVS(59)
PPC64_STVS(60)
PPC64_STVS(61)
PPC64_STVS(62)
PPC64_STVS(63)
#else
// store FP register
#define PPC64_STF(n) \
stfd n, (PPC64_OFFS_FP + n * 16)(3)
// save float registers
PPC64_STF(0)
PPC64_STF(1)
PPC64_STF(2)
PPC64_STF(3)
PPC64_STF(4)
PPC64_STF(5)
PPC64_STF(6)
PPC64_STF(7)
PPC64_STF(8)
PPC64_STF(9)
PPC64_STF(10)
PPC64_STF(11)
PPC64_STF(12)
PPC64_STF(13)
PPC64_STF(14)
PPC64_STF(15)
PPC64_STF(16)
PPC64_STF(17)
PPC64_STF(18)
PPC64_STF(19)
PPC64_STF(20)
PPC64_STF(21)
PPC64_STF(22)
PPC64_STF(23)
PPC64_STF(24)
PPC64_STF(25)
PPC64_STF(26)
PPC64_STF(27)
PPC64_STF(28)
PPC64_STF(29)
PPC64_STF(30)
PPC64_STF(31)
#if defined(__ALTIVEC__)
// save vector registers
// Use 16-bytes below the stack pointer as an
// aligned buffer to save each vector register.
// Note that the stack pointer is always 16-byte aligned.
subi 4, 1, 16
#define PPC64_STV_UNALIGNED(n) \
stvx n, 0, 4 ;\
ld 5, 0(4) ;\
std 5, (PPC64_OFFS_V + n * 16)(3) ;\
ld 5, 8(4) ;\
std 5, (PPC64_OFFS_V + n * 16 + 8)(3)
PPC64_STV_UNALIGNED(0)
PPC64_STV_UNALIGNED(1)
PPC64_STV_UNALIGNED(2)
PPC64_STV_UNALIGNED(3)
PPC64_STV_UNALIGNED(4)
PPC64_STV_UNALIGNED(5)
PPC64_STV_UNALIGNED(6)
PPC64_STV_UNALIGNED(7)
PPC64_STV_UNALIGNED(8)
PPC64_STV_UNALIGNED(9)
PPC64_STV_UNALIGNED(10)
PPC64_STV_UNALIGNED(11)
PPC64_STV_UNALIGNED(12)
PPC64_STV_UNALIGNED(13)
PPC64_STV_UNALIGNED(14)
PPC64_STV_UNALIGNED(15)
PPC64_STV_UNALIGNED(16)
PPC64_STV_UNALIGNED(17)
PPC64_STV_UNALIGNED(18)
PPC64_STV_UNALIGNED(19)
PPC64_STV_UNALIGNED(20)
PPC64_STV_UNALIGNED(21)
PPC64_STV_UNALIGNED(22)
PPC64_STV_UNALIGNED(23)
PPC64_STV_UNALIGNED(24)
PPC64_STV_UNALIGNED(25)
PPC64_STV_UNALIGNED(26)
PPC64_STV_UNALIGNED(27)
PPC64_STV_UNALIGNED(28)
PPC64_STV_UNALIGNED(29)
PPC64_STV_UNALIGNED(30)
PPC64_STV_UNALIGNED(31)
#endif
#endif
li 3, 0 // return UNW_ESUCCESS
blr
#elif defined(__powerpc__)
//
// extern int unw_getcontext(unw_context_t* thread_state)
//
// On entry:
// thread_state pointer is in r3
//
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
stw 0, 8(3)
mflr 0
stw 0, 0(3) // store lr as ssr0
stw 1, 12(3)
stw 2, 16(3)
stw 3, 20(3)
stw 4, 24(3)
stw 5, 28(3)
stw 6, 32(3)
stw 7, 36(3)
stw 8, 40(3)
stw 9, 44(3)
stw 10, 48(3)
stw 11, 52(3)
stw 12, 56(3)
stw 13, 60(3)
stw 14, 64(3)
stw 15, 68(3)
stw 16, 72(3)
stw 17, 76(3)
stw 18, 80(3)
stw 19, 84(3)
stw 20, 88(3)
stw 21, 92(3)
stw 22, 96(3)
stw 23,100(3)
stw 24,104(3)
stw 25,108(3)
stw 26,112(3)
stw 27,116(3)
stw 28,120(3)
stw 29,124(3)
stw 30,128(3)
stw 31,132(3)
+#if defined(__ALTIVEC__)
// save VRSave register
mfspr 0, 256
stw 0, 156(3)
+#endif
// save CR registers
mfcr 0
stw 0, 136(3)
// save CTR register
mfctr 0
stw 0, 148(3)
#if !defined(__NO_FPRS__)
// save float registers
stfd 0, 160(3)
stfd 1, 168(3)
stfd 2, 176(3)
stfd 3, 184(3)
stfd 4, 192(3)
stfd 5, 200(3)
stfd 6, 208(3)
stfd 7, 216(3)
stfd 8, 224(3)
stfd 9, 232(3)
stfd 10,240(3)
stfd 11,248(3)
stfd 12,256(3)
stfd 13,264(3)
stfd 14,272(3)
stfd 15,280(3)
stfd 16,288(3)
stfd 17,296(3)
stfd 18,304(3)
stfd 19,312(3)
stfd 20,320(3)
stfd 21,328(3)
stfd 22,336(3)
stfd 23,344(3)
stfd 24,352(3)
stfd 25,360(3)
stfd 26,368(3)
stfd 27,376(3)
stfd 28,384(3)
stfd 29,392(3)
stfd 30,400(3)
stfd 31,408(3)
#endif
#if defined(__ALTIVEC__)
// save vector registers
subi 4, 1, 16
rlwinm 4, 4, 0, 0, 27 // mask low 4-bits
// r4 is now a 16-byte aligned pointer into the red zone
#define SAVE_VECTOR_UNALIGNED(_vec, _offset) \
stvx _vec, 0, 4 SEPARATOR \
lwz 5, 0(4) SEPARATOR \
stw 5, _offset(3) SEPARATOR \
lwz 5, 4(4) SEPARATOR \
stw 5, _offset+4(3) SEPARATOR \
lwz 5, 8(4) SEPARATOR \
stw 5, _offset+8(3) SEPARATOR \
lwz 5, 12(4) SEPARATOR \
stw 5, _offset+12(3)
SAVE_VECTOR_UNALIGNED( 0, 424+0x000)
SAVE_VECTOR_UNALIGNED( 1, 424+0x010)
SAVE_VECTOR_UNALIGNED( 2, 424+0x020)
SAVE_VECTOR_UNALIGNED( 3, 424+0x030)
SAVE_VECTOR_UNALIGNED( 4, 424+0x040)
SAVE_VECTOR_UNALIGNED( 5, 424+0x050)
SAVE_VECTOR_UNALIGNED( 6, 424+0x060)
SAVE_VECTOR_UNALIGNED( 7, 424+0x070)
SAVE_VECTOR_UNALIGNED( 8, 424+0x080)
SAVE_VECTOR_UNALIGNED( 9, 424+0x090)
SAVE_VECTOR_UNALIGNED(10, 424+0x0A0)
SAVE_VECTOR_UNALIGNED(11, 424+0x0B0)
SAVE_VECTOR_UNALIGNED(12, 424+0x0C0)
SAVE_VECTOR_UNALIGNED(13, 424+0x0D0)
SAVE_VECTOR_UNALIGNED(14, 424+0x0E0)
SAVE_VECTOR_UNALIGNED(15, 424+0x0F0)
SAVE_VECTOR_UNALIGNED(16, 424+0x100)
SAVE_VECTOR_UNALIGNED(17, 424+0x110)
SAVE_VECTOR_UNALIGNED(18, 424+0x120)
SAVE_VECTOR_UNALIGNED(19, 424+0x130)
SAVE_VECTOR_UNALIGNED(20, 424+0x140)
SAVE_VECTOR_UNALIGNED(21, 424+0x150)
SAVE_VECTOR_UNALIGNED(22, 424+0x160)
SAVE_VECTOR_UNALIGNED(23, 424+0x170)
SAVE_VECTOR_UNALIGNED(24, 424+0x180)
SAVE_VECTOR_UNALIGNED(25, 424+0x190)
SAVE_VECTOR_UNALIGNED(26, 424+0x1A0)
SAVE_VECTOR_UNALIGNED(27, 424+0x1B0)
SAVE_VECTOR_UNALIGNED(28, 424+0x1C0)
SAVE_VECTOR_UNALIGNED(29, 424+0x1D0)
SAVE_VECTOR_UNALIGNED(30, 424+0x1E0)
SAVE_VECTOR_UNALIGNED(31, 424+0x1F0)
#endif
li 3, 0 // return UNW_ESUCCESS
blr
#elif defined(__aarch64__)
//
// extern int __unw_getcontext(unw_context_t* thread_state)
//
// On entry:
// thread_state pointer is in x0
//
.p2align 2
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
stp x0, x1, [x0, #0x000]
stp x2, x3, [x0, #0x010]
stp x4, x5, [x0, #0x020]
stp x6, x7, [x0, #0x030]
stp x8, x9, [x0, #0x040]
stp x10,x11, [x0, #0x050]
stp x12,x13, [x0, #0x060]
stp x14,x15, [x0, #0x070]
stp x16,x17, [x0, #0x080]
stp x18,x19, [x0, #0x090]
stp x20,x21, [x0, #0x0A0]
stp x22,x23, [x0, #0x0B0]
stp x24,x25, [x0, #0x0C0]
stp x26,x27, [x0, #0x0D0]
stp x28,x29, [x0, #0x0E0]
str x30, [x0, #0x0F0]
mov x1,sp
str x1, [x0, #0x0F8]
str x30, [x0, #0x100] // store return address as pc
// skip cpsr
stp d0, d1, [x0, #0x110]
stp d2, d3, [x0, #0x120]
stp d4, d5, [x0, #0x130]
stp d6, d7, [x0, #0x140]
stp d8, d9, [x0, #0x150]
stp d10,d11, [x0, #0x160]
stp d12,d13, [x0, #0x170]
stp d14,d15, [x0, #0x180]
stp d16,d17, [x0, #0x190]
stp d18,d19, [x0, #0x1A0]
stp d20,d21, [x0, #0x1B0]
stp d22,d23, [x0, #0x1C0]
stp d24,d25, [x0, #0x1D0]
stp d26,d27, [x0, #0x1E0]
stp d28,d29, [x0, #0x1F0]
str d30, [x0, #0x200]
str d31, [x0, #0x208]
mov x0, #0 // return UNW_ESUCCESS
ret
#elif defined(__arm__) && !defined(__APPLE__)
#if !defined(__ARM_ARCH_ISA_ARM)
#if (__ARM_ARCH_ISA_THUMB == 2)
.syntax unified
#endif
.thumb
#endif
@
@ extern int __unw_getcontext(unw_context_t* thread_state)
@
@ On entry:
@ thread_state pointer is in r0
@
@ Per EHABI #4.7 this only saves the core integer registers.
@ EHABI #7.4.5 notes that in general all VRS registers should be restored
@ however this is very hard to do for VFP registers because it is unknown
@ to the library how many registers are implemented by the architecture.
@ Instead, VFP registers are demand saved by logic external to __unw_getcontext.
@
.p2align 2
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
#if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1
stm r0!, {r0-r7}
mov r1, r8
mov r2, r9
mov r3, r10
stm r0!, {r1-r3}
mov r1, r11
mov r2, sp
mov r3, lr
str r1, [r0, #0] @ r11
@ r12 does not need storing, it it the intra-procedure-call scratch register
str r2, [r0, #8] @ sp
str r3, [r0, #12] @ lr
str r3, [r0, #16] @ store return address as pc
@ T1 does not have a non-cpsr-clobbering register-zeroing instruction.
@ It is safe to use here though because we are about to return, and cpsr is
@ not expected to be preserved.
movs r0, #0 @ return UNW_ESUCCESS
#else
@ 32bit thumb-2 restrictions for stm:
@ . the sp (r13) cannot be in the list
@ . the pc (r15) cannot be in the list in an STM instruction
stm r0, {r0-r12}
str sp, [r0, #52]
str lr, [r0, #56]
str lr, [r0, #60] @ store return address as pc
mov r0, #0 @ return UNW_ESUCCESS
#endif
JMP(lr)
@
@ static void libunwind::Registers_arm::saveVFPWithFSTMD(unw_fpreg_t* values)
@
@ On entry:
@ values pointer is in r0
@
.p2align 2
#if defined(__ELF__)
.fpu vfpv3-d16
#endif
DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPv)
vstmia r0, {d0-d15}
JMP(lr)
@
@ static void libunwind::Registers_arm::saveVFPWithFSTMX(unw_fpreg_t* values)
@
@ On entry:
@ values pointer is in r0
@
.p2align 2
#if defined(__ELF__)
.fpu vfpv3-d16
#endif
DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPv)
vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia
JMP(lr)
@
@ static void libunwind::Registers_arm::saveVFPv3(unw_fpreg_t* values)
@
@ On entry:
@ values pointer is in r0
@
.p2align 2
#if defined(__ELF__)
.fpu vfpv3
#endif
DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPv)
@ VFP and iwMMX instructions are only available when compiling with the flags
@ that enable them. We do not want to do that in the library (because we do not
@ want the compiler to generate instructions that access those) but this is
@ only accessed if the personality routine needs these registers. Use of
@ these registers implies they are, actually, available on the target, so
@ it's ok to execute.
@ So, generate the instructions using the corresponding coprocessor mnemonic.
vstmia r0, {d16-d31}
JMP(lr)
#if defined(_LIBUNWIND_ARM_WMMX)
@
@ static void libunwind::Registers_arm::saveiWMMX(unw_fpreg_t* values)
@
@ On entry:
@ values pointer is in r0
@
.p2align 2
#if defined(__ELF__)
.arch armv5te
#endif
DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPv)
stcl p1, cr0, [r0], #8 @ wstrd wR0, [r0], #8
stcl p1, cr1, [r0], #8 @ wstrd wR1, [r0], #8
stcl p1, cr2, [r0], #8 @ wstrd wR2, [r0], #8
stcl p1, cr3, [r0], #8 @ wstrd wR3, [r0], #8
stcl p1, cr4, [r0], #8 @ wstrd wR4, [r0], #8
stcl p1, cr5, [r0], #8 @ wstrd wR5, [r0], #8
stcl p1, cr6, [r0], #8 @ wstrd wR6, [r0], #8
stcl p1, cr7, [r0], #8 @ wstrd wR7, [r0], #8
stcl p1, cr8, [r0], #8 @ wstrd wR8, [r0], #8
stcl p1, cr9, [r0], #8 @ wstrd wR9, [r0], #8
stcl p1, cr10, [r0], #8 @ wstrd wR10, [r0], #8
stcl p1, cr11, [r0], #8 @ wstrd wR11, [r0], #8
stcl p1, cr12, [r0], #8 @ wstrd wR12, [r0], #8
stcl p1, cr13, [r0], #8 @ wstrd wR13, [r0], #8
stcl p1, cr14, [r0], #8 @ wstrd wR14, [r0], #8
stcl p1, cr15, [r0], #8 @ wstrd wR15, [r0], #8
JMP(lr)
@
@ static void libunwind::Registers_arm::saveiWMMXControl(unw_uint32_t* values)
@
@ On entry:
@ values pointer is in r0
@
.p2align 2
#if defined(__ELF__)
.arch armv5te
#endif
DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj)
stc2 p1, cr8, [r0], #4 @ wstrw wCGR0, [r0], #4
stc2 p1, cr9, [r0], #4 @ wstrw wCGR1, [r0], #4
stc2 p1, cr10, [r0], #4 @ wstrw wCGR2, [r0], #4
stc2 p1, cr11, [r0], #4 @ wstrw wCGR3, [r0], #4
JMP(lr)
#endif
#elif defined(__or1k__)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in r3
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
l.sw 0(r3), r0
l.sw 4(r3), r1
l.sw 8(r3), r2
l.sw 12(r3), r3
l.sw 16(r3), r4
l.sw 20(r3), r5
l.sw 24(r3), r6
l.sw 28(r3), r7
l.sw 32(r3), r8
l.sw 36(r3), r9
l.sw 40(r3), r10
l.sw 44(r3), r11
l.sw 48(r3), r12
l.sw 52(r3), r13
l.sw 56(r3), r14
l.sw 60(r3), r15
l.sw 64(r3), r16
l.sw 68(r3), r17
l.sw 72(r3), r18
l.sw 76(r3), r19
l.sw 80(r3), r20
l.sw 84(r3), r21
l.sw 88(r3), r22
l.sw 92(r3), r23
l.sw 96(r3), r24
l.sw 100(r3), r25
l.sw 104(r3), r26
l.sw 108(r3), r27
l.sw 112(r3), r28
l.sw 116(r3), r29
l.sw 120(r3), r30
l.sw 124(r3), r31
# store ra to pc
l.sw 128(r3), r9
# zero epcr
l.sw 132(r3), r0
#elif defined(__hexagon__)
#
# extern int unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in r0
#
#define OFFSET(offset) (offset/4)
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
memw(r0+#32) = r8
memw(r0+#36) = r9
memw(r0+#40) = r10
memw(r0+#44) = r11
memw(r0+#48) = r12
memw(r0+#52) = r13
memw(r0+#56) = r14
memw(r0+#60) = r15
memw(r0+#64) = r16
memw(r0+#68) = r17
memw(r0+#72) = r18
memw(r0+#76) = r19
memw(r0+#80) = r20
memw(r0+#84) = r21
memw(r0+#88) = r22
memw(r0+#92) = r23
memw(r0+#96) = r24
memw(r0+#100) = r25
memw(r0+#104) = r26
memw(r0+#108) = r27
memw(r0+#112) = r28
memw(r0+#116) = r29
memw(r0+#120) = r30
memw(r0+#124) = r31
r1 = c4 // Predicate register
memw(r0+#128) = r1
r1 = memw(r30) // *FP == Saved FP
r1 = r31
memw(r0+#132) = r1
jumpr r31
#elif defined(__sparc__) && defined(__arch64__)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in %o0
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
.register %g2, #scratch
.register %g3, #scratch
.register %g6, #scratch
.register %g7, #scratch
stx %g1, [%o0 + 0x08]
stx %g2, [%o0 + 0x10]
stx %g3, [%o0 + 0x18]
stx %g4, [%o0 + 0x20]
stx %g5, [%o0 + 0x28]
stx %g6, [%o0 + 0x30]
stx %g7, [%o0 + 0x38]
stx %o0, [%o0 + 0x40]
stx %o1, [%o0 + 0x48]
stx %o2, [%o0 + 0x50]
stx %o3, [%o0 + 0x58]
stx %o4, [%o0 + 0x60]
stx %o5, [%o0 + 0x68]
stx %o6, [%o0 + 0x70]
stx %o7, [%o0 + 0x78]
stx %l0, [%o0 + 0x80]
stx %l1, [%o0 + 0x88]
stx %l2, [%o0 + 0x90]
stx %l3, [%o0 + 0x98]
stx %l4, [%o0 + 0xa0]
stx %l5, [%o0 + 0xa8]
stx %l6, [%o0 + 0xb0]
stx %l7, [%o0 + 0xb8]
stx %i0, [%o0 + 0xc0]
stx %i1, [%o0 + 0xc8]
stx %i2, [%o0 + 0xd0]
stx %i3, [%o0 + 0xd8]
stx %i4, [%o0 + 0xe0]
stx %i5, [%o0 + 0xe8]
stx %i6, [%o0 + 0xf0]
stx %i7, [%o0 + 0xf8]
# save StackGhost cookie
mov %i7, %g4
save %sp, -176, %sp
# register window flush necessary even without StackGhost
flushw
restore
ldx [%sp + 2047 + 0x78], %g5
xor %g4, %g5, %g4
stx %g4, [%o0 + 0x100]
retl
# return UNW_ESUCCESS
clr %o0
#elif defined(__sparc__)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in o0
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
ta 3
add %o7, 8, %o7
std %g0, [%o0 + 0]
std %g2, [%o0 + 8]
std %g4, [%o0 + 16]
std %g6, [%o0 + 24]
std %o0, [%o0 + 32]
std %o2, [%o0 + 40]
std %o4, [%o0 + 48]
std %o6, [%o0 + 56]
std %l0, [%o0 + 64]
std %l2, [%o0 + 72]
std %l4, [%o0 + 80]
std %l6, [%o0 + 88]
std %i0, [%o0 + 96]
std %i2, [%o0 + 104]
std %i4, [%o0 + 112]
std %i6, [%o0 + 120]
jmp %o7
clr %o0 // return UNW_ESUCCESS
#elif defined(__riscv)
#
# extern int __unw_getcontext(unw_context_t* thread_state)
#
# On entry:
# thread_state pointer is in a0
#
DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext)
ISTORE x1, (RISCV_ISIZE * 0)(a0) // store ra as pc
ISTORE x1, (RISCV_ISIZE * 1)(a0)
ISTORE x2, (RISCV_ISIZE * 2)(a0)
ISTORE x3, (RISCV_ISIZE * 3)(a0)
ISTORE x4, (RISCV_ISIZE * 4)(a0)
ISTORE x5, (RISCV_ISIZE * 5)(a0)
ISTORE x6, (RISCV_ISIZE * 6)(a0)
ISTORE x7, (RISCV_ISIZE * 7)(a0)
ISTORE x8, (RISCV_ISIZE * 8)(a0)
ISTORE x9, (RISCV_ISIZE * 9)(a0)
ISTORE x10, (RISCV_ISIZE * 10)(a0)
ISTORE x11, (RISCV_ISIZE * 11)(a0)
ISTORE x12, (RISCV_ISIZE * 12)(a0)
ISTORE x13, (RISCV_ISIZE * 13)(a0)
ISTORE x14, (RISCV_ISIZE * 14)(a0)
ISTORE x15, (RISCV_ISIZE * 15)(a0)
ISTORE x16, (RISCV_ISIZE * 16)(a0)
ISTORE x17, (RISCV_ISIZE * 17)(a0)
ISTORE x18, (RISCV_ISIZE * 18)(a0)
ISTORE x19, (RISCV_ISIZE * 19)(a0)
ISTORE x20, (RISCV_ISIZE * 20)(a0)
ISTORE x21, (RISCV_ISIZE * 21)(a0)
ISTORE x22, (RISCV_ISIZE * 22)(a0)
ISTORE x23, (RISCV_ISIZE * 23)(a0)
ISTORE x24, (RISCV_ISIZE * 24)(a0)
ISTORE x25, (RISCV_ISIZE * 25)(a0)
ISTORE x26, (RISCV_ISIZE * 26)(a0)
ISTORE x27, (RISCV_ISIZE * 27)(a0)
ISTORE x28, (RISCV_ISIZE * 28)(a0)
ISTORE x29, (RISCV_ISIZE * 29)(a0)
ISTORE x30, (RISCV_ISIZE * 30)(a0)
ISTORE x31, (RISCV_ISIZE * 31)(a0)
# if defined(__riscv_flen)
FSTORE f0, (RISCV_FOFFSET + RISCV_FSIZE * 0)(a0)
FSTORE f1, (RISCV_FOFFSET + RISCV_FSIZE * 1)(a0)
FSTORE f2, (RISCV_FOFFSET + RISCV_FSIZE * 2)(a0)
FSTORE f3, (RISCV_FOFFSET + RISCV_FSIZE * 3)(a0)
FSTORE f4, (RISCV_FOFFSET + RISCV_FSIZE * 4)(a0)
FSTORE f5, (RISCV_FOFFSET + RISCV_FSIZE * 5)(a0)
FSTORE f6, (RISCV_FOFFSET + RISCV_FSIZE * 6)(a0)
FSTORE f7, (RISCV_FOFFSET + RISCV_FSIZE * 7)(a0)
FSTORE f8, (RISCV_FOFFSET + RISCV_FSIZE * 8)(a0)
FSTORE f9, (RISCV_FOFFSET + RISCV_FSIZE * 9)(a0)
FSTORE f10, (RISCV_FOFFSET + RISCV_FSIZE * 10)(a0)
FSTORE f11, (RISCV_FOFFSET + RISCV_FSIZE * 11)(a0)
FSTORE f12, (RISCV_FOFFSET + RISCV_FSIZE * 12)(a0)
FSTORE f13, (RISCV_FOFFSET + RISCV_FSIZE * 13)(a0)
FSTORE f14, (RISCV_FOFFSET + RISCV_FSIZE * 14)(a0)
FSTORE f15, (RISCV_FOFFSET + RISCV_FSIZE * 15)(a0)
FSTORE f16, (RISCV_FOFFSET + RISCV_FSIZE * 16)(a0)
FSTORE f17, (RISCV_FOFFSET + RISCV_FSIZE * 17)(a0)
FSTORE f18, (RISCV_FOFFSET + RISCV_FSIZE * 18)(a0)
FSTORE f19, (RISCV_FOFFSET + RISCV_FSIZE * 19)(a0)
FSTORE f20, (RISCV_FOFFSET + RISCV_FSIZE * 20)(a0)
FSTORE f21, (RISCV_FOFFSET + RISCV_FSIZE * 21)(a0)
FSTORE f22, (RISCV_FOFFSET + RISCV_FSIZE * 22)(a0)
FSTORE f23, (RISCV_FOFFSET + RISCV_FSIZE * 23)(a0)
FSTORE f24, (RISCV_FOFFSET + RISCV_FSIZE * 24)(a0)
FSTORE f25, (RISCV_FOFFSET + RISCV_FSIZE * 25)(a0)
FSTORE f26, (RISCV_FOFFSET + RISCV_FSIZE * 26)(a0)
FSTORE f27, (RISCV_FOFFSET + RISCV_FSIZE * 27)(a0)
FSTORE f28, (RISCV_FOFFSET + RISCV_FSIZE * 28)(a0)
FSTORE f29, (RISCV_FOFFSET + RISCV_FSIZE * 29)(a0)
FSTORE f30, (RISCV_FOFFSET + RISCV_FSIZE * 30)(a0)
FSTORE f31, (RISCV_FOFFSET + RISCV_FSIZE * 31)(a0)
# endif
li a0, 0 // return UNW_ESUCCESS
ret // jump to ra
#endif
WEAK_ALIAS(__unw_getcontext, unw_getcontext)
#endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */
NO_EXEC_STACK_DIRECTIVE
diff --git a/contrib/llvm-project/libunwind/src/assembly.h b/contrib/llvm-project/libunwind/src/assembly.h
index 978f6bd619bd..89293a555bfc 100644
--- a/contrib/llvm-project/libunwind/src/assembly.h
+++ b/contrib/llvm-project/libunwind/src/assembly.h
@@ -1,241 +1,241 @@
/* ===-- assembly.h - libUnwind assembler support macros -------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* ===----------------------------------------------------------------------===
*
* This file defines macros for use in libUnwind assembler source.
* This file is not part of the interface of this library.
*
* ===----------------------------------------------------------------------===
*/
#ifndef UNWIND_ASSEMBLY_H
#define UNWIND_ASSEMBLY_H
-#if (defined(__i386__) || defined(__x86_64__)) && defined(__linux__)
+#if defined(__linux__) && defined(__CET__)
#include <cet.h>
#define _LIBUNWIND_CET_ENDBR _CET_ENDBR
#else
#define _LIBUNWIND_CET_ENDBR
#endif
#if defined(__powerpc64__)
#define SEPARATOR ;
#define PPC64_OFFS_SRR0 0
#define PPC64_OFFS_CR 272
#define PPC64_OFFS_XER 280
#define PPC64_OFFS_LR 288
#define PPC64_OFFS_CTR 296
#define PPC64_OFFS_VRSAVE 304
#define PPC64_OFFS_FP 312
#define PPC64_OFFS_V 824
#elif defined(__APPLE__) && defined(__aarch64__)
#define SEPARATOR %%
#elif defined(__riscv)
# define RISCV_ISIZE (__riscv_xlen / 8)
# define RISCV_FOFFSET (RISCV_ISIZE * 32)
# if defined(__riscv_flen)
# define RISCV_FSIZE (__riscv_flen / 8)
# endif
# if __riscv_xlen == 64
# define ILOAD ld
# define ISTORE sd
# elif __riscv_xlen == 32
# define ILOAD lw
# define ISTORE sw
# else
# error "Unsupported __riscv_xlen"
# endif
# if defined(__riscv_flen)
# if __riscv_flen == 64
# define FLOAD fld
# define FSTORE fsd
# elif __riscv_flen == 32
# define FLOAD flw
# define FSTORE fsw
# else
# error "Unsupported __riscv_flen"
# endif
# endif
# define SEPARATOR ;
#else
#define SEPARATOR ;
#endif
#if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1)
#define PPC64_OPD1 .section .opd,"aw",@progbits SEPARATOR
#define PPC64_OPD2 SEPARATOR \
.p2align 3 SEPARATOR \
.quad .Lfunc_begin0 SEPARATOR \
.quad .TOC.@tocbase SEPARATOR \
.quad 0 SEPARATOR \
.text SEPARATOR \
.Lfunc_begin0:
#else
#define PPC64_OPD1
#define PPC64_OPD2
#endif
#if defined(__aarch64__) && defined(__ARM_FEATURE_BTI_DEFAULT)
.pushsection ".note.gnu.property", "a" SEPARATOR \
.balign 8 SEPARATOR \
.long 4 SEPARATOR \
.long 0x10 SEPARATOR \
.long 0x5 SEPARATOR \
.asciz "GNU" SEPARATOR \
.long 0xc0000000 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */ \
.long 4 SEPARATOR \
.long 3 SEPARATOR /* GNU_PROPERTY_AARCH64_FEATURE_1_BTI AND */ \
/* GNU_PROPERTY_AARCH64_FEATURE_1_PAC */ \
.long 0 SEPARATOR \
.popsection SEPARATOR
#define AARCH64_BTI bti c
#else
#define AARCH64_BTI
#endif
#if !defined(__aarch64__)
#ifdef __ARM_FEATURE_PAC_DEFAULT
.eabi_attribute Tag_PAC_extension, 2
.eabi_attribute Tag_PACRET_use, 1
#endif
#ifdef __ARM_FEATURE_BTI_DEFAULT
.eabi_attribute Tag_BTI_extension, 1
.eabi_attribute Tag_BTI_use, 1
#endif
#endif
#define GLUE2(a, b) a ## b
#define GLUE(a, b) GLUE2(a, b)
#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
#if defined(__APPLE__)
#define SYMBOL_IS_FUNC(name)
#define HIDDEN_SYMBOL(name) .private_extern name
#if defined(_LIBUNWIND_HIDE_SYMBOLS)
#define EXPORT_SYMBOL(name) HIDDEN_SYMBOL(name)
#else
#define EXPORT_SYMBOL(name)
#endif
#define WEAK_ALIAS(name, aliasname) \
.globl SYMBOL_NAME(aliasname) SEPARATOR \
EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \
SYMBOL_NAME(aliasname) = SYMBOL_NAME(name)
#define NO_EXEC_STACK_DIRECTIVE
#elif defined(__ELF__)
#if defined(__arm__)
#define SYMBOL_IS_FUNC(name) .type name,%function
#else
#define SYMBOL_IS_FUNC(name) .type name,@function
#endif
#define HIDDEN_SYMBOL(name) .hidden name
#if defined(_LIBUNWIND_HIDE_SYMBOLS)
#define EXPORT_SYMBOL(name) HIDDEN_SYMBOL(name)
#else
#define EXPORT_SYMBOL(name)
#endif
#define WEAK_SYMBOL(name) .weak name
#if defined(__hexagon__)
#define WEAK_ALIAS(name, aliasname) \
EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \
WEAK_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \
.equiv SYMBOL_NAME(aliasname), SYMBOL_NAME(name)
#else
#define WEAK_ALIAS(name, aliasname) \
EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \
WEAK_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \
SYMBOL_NAME(aliasname) = SYMBOL_NAME(name)
#endif
#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
defined(__linux__)
#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
#else
#define NO_EXEC_STACK_DIRECTIVE
#endif
#elif defined(_WIN32)
#define SYMBOL_IS_FUNC(name) \
.def name SEPARATOR \
.scl 2 SEPARATOR \
.type 32 SEPARATOR \
.endef
#define EXPORT_SYMBOL2(name) \
.section .drectve,"yn" SEPARATOR \
.ascii "-export:", #name, "\0" SEPARATOR \
.text
#if defined(_LIBUNWIND_HIDE_SYMBOLS)
#define EXPORT_SYMBOL(name)
#else
#define EXPORT_SYMBOL(name) EXPORT_SYMBOL2(name)
#endif
#define HIDDEN_SYMBOL(name)
#if defined(__MINGW32__)
#define WEAK_ALIAS(name, aliasname) \
.globl SYMBOL_NAME(aliasname) SEPARATOR \
EXPORT_SYMBOL(aliasname) SEPARATOR \
SYMBOL_NAME(aliasname) = SYMBOL_NAME(name)
#else
#define WEAK_ALIAS3(name, aliasname) \
.section .drectve,"yn" SEPARATOR \
.ascii "-alternatename:", #aliasname, "=", #name, "\0" SEPARATOR \
.text
#define WEAK_ALIAS2(name, aliasname) \
WEAK_ALIAS3(name, aliasname)
#define WEAK_ALIAS(name, aliasname) \
EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \
WEAK_ALIAS2(SYMBOL_NAME(name), SYMBOL_NAME(aliasname))
#endif
#define NO_EXEC_STACK_DIRECTIVE
#elif defined(__sparc__)
#else
#error Unsupported target
#endif
#define DEFINE_LIBUNWIND_FUNCTION(name) \
.globl SYMBOL_NAME(name) SEPARATOR \
HIDDEN_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \
SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
PPC64_OPD1 \
SYMBOL_NAME(name): \
PPC64_OPD2 \
AARCH64_BTI
#if defined(__arm__)
#if !defined(__ARM_ARCH)
#define __ARM_ARCH 4
#endif
#if defined(__ARM_ARCH_4T__) || __ARM_ARCH >= 5
#define ARM_HAS_BX
#endif
#ifdef ARM_HAS_BX
#define JMP(r) bx r
#else
#define JMP(r) mov pc, r
#endif
#endif /* __arm__ */
#if defined(__powerpc__)
#define PPC_LEFT_SHIFT(index) << (index)
#endif
#endif /* UNWIND_ASSEMBLY_H */
diff --git a/contrib/llvm-project/lld/ELF/SyntheticSections.cpp b/contrib/llvm-project/lld/ELF/SyntheticSections.cpp
index 986c1308cbaf..7778ae5f78e6 100644
--- a/contrib/llvm-project/lld/ELF/SyntheticSections.cpp
+++ b/contrib/llvm-project/lld/ELF/SyntheticSections.cpp
@@ -1,3931 +1,3934 @@
//===- SyntheticSections.cpp ----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains linker-synthesized sections. Currently,
// synthetic sections are created either output sections or input sections,
// but we are rewriting code so that all synthetic sections are created as
// input sections.
//
//===----------------------------------------------------------------------===//
#include "SyntheticSections.h"
#include "Config.h"
#include "InputFiles.h"
#include "LinkerScript.h"
#include "OutputSections.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "Target.h"
#include "Writer.h"
#include "lld/Common/CommonLinkerContext.h"
#include "lld/Common/DWARF.h"
#include "lld/Common/Strings.h"
#include "lld/Common/Version.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/TimeProfiler.h"
#include <cstdlib>
#include <thread>
using namespace llvm;
using namespace llvm::dwarf;
using namespace llvm::ELF;
using namespace llvm::object;
using namespace llvm::support;
using namespace lld;
using namespace lld::elf;
using llvm::support::endian::read32le;
using llvm::support::endian::write32le;
using llvm::support::endian::write64le;
constexpr size_t MergeNoTailSection::numShards;
static uint64_t readUint(uint8_t *buf) {
return config->is64 ? read64(buf) : read32(buf);
}
static void writeUint(uint8_t *buf, uint64_t val) {
if (config->is64)
write64(buf, val);
else
write32(buf, val);
}
// Returns an LLD version string.
static ArrayRef<uint8_t> getVersion() {
// Check LLD_VERSION first for ease of testing.
// You can get consistent output by using the environment variable.
// This is only for testing.
StringRef s = getenv("LLD_VERSION");
if (s.empty())
s = saver().save(Twine("Linker: ") + getLLDVersion());
// +1 to include the terminating '\0'.
return {(const uint8_t *)s.data(), s.size() + 1};
}
// Creates a .comment section containing LLD version info.
// With this feature, you can identify LLD-generated binaries easily
// by "readelf --string-dump .comment <file>".
// The returned object is a mergeable string section.
MergeInputSection *elf::createCommentSection() {
auto *sec = make<MergeInputSection>(SHF_MERGE | SHF_STRINGS, SHT_PROGBITS, 1,
getVersion(), ".comment");
sec->splitIntoPieces();
return sec;
}
// .MIPS.abiflags section.
template <class ELFT>
MipsAbiFlagsSection<ELFT>::MipsAbiFlagsSection(Elf_Mips_ABIFlags flags)
: SyntheticSection(SHF_ALLOC, SHT_MIPS_ABIFLAGS, 8, ".MIPS.abiflags"),
flags(flags) {
this->entsize = sizeof(Elf_Mips_ABIFlags);
}
template <class ELFT> void MipsAbiFlagsSection<ELFT>::writeTo(uint8_t *buf) {
memcpy(buf, &flags, sizeof(flags));
}
template <class ELFT>
std::unique_ptr<MipsAbiFlagsSection<ELFT>> MipsAbiFlagsSection<ELFT>::create() {
Elf_Mips_ABIFlags flags = {};
bool create = false;
for (InputSectionBase *sec : inputSections) {
if (sec->type != SHT_MIPS_ABIFLAGS)
continue;
sec->markDead();
create = true;
std::string filename = toString(sec->file);
const size_t size = sec->data().size();
// Older version of BFD (such as the default FreeBSD linker) concatenate
// .MIPS.abiflags instead of merging. To allow for this case (or potential
// zero padding) we ignore everything after the first Elf_Mips_ABIFlags
if (size < sizeof(Elf_Mips_ABIFlags)) {
error(filename + ": invalid size of .MIPS.abiflags section: got " +
Twine(size) + " instead of " + Twine(sizeof(Elf_Mips_ABIFlags)));
return nullptr;
}
auto *s = reinterpret_cast<const Elf_Mips_ABIFlags *>(sec->data().data());
if (s->version != 0) {
error(filename + ": unexpected .MIPS.abiflags version " +
Twine(s->version));
return nullptr;
}
// LLD checks ISA compatibility in calcMipsEFlags(). Here we just
// select the highest number of ISA/Rev/Ext.
flags.isa_level = std::max(flags.isa_level, s->isa_level);
flags.isa_rev = std::max(flags.isa_rev, s->isa_rev);
flags.isa_ext = std::max(flags.isa_ext, s->isa_ext);
flags.gpr_size = std::max(flags.gpr_size, s->gpr_size);
flags.cpr1_size = std::max(flags.cpr1_size, s->cpr1_size);
flags.cpr2_size = std::max(flags.cpr2_size, s->cpr2_size);
flags.ases |= s->ases;
flags.flags1 |= s->flags1;
flags.flags2 |= s->flags2;
flags.fp_abi = elf::getMipsFpAbiFlag(flags.fp_abi, s->fp_abi, filename);
};
if (create)
return std::make_unique<MipsAbiFlagsSection<ELFT>>(flags);
return nullptr;
}
// .MIPS.options section.
template <class ELFT>
MipsOptionsSection<ELFT>::MipsOptionsSection(Elf_Mips_RegInfo reginfo)
: SyntheticSection(SHF_ALLOC, SHT_MIPS_OPTIONS, 8, ".MIPS.options"),
reginfo(reginfo) {
this->entsize = sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo);
}
template <class ELFT> void MipsOptionsSection<ELFT>::writeTo(uint8_t *buf) {
auto *options = reinterpret_cast<Elf_Mips_Options *>(buf);
options->kind = ODK_REGINFO;
options->size = getSize();
if (!config->relocatable)
reginfo.ri_gp_value = in.mipsGot->getGp();
memcpy(buf + sizeof(Elf_Mips_Options), &reginfo, sizeof(reginfo));
}
template <class ELFT>
std::unique_ptr<MipsOptionsSection<ELFT>> MipsOptionsSection<ELFT>::create() {
// N64 ABI only.
if (!ELFT::Is64Bits)
return nullptr;
SmallVector<InputSectionBase *, 0> sections;
for (InputSectionBase *sec : inputSections)
if (sec->type == SHT_MIPS_OPTIONS)
sections.push_back(sec);
if (sections.empty())
return nullptr;
Elf_Mips_RegInfo reginfo = {};
for (InputSectionBase *sec : sections) {
sec->markDead();
std::string filename = toString(sec->file);
ArrayRef<uint8_t> d = sec->data();
while (!d.empty()) {
if (d.size() < sizeof(Elf_Mips_Options)) {
error(filename + ": invalid size of .MIPS.options section");
break;
}
auto *opt = reinterpret_cast<const Elf_Mips_Options *>(d.data());
if (opt->kind == ODK_REGINFO) {
reginfo.ri_gprmask |= opt->getRegInfo().ri_gprmask;
sec->getFile<ELFT>()->mipsGp0 = opt->getRegInfo().ri_gp_value;
break;
}
if (!opt->size)
fatal(filename + ": zero option descriptor size");
d = d.slice(opt->size);
}
};
return std::make_unique<MipsOptionsSection<ELFT>>(reginfo);
}
// MIPS .reginfo section.
template <class ELFT>
MipsReginfoSection<ELFT>::MipsReginfoSection(Elf_Mips_RegInfo reginfo)
: SyntheticSection(SHF_ALLOC, SHT_MIPS_REGINFO, 4, ".reginfo"),
reginfo(reginfo) {
this->entsize = sizeof(Elf_Mips_RegInfo);
}
template <class ELFT> void MipsReginfoSection<ELFT>::writeTo(uint8_t *buf) {
if (!config->relocatable)
reginfo.ri_gp_value = in.mipsGot->getGp();
memcpy(buf, &reginfo, sizeof(reginfo));
}
template <class ELFT>
std::unique_ptr<MipsReginfoSection<ELFT>> MipsReginfoSection<ELFT>::create() {
// Section should be alive for O32 and N32 ABIs only.
if (ELFT::Is64Bits)
return nullptr;
SmallVector<InputSectionBase *, 0> sections;
for (InputSectionBase *sec : inputSections)
if (sec->type == SHT_MIPS_REGINFO)
sections.push_back(sec);
if (sections.empty())
return nullptr;
Elf_Mips_RegInfo reginfo = {};
for (InputSectionBase *sec : sections) {
sec->markDead();
if (sec->data().size() != sizeof(Elf_Mips_RegInfo)) {
error(toString(sec->file) + ": invalid size of .reginfo section");
return nullptr;
}
auto *r = reinterpret_cast<const Elf_Mips_RegInfo *>(sec->data().data());
reginfo.ri_gprmask |= r->ri_gprmask;
sec->getFile<ELFT>()->mipsGp0 = r->ri_gp_value;
};
return std::make_unique<MipsReginfoSection<ELFT>>(reginfo);
}
InputSection *elf::createInterpSection() {
// StringSaver guarantees that the returned string ends with '\0'.
StringRef s = saver().save(config->dynamicLinker);
ArrayRef<uint8_t> contents = {(const uint8_t *)s.data(), s.size() + 1};
return make<InputSection>(nullptr, SHF_ALLOC, SHT_PROGBITS, 1, contents,
".interp");
}
Defined *elf::addSyntheticLocal(StringRef name, uint8_t type, uint64_t value,
uint64_t size, InputSectionBase &section) {
Defined *s = makeDefined(section.file, name, STB_LOCAL, STV_DEFAULT, type,
value, size, &section);
if (in.symTab)
in.symTab->addSymbol(s);
return s;
}
static size_t getHashSize() {
switch (config->buildId) {
case BuildIdKind::Fast:
return 8;
case BuildIdKind::Md5:
case BuildIdKind::Uuid:
return 16;
case BuildIdKind::Sha1:
return 20;
case BuildIdKind::Hexstring:
return config->buildIdVector.size();
default:
llvm_unreachable("unknown BuildIdKind");
}
}
// This class represents a linker-synthesized .note.gnu.property section.
//
// In x86 and AArch64, object files may contain feature flags indicating the
// features that they have used. The flags are stored in a .note.gnu.property
// section.
//
// lld reads the sections from input files and merges them by computing AND of
// the flags. The result is written as a new .note.gnu.property section.
//
// If the flag is zero (which indicates that the intersection of the feature
// sets is empty, or some input files didn't have .note.gnu.property sections),
// we don't create this section.
GnuPropertySection::GnuPropertySection()
: SyntheticSection(llvm::ELF::SHF_ALLOC, llvm::ELF::SHT_NOTE,
config->wordsize, ".note.gnu.property") {}
void GnuPropertySection::writeTo(uint8_t *buf) {
uint32_t featureAndType = config->emachine == EM_AARCH64
? GNU_PROPERTY_AARCH64_FEATURE_1_AND
: GNU_PROPERTY_X86_FEATURE_1_AND;
write32(buf, 4); // Name size
write32(buf + 4, config->is64 ? 16 : 12); // Content size
write32(buf + 8, NT_GNU_PROPERTY_TYPE_0); // Type
memcpy(buf + 12, "GNU", 4); // Name string
write32(buf + 16, featureAndType); // Feature type
write32(buf + 20, 4); // Feature size
write32(buf + 24, config->andFeatures); // Feature flags
if (config->is64)
write32(buf + 28, 0); // Padding
}
size_t GnuPropertySection::getSize() const { return config->is64 ? 32 : 28; }
BuildIdSection::BuildIdSection()
: SyntheticSection(SHF_ALLOC, SHT_NOTE, 4, ".note.gnu.build-id"),
hashSize(getHashSize()) {}
void BuildIdSection::writeTo(uint8_t *buf) {
write32(buf, 4); // Name size
write32(buf + 4, hashSize); // Content size
write32(buf + 8, NT_GNU_BUILD_ID); // Type
memcpy(buf + 12, "GNU", 4); // Name string
hashBuf = buf + 16;
}
void BuildIdSection::writeBuildId(ArrayRef<uint8_t> buf) {
assert(buf.size() == hashSize);
memcpy(hashBuf, buf.data(), hashSize);
}
BssSection::BssSection(StringRef name, uint64_t size, uint32_t alignment)
: SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, alignment, name) {
this->bss = true;
this->size = size;
}
EhFrameSection::EhFrameSection()
: SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame") {}
// Search for an existing CIE record or create a new one.
// CIE records from input object files are uniquified by their contents
// and where their relocations point to.
template <class ELFT, class RelTy>
CieRecord *EhFrameSection::addCie(EhSectionPiece &cie, ArrayRef<RelTy> rels) {
Symbol *personality = nullptr;
unsigned firstRelI = cie.firstRelocation;
if (firstRelI != (unsigned)-1)
personality =
&cie.sec->template getFile<ELFT>()->getRelocTargetSym(rels[firstRelI]);
// Search for an existing CIE by CIE contents/relocation target pair.
CieRecord *&rec = cieMap[{cie.data(), personality}];
// If not found, create a new one.
if (!rec) {
rec = make<CieRecord>();
rec->cie = &cie;
cieRecords.push_back(rec);
}
return rec;
}
// There is one FDE per function. Returns a non-null pointer to the function
// symbol if the given FDE points to a live function.
template <class ELFT, class RelTy>
Defined *EhFrameSection::isFdeLive(EhSectionPiece &fde, ArrayRef<RelTy> rels) {
auto *sec = cast<EhInputSection>(fde.sec);
unsigned firstRelI = fde.firstRelocation;
// An FDE should point to some function because FDEs are to describe
// functions. That's however not always the case due to an issue of
// ld.gold with -r. ld.gold may discard only functions and leave their
// corresponding FDEs, which results in creating bad .eh_frame sections.
// To deal with that, we ignore such FDEs.
if (firstRelI == (unsigned)-1)
return nullptr;
const RelTy &rel = rels[firstRelI];
Symbol &b = sec->template getFile<ELFT>()->getRelocTargetSym(rel);
// FDEs for garbage-collected or merged-by-ICF sections, or sections in
// another partition, are dead.
if (auto *d = dyn_cast<Defined>(&b))
if (!d->folded && d->section && d->section->partition == partition)
return d;
return nullptr;
}
// .eh_frame is a sequence of CIE or FDE records. In general, there
// is one CIE record per input object file which is followed by
// a list of FDEs. This function searches an existing CIE or create a new
// one and associates FDEs to the CIE.
template <class ELFT, class RelTy>
void EhFrameSection::addRecords(EhInputSection *sec, ArrayRef<RelTy> rels) {
offsetToCie.clear();
for (EhSectionPiece &piece : sec->pieces) {
// The empty record is the end marker.
if (piece.size == 4)
return;
size_t offset = piece.inputOff;
uint32_t id = read32(piece.data().data() + 4);
if (id == 0) {
offsetToCie[offset] = addCie<ELFT>(piece, rels);
continue;
}
uint32_t cieOffset = offset + 4 - id;
CieRecord *rec = offsetToCie[cieOffset];
if (!rec)
fatal(toString(sec) + ": invalid CIE reference");
if (!isFdeLive<ELFT>(piece, rels))
continue;
rec->fdes.push_back(&piece);
numFdes++;
}
}
template <class ELFT>
void EhFrameSection::addSectionAux(EhInputSection *sec) {
if (!sec->isLive())
return;
const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
if (rels.areRelocsRel())
addRecords<ELFT>(sec, rels.rels);
else
addRecords<ELFT>(sec, rels.relas);
}
void EhFrameSection::addSection(EhInputSection *sec) {
sec->parent = this;
alignment = std::max(alignment, sec->alignment);
sections.push_back(sec);
for (auto *ds : sec->dependentSections)
dependentSections.push_back(ds);
}
// Used by ICF<ELFT>::handleLSDA(). This function is very similar to
// EhFrameSection::addRecords().
template <class ELFT, class RelTy>
void EhFrameSection::iterateFDEWithLSDAAux(
EhInputSection &sec, ArrayRef<RelTy> rels, DenseSet<size_t> &ciesWithLSDA,
llvm::function_ref<void(InputSection &)> fn) {
for (EhSectionPiece &piece : sec.pieces) {
// Skip ZERO terminator.
if (piece.size == 4)
continue;
size_t offset = piece.inputOff;
uint32_t id =
endian::read32<ELFT::TargetEndianness>(piece.data().data() + 4);
if (id == 0) {
if (hasLSDA(piece))
ciesWithLSDA.insert(offset);
continue;
}
uint32_t cieOffset = offset + 4 - id;
if (ciesWithLSDA.count(cieOffset) == 0)
continue;
// The CIE has a LSDA argument. Call fn with d's section.
if (Defined *d = isFdeLive<ELFT>(piece, rels))
if (auto *s = dyn_cast_or_null<InputSection>(d->section))
fn(*s);
}
}
template <class ELFT>
void EhFrameSection::iterateFDEWithLSDA(
llvm::function_ref<void(InputSection &)> fn) {
DenseSet<size_t> ciesWithLSDA;
for (EhInputSection *sec : sections) {
ciesWithLSDA.clear();
const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>();
if (rels.areRelocsRel())
iterateFDEWithLSDAAux<ELFT>(*sec, rels.rels, ciesWithLSDA, fn);
else
iterateFDEWithLSDAAux<ELFT>(*sec, rels.relas, ciesWithLSDA, fn);
}
}
static void writeCieFde(uint8_t *buf, ArrayRef<uint8_t> d) {
memcpy(buf, d.data(), d.size());
size_t aligned = alignTo(d.size(), config->wordsize);
assert(std::all_of(buf + d.size(), buf + aligned,
[](uint8_t c) { return c == 0; }));
// Fix the size field. -4 since size does not include the size field itself.
write32(buf, aligned - 4);
}
void EhFrameSection::finalizeContents() {
assert(!this->size); // Not finalized.
switch (config->ekind) {
case ELFNoneKind:
llvm_unreachable("invalid ekind");
case ELF32LEKind:
for (EhInputSection *sec : sections)
addSectionAux<ELF32LE>(sec);
break;
case ELF32BEKind:
for (EhInputSection *sec : sections)
addSectionAux<ELF32BE>(sec);
break;
case ELF64LEKind:
for (EhInputSection *sec : sections)
addSectionAux<ELF64LE>(sec);
break;
case ELF64BEKind:
for (EhInputSection *sec : sections)
addSectionAux<ELF64BE>(sec);
break;
}
size_t off = 0;
for (CieRecord *rec : cieRecords) {
rec->cie->outputOff = off;
off += alignTo(rec->cie->size, config->wordsize);
for (EhSectionPiece *fde : rec->fdes) {
fde->outputOff = off;
off += alignTo(fde->size, config->wordsize);
}
}
// The LSB standard does not allow a .eh_frame section with zero
// Call Frame Information records. glibc unwind-dw2-fde.c
// classify_object_over_fdes expects there is a CIE record length 0 as a
// terminator. Thus we add one unconditionally.
off += 4;
this->size = off;
}
// Returns data for .eh_frame_hdr. .eh_frame_hdr is a binary search table
// to get an FDE from an address to which FDE is applied. This function
// returns a list of such pairs.
SmallVector<EhFrameSection::FdeData, 0> EhFrameSection::getFdeData() const {
uint8_t *buf = Out::bufferStart + getParent()->offset + outSecOff;
SmallVector<FdeData, 0> ret;
uint64_t va = getPartition().ehFrameHdr->getVA();
for (CieRecord *rec : cieRecords) {
uint8_t enc = getFdeEncoding(rec->cie);
for (EhSectionPiece *fde : rec->fdes) {
uint64_t pc = getFdePc(buf, fde->outputOff, enc);
uint64_t fdeVA = getParent()->addr + fde->outputOff;
if (!isInt<32>(pc - va))
fatal(toString(fde->sec) + ": PC offset is too large: 0x" +
Twine::utohexstr(pc - va));
ret.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)});
}
}
// Sort the FDE list by their PC and uniqueify. Usually there is only
// one FDE for a PC (i.e. function), but if ICF merges two functions
// into one, there can be more than one FDEs pointing to the address.
auto less = [](const FdeData &a, const FdeData &b) {
return a.pcRel < b.pcRel;
};
llvm::stable_sort(ret, less);
auto eq = [](const FdeData &a, const FdeData &b) {
return a.pcRel == b.pcRel;
};
ret.erase(std::unique(ret.begin(), ret.end(), eq), ret.end());
return ret;
}
static uint64_t readFdeAddr(uint8_t *buf, int size) {
switch (size) {
case DW_EH_PE_udata2:
return read16(buf);
case DW_EH_PE_sdata2:
return (int16_t)read16(buf);
case DW_EH_PE_udata4:
return read32(buf);
case DW_EH_PE_sdata4:
return (int32_t)read32(buf);
case DW_EH_PE_udata8:
case DW_EH_PE_sdata8:
return read64(buf);
case DW_EH_PE_absptr:
return readUint(buf);
}
fatal("unknown FDE size encoding");
}
// Returns the VA to which a given FDE (on a mmap'ed buffer) is applied to.
// We need it to create .eh_frame_hdr section.
uint64_t EhFrameSection::getFdePc(uint8_t *buf, size_t fdeOff,
uint8_t enc) const {
// The starting address to which this FDE applies is
// stored at FDE + 8 byte.
size_t off = fdeOff + 8;
uint64_t addr = readFdeAddr(buf + off, enc & 0xf);
if ((enc & 0x70) == DW_EH_PE_absptr)
return addr;
if ((enc & 0x70) == DW_EH_PE_pcrel)
return addr + getParent()->addr + off;
fatal("unknown FDE size relative encoding");
}
void EhFrameSection::writeTo(uint8_t *buf) {
// Write CIE and FDE records.
for (CieRecord *rec : cieRecords) {
size_t cieOffset = rec->cie->outputOff;
writeCieFde(buf + cieOffset, rec->cie->data());
for (EhSectionPiece *fde : rec->fdes) {
size_t off = fde->outputOff;
writeCieFde(buf + off, fde->data());
// FDE's second word should have the offset to an associated CIE.
// Write it.
write32(buf + off + 4, off + 4 - cieOffset);
}
}
// Apply relocations. .eh_frame section contents are not contiguous
// in the output buffer, but relocateAlloc() still works because
// getOffset() takes care of discontiguous section pieces.
for (EhInputSection *s : sections)
s->relocateAlloc(buf, nullptr);
if (getPartition().ehFrameHdr && getPartition().ehFrameHdr->getParent())
getPartition().ehFrameHdr->write();
}
GotSection::GotSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
target->gotEntrySize, ".got") {
numEntries = target->gotHeaderEntriesNum;
}
void GotSection::addEntry(Symbol &sym) {
assert(sym.auxIdx == symAux.size() - 1);
symAux.back().gotIdx = numEntries++;
}
bool GotSection::addTlsDescEntry(Symbol &sym) {
assert(sym.auxIdx == symAux.size() - 1);
symAux.back().tlsDescIdx = numEntries;
numEntries += 2;
return true;
}
bool GotSection::addDynTlsEntry(Symbol &sym) {
assert(sym.auxIdx == symAux.size() - 1);
symAux.back().tlsGdIdx = numEntries;
// Global Dynamic TLS entries take two GOT slots.
numEntries += 2;
return true;
}
// Reserves TLS entries for a TLS module ID and a TLS block offset.
// In total it takes two GOT slots.
bool GotSection::addTlsIndex() {
if (tlsIndexOff != uint32_t(-1))
return false;
tlsIndexOff = numEntries * config->wordsize;
numEntries += 2;
return true;
}
uint32_t GotSection::getTlsDescOffset(const Symbol &sym) const {
return sym.getTlsDescIdx() * config->wordsize;
}
uint64_t GotSection::getTlsDescAddr(const Symbol &sym) const {
return getVA() + getTlsDescOffset(sym);
}
uint64_t GotSection::getGlobalDynAddr(const Symbol &b) const {
return this->getVA() + b.getTlsGdIdx() * config->wordsize;
}
uint64_t GotSection::getGlobalDynOffset(const Symbol &b) const {
return b.getTlsGdIdx() * config->wordsize;
}
void GotSection::finalizeContents() {
if (config->emachine == EM_PPC64 &&
numEntries <= target->gotHeaderEntriesNum && !ElfSym::globalOffsetTable)
size = 0;
else
size = numEntries * config->wordsize;
}
bool GotSection::isNeeded() const {
// Needed if the GOT symbol is used or the number of entries is more than just
// the header. A GOT with just the header may not be needed.
return hasGotOffRel || numEntries > target->gotHeaderEntriesNum;
}
void GotSection::writeTo(uint8_t *buf) {
target->writeGotHeader(buf);
relocateAlloc(buf, buf + size);
}
static uint64_t getMipsPageAddr(uint64_t addr) {
return (addr + 0x8000) & ~0xffff;
}
static uint64_t getMipsPageCount(uint64_t size) {
return (size + 0xfffe) / 0xffff + 1;
}
MipsGotSection::MipsGotSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, SHT_PROGBITS, 16,
".got") {}
void MipsGotSection::addEntry(InputFile &file, Symbol &sym, int64_t addend,
RelExpr expr) {
FileGot &g = getGot(file);
if (expr == R_MIPS_GOT_LOCAL_PAGE) {
if (const OutputSection *os = sym.getOutputSection())
g.pagesMap.insert({os, {}});
else
g.local16.insert({{nullptr, getMipsPageAddr(sym.getVA(addend))}, 0});
} else if (sym.isTls())
g.tls.insert({&sym, 0});
else if (sym.isPreemptible && expr == R_ABS)
g.relocs.insert({&sym, 0});
else if (sym.isPreemptible)
g.global.insert({&sym, 0});
else if (expr == R_MIPS_GOT_OFF32)
g.local32.insert({{&sym, addend}, 0});
else
g.local16.insert({{&sym, addend}, 0});
}
void MipsGotSection::addDynTlsEntry(InputFile &file, Symbol &sym) {
getGot(file).dynTlsSymbols.insert({&sym, 0});
}
void MipsGotSection::addTlsIndex(InputFile &file) {
getGot(file).dynTlsSymbols.insert({nullptr, 0});
}
size_t MipsGotSection::FileGot::getEntriesNum() const {
return getPageEntriesNum() + local16.size() + global.size() + relocs.size() +
tls.size() + dynTlsSymbols.size() * 2;
}
size_t MipsGotSection::FileGot::getPageEntriesNum() const {
size_t num = 0;
for (const std::pair<const OutputSection *, FileGot::PageBlock> &p : pagesMap)
num += p.second.count;
return num;
}
size_t MipsGotSection::FileGot::getIndexedEntriesNum() const {
size_t count = getPageEntriesNum() + local16.size() + global.size();
// If there are relocation-only entries in the GOT, TLS entries
// are allocated after them. TLS entries should be addressable
// by 16-bit index so count both reloc-only and TLS entries.
if (!tls.empty() || !dynTlsSymbols.empty())
count += relocs.size() + tls.size() + dynTlsSymbols.size() * 2;
return count;
}
MipsGotSection::FileGot &MipsGotSection::getGot(InputFile &f) {
if (f.mipsGotIndex == uint32_t(-1)) {
gots.emplace_back();
gots.back().file = &f;
f.mipsGotIndex = gots.size() - 1;
}
return gots[f.mipsGotIndex];
}
uint64_t MipsGotSection::getPageEntryOffset(const InputFile *f,
const Symbol &sym,
int64_t addend) const {
const FileGot &g = gots[f->mipsGotIndex];
uint64_t index = 0;
if (const OutputSection *outSec = sym.getOutputSection()) {
uint64_t secAddr = getMipsPageAddr(outSec->addr);
uint64_t symAddr = getMipsPageAddr(sym.getVA(addend));
index = g.pagesMap.lookup(outSec).firstIndex + (symAddr - secAddr) / 0xffff;
} else {
index = g.local16.lookup({nullptr, getMipsPageAddr(sym.getVA(addend))});
}
return index * config->wordsize;
}
uint64_t MipsGotSection::getSymEntryOffset(const InputFile *f, const Symbol &s,
int64_t addend) const {
const FileGot &g = gots[f->mipsGotIndex];
Symbol *sym = const_cast<Symbol *>(&s);
if (sym->isTls())
return g.tls.lookup(sym) * config->wordsize;
if (sym->isPreemptible)
return g.global.lookup(sym) * config->wordsize;
return g.local16.lookup({sym, addend}) * config->wordsize;
}
uint64_t MipsGotSection::getTlsIndexOffset(const InputFile *f) const {
const FileGot &g = gots[f->mipsGotIndex];
return g.dynTlsSymbols.lookup(nullptr) * config->wordsize;
}
uint64_t MipsGotSection::getGlobalDynOffset(const InputFile *f,
const Symbol &s) const {
const FileGot &g = gots[f->mipsGotIndex];
Symbol *sym = const_cast<Symbol *>(&s);
return g.dynTlsSymbols.lookup(sym) * config->wordsize;
}
const Symbol *MipsGotSection::getFirstGlobalEntry() const {
if (gots.empty())
return nullptr;
const FileGot &primGot = gots.front();
if (!primGot.global.empty())
return primGot.global.front().first;
if (!primGot.relocs.empty())
return primGot.relocs.front().first;
return nullptr;
}
unsigned MipsGotSection::getLocalEntriesNum() const {
if (gots.empty())
return headerEntriesNum;
return headerEntriesNum + gots.front().getPageEntriesNum() +
gots.front().local16.size();
}
bool MipsGotSection::tryMergeGots(FileGot &dst, FileGot &src, bool isPrimary) {
FileGot tmp = dst;
set_union(tmp.pagesMap, src.pagesMap);
set_union(tmp.local16, src.local16);
set_union(tmp.global, src.global);
set_union(tmp.relocs, src.relocs);
set_union(tmp.tls, src.tls);
set_union(tmp.dynTlsSymbols, src.dynTlsSymbols);
size_t count = isPrimary ? headerEntriesNum : 0;
count += tmp.getIndexedEntriesNum();
if (count * config->wordsize > config->mipsGotSize)
return false;
std::swap(tmp, dst);
return true;
}
void MipsGotSection::finalizeContents() { updateAllocSize(); }
bool MipsGotSection::updateAllocSize() {
size = headerEntriesNum * config->wordsize;
for (const FileGot &g : gots)
size += g.getEntriesNum() * config->wordsize;
return false;
}
void MipsGotSection::build() {
if (gots.empty())
return;
std::vector<FileGot> mergedGots(1);
// For each GOT move non-preemptible symbols from the `Global`
// to `Local16` list. Preemptible symbol might become non-preemptible
// one if, for example, it gets a related copy relocation.
for (FileGot &got : gots) {
for (auto &p: got.global)
if (!p.first->isPreemptible)
got.local16.insert({{p.first, 0}, 0});
got.global.remove_if([&](const std::pair<Symbol *, size_t> &p) {
return !p.first->isPreemptible;
});
}
// For each GOT remove "reloc-only" entry if there is "global"
// entry for the same symbol. And add local entries which indexed
// using 32-bit value at the end of 16-bit entries.
for (FileGot &got : gots) {
got.relocs.remove_if([&](const std::pair<Symbol *, size_t> &p) {
return got.global.count(p.first);
});
set_union(got.local16, got.local32);
got.local32.clear();
}
// Evaluate number of "reloc-only" entries in the resulting GOT.
// To do that put all unique "reloc-only" and "global" entries
// from all GOTs to the future primary GOT.
FileGot *primGot = &mergedGots.front();
for (FileGot &got : gots) {
set_union(primGot->relocs, got.global);
set_union(primGot->relocs, got.relocs);
got.relocs.clear();
}
// Evaluate number of "page" entries in each GOT.
for (FileGot &got : gots) {
for (std::pair<const OutputSection *, FileGot::PageBlock> &p :
got.pagesMap) {
const OutputSection *os = p.first;
uint64_t secSize = 0;
for (SectionCommand *cmd : os->commands) {
if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
for (InputSection *isec : isd->sections) {
uint64_t off = alignTo(secSize, isec->alignment);
secSize = off + isec->getSize();
}
}
p.second.count = getMipsPageCount(secSize);
}
}
// Merge GOTs. Try to join as much as possible GOTs but do not exceed
// maximum GOT size. At first, try to fill the primary GOT because
// the primary GOT can be accessed in the most effective way. If it
// is not possible, try to fill the last GOT in the list, and finally
// create a new GOT if both attempts failed.
for (FileGot &srcGot : gots) {
InputFile *file = srcGot.file;
if (tryMergeGots(mergedGots.front(), srcGot, true)) {
file->mipsGotIndex = 0;
} else {
// If this is the first time we failed to merge with the primary GOT,
// MergedGots.back() will also be the primary GOT. We must make sure not
// to try to merge again with isPrimary=false, as otherwise, if the
// inputs are just right, we could allow the primary GOT to become 1 or 2
// words bigger due to ignoring the header size.
if (mergedGots.size() == 1 ||
!tryMergeGots(mergedGots.back(), srcGot, false)) {
mergedGots.emplace_back();
std::swap(mergedGots.back(), srcGot);
}
file->mipsGotIndex = mergedGots.size() - 1;
}
}
std::swap(gots, mergedGots);
// Reduce number of "reloc-only" entries in the primary GOT
// by subtracting "global" entries in the primary GOT.
primGot = &gots.front();
primGot->relocs.remove_if([&](const std::pair<Symbol *, size_t> &p) {
return primGot->global.count(p.first);
});
// Calculate indexes for each GOT entry.
size_t index = headerEntriesNum;
for (FileGot &got : gots) {
got.startIndex = &got == primGot ? 0 : index;
for (std::pair<const OutputSection *, FileGot::PageBlock> &p :
got.pagesMap) {
// For each output section referenced by GOT page relocations calculate
// and save into pagesMap an upper bound of MIPS GOT entries required
// to store page addresses of local symbols. We assume the worst case -
// each 64kb page of the output section has at least one GOT relocation
// against it. And take in account the case when the section intersects
// page boundaries.
p.second.firstIndex = index;
index += p.second.count;
}
for (auto &p: got.local16)
p.second = index++;
for (auto &p: got.global)
p.second = index++;
for (auto &p: got.relocs)
p.second = index++;
for (auto &p: got.tls)
p.second = index++;
for (auto &p: got.dynTlsSymbols) {
p.second = index;
index += 2;
}
}
// Update SymbolAux::gotIdx field to use this
// value later in the `sortMipsSymbols` function.
for (auto &p : primGot->global) {
if (p.first->auxIdx == uint32_t(-1))
p.first->allocateAux();
symAux.back().gotIdx = p.second;
}
for (auto &p : primGot->relocs) {
if (p.first->auxIdx == uint32_t(-1))
p.first->allocateAux();
symAux.back().gotIdx = p.second;
}
// Create dynamic relocations.
for (FileGot &got : gots) {
// Create dynamic relocations for TLS entries.
for (std::pair<Symbol *, size_t> &p : got.tls) {
Symbol *s = p.first;
uint64_t offset = p.second * config->wordsize;
// When building a shared library we still need a dynamic relocation
// for the TP-relative offset as we don't know how much other data will
// be allocated before us in the static TLS block.
if (s->isPreemptible || config->shared)
mainPart->relaDyn->addReloc({target->tlsGotRel, this, offset,
DynamicReloc::AgainstSymbolWithTargetVA,
*s, 0, R_ABS});
}
for (std::pair<Symbol *, size_t> &p : got.dynTlsSymbols) {
Symbol *s = p.first;
uint64_t offset = p.second * config->wordsize;
if (s == nullptr) {
if (!config->shared)
continue;
mainPart->relaDyn->addReloc({target->tlsModuleIndexRel, this, offset});
} else {
// When building a shared library we still need a dynamic relocation
// for the module index. Therefore only checking for
// S->isPreemptible is not sufficient (this happens e.g. for
// thread-locals that have been marked as local through a linker script)
if (!s->isPreemptible && !config->shared)
continue;
mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *this,
offset, *s);
// However, we can skip writing the TLS offset reloc for non-preemptible
// symbols since it is known even in shared libraries
if (!s->isPreemptible)
continue;
offset += config->wordsize;
mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *this, offset,
*s);
}
}
// Do not create dynamic relocations for non-TLS
// entries in the primary GOT.
if (&got == primGot)
continue;
// Dynamic relocations for "global" entries.
for (const std::pair<Symbol *, size_t> &p : got.global) {
uint64_t offset = p.second * config->wordsize;
mainPart->relaDyn->addSymbolReloc(target->relativeRel, *this, offset,
*p.first);
}
if (!config->isPic)
continue;
// Dynamic relocations for "local" entries in case of PIC.
for (const std::pair<const OutputSection *, FileGot::PageBlock> &l :
got.pagesMap) {
size_t pageCount = l.second.count;
for (size_t pi = 0; pi < pageCount; ++pi) {
uint64_t offset = (l.second.firstIndex + pi) * config->wordsize;
mainPart->relaDyn->addReloc({target->relativeRel, this, offset, l.first,
int64_t(pi * 0x10000)});
}
}
for (const std::pair<GotEntry, size_t> &p : got.local16) {
uint64_t offset = p.second * config->wordsize;
mainPart->relaDyn->addReloc({target->relativeRel, this, offset,
DynamicReloc::AddendOnlyWithTargetVA,
*p.first.first, p.first.second, R_ABS});
}
}
}
bool MipsGotSection::isNeeded() const {
// We add the .got section to the result for dynamic MIPS target because
// its address and properties are mentioned in the .dynamic section.
return !config->relocatable;
}
uint64_t MipsGotSection::getGp(const InputFile *f) const {
// For files without related GOT or files refer a primary GOT
// returns "common" _gp value. For secondary GOTs calculate
// individual _gp values.
if (!f || f->mipsGotIndex == uint32_t(-1) || f->mipsGotIndex == 0)
return ElfSym::mipsGp->getVA(0);
return getVA() + gots[f->mipsGotIndex].startIndex * config->wordsize + 0x7ff0;
}
void MipsGotSection::writeTo(uint8_t *buf) {
// Set the MSB of the second GOT slot. This is not required by any
// MIPS ABI documentation, though.
//
// There is a comment in glibc saying that "The MSB of got[1] of a
// gnu object is set to identify gnu objects," and in GNU gold it
// says "the second entry will be used by some runtime loaders".
// But how this field is being used is unclear.
//
// We are not really willing to mimic other linkers behaviors
// without understanding why they do that, but because all files
// generated by GNU tools have this special GOT value, and because
// we've been doing this for years, it is probably a safe bet to
// keep doing this for now. We really need to revisit this to see
// if we had to do this.
writeUint(buf + config->wordsize, (uint64_t)1 << (config->wordsize * 8 - 1));
for (const FileGot &g : gots) {
auto write = [&](size_t i, const Symbol *s, int64_t a) {
uint64_t va = a;
if (s)
va = s->getVA(a);
writeUint(buf + i * config->wordsize, va);
};
// Write 'page address' entries to the local part of the GOT.
for (const std::pair<const OutputSection *, FileGot::PageBlock> &l :
g.pagesMap) {
size_t pageCount = l.second.count;
uint64_t firstPageAddr = getMipsPageAddr(l.first->addr);
for (size_t pi = 0; pi < pageCount; ++pi)
write(l.second.firstIndex + pi, nullptr, firstPageAddr + pi * 0x10000);
}
// Local, global, TLS, reloc-only entries.
// If TLS entry has a corresponding dynamic relocations, leave it
// initialized by zero. Write down adjusted TLS symbol's values otherwise.
// To calculate the adjustments use offsets for thread-local storage.
// http://web.archive.org/web/20190324223224/https://www.linux-mips.org/wiki/NPTL
for (const std::pair<GotEntry, size_t> &p : g.local16)
write(p.second, p.first.first, p.first.second);
// Write VA to the primary GOT only. For secondary GOTs that
// will be done by REL32 dynamic relocations.
if (&g == &gots.front())
for (const std::pair<Symbol *, size_t> &p : g.global)
write(p.second, p.first, 0);
for (const std::pair<Symbol *, size_t> &p : g.relocs)
write(p.second, p.first, 0);
for (const std::pair<Symbol *, size_t> &p : g.tls)
write(p.second, p.first,
p.first->isPreemptible || config->shared ? 0 : -0x7000);
for (const std::pair<Symbol *, size_t> &p : g.dynTlsSymbols) {
if (p.first == nullptr && !config->shared)
write(p.second, nullptr, 1);
else if (p.first && !p.first->isPreemptible) {
// If we are emitting a shared library with relocations we mustn't write
// anything to the GOT here. When using Elf_Rel relocations the value
// one will be treated as an addend and will cause crashes at runtime
if (!config->shared)
write(p.second, nullptr, 1);
write(p.second + 1, p.first, -0x8000);
}
}
}
}
// On PowerPC the .plt section is used to hold the table of function addresses
// instead of the .got.plt, and the type is SHT_NOBITS similar to a .bss
// section. I don't know why we have a BSS style type for the section but it is
// consistent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI.
GotPltSection::GotPltSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize,
".got.plt") {
if (config->emachine == EM_PPC) {
name = ".plt";
} else if (config->emachine == EM_PPC64) {
type = SHT_NOBITS;
name = ".plt";
}
}
void GotPltSection::addEntry(Symbol &sym) {
assert(sym.auxIdx == symAux.size() - 1 &&
symAux.back().pltIdx == entries.size());
entries.push_back(&sym);
}
size_t GotPltSection::getSize() const {
return (target->gotPltHeaderEntriesNum + entries.size()) *
target->gotEntrySize;
}
void GotPltSection::writeTo(uint8_t *buf) {
target->writeGotPltHeader(buf);
buf += target->gotPltHeaderEntriesNum * target->gotEntrySize;
for (const Symbol *b : entries) {
target->writeGotPlt(buf, *b);
buf += target->gotEntrySize;
}
}
bool GotPltSection::isNeeded() const {
// We need to emit GOTPLT even if it's empty if there's a relocation relative
// to it.
return !entries.empty() || hasGotPltOffRel;
}
static StringRef getIgotPltName() {
// On ARM the IgotPltSection is part of the GotSection.
if (config->emachine == EM_ARM)
return ".got";
// On PowerPC64 the GotPltSection is renamed to '.plt' so the IgotPltSection
// needs to be named the same.
if (config->emachine == EM_PPC64)
return ".plt";
return ".got.plt";
}
// On PowerPC64 the GotPltSection type is SHT_NOBITS so we have to follow suit
// with the IgotPltSection.
IgotPltSection::IgotPltSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE,
config->emachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS,
target->gotEntrySize, getIgotPltName()) {}
void IgotPltSection::addEntry(Symbol &sym) {
assert(symAux.back().pltIdx == entries.size());
entries.push_back(&sym);
}
size_t IgotPltSection::getSize() const {
return entries.size() * target->gotEntrySize;
}
void IgotPltSection::writeTo(uint8_t *buf) {
for (const Symbol *b : entries) {
target->writeIgotPlt(buf, *b);
buf += target->gotEntrySize;
}
}
StringTableSection::StringTableSection(StringRef name, bool dynamic)
: SyntheticSection(dynamic ? (uint64_t)SHF_ALLOC : 0, SHT_STRTAB, 1, name),
dynamic(dynamic) {
// ELF string tables start with a NUL byte.
strings.push_back("");
+ stringMap.try_emplace(CachedHashStringRef(""), 0);
size = 1;
}
// Adds a string to the string table. If `hashIt` is true we hash and check for
// duplicates. It is optional because the name of global symbols are already
// uniqued and hashing them again has a big cost for a small value: uniquing
// them with some other string that happens to be the same.
unsigned StringTableSection::addString(StringRef s, bool hashIt) {
if (hashIt) {
auto r = stringMap.try_emplace(CachedHashStringRef(s), size);
if (!r.second)
return r.first->second;
}
if (s.empty())
return 0;
unsigned ret = this->size;
this->size = this->size + s.size() + 1;
strings.push_back(s);
return ret;
}
void StringTableSection::writeTo(uint8_t *buf) {
for (StringRef s : strings) {
memcpy(buf, s.data(), s.size());
buf[s.size()] = '\0';
buf += s.size() + 1;
}
}
// Returns the number of entries in .gnu.version_d: the number of
// non-VER_NDX_LOCAL-non-VER_NDX_GLOBAL definitions, plus 1.
// Note that we don't support vd_cnt > 1 yet.
static unsigned getVerDefNum() {
return namedVersionDefs().size() + 1;
}
template <class ELFT>
DynamicSection<ELFT>::DynamicSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_DYNAMIC, config->wordsize,
".dynamic") {
this->entsize = ELFT::Is64Bits ? 16 : 8;
// .dynamic section is not writable on MIPS and on Fuchsia OS
// which passes -z rodynamic.
// See "Special Section" in Chapter 4 in the following document:
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
if (config->emachine == EM_MIPS || config->zRodynamic)
this->flags = SHF_ALLOC;
}
// The output section .rela.dyn may include these synthetic sections:
//
// - part.relaDyn
// - in.relaIplt: this is included if in.relaIplt is named .rela.dyn
// - in.relaPlt: this is included if a linker script places .rela.plt inside
// .rela.dyn
//
// DT_RELASZ is the total size of the included sections.
static uint64_t addRelaSz(const RelocationBaseSection &relaDyn) {
size_t size = relaDyn.getSize();
if (in.relaIplt->getParent() == relaDyn.getParent())
size += in.relaIplt->getSize();
if (in.relaPlt->getParent() == relaDyn.getParent())
size += in.relaPlt->getSize();
return size;
}
// A Linker script may assign the RELA relocation sections to the same
// output section. When this occurs we cannot just use the OutputSection
// Size. Moreover the [DT_JMPREL, DT_JMPREL + DT_PLTRELSZ) is permitted to
// overlap with the [DT_RELA, DT_RELA + DT_RELASZ).
static uint64_t addPltRelSz() {
size_t size = in.relaPlt->getSize();
if (in.relaIplt->getParent() == in.relaPlt->getParent() &&
in.relaIplt->name == in.relaPlt->name)
size += in.relaIplt->getSize();
return size;
}
// Add remaining entries to complete .dynamic contents.
template <class ELFT>
std::vector<std::pair<int32_t, uint64_t>>
DynamicSection<ELFT>::computeContents() {
elf::Partition &part = getPartition();
bool isMain = part.name.empty();
std::vector<std::pair<int32_t, uint64_t>> entries;
auto addInt = [&](int32_t tag, uint64_t val) {
entries.emplace_back(tag, val);
};
auto addInSec = [&](int32_t tag, const InputSection &sec) {
entries.emplace_back(tag, sec.getVA());
};
for (StringRef s : config->filterList)
addInt(DT_FILTER, part.dynStrTab->addString(s));
for (StringRef s : config->auxiliaryList)
addInt(DT_AUXILIARY, part.dynStrTab->addString(s));
if (!config->rpath.empty())
addInt(config->enableNewDtags ? DT_RUNPATH : DT_RPATH,
part.dynStrTab->addString(config->rpath));
for (SharedFile *file : sharedFiles)
if (file->isNeeded)
addInt(DT_NEEDED, part.dynStrTab->addString(file->soName));
if (isMain) {
if (!config->soName.empty())
addInt(DT_SONAME, part.dynStrTab->addString(config->soName));
} else {
if (!config->soName.empty())
addInt(DT_NEEDED, part.dynStrTab->addString(config->soName));
addInt(DT_SONAME, part.dynStrTab->addString(part.name));
}
// Set DT_FLAGS and DT_FLAGS_1.
uint32_t dtFlags = 0;
uint32_t dtFlags1 = 0;
if (config->bsymbolic == BsymbolicKind::All)
dtFlags |= DF_SYMBOLIC;
if (config->zGlobal)
dtFlags1 |= DF_1_GLOBAL;
if (config->zInitfirst)
dtFlags1 |= DF_1_INITFIRST;
if (config->zInterpose)
dtFlags1 |= DF_1_INTERPOSE;
if (config->zNodefaultlib)
dtFlags1 |= DF_1_NODEFLIB;
if (config->zNodelete)
dtFlags1 |= DF_1_NODELETE;
if (config->zNodlopen)
dtFlags1 |= DF_1_NOOPEN;
if (config->pie)
dtFlags1 |= DF_1_PIE;
if (config->zNow) {
dtFlags |= DF_BIND_NOW;
dtFlags1 |= DF_1_NOW;
}
if (config->zOrigin) {
dtFlags |= DF_ORIGIN;
dtFlags1 |= DF_1_ORIGIN;
}
if (!config->zText)
dtFlags |= DF_TEXTREL;
if (config->hasTlsIe && config->shared)
dtFlags |= DF_STATIC_TLS;
if (dtFlags)
addInt(DT_FLAGS, dtFlags);
if (dtFlags1)
addInt(DT_FLAGS_1, dtFlags1);
// DT_DEBUG is a pointer to debug information used by debuggers at runtime. We
// need it for each process, so we don't write it for DSOs. The loader writes
// the pointer into this entry.
//
// DT_DEBUG is the only .dynamic entry that needs to be written to. Some
// systems (currently only Fuchsia OS) provide other means to give the
// debugger this information. Such systems may choose make .dynamic read-only.
// If the target is such a system (used -z rodynamic) don't write DT_DEBUG.
if (!config->shared && !config->relocatable && !config->zRodynamic)
addInt(DT_DEBUG, 0);
if (part.relaDyn->isNeeded() ||
(in.relaIplt->isNeeded() &&
part.relaDyn->getParent() == in.relaIplt->getParent())) {
addInSec(part.relaDyn->dynamicTag, *part.relaDyn);
entries.emplace_back(part.relaDyn->sizeDynamicTag,
addRelaSz(*part.relaDyn));
bool isRela = config->isRela;
addInt(isRela ? DT_RELAENT : DT_RELENT,
isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel));
// MIPS dynamic loader does not support RELCOUNT tag.
// The problem is in the tight relation between dynamic
// relocations and GOT. So do not emit this tag on MIPS.
if (config->emachine != EM_MIPS) {
size_t numRelativeRels = part.relaDyn->getRelativeRelocCount();
if (config->zCombreloc && numRelativeRels)
addInt(isRela ? DT_RELACOUNT : DT_RELCOUNT, numRelativeRels);
}
}
if (part.relrDyn && part.relrDyn->getParent() &&
!part.relrDyn->relocs.empty()) {
addInSec(config->useAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR,
*part.relrDyn);
addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ,
part.relrDyn->getParent()->size);
addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT,
sizeof(Elf_Relr));
}
// .rel[a].plt section usually consists of two parts, containing plt and
// iplt relocations. It is possible to have only iplt relocations in the
// output. In that case relaPlt is empty and have zero offset, the same offset
// as relaIplt has. And we still want to emit proper dynamic tags for that
// case, so here we always use relaPlt as marker for the beginning of
// .rel[a].plt section.
if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) {
addInSec(DT_JMPREL, *in.relaPlt);
entries.emplace_back(DT_PLTRELSZ, addPltRelSz());
switch (config->emachine) {
case EM_MIPS:
addInSec(DT_MIPS_PLTGOT, *in.gotPlt);
break;
case EM_SPARCV9:
addInSec(DT_PLTGOT, *in.plt);
break;
case EM_AARCH64:
if (llvm::find_if(in.relaPlt->relocs, [](const DynamicReloc &r) {
return r.type == target->pltRel &&
r.sym->stOther & STO_AARCH64_VARIANT_PCS;
}) != in.relaPlt->relocs.end())
addInt(DT_AARCH64_VARIANT_PCS, 0);
LLVM_FALLTHROUGH;
default:
addInSec(DT_PLTGOT, *in.gotPlt);
break;
}
addInt(DT_PLTREL, config->isRela ? DT_RELA : DT_REL);
}
if (config->emachine == EM_AARCH64) {
if (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
addInt(DT_AARCH64_BTI_PLT, 0);
if (config->zPacPlt)
addInt(DT_AARCH64_PAC_PLT, 0);
}
addInSec(DT_SYMTAB, *part.dynSymTab);
addInt(DT_SYMENT, sizeof(Elf_Sym));
addInSec(DT_STRTAB, *part.dynStrTab);
addInt(DT_STRSZ, part.dynStrTab->getSize());
if (!config->zText)
addInt(DT_TEXTREL, 0);
if (part.gnuHashTab && part.gnuHashTab->getParent())
addInSec(DT_GNU_HASH, *part.gnuHashTab);
if (part.hashTab && part.hashTab->getParent())
addInSec(DT_HASH, *part.hashTab);
if (isMain) {
if (Out::preinitArray) {
addInt(DT_PREINIT_ARRAY, Out::preinitArray->addr);
addInt(DT_PREINIT_ARRAYSZ, Out::preinitArray->size);
}
if (Out::initArray) {
addInt(DT_INIT_ARRAY, Out::initArray->addr);
addInt(DT_INIT_ARRAYSZ, Out::initArray->size);
}
if (Out::finiArray) {
addInt(DT_FINI_ARRAY, Out::finiArray->addr);
addInt(DT_FINI_ARRAYSZ, Out::finiArray->size);
}
if (Symbol *b = symtab->find(config->init))
if (b->isDefined())
addInt(DT_INIT, b->getVA());
if (Symbol *b = symtab->find(config->fini))
if (b->isDefined())
addInt(DT_FINI, b->getVA());
}
if (part.verSym && part.verSym->isNeeded())
addInSec(DT_VERSYM, *part.verSym);
if (part.verDef && part.verDef->isLive()) {
addInSec(DT_VERDEF, *part.verDef);
addInt(DT_VERDEFNUM, getVerDefNum());
}
if (part.verNeed && part.verNeed->isNeeded()) {
addInSec(DT_VERNEED, *part.verNeed);
unsigned needNum = 0;
for (SharedFile *f : sharedFiles)
if (!f->vernauxs.empty())
++needNum;
addInt(DT_VERNEEDNUM, needNum);
}
if (config->emachine == EM_MIPS) {
addInt(DT_MIPS_RLD_VERSION, 1);
addInt(DT_MIPS_FLAGS, RHF_NOTPOT);
addInt(DT_MIPS_BASE_ADDRESS, target->getImageBase());
addInt(DT_MIPS_SYMTABNO, part.dynSymTab->getNumSymbols());
addInt(DT_MIPS_LOCAL_GOTNO, in.mipsGot->getLocalEntriesNum());
if (const Symbol *b = in.mipsGot->getFirstGlobalEntry())
addInt(DT_MIPS_GOTSYM, b->dynsymIndex);
else
addInt(DT_MIPS_GOTSYM, part.dynSymTab->getNumSymbols());
addInSec(DT_PLTGOT, *in.mipsGot);
if (in.mipsRldMap) {
if (!config->pie)
addInSec(DT_MIPS_RLD_MAP, *in.mipsRldMap);
// Store the offset to the .rld_map section
// relative to the address of the tag.
addInt(DT_MIPS_RLD_MAP_REL,
in.mipsRldMap->getVA() - (getVA() + entries.size() * entsize));
}
}
// DT_PPC_GOT indicates to glibc Secure PLT is used. If DT_PPC_GOT is absent,
// glibc assumes the old-style BSS PLT layout which we don't support.
if (config->emachine == EM_PPC)
addInSec(DT_PPC_GOT, *in.got);
// Glink dynamic tag is required by the V2 abi if the plt section isn't empty.
if (config->emachine == EM_PPC64 && in.plt->isNeeded()) {
// The Glink tag points to 32 bytes before the first lazy symbol resolution
// stub, which starts directly after the header.
addInt(DT_PPC64_GLINK, in.plt->getVA() + target->pltHeaderSize - 32);
}
addInt(DT_NULL, 0);
return entries;
}
template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
if (OutputSection *sec = getPartition().dynStrTab->getParent())
getParent()->link = sec->sectionIndex;
this->size = computeContents().size() * this->entsize;
}
template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *buf) {
auto *p = reinterpret_cast<Elf_Dyn *>(buf);
for (std::pair<int32_t, uint64_t> kv : computeContents()) {
p->d_tag = kv.first;
p->d_un.d_val = kv.second;
++p;
}
}
uint64_t DynamicReloc::getOffset() const {
return inputSec->getVA(offsetInSec);
}
int64_t DynamicReloc::computeAddend() const {
switch (kind) {
case AddendOnly:
assert(sym == nullptr);
return addend;
case AgainstSymbol:
assert(sym != nullptr);
return addend;
case AddendOnlyWithTargetVA:
case AgainstSymbolWithTargetVA:
return InputSection::getRelocTargetVA(inputSec->file, type, addend,
getOffset(), *sym, expr);
case MipsMultiGotPage:
assert(sym == nullptr);
return getMipsPageAddr(outputSec->addr) + addend;
}
llvm_unreachable("Unknown DynamicReloc::Kind enum");
}
uint32_t DynamicReloc::getSymIndex(SymbolTableBaseSection *symTab) const {
if (needsDynSymIndex())
return symTab->getSymbolIndex(sym);
return 0;
}
RelocationBaseSection::RelocationBaseSection(StringRef name, uint32_t type,
int32_t dynamicTag,
int32_t sizeDynamicTag,
bool combreloc)
: SyntheticSection(SHF_ALLOC, type, config->wordsize, name),
dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag),
combreloc(combreloc) {}
void RelocationBaseSection::addSymbolReloc(RelType dynType,
InputSectionBase &isec,
uint64_t offsetInSec, Symbol &sym,
int64_t addend,
Optional<RelType> addendRelType) {
addReloc(DynamicReloc::AgainstSymbol, dynType, isec, offsetInSec, sym, addend,
R_ADDEND, addendRelType ? *addendRelType : target->noneRel);
}
void RelocationBaseSection::addRelativeReloc(
RelType dynType, InputSectionBase &inputSec, uint64_t offsetInSec,
Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr) {
// This function should only be called for non-preemptible symbols or
// RelExpr values that refer to an address inside the output file (e.g. the
// address of the GOT entry for a potentially preemptible symbol).
assert((!sym.isPreemptible || expr == R_GOT) &&
"cannot add relative relocation against preemptible symbol");
assert(expr != R_ADDEND && "expected non-addend relocation expression");
addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, inputSec, offsetInSec,
sym, addend, expr, addendRelType);
}
void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym,
RelType addendRelType) {
// No need to write an addend to the section for preemptible symbols.
if (sym.isPreemptible)
addReloc({dynType, &isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0,
R_ABS});
else
addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, offsetInSec,
sym, 0, R_ABS, addendRelType);
}
void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType,
InputSectionBase &inputSec,
uint64_t offsetInSec, Symbol &sym,
int64_t addend, RelExpr expr,
RelType addendRelType) {
// Write the addends to the relocated address if required. We skip
// it if the written value would be zero.
if (config->writeAddends && (expr != R_ADDEND || addend != 0))
inputSec.relocations.push_back(
{expr, addendRelType, offsetInSec, addend, &sym});
addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr});
}
void RelocationBaseSection::partitionRels() {
if (!combreloc)
return;
const RelType relativeRel = target->relativeRel;
numRelativeRelocs =
llvm::partition(relocs, [=](auto &r) { return r.type == relativeRel; }) -
relocs.begin();
}
void RelocationBaseSection::finalizeContents() {
SymbolTableBaseSection *symTab = getPartition().dynSymTab.get();
// When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE
// relocations due to IFUNC (e.g. strcpy). sh_link will be set to 0 in that
// case.
if (symTab && symTab->getParent())
getParent()->link = symTab->getParent()->sectionIndex;
else
getParent()->link = 0;
if (in.relaPlt.get() == this && in.gotPlt->getParent()) {
getParent()->flags |= ELF::SHF_INFO_LINK;
getParent()->info = in.gotPlt->getParent()->sectionIndex;
}
if (in.relaIplt.get() == this && in.igotPlt->getParent()) {
getParent()->flags |= ELF::SHF_INFO_LINK;
getParent()->info = in.igotPlt->getParent()->sectionIndex;
}
}
void DynamicReloc::computeRaw(SymbolTableBaseSection *symtab) {
r_offset = getOffset();
r_sym = getSymIndex(symtab);
addend = computeAddend();
kind = AddendOnly; // Catch errors
}
void RelocationBaseSection::computeRels() {
SymbolTableBaseSection *symTab = getPartition().dynSymTab.get();
parallelForEach(relocs,
[symTab](DynamicReloc &rel) { rel.computeRaw(symTab); });
// Sort by (!IsRelative,SymIndex,r_offset). DT_REL[A]COUNT requires us to
// place R_*_RELATIVE first. SymIndex is to improve locality, while r_offset
// is to make results easier to read.
if (combreloc) {
auto nonRelative = relocs.begin() + numRelativeRelocs;
parallelSort(relocs.begin(), nonRelative,
[&](auto &a, auto &b) { return a.r_offset < b.r_offset; });
// Non-relative relocations are few, so don't bother with parallelSort.
std::sort(nonRelative, relocs.end(), [&](auto &a, auto &b) {
return std::tie(a.r_sym, a.r_offset) < std::tie(b.r_sym, b.r_offset);
});
}
}
template <class ELFT>
RelocationSection<ELFT>::RelocationSection(StringRef name, bool combreloc)
: RelocationBaseSection(name, config->isRela ? SHT_RELA : SHT_REL,
config->isRela ? DT_RELA : DT_REL,
config->isRela ? DT_RELASZ : DT_RELSZ, combreloc) {
this->entsize = config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
}
template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *buf) {
computeRels();
for (const DynamicReloc &rel : relocs) {
auto *p = reinterpret_cast<Elf_Rela *>(buf);
p->r_offset = rel.r_offset;
p->setSymbolAndType(rel.r_sym, rel.type, config->isMips64EL);
if (config->isRela)
p->r_addend = rel.addend;
buf += config->isRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel);
}
}
RelrBaseSection::RelrBaseSection()
: SyntheticSection(SHF_ALLOC,
config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR,
config->wordsize, ".relr.dyn") {}
template <class ELFT>
AndroidPackedRelocationSection<ELFT>::AndroidPackedRelocationSection(
StringRef name)
: RelocationBaseSection(
name, config->isRela ? SHT_ANDROID_RELA : SHT_ANDROID_REL,
config->isRela ? DT_ANDROID_RELA : DT_ANDROID_REL,
config->isRela ? DT_ANDROID_RELASZ : DT_ANDROID_RELSZ,
/*combreloc=*/false) {
this->entsize = 1;
}
template <class ELFT>
bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() {
// This function computes the contents of an Android-format packed relocation
// section.
//
// This format compresses relocations by using relocation groups to factor out
// fields that are common between relocations and storing deltas from previous
// relocations in SLEB128 format (which has a short representation for small
// numbers). A good example of a relocation type with common fields is
// R_*_RELATIVE, which is normally used to represent function pointers in
// vtables. In the REL format, each relative relocation has the same r_info
// field, and is only different from other relative relocations in terms of
// the r_offset field. By sorting relocations by offset, grouping them by
// r_info and representing each relocation with only the delta from the
// previous offset, each 8-byte relocation can be compressed to as little as 1
// byte (or less with run-length encoding). This relocation packer was able to
// reduce the size of the relocation section in an Android Chromium DSO from
// 2,911,184 bytes to 174,693 bytes, or 6% of the original size.
//
// A relocation section consists of a header containing the literal bytes
// 'APS2' followed by a sequence of SLEB128-encoded integers. The first two
// elements are the total number of relocations in the section and an initial
// r_offset value. The remaining elements define a sequence of relocation
// groups. Each relocation group starts with a header consisting of the
// following elements:
//
// - the number of relocations in the relocation group
// - flags for the relocation group
// - (if RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG is set) the r_offset delta
// for each relocation in the group.
// - (if RELOCATION_GROUPED_BY_INFO_FLAG is set) the value of the r_info
// field for each relocation in the group.
// - (if RELOCATION_GROUP_HAS_ADDEND_FLAG and
// RELOCATION_GROUPED_BY_ADDEND_FLAG are set) the r_addend delta for
// each relocation in the group.
//
// Following the relocation group header are descriptions of each of the
// relocations in the group. They consist of the following elements:
//
// - (if RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG is not set) the r_offset
// delta for this relocation.
// - (if RELOCATION_GROUPED_BY_INFO_FLAG is not set) the value of the r_info
// field for this relocation.
// - (if RELOCATION_GROUP_HAS_ADDEND_FLAG is set and
// RELOCATION_GROUPED_BY_ADDEND_FLAG is not set) the r_addend delta for
// this relocation.
size_t oldSize = relocData.size();
relocData = {'A', 'P', 'S', '2'};
raw_svector_ostream os(relocData);
auto add = [&](int64_t v) { encodeSLEB128(v, os); };
// The format header includes the number of relocations and the initial
// offset (we set this to zero because the first relocation group will
// perform the initial adjustment).
add(relocs.size());
add(0);
std::vector<Elf_Rela> relatives, nonRelatives;
for (const DynamicReloc &rel : relocs) {
Elf_Rela r;
r.r_offset = rel.getOffset();
r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab.get()),
rel.type, false);
if (config->isRela)
r.r_addend = rel.computeAddend();
if (r.getType(config->isMips64EL) == target->relativeRel)
relatives.push_back(r);
else
nonRelatives.push_back(r);
}
llvm::sort(relatives, [](const Elf_Rel &a, const Elf_Rel &b) {
return a.r_offset < b.r_offset;
});
// Try to find groups of relative relocations which are spaced one word
// apart from one another. These generally correspond to vtable entries. The
// format allows these groups to be encoded using a sort of run-length
// encoding, but each group will cost 7 bytes in addition to the offset from
// the previous group, so it is only profitable to do this for groups of
// size 8 or larger.
std::vector<Elf_Rela> ungroupedRelatives;
std::vector<std::vector<Elf_Rela>> relativeGroups;
for (auto i = relatives.begin(), e = relatives.end(); i != e;) {
std::vector<Elf_Rela> group;
do {
group.push_back(*i++);
} while (i != e && (i - 1)->r_offset + config->wordsize == i->r_offset);
if (group.size() < 8)
ungroupedRelatives.insert(ungroupedRelatives.end(), group.begin(),
group.end());
else
relativeGroups.emplace_back(std::move(group));
}
// For non-relative relocations, we would like to:
// 1. Have relocations with the same symbol offset to be consecutive, so
// that the runtime linker can speed-up symbol lookup by implementing an
// 1-entry cache.
// 2. Group relocations by r_info to reduce the size of the relocation
// section.
// Since the symbol offset is the high bits in r_info, sorting by r_info
// allows us to do both.
//
// For Rela, we also want to sort by r_addend when r_info is the same. This
// enables us to group by r_addend as well.
llvm::stable_sort(nonRelatives, [](const Elf_Rela &a, const Elf_Rela &b) {
if (a.r_info != b.r_info)
return a.r_info < b.r_info;
if (config->isRela)
return a.r_addend < b.r_addend;
return false;
});
// Group relocations with the same r_info. Note that each group emits a group
// header and that may make the relocation section larger. It is hard to
// estimate the size of a group header as the encoded size of that varies
// based on r_info. However, we can approximate this trade-off by the number
// of values encoded. Each group header contains 3 values, and each relocation
// in a group encodes one less value, as compared to when it is not grouped.
// Therefore, we only group relocations if there are 3 or more of them with
// the same r_info.
//
// For Rela, the addend for most non-relative relocations is zero, and thus we
// can usually get a smaller relocation section if we group relocations with 0
// addend as well.
std::vector<Elf_Rela> ungroupedNonRelatives;
std::vector<std::vector<Elf_Rela>> nonRelativeGroups;
for (auto i = nonRelatives.begin(), e = nonRelatives.end(); i != e;) {
auto j = i + 1;
while (j != e && i->r_info == j->r_info &&
(!config->isRela || i->r_addend == j->r_addend))
++j;
if (j - i < 3 || (config->isRela && i->r_addend != 0))
ungroupedNonRelatives.insert(ungroupedNonRelatives.end(), i, j);
else
nonRelativeGroups.emplace_back(i, j);
i = j;
}
// Sort ungrouped relocations by offset to minimize the encoded length.
llvm::sort(ungroupedNonRelatives, [](const Elf_Rela &a, const Elf_Rela &b) {
return a.r_offset < b.r_offset;
});
unsigned hasAddendIfRela =
config->isRela ? RELOCATION_GROUP_HAS_ADDEND_FLAG : 0;
uint64_t offset = 0;
uint64_t addend = 0;
// Emit the run-length encoding for the groups of adjacent relative
// relocations. Each group is represented using two groups in the packed
// format. The first is used to set the current offset to the start of the
// group (and also encodes the first relocation), and the second encodes the
// remaining relocations.
for (std::vector<Elf_Rela> &g : relativeGroups) {
// The first relocation in the group.
add(1);
add(RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG |
RELOCATION_GROUPED_BY_INFO_FLAG | hasAddendIfRela);
add(g[0].r_offset - offset);
add(target->relativeRel);
if (config->isRela) {
add(g[0].r_addend - addend);
addend = g[0].r_addend;
}
// The remaining relocations.
add(g.size() - 1);
add(RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG |
RELOCATION_GROUPED_BY_INFO_FLAG | hasAddendIfRela);
add(config->wordsize);
add(target->relativeRel);
if (config->isRela) {
for (auto i = g.begin() + 1, e = g.end(); i != e; ++i) {
add(i->r_addend - addend);
addend = i->r_addend;
}
}
offset = g.back().r_offset;
}
// Now the ungrouped relatives.
if (!ungroupedRelatives.empty()) {
add(ungroupedRelatives.size());
add(RELOCATION_GROUPED_BY_INFO_FLAG | hasAddendIfRela);
add(target->relativeRel);
for (Elf_Rela &r : ungroupedRelatives) {
add(r.r_offset - offset);
offset = r.r_offset;
if (config->isRela) {
add(r.r_addend - addend);
addend = r.r_addend;
}
}
}
// Grouped non-relatives.
for (ArrayRef<Elf_Rela> g : nonRelativeGroups) {
add(g.size());
add(RELOCATION_GROUPED_BY_INFO_FLAG);
add(g[0].r_info);
for (const Elf_Rela &r : g) {
add(r.r_offset - offset);
offset = r.r_offset;
}
addend = 0;
}
// Finally the ungrouped non-relative relocations.
if (!ungroupedNonRelatives.empty()) {
add(ungroupedNonRelatives.size());
add(hasAddendIfRela);
for (Elf_Rela &r : ungroupedNonRelatives) {
add(r.r_offset - offset);
offset = r.r_offset;
add(r.r_info);
if (config->isRela) {
add(r.r_addend - addend);
addend = r.r_addend;
}
}
}
// Don't allow the section to shrink; otherwise the size of the section can
// oscillate infinitely.
if (relocData.size() < oldSize)
relocData.append(oldSize - relocData.size(), 0);
// Returns whether the section size changed. We need to keep recomputing both
// section layout and the contents of this section until the size converges
// because changing this section's size can affect section layout, which in
// turn can affect the sizes of the LEB-encoded integers stored in this
// section.
return relocData.size() != oldSize;
}
template <class ELFT> RelrSection<ELFT>::RelrSection() {
this->entsize = config->wordsize;
}
template <class ELFT> bool RelrSection<ELFT>::updateAllocSize() {
// This function computes the contents of an SHT_RELR packed relocation
// section.
//
// Proposal for adding SHT_RELR sections to generic-abi is here:
// https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg
//
// The encoded sequence of Elf64_Relr entries in a SHT_RELR section looks
// like [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ]
//
// i.e. start with an address, followed by any number of bitmaps. The address
// entry encodes 1 relocation. The subsequent bitmap entries encode up to 63
// relocations each, at subsequent offsets following the last address entry.
//
// The bitmap entries must have 1 in the least significant bit. The assumption
// here is that an address cannot have 1 in lsb. Odd addresses are not
// supported.
//
// Excluding the least significant bit in the bitmap, each non-zero bit in
// the bitmap represents a relocation to be applied to a corresponding machine
// word that follows the base address word. The second least significant bit
// represents the machine word immediately following the initial address, and
// each bit that follows represents the next word, in linear order. As such,
// a single bitmap can encode up to 31 relocations in a 32-bit object, and
// 63 relocations in a 64-bit object.
//
// This encoding has a couple of interesting properties:
// 1. Looking at any entry, it is clear whether it's an address or a bitmap:
// even means address, odd means bitmap.
// 2. Just a simple list of addresses is a valid encoding.
size_t oldSize = relrRelocs.size();
relrRelocs.clear();
// Same as Config->Wordsize but faster because this is a compile-time
// constant.
const size_t wordsize = sizeof(typename ELFT::uint);
// Number of bits to use for the relocation offsets bitmap.
// Must be either 63 or 31.
const size_t nBits = wordsize * 8 - 1;
// Get offsets for all relative relocations and sort them.
std::unique_ptr<uint64_t[]> offsets(new uint64_t[relocs.size()]);
for (auto it : llvm::enumerate(relocs))
offsets[it.index()] = it.value().getOffset();
std::sort(offsets.get(), offsets.get() + relocs.size());
// For each leading relocation, find following ones that can be folded
// as a bitmap and fold them.
for (size_t i = 0, e = relocs.size(); i != e;) {
// Add a leading relocation.
relrRelocs.push_back(Elf_Relr(offsets[i]));
uint64_t base = offsets[i] + wordsize;
++i;
// Find foldable relocations to construct bitmaps.
for (;;) {
uint64_t bitmap = 0;
for (; i != e; ++i) {
uint64_t d = offsets[i] - base;
if (d >= nBits * wordsize || d % wordsize)
break;
bitmap |= uint64_t(1) << (d / wordsize);
}
if (!bitmap)
break;
relrRelocs.push_back(Elf_Relr((bitmap << 1) | 1));
base += nBits * wordsize;
}
}
// Don't allow the section to shrink; otherwise the size of the section can
// oscillate infinitely. Trailing 1s do not decode to more relocations.
if (relrRelocs.size() < oldSize) {
log(".relr.dyn needs " + Twine(oldSize - relrRelocs.size()) +
" padding word(s)");
relrRelocs.resize(oldSize, Elf_Relr(1));
}
return relrRelocs.size() != oldSize;
}
SymbolTableBaseSection::SymbolTableBaseSection(StringTableSection &strTabSec)
: SyntheticSection(strTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0,
strTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB,
config->wordsize,
strTabSec.isDynamic() ? ".dynsym" : ".symtab"),
strTabSec(strTabSec) {}
// Orders symbols according to their positions in the GOT,
// in compliance with MIPS ABI rules.
// See "Global Offset Table" in Chapter 5 in the following document
// for detailed description:
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
static bool sortMipsSymbols(const SymbolTableEntry &l,
const SymbolTableEntry &r) {
// Sort entries related to non-local preemptible symbols by GOT indexes.
// All other entries go to the beginning of a dynsym in arbitrary order.
if (l.sym->isInGot() && r.sym->isInGot())
return l.sym->getGotIdx() < r.sym->getGotIdx();
if (!l.sym->isInGot() && !r.sym->isInGot())
return false;
return !l.sym->isInGot();
}
void SymbolTableBaseSection::finalizeContents() {
if (OutputSection *sec = strTabSec.getParent())
getParent()->link = sec->sectionIndex;
if (this->type != SHT_DYNSYM) {
sortSymTabSymbols();
return;
}
// If it is a .dynsym, there should be no local symbols, but we need
// to do a few things for the dynamic linker.
// Section's Info field has the index of the first non-local symbol.
// Because the first symbol entry is a null entry, 1 is the first.
getParent()->info = 1;
if (getPartition().gnuHashTab) {
// NB: It also sorts Symbols to meet the GNU hash table requirements.
getPartition().gnuHashTab->addSymbols(symbols);
} else if (config->emachine == EM_MIPS) {
llvm::stable_sort(symbols, sortMipsSymbols);
}
// Only the main partition's dynsym indexes are stored in the symbols
// themselves. All other partitions use a lookup table.
if (this == mainPart->dynSymTab.get()) {
size_t i = 0;
for (const SymbolTableEntry &s : symbols)
s.sym->dynsymIndex = ++i;
}
}
// The ELF spec requires that all local symbols precede global symbols, so we
// sort symbol entries in this function. (For .dynsym, we don't do that because
// symbols for dynamic linking are inherently all globals.)
//
// Aside from above, we put local symbols in groups starting with the STT_FILE
// symbol. That is convenient for purpose of identifying where are local symbols
// coming from.
void SymbolTableBaseSection::sortSymTabSymbols() {
// Move all local symbols before global symbols.
auto e = std::stable_partition(
symbols.begin(), symbols.end(),
[](const SymbolTableEntry &s) { return s.sym->isLocal(); });
size_t numLocals = e - symbols.begin();
getParent()->info = numLocals + 1;
// We want to group the local symbols by file. For that we rebuild the local
// part of the symbols vector. We do not need to care about the STT_FILE
// symbols, they are already naturally placed first in each group. That
// happens because STT_FILE is always the first symbol in the object and hence
// precede all other local symbols we add for a file.
MapVector<InputFile *, SmallVector<SymbolTableEntry, 0>> arr;
for (const SymbolTableEntry &s : llvm::make_range(symbols.begin(), e))
arr[s.sym->file].push_back(s);
auto i = symbols.begin();
for (auto &p : arr)
for (SymbolTableEntry &entry : p.second)
*i++ = entry;
}
void SymbolTableBaseSection::addSymbol(Symbol *b) {
// Adding a local symbol to a .dynsym is a bug.
assert(this->type != SHT_DYNSYM || !b->isLocal());
bool hashIt = b->isLocal() && config->optimize >= 2;
symbols.push_back({b, strTabSec.addString(b->getName(), hashIt)});
}
size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) {
if (this == mainPart->dynSymTab.get())
return sym->dynsymIndex;
// Initializes symbol lookup tables lazily. This is used only for -r,
// --emit-relocs and dynsyms in partitions other than the main one.
llvm::call_once(onceFlag, [&] {
symbolIndexMap.reserve(symbols.size());
size_t i = 0;
for (const SymbolTableEntry &e : symbols) {
if (e.sym->type == STT_SECTION)
sectionIndexMap[e.sym->getOutputSection()] = ++i;
else
symbolIndexMap[e.sym] = ++i;
}
});
// Section symbols are mapped based on their output sections
// to maintain their semantics.
if (sym->type == STT_SECTION)
return sectionIndexMap.lookup(sym->getOutputSection());
return symbolIndexMap.lookup(sym);
}
template <class ELFT>
SymbolTableSection<ELFT>::SymbolTableSection(StringTableSection &strTabSec)
: SymbolTableBaseSection(strTabSec) {
this->entsize = sizeof(Elf_Sym);
}
static BssSection *getCommonSec(Symbol *sym) {
if (!config->defineCommon)
if (auto *d = dyn_cast<Defined>(sym))
return dyn_cast_or_null<BssSection>(d->section);
return nullptr;
}
static uint32_t getSymSectionIndex(Symbol *sym) {
assert(!(sym->needsCopy && sym->isObject()));
if (!isa<Defined>(sym) || sym->needsCopy)
return SHN_UNDEF;
if (const OutputSection *os = sym->getOutputSection())
return os->sectionIndex >= SHN_LORESERVE ? (uint32_t)SHN_XINDEX
: os->sectionIndex;
return SHN_ABS;
}
// Write the internal symbol table contents to the output symbol table.
template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *buf) {
// The first entry is a null entry as per the ELF spec.
buf += sizeof(Elf_Sym);
auto *eSym = reinterpret_cast<Elf_Sym *>(buf);
for (SymbolTableEntry &ent : symbols) {
Symbol *sym = ent.sym;
bool isDefinedHere = type == SHT_SYMTAB || sym->partition == partition;
// Set st_name, st_info and st_other.
eSym->st_name = ent.strTabOffset;
eSym->setBindingAndType(sym->binding, sym->type);
eSym->st_other = sym->visibility;
// The 3 most significant bits of st_other are used by OpenPOWER ABI.
// See getPPC64GlobalEntryToLocalEntryOffset() for more details.
if (config->emachine == EM_PPC64)
eSym->st_other |= sym->stOther & 0xe0;
// The most significant bit of st_other is used by AArch64 ABI for the
// variant PCS.
else if (config->emachine == EM_AARCH64)
eSym->st_other |= sym->stOther & STO_AARCH64_VARIANT_PCS;
if (BssSection *commonSec = getCommonSec(sym)) {
// st_value is usually an address of a symbol, but that has a special
// meaning for uninstantiated common symbols (--no-define-common).
eSym->st_shndx = SHN_COMMON;
eSym->st_value = commonSec->alignment;
eSym->st_size = cast<Defined>(sym)->size;
} else {
const uint32_t shndx = getSymSectionIndex(sym);
if (isDefinedHere) {
eSym->st_shndx = shndx;
eSym->st_value = sym->getVA();
// Copy symbol size if it is a defined symbol. st_size is not
// significant for undefined symbols, so whether copying it or not is up
// to us if that's the case. We'll leave it as zero because by not
// setting a value, we can get the exact same outputs for two sets of
// input files that differ only in undefined symbol size in DSOs.
eSym->st_size = shndx != SHN_UNDEF ? cast<Defined>(sym)->size : 0;
} else {
eSym->st_shndx = 0;
eSym->st_value = 0;
eSym->st_size = 0;
}
}
++eSym;
}
// On MIPS we need to mark symbol which has a PLT entry and requires
// pointer equality by STO_MIPS_PLT flag. That is necessary to help
// dynamic linker distinguish such symbols and MIPS lazy-binding stubs.
// https://sourceware.org/ml/binutils/2008-07/txt00000.txt
if (config->emachine == EM_MIPS) {
auto *eSym = reinterpret_cast<Elf_Sym *>(buf);
for (SymbolTableEntry &ent : symbols) {
Symbol *sym = ent.sym;
if (sym->isInPlt() && sym->needsCopy)
eSym->st_other |= STO_MIPS_PLT;
if (isMicroMips()) {
// We already set the less-significant bit for symbols
// marked by the `STO_MIPS_MICROMIPS` flag and for microMIPS PLT
// records. That allows us to distinguish such symbols in
// the `MIPS<ELFT>::relocate()` routine. Now we should
// clear that bit for non-dynamic symbol table, so tools
// like `objdump` will be able to deal with a correct
// symbol position.
if (sym->isDefined() &&
((sym->stOther & STO_MIPS_MICROMIPS) || sym->needsCopy)) {
if (!strTabSec.isDynamic())
eSym->st_value &= ~1;
eSym->st_other |= STO_MIPS_MICROMIPS;
}
}
if (config->relocatable)
if (auto *d = dyn_cast<Defined>(sym))
if (isMipsPIC<ELFT>(d))
eSym->st_other |= STO_MIPS_PIC;
++eSym;
}
}
}
SymtabShndxSection::SymtabShndxSection()
: SyntheticSection(0, SHT_SYMTAB_SHNDX, 4, ".symtab_shndx") {
this->entsize = 4;
}
void SymtabShndxSection::writeTo(uint8_t *buf) {
// We write an array of 32 bit values, where each value has 1:1 association
// with an entry in .symtab. If the corresponding entry contains SHN_XINDEX,
// we need to write actual index, otherwise, we must write SHN_UNDEF(0).
buf += 4; // Ignore .symtab[0] entry.
for (const SymbolTableEntry &entry : in.symTab->getSymbols()) {
if (!getCommonSec(entry.sym) && getSymSectionIndex(entry.sym) == SHN_XINDEX)
write32(buf, entry.sym->getOutputSection()->sectionIndex);
buf += 4;
}
}
bool SymtabShndxSection::isNeeded() const {
// SHT_SYMTAB can hold symbols with section indices values up to
// SHN_LORESERVE. If we need more, we want to use extension SHT_SYMTAB_SHNDX
// section. Problem is that we reveal the final section indices a bit too
// late, and we do not know them here. For simplicity, we just always create
// a .symtab_shndx section when the amount of output sections is huge.
size_t size = 0;
for (SectionCommand *cmd : script->sectionCommands)
if (isa<OutputSection>(cmd))
++size;
return size >= SHN_LORESERVE;
}
void SymtabShndxSection::finalizeContents() {
getParent()->link = in.symTab->getParent()->sectionIndex;
}
size_t SymtabShndxSection::getSize() const {
return in.symTab->getNumSymbols() * 4;
}
// .hash and .gnu.hash sections contain on-disk hash tables that map
// symbol names to their dynamic symbol table indices. Their purpose
// is to help the dynamic linker resolve symbols quickly. If ELF files
// don't have them, the dynamic linker has to do linear search on all
// dynamic symbols, which makes programs slower. Therefore, a .hash
// section is added to a DSO by default.
//
// The Unix semantics of resolving dynamic symbols is somewhat expensive.
// Each ELF file has a list of DSOs that the ELF file depends on and a
// list of dynamic symbols that need to be resolved from any of the
// DSOs. That means resolving all dynamic symbols takes O(m)*O(n)
// where m is the number of DSOs and n is the number of dynamic
// symbols. For modern large programs, both m and n are large. So
// making each step faster by using hash tables substantially
// improves time to load programs.
//
// (Note that this is not the only way to design the shared library.
// For instance, the Windows DLL takes a different approach. On
// Windows, each dynamic symbol has a name of DLL from which the symbol
// has to be resolved. That makes the cost of symbol resolution O(n).
// This disables some hacky techniques you can use on Unix such as
// LD_PRELOAD, but this is arguably better semantics than the Unix ones.)
//
// Due to historical reasons, we have two different hash tables, .hash
// and .gnu.hash. They are for the same purpose, and .gnu.hash is a new
// and better version of .hash. .hash is just an on-disk hash table, but
// .gnu.hash has a bloom filter in addition to a hash table to skip
// DSOs very quickly. If you are sure that your dynamic linker knows
// about .gnu.hash, you want to specify --hash-style=gnu. Otherwise, a
// safe bet is to specify --hash-style=both for backward compatibility.
GnuHashTableSection::GnuHashTableSection()
: SyntheticSection(SHF_ALLOC, SHT_GNU_HASH, config->wordsize, ".gnu.hash") {
}
void GnuHashTableSection::finalizeContents() {
if (OutputSection *sec = getPartition().dynSymTab->getParent())
getParent()->link = sec->sectionIndex;
// Computes bloom filter size in word size. We want to allocate 12
// bits for each symbol. It must be a power of two.
if (symbols.empty()) {
maskWords = 1;
} else {
uint64_t numBits = symbols.size() * 12;
maskWords = NextPowerOf2(numBits / (config->wordsize * 8));
}
size = 16; // Header
size += config->wordsize * maskWords; // Bloom filter
size += nBuckets * 4; // Hash buckets
size += symbols.size() * 4; // Hash values
}
void GnuHashTableSection::writeTo(uint8_t *buf) {
// Write a header.
write32(buf, nBuckets);
write32(buf + 4, getPartition().dynSymTab->getNumSymbols() - symbols.size());
write32(buf + 8, maskWords);
write32(buf + 12, Shift2);
buf += 16;
// Write the 2-bit bloom filter.
const unsigned c = config->is64 ? 64 : 32;
for (const Entry &sym : symbols) {
// When C = 64, we choose a word with bits [6:...] and set 1 to two bits in
// the word using bits [0:5] and [26:31].
size_t i = (sym.hash / c) & (maskWords - 1);
uint64_t val = readUint(buf + i * config->wordsize);
val |= uint64_t(1) << (sym.hash % c);
val |= uint64_t(1) << ((sym.hash >> Shift2) % c);
writeUint(buf + i * config->wordsize, val);
}
buf += config->wordsize * maskWords;
// Write the hash table.
uint32_t *buckets = reinterpret_cast<uint32_t *>(buf);
uint32_t oldBucket = -1;
uint32_t *values = buckets + nBuckets;
for (auto i = symbols.begin(), e = symbols.end(); i != e; ++i) {
// Write a hash value. It represents a sequence of chains that share the
// same hash modulo value. The last element of each chain is terminated by
// LSB 1.
uint32_t hash = i->hash;
bool isLastInChain = (i + 1) == e || i->bucketIdx != (i + 1)->bucketIdx;
hash = isLastInChain ? hash | 1 : hash & ~1;
write32(values++, hash);
if (i->bucketIdx == oldBucket)
continue;
// Write a hash bucket. Hash buckets contain indices in the following hash
// value table.
write32(buckets + i->bucketIdx,
getPartition().dynSymTab->getSymbolIndex(i->sym));
oldBucket = i->bucketIdx;
}
}
static uint32_t hashGnu(StringRef name) {
uint32_t h = 5381;
for (uint8_t c : name)
h = (h << 5) + h + c;
return h;
}
// Add symbols to this symbol hash table. Note that this function
// destructively sort a given vector -- which is needed because
// GNU-style hash table places some sorting requirements.
void GnuHashTableSection::addSymbols(SmallVectorImpl<SymbolTableEntry> &v) {
// We cannot use 'auto' for Mid because GCC 6.1 cannot deduce
// its type correctly.
auto mid =
std::stable_partition(v.begin(), v.end(), [&](const SymbolTableEntry &s) {
return !s.sym->isDefined() || s.sym->partition != partition;
});
// We chose load factor 4 for the on-disk hash table. For each hash
// collision, the dynamic linker will compare a uint32_t hash value.
// Since the integer comparison is quite fast, we believe we can
// make the load factor even larger. 4 is just a conservative choice.
//
// Note that we don't want to create a zero-sized hash table because
// Android loader as of 2018 doesn't like a .gnu.hash containing such
// table. If that's the case, we create a hash table with one unused
// dummy slot.
nBuckets = std::max<size_t>((v.end() - mid) / 4, 1);
if (mid == v.end())
return;
for (SymbolTableEntry &ent : llvm::make_range(mid, v.end())) {
Symbol *b = ent.sym;
uint32_t hash = hashGnu(b->getName());
uint32_t bucketIdx = hash % nBuckets;
symbols.push_back({b, ent.strTabOffset, hash, bucketIdx});
}
llvm::sort(symbols, [](const Entry &l, const Entry &r) {
return std::tie(l.bucketIdx, l.strTabOffset) <
std::tie(r.bucketIdx, r.strTabOffset);
});
v.erase(mid, v.end());
for (const Entry &ent : symbols)
v.push_back({ent.sym, ent.strTabOffset});
}
HashTableSection::HashTableSection()
: SyntheticSection(SHF_ALLOC, SHT_HASH, 4, ".hash") {
this->entsize = 4;
}
void HashTableSection::finalizeContents() {
SymbolTableBaseSection *symTab = getPartition().dynSymTab.get();
if (OutputSection *sec = symTab->getParent())
getParent()->link = sec->sectionIndex;
unsigned numEntries = 2; // nbucket and nchain.
numEntries += symTab->getNumSymbols(); // The chain entries.
// Create as many buckets as there are symbols.
numEntries += symTab->getNumSymbols();
this->size = numEntries * 4;
}
void HashTableSection::writeTo(uint8_t *buf) {
SymbolTableBaseSection *symTab = getPartition().dynSymTab.get();
unsigned numSymbols = symTab->getNumSymbols();
uint32_t *p = reinterpret_cast<uint32_t *>(buf);
write32(p++, numSymbols); // nbucket
write32(p++, numSymbols); // nchain
uint32_t *buckets = p;
uint32_t *chains = p + numSymbols;
for (const SymbolTableEntry &s : symTab->getSymbols()) {
Symbol *sym = s.sym;
StringRef name = sym->getName();
unsigned i = sym->dynsymIndex;
uint32_t hash = hashSysV(name) % numSymbols;
chains[i] = buckets[hash];
write32(buckets + hash, i);
}
}
PltSection::PltSection()
: SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"),
headerSize(target->pltHeaderSize) {
// On PowerPC, this section contains lazy symbol resolvers.
if (config->emachine == EM_PPC64) {
name = ".glink";
alignment = 4;
}
// On x86 when IBT is enabled, this section contains the second PLT (lazy
// symbol resolvers).
if ((config->emachine == EM_386 || config->emachine == EM_X86_64) &&
(config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT))
name = ".plt.sec";
// The PLT needs to be writable on SPARC as the dynamic linker will
// modify the instructions in the PLT entries.
if (config->emachine == EM_SPARCV9)
this->flags |= SHF_WRITE;
}
void PltSection::writeTo(uint8_t *buf) {
// At beginning of PLT, we have code to call the dynamic
// linker to resolve dynsyms at runtime. Write such code.
target->writePltHeader(buf);
size_t off = headerSize;
for (const Symbol *sym : entries) {
target->writePlt(buf + off, *sym, getVA() + off);
off += target->pltEntrySize;
}
}
void PltSection::addEntry(Symbol &sym) {
assert(sym.auxIdx == symAux.size() - 1);
symAux.back().pltIdx = entries.size();
entries.push_back(&sym);
}
size_t PltSection::getSize() const {
return headerSize + entries.size() * target->pltEntrySize;
}
bool PltSection::isNeeded() const {
// For -z retpolineplt, .iplt needs the .plt header.
return !entries.empty() || (config->zRetpolineplt && in.iplt->isNeeded());
}
// Used by ARM to add mapping symbols in the PLT section, which aid
// disassembly.
void PltSection::addSymbols() {
target->addPltHeaderSymbols(*this);
size_t off = headerSize;
for (size_t i = 0; i < entries.size(); ++i) {
target->addPltSymbols(*this, off);
off += target->pltEntrySize;
}
}
IpltSection::IpltSection()
: SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".iplt") {
if (config->emachine == EM_PPC || config->emachine == EM_PPC64) {
name = ".glink";
alignment = 4;
}
}
void IpltSection::writeTo(uint8_t *buf) {
uint32_t off = 0;
for (const Symbol *sym : entries) {
target->writeIplt(buf + off, *sym, getVA() + off);
off += target->ipltEntrySize;
}
}
size_t IpltSection::getSize() const {
return entries.size() * target->ipltEntrySize;
}
void IpltSection::addEntry(Symbol &sym) {
assert(sym.auxIdx == symAux.size() - 1);
symAux.back().pltIdx = entries.size();
entries.push_back(&sym);
}
// ARM uses mapping symbols to aid disassembly.
void IpltSection::addSymbols() {
size_t off = 0;
for (size_t i = 0, e = entries.size(); i != e; ++i) {
target->addPltSymbols(*this, off);
off += target->pltEntrySize;
}
}
PPC32GlinkSection::PPC32GlinkSection() {
name = ".glink";
alignment = 4;
}
void PPC32GlinkSection::writeTo(uint8_t *buf) {
writePPC32GlinkSection(buf, entries.size());
}
size_t PPC32GlinkSection::getSize() const {
return headerSize + entries.size() * target->pltEntrySize + footerSize;
}
// This is an x86-only extra PLT section and used only when a security
// enhancement feature called CET is enabled. In this comment, I'll explain what
// the feature is and why we have two PLT sections if CET is enabled.
//
// So, what does CET do? CET introduces a new restriction to indirect jump
// instructions. CET works this way. Assume that CET is enabled. Then, if you
// execute an indirect jump instruction, the processor verifies that a special
// "landing pad" instruction (which is actually a repurposed NOP instruction and
// now called "endbr32" or "endbr64") is at the jump target. If the jump target
// does not start with that instruction, the processor raises an exception
// instead of continuing executing code.
//
// If CET is enabled, the compiler emits endbr to all locations where indirect
// jumps may jump to.
//
// This mechanism makes it extremely hard to transfer the control to a middle of
// a function that is not supporsed to be a indirect jump target, preventing
// certain types of attacks such as ROP or JOP.
//
// Note that the processors in the market as of 2019 don't actually support the
// feature. Only the spec is available at the moment.
//
// Now, I'll explain why we have this extra PLT section for CET.
//
// Since you can indirectly jump to a PLT entry, we have to make PLT entries
// start with endbr. The problem is there's no extra space for endbr (which is 4
// bytes long), as the PLT entry is only 16 bytes long and all bytes are already
// used.
//
// In order to deal with the issue, we split a PLT entry into two PLT entries.
// Remember that each PLT entry contains code to jump to an address read from
// .got.plt AND code to resolve a dynamic symbol lazily. With the 2-PLT scheme,
// the former code is written to .plt.sec, and the latter code is written to
// .plt.
//
// Lazy symbol resolution in the 2-PLT scheme works in the usual way, except
// that the regular .plt is now called .plt.sec and .plt is repurposed to
// contain only code for lazy symbol resolution.
//
// In other words, this is how the 2-PLT scheme works. Application code is
// supposed to jump to .plt.sec to call an external function. Each .plt.sec
// entry contains code to read an address from a corresponding .got.plt entry
// and jump to that address. Addresses in .got.plt initially point to .plt, so
// when an application calls an external function for the first time, the
// control is transferred to a function that resolves a symbol name from
// external shared object files. That function then rewrites a .got.plt entry
// with a resolved address, so that the subsequent function calls directly jump
// to a desired location from .plt.sec.
//
// There is an open question as to whether the 2-PLT scheme was desirable or
// not. We could have simply extended the PLT entry size to 32-bytes to
// accommodate endbr, and that scheme would have been much simpler than the
// 2-PLT scheme. One reason to split PLT was, by doing that, we could keep hot
// code (.plt.sec) from cold code (.plt). But as far as I know no one proved
// that the optimization actually makes a difference.
//
// That said, the 2-PLT scheme is a part of the ABI, debuggers and other tools
// depend on it, so we implement the ABI.
IBTPltSection::IBTPltSection()
: SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt") {}
void IBTPltSection::writeTo(uint8_t *buf) {
target->writeIBTPlt(buf, in.plt->getNumEntries());
}
size_t IBTPltSection::getSize() const {
// 16 is the header size of .plt.
return 16 + in.plt->getNumEntries() * target->pltEntrySize;
}
+bool IBTPltSection::isNeeded() const { return in.plt->getNumEntries() > 0; }
+
// The string hash function for .gdb_index.
static uint32_t computeGdbHash(StringRef s) {
uint32_t h = 0;
for (uint8_t c : s)
h = h * 67 + toLower(c) - 113;
return h;
}
GdbIndexSection::GdbIndexSection()
: SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index") {}
// Returns the desired size of an on-disk hash table for a .gdb_index section.
// There's a tradeoff between size and collision rate. We aim 75% utilization.
size_t GdbIndexSection::computeSymtabSize() const {
return std::max<size_t>(NextPowerOf2(symbols.size() * 4 / 3), 1024);
}
// Compute the output section size.
void GdbIndexSection::initOutputSize() {
size = sizeof(GdbIndexHeader) + computeSymtabSize() * 8;
for (GdbChunk &chunk : chunks)
size += chunk.compilationUnits.size() * 16 + chunk.addressAreas.size() * 20;
// Add the constant pool size if exists.
if (!symbols.empty()) {
GdbSymbol &sym = symbols.back();
size += sym.nameOff + sym.name.size() + 1;
}
}
static SmallVector<GdbIndexSection::CuEntry, 0>
readCuList(DWARFContext &dwarf) {
SmallVector<GdbIndexSection::CuEntry, 0> ret;
for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units())
ret.push_back({cu->getOffset(), cu->getLength() + 4});
return ret;
}
static SmallVector<GdbIndexSection::AddressEntry, 0>
readAddressAreas(DWARFContext &dwarf, InputSection *sec) {
SmallVector<GdbIndexSection::AddressEntry, 0> ret;
uint32_t cuIdx = 0;
for (std::unique_ptr<DWARFUnit> &cu : dwarf.compile_units()) {
if (Error e = cu->tryExtractDIEsIfNeeded(false)) {
warn(toString(sec) + ": " + toString(std::move(e)));
return {};
}
Expected<DWARFAddressRangesVector> ranges = cu->collectAddressRanges();
if (!ranges) {
warn(toString(sec) + ": " + toString(ranges.takeError()));
return {};
}
ArrayRef<InputSectionBase *> sections = sec->file->getSections();
for (DWARFAddressRange &r : *ranges) {
if (r.SectionIndex == -1ULL)
continue;
// Range list with zero size has no effect.
InputSectionBase *s = sections[r.SectionIndex];
if (s && s != &InputSection::discarded && s->isLive())
if (r.LowPC != r.HighPC)
ret.push_back({cast<InputSection>(s), r.LowPC, r.HighPC, cuIdx});
}
++cuIdx;
}
return ret;
}
template <class ELFT>
static SmallVector<GdbIndexSection::NameAttrEntry, 0>
readPubNamesAndTypes(const LLDDwarfObj<ELFT> &obj,
const SmallVectorImpl<GdbIndexSection::CuEntry> &cus) {
const LLDDWARFSection &pubNames = obj.getGnuPubnamesSection();
const LLDDWARFSection &pubTypes = obj.getGnuPubtypesSection();
SmallVector<GdbIndexSection::NameAttrEntry, 0> ret;
for (const LLDDWARFSection *pub : {&pubNames, &pubTypes}) {
DWARFDataExtractor data(obj, *pub, config->isLE, config->wordsize);
DWARFDebugPubTable table;
table.extract(data, /*GnuStyle=*/true, [&](Error e) {
warn(toString(pub->sec) + ": " + toString(std::move(e)));
});
for (const DWARFDebugPubTable::Set &set : table.getData()) {
// The value written into the constant pool is kind << 24 | cuIndex. As we
// don't know how many compilation units precede this object to compute
// cuIndex, we compute (kind << 24 | cuIndexInThisObject) instead, and add
// the number of preceding compilation units later.
uint32_t i = llvm::partition_point(cus,
[&](GdbIndexSection::CuEntry cu) {
return cu.cuOffset < set.Offset;
}) -
cus.begin();
for (const DWARFDebugPubTable::Entry &ent : set.Entries)
ret.push_back({{ent.Name, computeGdbHash(ent.Name)},
(ent.Descriptor.toBits() << 24) | i});
}
}
return ret;
}
// Create a list of symbols from a given list of symbol names and types
// by uniquifying them by name.
static SmallVector<GdbIndexSection::GdbSymbol, 0> createSymbols(
ArrayRef<SmallVector<GdbIndexSection::NameAttrEntry, 0>> nameAttrs,
const SmallVector<GdbIndexSection::GdbChunk, 0> &chunks) {
using GdbSymbol = GdbIndexSection::GdbSymbol;
using NameAttrEntry = GdbIndexSection::NameAttrEntry;
// For each chunk, compute the number of compilation units preceding it.
uint32_t cuIdx = 0;
std::unique_ptr<uint32_t[]> cuIdxs(new uint32_t[chunks.size()]);
for (uint32_t i = 0, e = chunks.size(); i != e; ++i) {
cuIdxs[i] = cuIdx;
cuIdx += chunks[i].compilationUnits.size();
}
// The number of symbols we will handle in this function is of the order
// of millions for very large executables, so we use multi-threading to
// speed it up.
constexpr size_t numShards = 32;
size_t concurrency = PowerOf2Floor(
std::min<size_t>(hardware_concurrency(parallel::strategy.ThreadsRequested)
.compute_thread_count(),
numShards));
// A sharded map to uniquify symbols by name.
auto map =
std::make_unique<DenseMap<CachedHashStringRef, size_t>[]>(numShards);
size_t shift = 32 - countTrailingZeros(numShards);
// Instantiate GdbSymbols while uniqufying them by name.
auto symbols = std::make_unique<SmallVector<GdbSymbol, 0>[]>(numShards);
parallelForEachN(0, concurrency, [&](size_t threadId) {
uint32_t i = 0;
for (ArrayRef<NameAttrEntry> entries : nameAttrs) {
for (const NameAttrEntry &ent : entries) {
size_t shardId = ent.name.hash() >> shift;
if ((shardId & (concurrency - 1)) != threadId)
continue;
uint32_t v = ent.cuIndexAndAttrs + cuIdxs[i];
size_t &idx = map[shardId][ent.name];
if (idx) {
symbols[shardId][idx - 1].cuVector.push_back(v);
continue;
}
idx = symbols[shardId].size() + 1;
symbols[shardId].push_back({ent.name, {v}, 0, 0});
}
++i;
}
});
size_t numSymbols = 0;
for (ArrayRef<GdbSymbol> v : makeArrayRef(symbols.get(), numShards))
numSymbols += v.size();
// The return type is a flattened vector, so we'll copy each vector
// contents to Ret.
SmallVector<GdbSymbol, 0> ret;
ret.reserve(numSymbols);
for (SmallVector<GdbSymbol, 0> &vec :
makeMutableArrayRef(symbols.get(), numShards))
for (GdbSymbol &sym : vec)
ret.push_back(std::move(sym));
// CU vectors and symbol names are adjacent in the output file.
// We can compute their offsets in the output file now.
size_t off = 0;
for (GdbSymbol &sym : ret) {
sym.cuVectorOff = off;
off += (sym.cuVector.size() + 1) * 4;
}
for (GdbSymbol &sym : ret) {
sym.nameOff = off;
off += sym.name.size() + 1;
}
return ret;
}
// Returns a newly-created .gdb_index section.
template <class ELFT> GdbIndexSection *GdbIndexSection::create() {
// Collect InputFiles with .debug_info. See the comment in
// LLDDwarfObj<ELFT>::LLDDwarfObj. If we do lightweight parsing in the future,
// note that isec->data() may uncompress the full content, which should be
// parallelized.
SetVector<InputFile *> files;
for (InputSectionBase *s : inputSections) {
InputSection *isec = dyn_cast<InputSection>(s);
if (!isec)
continue;
// .debug_gnu_pub{names,types} are useless in executables.
// They are present in input object files solely for creating
// a .gdb_index. So we can remove them from the output.
if (s->name == ".debug_gnu_pubnames" || s->name == ".debug_gnu_pubtypes")
s->markDead();
else if (isec->name == ".debug_info")
files.insert(isec->file);
}
// Drop .rel[a].debug_gnu_pub{names,types} for --emit-relocs.
llvm::erase_if(inputSections, [](InputSectionBase *s) {
if (auto *isec = dyn_cast<InputSection>(s))
if (InputSectionBase *rel = isec->getRelocatedSection())
return !rel->isLive();
return !s->isLive();
});
SmallVector<GdbChunk, 0> chunks(files.size());
SmallVector<SmallVector<NameAttrEntry, 0>, 0> nameAttrs(files.size());
parallelForEachN(0, files.size(), [&](size_t i) {
// To keep memory usage low, we don't want to keep cached DWARFContext, so
// avoid getDwarf() here.
ObjFile<ELFT> *file = cast<ObjFile<ELFT>>(files[i]);
DWARFContext dwarf(std::make_unique<LLDDwarfObj<ELFT>>(file));
auto &dobj = static_cast<const LLDDwarfObj<ELFT> &>(dwarf.getDWARFObj());
// If the are multiple compile units .debug_info (very rare ld -r --unique),
// this only picks the last one. Other address ranges are lost.
chunks[i].sec = dobj.getInfoSection();
chunks[i].compilationUnits = readCuList(dwarf);
chunks[i].addressAreas = readAddressAreas(dwarf, chunks[i].sec);
nameAttrs[i] = readPubNamesAndTypes<ELFT>(dobj, chunks[i].compilationUnits);
});
auto *ret = make<GdbIndexSection>();
ret->chunks = std::move(chunks);
ret->symbols = createSymbols(nameAttrs, ret->chunks);
ret->initOutputSize();
return ret;
}
void GdbIndexSection::writeTo(uint8_t *buf) {
// Write the header.
auto *hdr = reinterpret_cast<GdbIndexHeader *>(buf);
uint8_t *start = buf;
hdr->version = 7;
buf += sizeof(*hdr);
// Write the CU list.
hdr->cuListOff = buf - start;
for (GdbChunk &chunk : chunks) {
for (CuEntry &cu : chunk.compilationUnits) {
write64le(buf, chunk.sec->outSecOff + cu.cuOffset);
write64le(buf + 8, cu.cuLength);
buf += 16;
}
}
// Write the address area.
hdr->cuTypesOff = buf - start;
hdr->addressAreaOff = buf - start;
uint32_t cuOff = 0;
for (GdbChunk &chunk : chunks) {
for (AddressEntry &e : chunk.addressAreas) {
// In the case of ICF there may be duplicate address range entries.
const uint64_t baseAddr = e.section->repl->getVA(0);
write64le(buf, baseAddr + e.lowAddress);
write64le(buf + 8, baseAddr + e.highAddress);
write32le(buf + 16, e.cuIndex + cuOff);
buf += 20;
}
cuOff += chunk.compilationUnits.size();
}
// Write the on-disk open-addressing hash table containing symbols.
hdr->symtabOff = buf - start;
size_t symtabSize = computeSymtabSize();
uint32_t mask = symtabSize - 1;
for (GdbSymbol &sym : symbols) {
uint32_t h = sym.name.hash();
uint32_t i = h & mask;
uint32_t step = ((h * 17) & mask) | 1;
while (read32le(buf + i * 8))
i = (i + step) & mask;
write32le(buf + i * 8, sym.nameOff);
write32le(buf + i * 8 + 4, sym.cuVectorOff);
}
buf += symtabSize * 8;
// Write the string pool.
hdr->constantPoolOff = buf - start;
parallelForEach(symbols, [&](GdbSymbol &sym) {
memcpy(buf + sym.nameOff, sym.name.data(), sym.name.size());
});
// Write the CU vectors.
for (GdbSymbol &sym : symbols) {
write32le(buf, sym.cuVector.size());
buf += 4;
for (uint32_t val : sym.cuVector) {
write32le(buf, val);
buf += 4;
}
}
}
bool GdbIndexSection::isNeeded() const { return !chunks.empty(); }
EhFrameHeader::EhFrameHeader()
: SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 4, ".eh_frame_hdr") {}
void EhFrameHeader::writeTo(uint8_t *buf) {
// Unlike most sections, the EhFrameHeader section is written while writing
// another section, namely EhFrameSection, which calls the write() function
// below from its writeTo() function. This is necessary because the contents
// of EhFrameHeader depend on the relocated contents of EhFrameSection and we
// don't know which order the sections will be written in.
}
// .eh_frame_hdr contains a binary search table of pointers to FDEs.
// Each entry of the search table consists of two values,
// the starting PC from where FDEs covers, and the FDE's address.
// It is sorted by PC.
void EhFrameHeader::write() {
uint8_t *buf = Out::bufferStart + getParent()->offset + outSecOff;
using FdeData = EhFrameSection::FdeData;
SmallVector<FdeData, 0> fdes = getPartition().ehFrame->getFdeData();
buf[0] = 1;
buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
buf[2] = DW_EH_PE_udata4;
buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
write32(buf + 4,
getPartition().ehFrame->getParent()->addr - this->getVA() - 4);
write32(buf + 8, fdes.size());
buf += 12;
for (FdeData &fde : fdes) {
write32(buf, fde.pcRel);
write32(buf + 4, fde.fdeVARel);
buf += 8;
}
}
size_t EhFrameHeader::getSize() const {
// .eh_frame_hdr has a 12 bytes header followed by an array of FDEs.
return 12 + getPartition().ehFrame->numFdes * 8;
}
bool EhFrameHeader::isNeeded() const {
return isLive() && getPartition().ehFrame->isNeeded();
}
VersionDefinitionSection::VersionDefinitionSection()
: SyntheticSection(SHF_ALLOC, SHT_GNU_verdef, sizeof(uint32_t),
".gnu.version_d") {}
StringRef VersionDefinitionSection::getFileDefName() {
if (!getPartition().name.empty())
return getPartition().name;
if (!config->soName.empty())
return config->soName;
return config->outputFile;
}
void VersionDefinitionSection::finalizeContents() {
fileDefNameOff = getPartition().dynStrTab->addString(getFileDefName());
for (const VersionDefinition &v : namedVersionDefs())
verDefNameOffs.push_back(getPartition().dynStrTab->addString(v.name));
if (OutputSection *sec = getPartition().dynStrTab->getParent())
getParent()->link = sec->sectionIndex;
// sh_info should be set to the number of definitions. This fact is missed in
// documentation, but confirmed by binutils community:
// https://sourceware.org/ml/binutils/2014-11/msg00355.html
getParent()->info = getVerDefNum();
}
void VersionDefinitionSection::writeOne(uint8_t *buf, uint32_t index,
StringRef name, size_t nameOff) {
uint16_t flags = index == 1 ? VER_FLG_BASE : 0;
// Write a verdef.
write16(buf, 1); // vd_version
write16(buf + 2, flags); // vd_flags
write16(buf + 4, index); // vd_ndx
write16(buf + 6, 1); // vd_cnt
write32(buf + 8, hashSysV(name)); // vd_hash
write32(buf + 12, 20); // vd_aux
write32(buf + 16, 28); // vd_next
// Write a veraux.
write32(buf + 20, nameOff); // vda_name
write32(buf + 24, 0); // vda_next
}
void VersionDefinitionSection::writeTo(uint8_t *buf) {
writeOne(buf, 1, getFileDefName(), fileDefNameOff);
auto nameOffIt = verDefNameOffs.begin();
for (const VersionDefinition &v : namedVersionDefs()) {
buf += EntrySize;
writeOne(buf, v.id, v.name, *nameOffIt++);
}
// Need to terminate the last version definition.
write32(buf + 16, 0); // vd_next
}
size_t VersionDefinitionSection::getSize() const {
return EntrySize * getVerDefNum();
}
// .gnu.version is a table where each entry is 2 byte long.
VersionTableSection::VersionTableSection()
: SyntheticSection(SHF_ALLOC, SHT_GNU_versym, sizeof(uint16_t),
".gnu.version") {
this->entsize = 2;
}
void VersionTableSection::finalizeContents() {
// At the moment of june 2016 GNU docs does not mention that sh_link field
// should be set, but Sun docs do. Also readelf relies on this field.
getParent()->link = getPartition().dynSymTab->getParent()->sectionIndex;
}
size_t VersionTableSection::getSize() const {
return (getPartition().dynSymTab->getSymbols().size() + 1) * 2;
}
void VersionTableSection::writeTo(uint8_t *buf) {
buf += 2;
for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) {
// For an unextracted lazy symbol (undefined weak), it must have been
// converted to Undefined and have VER_NDX_GLOBAL version here.
assert(!s.sym->isLazy());
write16(buf, s.sym->versionId);
buf += 2;
}
}
bool VersionTableSection::isNeeded() const {
return isLive() &&
(getPartition().verDef || getPartition().verNeed->isNeeded());
}
void elf::addVerneed(Symbol *ss) {
auto &file = cast<SharedFile>(*ss->file);
if (ss->verdefIndex == VER_NDX_GLOBAL) {
ss->versionId = VER_NDX_GLOBAL;
return;
}
if (file.vernauxs.empty())
file.vernauxs.resize(file.verdefs.size());
// Select a version identifier for the vernaux data structure, if we haven't
// already allocated one. The verdef identifiers cover the range
// [1..getVerDefNum()]; this causes the vernaux identifiers to start from
// getVerDefNum()+1.
if (file.vernauxs[ss->verdefIndex] == 0)
file.vernauxs[ss->verdefIndex] = ++SharedFile::vernauxNum + getVerDefNum();
ss->versionId = file.vernauxs[ss->verdefIndex];
}
template <class ELFT>
VersionNeedSection<ELFT>::VersionNeedSection()
: SyntheticSection(SHF_ALLOC, SHT_GNU_verneed, sizeof(uint32_t),
".gnu.version_r") {}
template <class ELFT> void VersionNeedSection<ELFT>::finalizeContents() {
for (SharedFile *f : sharedFiles) {
if (f->vernauxs.empty())
continue;
verneeds.emplace_back();
Verneed &vn = verneeds.back();
vn.nameStrTab = getPartition().dynStrTab->addString(f->soName);
for (unsigned i = 0; i != f->vernauxs.size(); ++i) {
if (f->vernauxs[i] == 0)
continue;
auto *verdef =
reinterpret_cast<const typename ELFT::Verdef *>(f->verdefs[i]);
vn.vernauxs.push_back(
{verdef->vd_hash, f->vernauxs[i],
getPartition().dynStrTab->addString(f->getStringTable().data() +
verdef->getAux()->vda_name)});
}
}
if (OutputSection *sec = getPartition().dynStrTab->getParent())
getParent()->link = sec->sectionIndex;
getParent()->info = verneeds.size();
}
template <class ELFT> void VersionNeedSection<ELFT>::writeTo(uint8_t *buf) {
// The Elf_Verneeds need to appear first, followed by the Elf_Vernauxs.
auto *verneed = reinterpret_cast<Elf_Verneed *>(buf);
auto *vernaux = reinterpret_cast<Elf_Vernaux *>(verneed + verneeds.size());
for (auto &vn : verneeds) {
// Create an Elf_Verneed for this DSO.
verneed->vn_version = 1;
verneed->vn_cnt = vn.vernauxs.size();
verneed->vn_file = vn.nameStrTab;
verneed->vn_aux =
reinterpret_cast<char *>(vernaux) - reinterpret_cast<char *>(verneed);
verneed->vn_next = sizeof(Elf_Verneed);
++verneed;
// Create the Elf_Vernauxs for this Elf_Verneed.
for (auto &vna : vn.vernauxs) {
vernaux->vna_hash = vna.hash;
vernaux->vna_flags = 0;
vernaux->vna_other = vna.verneedIndex;
vernaux->vna_name = vna.nameStrTab;
vernaux->vna_next = sizeof(Elf_Vernaux);
++vernaux;
}
vernaux[-1].vna_next = 0;
}
verneed[-1].vn_next = 0;
}
template <class ELFT> size_t VersionNeedSection<ELFT>::getSize() const {
return verneeds.size() * sizeof(Elf_Verneed) +
SharedFile::vernauxNum * sizeof(Elf_Vernaux);
}
template <class ELFT> bool VersionNeedSection<ELFT>::isNeeded() const {
return isLive() && SharedFile::vernauxNum != 0;
}
void MergeSyntheticSection::addSection(MergeInputSection *ms) {
ms->parent = this;
sections.push_back(ms);
assert(alignment == ms->alignment || !(ms->flags & SHF_STRINGS));
alignment = std::max(alignment, ms->alignment);
}
MergeTailSection::MergeTailSection(StringRef name, uint32_t type,
uint64_t flags, uint32_t alignment)
: MergeSyntheticSection(name, type, flags, alignment),
builder(StringTableBuilder::RAW, alignment) {}
size_t MergeTailSection::getSize() const { return builder.getSize(); }
void MergeTailSection::writeTo(uint8_t *buf) { builder.write(buf); }
void MergeTailSection::finalizeContents() {
// Add all string pieces to the string table builder to create section
// contents.
for (MergeInputSection *sec : sections)
for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
if (sec->pieces[i].live)
builder.add(sec->getData(i));
// Fix the string table content. After this, the contents will never change.
builder.finalize();
// finalize() fixed tail-optimized strings, so we can now get
// offsets of strings. Get an offset for each string and save it
// to a corresponding SectionPiece for easy access.
for (MergeInputSection *sec : sections)
for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
if (sec->pieces[i].live)
sec->pieces[i].outputOff = builder.getOffset(sec->getData(i));
}
void MergeNoTailSection::writeTo(uint8_t *buf) {
parallelForEachN(0, numShards,
[&](size_t i) { shards[i].write(buf + shardOffsets[i]); });
}
// This function is very hot (i.e. it can take several seconds to finish)
// because sometimes the number of inputs is in an order of magnitude of
// millions. So, we use multi-threading.
//
// For any strings S and T, we know S is not mergeable with T if S's hash
// value is different from T's. If that's the case, we can safely put S and
// T into different string builders without worrying about merge misses.
// We do it in parallel.
void MergeNoTailSection::finalizeContents() {
// Initializes string table builders.
for (size_t i = 0; i < numShards; ++i)
shards.emplace_back(StringTableBuilder::RAW, alignment);
// Concurrency level. Must be a power of 2 to avoid expensive modulo
// operations in the following tight loop.
size_t concurrency = PowerOf2Floor(
std::min<size_t>(hardware_concurrency(parallel::strategy.ThreadsRequested)
.compute_thread_count(),
numShards));
// Add section pieces to the builders.
parallelForEachN(0, concurrency, [&](size_t threadId) {
for (MergeInputSection *sec : sections) {
for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) {
if (!sec->pieces[i].live)
continue;
size_t shardId = getShardId(sec->pieces[i].hash);
if ((shardId & (concurrency - 1)) == threadId)
sec->pieces[i].outputOff = shards[shardId].add(sec->getData(i));
}
}
});
// Compute an in-section offset for each shard.
size_t off = 0;
for (size_t i = 0; i < numShards; ++i) {
shards[i].finalizeInOrder();
if (shards[i].getSize() > 0)
off = alignTo(off, alignment);
shardOffsets[i] = off;
off += shards[i].getSize();
}
size = off;
// So far, section pieces have offsets from beginning of shards, but
// we want offsets from beginning of the whole section. Fix them.
parallelForEach(sections, [&](MergeInputSection *sec) {
for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
if (sec->pieces[i].live)
sec->pieces[i].outputOff +=
shardOffsets[getShardId(sec->pieces[i].hash)];
});
}
template <class ELFT> void elf::splitSections() {
llvm::TimeTraceScope timeScope("Split sections");
// splitIntoPieces needs to be called on each MergeInputSection
// before calling finalizeContents().
parallelForEach(objectFiles, [](ELFFileBase *file) {
for (InputSectionBase *sec : file->getSections()) {
if (!sec)
continue;
if (auto *s = dyn_cast<MergeInputSection>(sec))
s->splitIntoPieces();
else if (auto *eh = dyn_cast<EhInputSection>(sec))
eh->split<ELFT>();
}
});
}
MipsRldMapSection::MipsRldMapSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, config->wordsize,
".rld_map") {}
ARMExidxSyntheticSection::ARMExidxSyntheticSection()
: SyntheticSection(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX,
config->wordsize, ".ARM.exidx") {}
static InputSection *findExidxSection(InputSection *isec) {
for (InputSection *d : isec->dependentSections)
if (d->type == SHT_ARM_EXIDX && d->isLive())
return d;
return nullptr;
}
static bool isValidExidxSectionDep(InputSection *isec) {
return (isec->flags & SHF_ALLOC) && (isec->flags & SHF_EXECINSTR) &&
isec->getSize() > 0;
}
bool ARMExidxSyntheticSection::addSection(InputSection *isec) {
if (isec->type == SHT_ARM_EXIDX) {
if (InputSection *dep = isec->getLinkOrderDep())
if (isValidExidxSectionDep(dep)) {
exidxSections.push_back(isec);
// Every exidxSection is 8 bytes, we need an estimate of
// size before assignAddresses can be called. Final size
// will only be known after finalize is called.
size += 8;
}
return true;
}
if (isValidExidxSectionDep(isec)) {
executableSections.push_back(isec);
return false;
}
// FIXME: we do not output a relocation section when --emit-relocs is used
// as we do not have relocation sections for linker generated table entries
// and we would have to erase at a late stage relocations from merged entries.
// Given that exception tables are already position independent and a binary
// analyzer could derive the relocations we choose to erase the relocations.
if (config->emitRelocs && isec->type == SHT_REL)
if (InputSectionBase *ex = isec->getRelocatedSection())
if (isa<InputSection>(ex) && ex->type == SHT_ARM_EXIDX)
return true;
return false;
}
// References to .ARM.Extab Sections have bit 31 clear and are not the
// special EXIDX_CANTUNWIND bit-pattern.
static bool isExtabRef(uint32_t unwind) {
return (unwind & 0x80000000) == 0 && unwind != 0x1;
}
// Return true if the .ARM.exidx section Cur can be merged into the .ARM.exidx
// section Prev, where Cur follows Prev in the table. This can be done if the
// unwinding instructions in Cur are identical to Prev. Linker generated
// EXIDX_CANTUNWIND entries are represented by nullptr as they do not have an
// InputSection.
static bool isDuplicateArmExidxSec(InputSection *prev, InputSection *cur) {
struct ExidxEntry {
ulittle32_t fn;
ulittle32_t unwind;
};
// Get the last table Entry from the previous .ARM.exidx section. If Prev is
// nullptr then it will be a synthesized EXIDX_CANTUNWIND entry.
ExidxEntry prevEntry = {ulittle32_t(0), ulittle32_t(1)};
if (prev)
prevEntry = prev->getDataAs<ExidxEntry>().back();
if (isExtabRef(prevEntry.unwind))
return false;
// We consider the unwind instructions of an .ARM.exidx table entry
// a duplicate if the previous unwind instructions if:
// - Both are the special EXIDX_CANTUNWIND.
// - Both are the same inline unwind instructions.
// We do not attempt to follow and check links into .ARM.extab tables as
// consecutive identical entries are rare and the effort to check that they
// are identical is high.
// If Cur is nullptr then this is synthesized EXIDX_CANTUNWIND entry.
if (cur == nullptr)
return prevEntry.unwind == 1;
for (const ExidxEntry entry : cur->getDataAs<ExidxEntry>())
if (isExtabRef(entry.unwind) || entry.unwind != prevEntry.unwind)
return false;
// All table entries in this .ARM.exidx Section can be merged into the
// previous Section.
return true;
}
// The .ARM.exidx table must be sorted in ascending order of the address of the
// functions the table describes. Optionally duplicate adjacent table entries
// can be removed. At the end of the function the executableSections must be
// sorted in ascending order of address, Sentinel is set to the InputSection
// with the highest address and any InputSections that have mergeable
// .ARM.exidx table entries are removed from it.
void ARMExidxSyntheticSection::finalizeContents() {
// The executableSections and exidxSections that we use to derive the final
// contents of this SyntheticSection are populated before
// processSectionCommands() and ICF. A /DISCARD/ entry in SECTIONS command or
// ICF may remove executable InputSections and their dependent .ARM.exidx
// section that we recorded earlier.
auto isDiscarded = [](const InputSection *isec) { return !isec->isLive(); };
llvm::erase_if(exidxSections, isDiscarded);
// We need to remove discarded InputSections and InputSections without
// .ARM.exidx sections that if we generated the .ARM.exidx it would be out
// of range.
auto isDiscardedOrOutOfRange = [this](InputSection *isec) {
if (!isec->isLive())
return true;
if (findExidxSection(isec))
return false;
int64_t off = static_cast<int64_t>(isec->getVA() - getVA());
return off != llvm::SignExtend64(off, 31);
};
llvm::erase_if(executableSections, isDiscardedOrOutOfRange);
// Sort the executable sections that may or may not have associated
// .ARM.exidx sections by order of ascending address. This requires the
// relative positions of InputSections and OutputSections to be known.
auto compareByFilePosition = [](const InputSection *a,
const InputSection *b) {
OutputSection *aOut = a->getParent();
OutputSection *bOut = b->getParent();
if (aOut != bOut)
return aOut->addr < bOut->addr;
return a->outSecOff < b->outSecOff;
};
llvm::stable_sort(executableSections, compareByFilePosition);
sentinel = executableSections.back();
// Optionally merge adjacent duplicate entries.
if (config->mergeArmExidx) {
SmallVector<InputSection *, 0> selectedSections;
selectedSections.reserve(executableSections.size());
selectedSections.push_back(executableSections[0]);
size_t prev = 0;
for (size_t i = 1; i < executableSections.size(); ++i) {
InputSection *ex1 = findExidxSection(executableSections[prev]);
InputSection *ex2 = findExidxSection(executableSections[i]);
if (!isDuplicateArmExidxSec(ex1, ex2)) {
selectedSections.push_back(executableSections[i]);
prev = i;
}
}
executableSections = std::move(selectedSections);
}
size_t offset = 0;
size = 0;
for (InputSection *isec : executableSections) {
if (InputSection *d = findExidxSection(isec)) {
d->outSecOff = offset;
d->parent = getParent();
offset += d->getSize();
} else {
offset += 8;
}
}
// Size includes Sentinel.
size = offset + 8;
}
InputSection *ARMExidxSyntheticSection::getLinkOrderDep() const {
return executableSections.front();
}
// To write the .ARM.exidx table from the ExecutableSections we have three cases
// 1.) The InputSection has a .ARM.exidx InputSection in its dependent sections.
// We write the .ARM.exidx section contents and apply its relocations.
// 2.) The InputSection does not have a dependent .ARM.exidx InputSection. We
// must write the contents of an EXIDX_CANTUNWIND directly. We use the
// start of the InputSection as the purpose of the linker generated
// section is to terminate the address range of the previous entry.
// 3.) A trailing EXIDX_CANTUNWIND sentinel section is required at the end of
// the table to terminate the address range of the final entry.
void ARMExidxSyntheticSection::writeTo(uint8_t *buf) {
const uint8_t cantUnwindData[8] = {0, 0, 0, 0, // PREL31 to target
1, 0, 0, 0}; // EXIDX_CANTUNWIND
uint64_t offset = 0;
for (InputSection *isec : executableSections) {
assert(isec->getParent() != nullptr);
if (InputSection *d = findExidxSection(isec)) {
memcpy(buf + offset, d->data().data(), d->data().size());
d->relocateAlloc(buf + d->outSecOff, buf + d->outSecOff + d->getSize());
offset += d->getSize();
} else {
// A Linker generated CANTUNWIND section.
memcpy(buf + offset, cantUnwindData, sizeof(cantUnwindData));
uint64_t s = isec->getVA();
uint64_t p = getVA() + offset;
target->relocateNoSym(buf + offset, R_ARM_PREL31, s - p);
offset += 8;
}
}
// Write Sentinel.
memcpy(buf + offset, cantUnwindData, sizeof(cantUnwindData));
uint64_t s = sentinel->getVA(sentinel->getSize());
uint64_t p = getVA() + offset;
target->relocateNoSym(buf + offset, R_ARM_PREL31, s - p);
assert(size == offset + 8);
}
bool ARMExidxSyntheticSection::isNeeded() const {
return llvm::any_of(exidxSections,
[](InputSection *isec) { return isec->isLive(); });
}
bool ARMExidxSyntheticSection::classof(const SectionBase *d) {
return d->kind() == InputSectionBase::Synthetic && d->type == SHT_ARM_EXIDX;
}
ThunkSection::ThunkSection(OutputSection *os, uint64_t off)
: SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS,
config->emachine == EM_PPC64 ? 16 : 4, ".text.thunk") {
this->parent = os;
this->outSecOff = off;
}
size_t ThunkSection::getSize() const {
if (roundUpSizeForErrata)
return alignTo(size, 4096);
return size;
}
void ThunkSection::addThunk(Thunk *t) {
thunks.push_back(t);
t->addSymbols(*this);
}
void ThunkSection::writeTo(uint8_t *buf) {
for (Thunk *t : thunks)
t->writeTo(buf + t->offset);
}
InputSection *ThunkSection::getTargetInputSection() const {
if (thunks.empty())
return nullptr;
const Thunk *t = thunks.front();
return t->getTargetInputSection();
}
bool ThunkSection::assignOffsets() {
uint64_t off = 0;
for (Thunk *t : thunks) {
off = alignTo(off, t->alignment);
t->setOffset(off);
uint32_t size = t->size();
t->getThunkTargetSym()->size = size;
off += size;
}
bool changed = off != size;
size = off;
return changed;
}
PPC32Got2Section::PPC32Got2Section()
: SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, 4, ".got2") {}
bool PPC32Got2Section::isNeeded() const {
// See the comment below. This is not needed if there is no other
// InputSection.
for (SectionCommand *cmd : getParent()->commands)
if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
for (InputSection *isec : isd->sections)
if (isec != this)
return true;
return false;
}
void PPC32Got2Section::finalizeContents() {
// PPC32 may create multiple GOT sections for -fPIC/-fPIE, one per file in
// .got2 . This function computes outSecOff of each .got2 to be used in
// PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is
// to collect input sections named ".got2".
for (SectionCommand *cmd : getParent()->commands)
if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
for (InputSection *isec : isd->sections) {
// isec->file may be nullptr for MergeSyntheticSection.
if (isec != this && isec->file)
isec->file->ppc32Got2 = isec;
}
}
}
// If linking position-dependent code then the table will store the addresses
// directly in the binary so the section has type SHT_PROGBITS. If linking
// position-independent code the section has type SHT_NOBITS since it will be
// allocated and filled in by the dynamic linker.
PPC64LongBranchTargetSection::PPC64LongBranchTargetSection()
: SyntheticSection(SHF_ALLOC | SHF_WRITE,
config->isPic ? SHT_NOBITS : SHT_PROGBITS, 8,
".branch_lt") {}
uint64_t PPC64LongBranchTargetSection::getEntryVA(const Symbol *sym,
int64_t addend) {
return getVA() + entry_index.find({sym, addend})->second * 8;
}
Optional<uint32_t> PPC64LongBranchTargetSection::addEntry(const Symbol *sym,
int64_t addend) {
auto res =
entry_index.try_emplace(std::make_pair(sym, addend), entries.size());
if (!res.second)
return None;
entries.emplace_back(sym, addend);
return res.first->second;
}
size_t PPC64LongBranchTargetSection::getSize() const {
return entries.size() * 8;
}
void PPC64LongBranchTargetSection::writeTo(uint8_t *buf) {
// If linking non-pic we have the final addresses of the targets and they get
// written to the table directly. For pic the dynamic linker will allocate
// the section and fill it it.
if (config->isPic)
return;
for (auto entry : entries) {
const Symbol *sym = entry.first;
int64_t addend = entry.second;
assert(sym->getVA());
// Need calls to branch to the local entry-point since a long-branch
// must be a local-call.
write64(buf, sym->getVA(addend) +
getPPC64GlobalEntryToLocalEntryOffset(sym->stOther));
buf += 8;
}
}
bool PPC64LongBranchTargetSection::isNeeded() const {
// `removeUnusedSyntheticSections()` is called before thunk allocation which
// is too early to determine if this section will be empty or not. We need
// Finalized to keep the section alive until after thunk creation. Finalized
// only gets set to true once `finalizeSections()` is called after thunk
// creation. Because of this, if we don't create any long-branch thunks we end
// up with an empty .branch_lt section in the binary.
return !finalized || !entries.empty();
}
static uint8_t getAbiVersion() {
// MIPS non-PIC executable gets ABI version 1.
if (config->emachine == EM_MIPS) {
if (!config->isPic && !config->relocatable &&
(config->eflags & (EF_MIPS_PIC | EF_MIPS_CPIC)) == EF_MIPS_CPIC)
return 1;
return 0;
}
if (config->emachine == EM_AMDGPU) {
uint8_t ver = objectFiles[0]->abiVersion;
for (InputFile *file : makeArrayRef(objectFiles).slice(1))
if (file->abiVersion != ver)
error("incompatible ABI version: " + toString(file));
return ver;
}
return 0;
}
template <typename ELFT> void elf::writeEhdr(uint8_t *buf, Partition &part) {
memcpy(buf, "\177ELF", 4);
auto *eHdr = reinterpret_cast<typename ELFT::Ehdr *>(buf);
eHdr->e_ident[EI_CLASS] = config->is64 ? ELFCLASS64 : ELFCLASS32;
eHdr->e_ident[EI_DATA] = config->isLE ? ELFDATA2LSB : ELFDATA2MSB;
eHdr->e_ident[EI_VERSION] = EV_CURRENT;
eHdr->e_ident[EI_OSABI] = config->osabi;
eHdr->e_ident[EI_ABIVERSION] = getAbiVersion();
eHdr->e_machine = config->emachine;
eHdr->e_version = EV_CURRENT;
eHdr->e_flags = config->eflags;
eHdr->e_ehsize = sizeof(typename ELFT::Ehdr);
eHdr->e_phnum = part.phdrs.size();
eHdr->e_shentsize = sizeof(typename ELFT::Shdr);
if (!config->relocatable) {
eHdr->e_phoff = sizeof(typename ELFT::Ehdr);
eHdr->e_phentsize = sizeof(typename ELFT::Phdr);
}
}
template <typename ELFT> void elf::writePhdrs(uint8_t *buf, Partition &part) {
// Write the program header table.
auto *hBuf = reinterpret_cast<typename ELFT::Phdr *>(buf);
for (PhdrEntry *p : part.phdrs) {
hBuf->p_type = p->p_type;
hBuf->p_flags = p->p_flags;
hBuf->p_offset = p->p_offset;
hBuf->p_vaddr = p->p_vaddr;
hBuf->p_paddr = p->p_paddr;
hBuf->p_filesz = p->p_filesz;
hBuf->p_memsz = p->p_memsz;
hBuf->p_align = p->p_align;
++hBuf;
}
}
template <typename ELFT>
PartitionElfHeaderSection<ELFT>::PartitionElfHeaderSection()
: SyntheticSection(SHF_ALLOC, SHT_LLVM_PART_EHDR, 1, "") {}
template <typename ELFT>
size_t PartitionElfHeaderSection<ELFT>::getSize() const {
return sizeof(typename ELFT::Ehdr);
}
template <typename ELFT>
void PartitionElfHeaderSection<ELFT>::writeTo(uint8_t *buf) {
writeEhdr<ELFT>(buf, getPartition());
// Loadable partitions are always ET_DYN.
auto *eHdr = reinterpret_cast<typename ELFT::Ehdr *>(buf);
eHdr->e_type = ET_DYN;
}
template <typename ELFT>
PartitionProgramHeadersSection<ELFT>::PartitionProgramHeadersSection()
: SyntheticSection(SHF_ALLOC, SHT_LLVM_PART_PHDR, 1, ".phdrs") {}
template <typename ELFT>
size_t PartitionProgramHeadersSection<ELFT>::getSize() const {
return sizeof(typename ELFT::Phdr) * getPartition().phdrs.size();
}
template <typename ELFT>
void PartitionProgramHeadersSection<ELFT>::writeTo(uint8_t *buf) {
writePhdrs<ELFT>(buf, getPartition());
}
PartitionIndexSection::PartitionIndexSection()
: SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 4, ".rodata") {}
size_t PartitionIndexSection::getSize() const {
return 12 * (partitions.size() - 1);
}
void PartitionIndexSection::finalizeContents() {
for (size_t i = 1; i != partitions.size(); ++i)
partitions[i].nameStrTab = mainPart->dynStrTab->addString(partitions[i].name);
}
void PartitionIndexSection::writeTo(uint8_t *buf) {
uint64_t va = getVA();
for (size_t i = 1; i != partitions.size(); ++i) {
write32(buf, mainPart->dynStrTab->getVA() + partitions[i].nameStrTab - va);
write32(buf + 4, partitions[i].elfHeader->getVA() - (va + 4));
SyntheticSection *next = i == partitions.size() - 1
? in.partEnd.get()
: partitions[i + 1].elfHeader.get();
write32(buf + 8, next->getVA() - partitions[i].elfHeader->getVA());
va += 12;
buf += 12;
}
}
void InStruct::reset() {
attributes.reset();
bss.reset();
bssRelRo.reset();
got.reset();
gotPlt.reset();
igotPlt.reset();
ppc64LongBranchTarget.reset();
mipsAbiFlags.reset();
mipsGot.reset();
mipsOptions.reset();
mipsReginfo.reset();
mipsRldMap.reset();
partEnd.reset();
partIndex.reset();
plt.reset();
iplt.reset();
ppc32Got2.reset();
ibtPlt.reset();
relaPlt.reset();
relaIplt.reset();
shStrTab.reset();
strTab.reset();
symTab.reset();
symTabShndx.reset();
}
InStruct elf::in;
std::vector<Partition> elf::partitions;
Partition *elf::mainPart;
template GdbIndexSection *GdbIndexSection::create<ELF32LE>();
template GdbIndexSection *GdbIndexSection::create<ELF32BE>();
template GdbIndexSection *GdbIndexSection::create<ELF64LE>();
template GdbIndexSection *GdbIndexSection::create<ELF64BE>();
template void elf::splitSections<ELF32LE>();
template void elf::splitSections<ELF32BE>();
template void elf::splitSections<ELF64LE>();
template void elf::splitSections<ELF64BE>();
template class elf::MipsAbiFlagsSection<ELF32LE>;
template class elf::MipsAbiFlagsSection<ELF32BE>;
template class elf::MipsAbiFlagsSection<ELF64LE>;
template class elf::MipsAbiFlagsSection<ELF64BE>;
template class elf::MipsOptionsSection<ELF32LE>;
template class elf::MipsOptionsSection<ELF32BE>;
template class elf::MipsOptionsSection<ELF64LE>;
template class elf::MipsOptionsSection<ELF64BE>;
template void EhFrameSection::iterateFDEWithLSDA<ELF32LE>(
function_ref<void(InputSection &)>);
template void EhFrameSection::iterateFDEWithLSDA<ELF32BE>(
function_ref<void(InputSection &)>);
template void EhFrameSection::iterateFDEWithLSDA<ELF64LE>(
function_ref<void(InputSection &)>);
template void EhFrameSection::iterateFDEWithLSDA<ELF64BE>(
function_ref<void(InputSection &)>);
template class elf::MipsReginfoSection<ELF32LE>;
template class elf::MipsReginfoSection<ELF32BE>;
template class elf::MipsReginfoSection<ELF64LE>;
template class elf::MipsReginfoSection<ELF64BE>;
template class elf::DynamicSection<ELF32LE>;
template class elf::DynamicSection<ELF32BE>;
template class elf::DynamicSection<ELF64LE>;
template class elf::DynamicSection<ELF64BE>;
template class elf::RelocationSection<ELF32LE>;
template class elf::RelocationSection<ELF32BE>;
template class elf::RelocationSection<ELF64LE>;
template class elf::RelocationSection<ELF64BE>;
template class elf::AndroidPackedRelocationSection<ELF32LE>;
template class elf::AndroidPackedRelocationSection<ELF32BE>;
template class elf::AndroidPackedRelocationSection<ELF64LE>;
template class elf::AndroidPackedRelocationSection<ELF64BE>;
template class elf::RelrSection<ELF32LE>;
template class elf::RelrSection<ELF32BE>;
template class elf::RelrSection<ELF64LE>;
template class elf::RelrSection<ELF64BE>;
template class elf::SymbolTableSection<ELF32LE>;
template class elf::SymbolTableSection<ELF32BE>;
template class elf::SymbolTableSection<ELF64LE>;
template class elf::SymbolTableSection<ELF64BE>;
template class elf::VersionNeedSection<ELF32LE>;
template class elf::VersionNeedSection<ELF32BE>;
template class elf::VersionNeedSection<ELF64LE>;
template class elf::VersionNeedSection<ELF64BE>;
template void elf::writeEhdr<ELF32LE>(uint8_t *Buf, Partition &Part);
template void elf::writeEhdr<ELF32BE>(uint8_t *Buf, Partition &Part);
template void elf::writeEhdr<ELF64LE>(uint8_t *Buf, Partition &Part);
template void elf::writeEhdr<ELF64BE>(uint8_t *Buf, Partition &Part);
template void elf::writePhdrs<ELF32LE>(uint8_t *Buf, Partition &Part);
template void elf::writePhdrs<ELF32BE>(uint8_t *Buf, Partition &Part);
template void elf::writePhdrs<ELF64LE>(uint8_t *Buf, Partition &Part);
template void elf::writePhdrs<ELF64BE>(uint8_t *Buf, Partition &Part);
template class elf::PartitionElfHeaderSection<ELF32LE>;
template class elf::PartitionElfHeaderSection<ELF32BE>;
template class elf::PartitionElfHeaderSection<ELF64LE>;
template class elf::PartitionElfHeaderSection<ELF64BE>;
template class elf::PartitionProgramHeadersSection<ELF32LE>;
template class elf::PartitionProgramHeadersSection<ELF32BE>;
template class elf::PartitionProgramHeadersSection<ELF64LE>;
template class elf::PartitionProgramHeadersSection<ELF64BE>;
diff --git a/contrib/llvm-project/lld/ELF/SyntheticSections.h b/contrib/llvm-project/lld/ELF/SyntheticSections.h
index 1b63a5d29d10..e609b3d7982a 100644
--- a/contrib/llvm-project/lld/ELF/SyntheticSections.h
+++ b/contrib/llvm-project/lld/ELF/SyntheticSections.h
@@ -1,1271 +1,1272 @@
//===- SyntheticSection.h ---------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Synthetic sections represent chunks of linker-created data. If you
// need to create a chunk of data that to be included in some section
// in the result, you probably want to create that as a synthetic section.
//
// Synthetic sections are designed as input sections as opposed to
// output sections because we want to allow them to be manipulated
// using linker scripts just like other input sections from regular
// files.
//
//===----------------------------------------------------------------------===//
#ifndef LLD_ELF_SYNTHETIC_SECTIONS_H
#define LLD_ELF_SYNTHETIC_SECTIONS_H
#include "DWARF.h"
#include "EhFrame.h"
#include "InputSection.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Endian.h"
#include <functional>
namespace lld {
namespace elf {
class Defined;
struct PhdrEntry;
class SymbolTableBaseSection;
class SyntheticSection : public InputSection {
public:
SyntheticSection(uint64_t flags, uint32_t type, uint32_t alignment,
StringRef name)
: InputSection(nullptr, flags, type, alignment, {}, name,
InputSectionBase::Synthetic) {}
virtual ~SyntheticSection() = default;
virtual void writeTo(uint8_t *buf) = 0;
virtual size_t getSize() const = 0;
virtual void finalizeContents() {}
// If the section has the SHF_ALLOC flag and the size may be changed if
// thunks are added, update the section size.
virtual bool updateAllocSize() { return false; }
virtual bool isNeeded() const { return true; }
static bool classof(const SectionBase *d) {
return d->kind() == InputSectionBase::Synthetic;
}
};
struct CieRecord {
EhSectionPiece *cie = nullptr;
SmallVector<EhSectionPiece *, 0> fdes;
};
// Section for .eh_frame.
class EhFrameSection final : public SyntheticSection {
public:
EhFrameSection();
void writeTo(uint8_t *buf) override;
void finalizeContents() override;
bool isNeeded() const override { return !sections.empty(); }
size_t getSize() const override { return size; }
static bool classof(const SectionBase *d) {
return SyntheticSection::classof(d) && d->name == ".eh_frame";
}
void addSection(EhInputSection *sec);
SmallVector<EhInputSection *, 0> sections;
size_t numFdes = 0;
struct FdeData {
uint32_t pcRel;
uint32_t fdeVARel;
};
SmallVector<FdeData, 0> getFdeData() const;
ArrayRef<CieRecord *> getCieRecords() const { return cieRecords; }
template <class ELFT>
void iterateFDEWithLSDA(llvm::function_ref<void(InputSection &)> fn);
private:
// This is used only when parsing EhInputSection. We keep it here to avoid
// allocating one for each EhInputSection.
llvm::DenseMap<size_t, CieRecord *> offsetToCie;
uint64_t size = 0;
template <class ELFT, class RelTy>
void addRecords(EhInputSection *s, llvm::ArrayRef<RelTy> rels);
template <class ELFT> void addSectionAux(EhInputSection *s);
template <class ELFT, class RelTy>
void iterateFDEWithLSDAAux(EhInputSection &sec, ArrayRef<RelTy> rels,
llvm::DenseSet<size_t> &ciesWithLSDA,
llvm::function_ref<void(InputSection &)> fn);
template <class ELFT, class RelTy>
CieRecord *addCie(EhSectionPiece &piece, ArrayRef<RelTy> rels);
template <class ELFT, class RelTy>
Defined *isFdeLive(EhSectionPiece &piece, ArrayRef<RelTy> rels);
uint64_t getFdePc(uint8_t *buf, size_t off, uint8_t enc) const;
SmallVector<CieRecord *, 0> cieRecords;
// CIE records are uniquified by their contents and personality functions.
llvm::DenseMap<std::pair<ArrayRef<uint8_t>, Symbol *>, CieRecord *> cieMap;
};
class GotSection : public SyntheticSection {
public:
GotSection();
size_t getSize() const override { return size; }
void finalizeContents() override;
bool isNeeded() const override;
void writeTo(uint8_t *buf) override;
void addEntry(Symbol &sym);
bool addTlsDescEntry(Symbol &sym);
bool addDynTlsEntry(Symbol &sym);
bool addTlsIndex();
uint32_t getTlsDescOffset(const Symbol &sym) const;
uint64_t getTlsDescAddr(const Symbol &sym) const;
uint64_t getGlobalDynAddr(const Symbol &b) const;
uint64_t getGlobalDynOffset(const Symbol &b) const;
uint64_t getTlsIndexVA() { return this->getVA() + tlsIndexOff; }
uint32_t getTlsIndexOff() const { return tlsIndexOff; }
// Flag to force GOT to be in output if we have relocations
// that relies on its address.
bool hasGotOffRel = false;
protected:
size_t numEntries = 0;
uint32_t tlsIndexOff = -1;
uint64_t size = 0;
};
// .note.GNU-stack section.
class GnuStackSection : public SyntheticSection {
public:
GnuStackSection()
: SyntheticSection(0, llvm::ELF::SHT_PROGBITS, 1, ".note.GNU-stack") {}
void writeTo(uint8_t *buf) override {}
size_t getSize() const override { return 0; }
};
class GnuPropertySection : public SyntheticSection {
public:
GnuPropertySection();
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
};
// .note.gnu.build-id section.
class BuildIdSection : public SyntheticSection {
// First 16 bytes are a header.
static const unsigned headerSize = 16;
public:
const size_t hashSize;
BuildIdSection();
void writeTo(uint8_t *buf) override;
size_t getSize() const override { return headerSize + hashSize; }
void writeBuildId(llvm::ArrayRef<uint8_t> buf);
private:
uint8_t *hashBuf;
};
// BssSection is used to reserve space for copy relocations and common symbols.
// We create three instances of this class for .bss, .bss.rel.ro and "COMMON",
// that are used for writable symbols, read-only symbols and common symbols,
// respectively.
class BssSection final : public SyntheticSection {
public:
BssSection(StringRef name, uint64_t size, uint32_t alignment);
void writeTo(uint8_t *) override {
llvm_unreachable("unexpected writeTo() call for SHT_NOBITS section");
}
bool isNeeded() const override { return size != 0; }
size_t getSize() const override { return size; }
static bool classof(const SectionBase *s) { return s->bss; }
uint64_t size;
};
class MipsGotSection final : public SyntheticSection {
public:
MipsGotSection();
void writeTo(uint8_t *buf) override;
size_t getSize() const override { return size; }
bool updateAllocSize() override;
void finalizeContents() override;
bool isNeeded() const override;
// Join separate GOTs built for each input file to generate
// primary and optional multiple secondary GOTs.
void build();
void addEntry(InputFile &file, Symbol &sym, int64_t addend, RelExpr expr);
void addDynTlsEntry(InputFile &file, Symbol &sym);
void addTlsIndex(InputFile &file);
uint64_t getPageEntryOffset(const InputFile *f, const Symbol &s,
int64_t addend) const;
uint64_t getSymEntryOffset(const InputFile *f, const Symbol &s,
int64_t addend) const;
uint64_t getGlobalDynOffset(const InputFile *f, const Symbol &s) const;
uint64_t getTlsIndexOffset(const InputFile *f) const;
// Returns the symbol which corresponds to the first entry of the global part
// of GOT on MIPS platform. It is required to fill up MIPS-specific dynamic
// table properties.
// Returns nullptr if the global part is empty.
const Symbol *getFirstGlobalEntry() const;
// Returns the number of entries in the local part of GOT including
// the number of reserved entries.
unsigned getLocalEntriesNum() const;
// Return _gp value for primary GOT (nullptr) or particular input file.
uint64_t getGp(const InputFile *f = nullptr) const;
private:
// MIPS GOT consists of three parts: local, global and tls. Each part
// contains different types of entries. Here is a layout of GOT:
// - Header entries |
// - Page entries | Local part
// - Local entries (16-bit access) |
// - Local entries (32-bit access) |
// - Normal global entries || Global part
// - Reloc-only global entries ||
// - TLS entries ||| TLS part
//
// Header:
// Two entries hold predefined value 0x0 and 0x80000000.
// Page entries:
// These entries created by R_MIPS_GOT_PAGE relocation and R_MIPS_GOT16
// relocation against local symbols. They are initialized by higher 16-bit
// of the corresponding symbol's value. So each 64kb of address space
// requires a single GOT entry.
// Local entries (16-bit access):
// These entries created by GOT relocations against global non-preemptible
// symbols so dynamic linker is not necessary to resolve the symbol's
// values. "16-bit access" means that corresponding relocations address
// GOT using 16-bit index. Each unique Symbol-Addend pair has its own
// GOT entry.
// Local entries (32-bit access):
// These entries are the same as above but created by relocations which
// address GOT using 32-bit index (R_MIPS_GOT_HI16/LO16 etc).
// Normal global entries:
// These entries created by GOT relocations against preemptible global
// symbols. They need to be initialized by dynamic linker and they ordered
// exactly as the corresponding entries in the dynamic symbols table.
// Reloc-only global entries:
// These entries created for symbols that are referenced by dynamic
// relocations R_MIPS_REL32. These entries are not accessed with gp-relative
// addressing, but MIPS ABI requires that these entries be present in GOT.
// TLS entries:
// Entries created by TLS relocations.
//
// If the sum of local, global and tls entries is less than 64K only single
// got is enough. Otherwise, multi-got is created. Series of primary and
// multiple secondary GOTs have the following layout:
// - Primary GOT
// Header
// Local entries
// Global entries
// Relocation only entries
// TLS entries
//
// - Secondary GOT
// Local entries
// Global entries
// TLS entries
// ...
//
// All GOT entries required by relocations from a single input file entirely
// belong to either primary or one of secondary GOTs. To reference GOT entries
// each GOT has its own _gp value points to the "middle" of the GOT.
// In the code this value loaded to the register which is used for GOT access.
//
// MIPS 32 function's prologue:
// lui v0,0x0
// 0: R_MIPS_HI16 _gp_disp
// addiu v0,v0,0
// 4: R_MIPS_LO16 _gp_disp
//
// MIPS 64:
// lui at,0x0
// 14: R_MIPS_GPREL16 main
//
// Dynamic linker does not know anything about secondary GOTs and cannot
// use a regular MIPS mechanism for GOT entries initialization. So we have
// to use an approach accepted by other architectures and create dynamic
// relocations R_MIPS_REL32 to initialize global entries (and local in case
// of PIC code) in secondary GOTs. But ironically MIPS dynamic linker
// requires GOT entries and correspondingly ordered dynamic symbol table
// entries to deal with dynamic relocations. To handle this problem
// relocation-only section in the primary GOT contains entries for all
// symbols referenced in global parts of secondary GOTs. Although the sum
// of local and normal global entries of the primary got should be less
// than 64K, the size of the primary got (including relocation-only entries
// can be greater than 64K, because parts of the primary got that overflow
// the 64K limit are used only by the dynamic linker at dynamic link-time
// and not by 16-bit gp-relative addressing at run-time.
//
// For complete multi-GOT description see the following link
// https://dmz-portal.mips.com/wiki/MIPS_Multi_GOT
// Number of "Header" entries.
static const unsigned headerEntriesNum = 2;
uint64_t size = 0;
// Symbol and addend.
using GotEntry = std::pair<Symbol *, int64_t>;
struct FileGot {
InputFile *file = nullptr;
size_t startIndex = 0;
struct PageBlock {
size_t firstIndex;
size_t count;
PageBlock() : firstIndex(0), count(0) {}
};
// Map output sections referenced by MIPS GOT relocations
// to the description (index/count) "page" entries allocated
// for this section.
llvm::SmallMapVector<const OutputSection *, PageBlock, 16> pagesMap;
// Maps from Symbol+Addend pair or just Symbol to the GOT entry index.
llvm::MapVector<GotEntry, size_t> local16;
llvm::MapVector<GotEntry, size_t> local32;
llvm::MapVector<Symbol *, size_t> global;
llvm::MapVector<Symbol *, size_t> relocs;
llvm::MapVector<Symbol *, size_t> tls;
// Set of symbols referenced by dynamic TLS relocations.
llvm::MapVector<Symbol *, size_t> dynTlsSymbols;
// Total number of all entries.
size_t getEntriesNum() const;
// Number of "page" entries.
size_t getPageEntriesNum() const;
// Number of entries require 16-bit index to access.
size_t getIndexedEntriesNum() const;
};
// Container of GOT created for each input file.
// After building a final series of GOTs this container
// holds primary and secondary GOT's.
std::vector<FileGot> gots;
// Return (and create if necessary) `FileGot`.
FileGot &getGot(InputFile &f);
// Try to merge two GOTs. In case of success the `Dst` contains
// result of merging and the function returns true. In case of
// overflow the `Dst` is unchanged and the function returns false.
bool tryMergeGots(FileGot & dst, FileGot & src, bool isPrimary);
};
class GotPltSection final : public SyntheticSection {
public:
GotPltSection();
void addEntry(Symbol &sym);
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
bool isNeeded() const override;
// Flag to force GotPlt to be in output if we have relocations
// that relies on its address.
bool hasGotPltOffRel = false;
private:
SmallVector<const Symbol *, 0> entries;
};
// The IgotPltSection is a Got associated with the PltSection for GNU Ifunc
// Symbols that will be relocated by Target->IRelativeRel.
// On most Targets the IgotPltSection will immediately follow the GotPltSection
// on ARM the IgotPltSection will immediately follow the GotSection.
class IgotPltSection final : public SyntheticSection {
public:
IgotPltSection();
void addEntry(Symbol &sym);
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
bool isNeeded() const override { return !entries.empty(); }
private:
SmallVector<const Symbol *, 0> entries;
};
class StringTableSection final : public SyntheticSection {
public:
StringTableSection(StringRef name, bool dynamic);
unsigned addString(StringRef s, bool hashIt = true);
void writeTo(uint8_t *buf) override;
size_t getSize() const override { return size; }
bool isDynamic() const { return dynamic; }
private:
const bool dynamic;
uint64_t size = 0;
llvm::DenseMap<llvm::CachedHashStringRef, unsigned> stringMap;
SmallVector<StringRef, 0> strings;
};
class DynamicReloc {
public:
enum Kind {
/// The resulting dynamic relocation does not reference a symbol (#sym must
/// be nullptr) and uses #addend as the result of computeAddend().
AddendOnly,
/// The resulting dynamic relocation will not reference a symbol: #sym is
/// only used to compute the addend with InputSection::getRelocTargetVA().
/// Useful for various relative and TLS relocations (e.g. R_X86_64_TPOFF64).
AddendOnlyWithTargetVA,
/// The resulting dynamic relocation references symbol #sym from the dynamic
/// symbol table and uses #addend as the value of computeAddend().
AgainstSymbol,
/// The resulting dynamic relocation references symbol #sym from the dynamic
/// symbol table and uses InputSection::getRelocTargetVA() + #addend for the
/// final addend. It can be used for relocations that write the symbol VA as
// the addend (e.g. R_MIPS_TLS_TPREL64) but still reference the symbol.
AgainstSymbolWithTargetVA,
/// This is used by the MIPS multi-GOT implementation. It relocates
/// addresses of 64kb pages that lie inside the output section.
MipsMultiGotPage,
};
/// This constructor records a relocation against a symbol.
DynamicReloc(RelType type, const InputSectionBase *inputSec,
uint64_t offsetInSec, Kind kind, Symbol &sym, int64_t addend,
RelExpr expr)
: sym(&sym), inputSec(inputSec), offsetInSec(offsetInSec), type(type),
addend(addend), kind(kind), expr(expr) {}
/// This constructor records a relative relocation with no symbol.
DynamicReloc(RelType type, const InputSectionBase *inputSec,
uint64_t offsetInSec, int64_t addend = 0)
: sym(nullptr), inputSec(inputSec), offsetInSec(offsetInSec), type(type),
addend(addend), kind(AddendOnly), expr(R_ADDEND) {}
/// This constructor records dynamic relocation settings used by the MIPS
/// multi-GOT implementation.
DynamicReloc(RelType type, const InputSectionBase *inputSec,
uint64_t offsetInSec, const OutputSection *outputSec,
int64_t addend)
: sym(nullptr), outputSec(outputSec), inputSec(inputSec),
offsetInSec(offsetInSec), type(type), addend(addend),
kind(MipsMultiGotPage), expr(R_ADDEND) {}
uint64_t getOffset() const;
uint32_t getSymIndex(SymbolTableBaseSection *symTab) const;
bool needsDynSymIndex() const {
return kind == AgainstSymbol || kind == AgainstSymbolWithTargetVA;
}
/// Computes the addend of the dynamic relocation. Note that this is not the
/// same as the #addend member variable as it may also include the symbol
/// address/the address of the corresponding GOT entry/etc.
int64_t computeAddend() const;
void computeRaw(SymbolTableBaseSection *symtab);
Symbol *sym;
const OutputSection *outputSec = nullptr;
const InputSectionBase *inputSec;
uint64_t offsetInSec;
uint64_t r_offset;
RelType type;
uint32_t r_sym;
// Initially input addend, then the output addend after
// RelocationSection<ELFT>::writeTo.
int64_t addend;
private:
Kind kind;
// The kind of expression used to calculate the added (required e.g. for
// relative GOT relocations).
RelExpr expr;
};
template <class ELFT> class DynamicSection final : public SyntheticSection {
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
public:
DynamicSection();
void finalizeContents() override;
void writeTo(uint8_t *buf) override;
size_t getSize() const override { return size; }
private:
std::vector<std::pair<int32_t, uint64_t>> computeContents();
uint64_t size = 0;
};
class RelocationBaseSection : public SyntheticSection {
public:
RelocationBaseSection(StringRef name, uint32_t type, int32_t dynamicTag,
int32_t sizeDynamicTag, bool combreloc);
/// Add a dynamic relocation without writing an addend to the output section.
/// This overload can be used if the addends are written directly instead of
/// using relocations on the input section (e.g. MipsGotSection::writeTo()).
void addReloc(const DynamicReloc &reloc) { relocs.push_back(reloc); }
/// Add a dynamic relocation against \p sym with an optional addend.
void addSymbolReloc(RelType dynType, InputSectionBase &isec,
uint64_t offsetInSec, Symbol &sym, int64_t addend = 0,
llvm::Optional<RelType> addendRelType = llvm::None);
/// Add a relative dynamic relocation that uses the target address of \p sym
/// (i.e. InputSection::getRelocTargetVA()) + \p addend as the addend.
void addRelativeReloc(RelType dynType, InputSectionBase &isec,
uint64_t offsetInSec, Symbol &sym, int64_t addend,
RelType addendRelType, RelExpr expr);
/// Add a dynamic relocation using the target address of \p sym as the addend
/// if \p sym is non-preemptible. Otherwise add a relocation against \p sym.
void addAddendOnlyRelocIfNonPreemptible(RelType dynType,
InputSectionBase &isec,
uint64_t offsetInSec, Symbol &sym,
RelType addendRelType);
void addReloc(DynamicReloc::Kind kind, RelType dynType,
InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym,
int64_t addend, RelExpr expr, RelType addendRelType);
bool isNeeded() const override { return !relocs.empty(); }
size_t getSize() const override { return relocs.size() * this->entsize; }
size_t getRelativeRelocCount() const { return numRelativeRelocs; }
void partitionRels();
void finalizeContents() override;
static bool classof(const SectionBase *d) {
return SyntheticSection::classof(d) &&
(d->type == llvm::ELF::SHT_RELA || d->type == llvm::ELF::SHT_REL ||
d->type == llvm::ELF::SHT_RELR);
}
int32_t dynamicTag, sizeDynamicTag;
SmallVector<DynamicReloc, 0> relocs;
protected:
void computeRels();
size_t numRelativeRelocs = 0; // used by -z combreloc
bool combreloc;
};
template <class ELFT>
class RelocationSection final : public RelocationBaseSection {
using Elf_Rel = typename ELFT::Rel;
using Elf_Rela = typename ELFT::Rela;
public:
RelocationSection(StringRef name, bool combreloc);
void writeTo(uint8_t *buf) override;
};
template <class ELFT>
class AndroidPackedRelocationSection final : public RelocationBaseSection {
using Elf_Rel = typename ELFT::Rel;
using Elf_Rela = typename ELFT::Rela;
public:
AndroidPackedRelocationSection(StringRef name);
bool updateAllocSize() override;
size_t getSize() const override { return relocData.size(); }
void writeTo(uint8_t *buf) override {
memcpy(buf, relocData.data(), relocData.size());
}
private:
SmallVector<char, 0> relocData;
};
struct RelativeReloc {
uint64_t getOffset() const { return inputSec->getVA(offsetInSec); }
const InputSectionBase *inputSec;
uint64_t offsetInSec;
};
class RelrBaseSection : public SyntheticSection {
public:
RelrBaseSection();
bool isNeeded() const override { return !relocs.empty(); }
SmallVector<RelativeReloc, 0> relocs;
};
// RelrSection is used to encode offsets for relative relocations.
// Proposal for adding SHT_RELR sections to generic-abi is here:
// https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg
// For more details, see the comment in RelrSection::updateAllocSize().
template <class ELFT> class RelrSection final : public RelrBaseSection {
using Elf_Relr = typename ELFT::Relr;
public:
RelrSection();
bool updateAllocSize() override;
size_t getSize() const override { return relrRelocs.size() * this->entsize; }
void writeTo(uint8_t *buf) override {
memcpy(buf, relrRelocs.data(), getSize());
}
private:
SmallVector<Elf_Relr, 0> relrRelocs;
};
struct SymbolTableEntry {
Symbol *sym;
size_t strTabOffset;
};
class SymbolTableBaseSection : public SyntheticSection {
public:
SymbolTableBaseSection(StringTableSection &strTabSec);
void finalizeContents() override;
size_t getSize() const override { return getNumSymbols() * entsize; }
void addSymbol(Symbol *sym);
unsigned getNumSymbols() const { return symbols.size() + 1; }
size_t getSymbolIndex(Symbol *sym);
ArrayRef<SymbolTableEntry> getSymbols() const { return symbols; }
protected:
void sortSymTabSymbols();
// A vector of symbols and their string table offsets.
SmallVector<SymbolTableEntry, 0> symbols;
StringTableSection &strTabSec;
llvm::once_flag onceFlag;
llvm::DenseMap<Symbol *, size_t> symbolIndexMap;
llvm::DenseMap<OutputSection *, size_t> sectionIndexMap;
};
template <class ELFT>
class SymbolTableSection final : public SymbolTableBaseSection {
using Elf_Sym = typename ELFT::Sym;
public:
SymbolTableSection(StringTableSection &strTabSec);
void writeTo(uint8_t *buf) override;
};
class SymtabShndxSection final : public SyntheticSection {
public:
SymtabShndxSection();
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
bool isNeeded() const override;
void finalizeContents() override;
};
// Outputs GNU Hash section. For detailed explanation see:
// https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
class GnuHashTableSection final : public SyntheticSection {
public:
GnuHashTableSection();
void finalizeContents() override;
void writeTo(uint8_t *buf) override;
size_t getSize() const override { return size; }
// Adds symbols to the hash table.
// Sorts the input to satisfy GNU hash section requirements.
void addSymbols(llvm::SmallVectorImpl<SymbolTableEntry> &symbols);
private:
// See the comment in writeBloomFilter.
enum { Shift2 = 26 };
struct Entry {
Symbol *sym;
size_t strTabOffset;
uint32_t hash;
uint32_t bucketIdx;
};
SmallVector<Entry, 0> symbols;
size_t maskWords;
size_t nBuckets = 0;
size_t size = 0;
};
class HashTableSection final : public SyntheticSection {
public:
HashTableSection();
void finalizeContents() override;
void writeTo(uint8_t *buf) override;
size_t getSize() const override { return size; }
private:
size_t size = 0;
};
// Used for PLT entries. It usually has a PLT header for lazy binding. Each PLT
// entry is associated with a JUMP_SLOT relocation, which may be resolved lazily
// at runtime.
//
// On PowerPC, this section contains lazy symbol resolvers. A branch instruction
// jumps to a PLT call stub, which will then jump to the target (BIND_NOW) or a
// lazy symbol resolver.
//
// On x86 when IBT is enabled, this section (.plt.sec) contains PLT call stubs.
// A call instruction jumps to a .plt.sec entry, which will then jump to the
// target (BIND_NOW) or a .plt entry.
class PltSection : public SyntheticSection {
public:
PltSection();
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
bool isNeeded() const override;
void addSymbols();
void addEntry(Symbol &sym);
size_t getNumEntries() const { return entries.size(); }
size_t headerSize;
SmallVector<const Symbol *, 0> entries;
};
// Used for non-preemptible ifuncs. It does not have a header. Each entry is
// associated with an IRELATIVE relocation, which will be resolved eagerly at
// runtime. PltSection can only contain entries associated with JUMP_SLOT
// relocations, so IPLT entries are in a separate section.
class IpltSection final : public SyntheticSection {
SmallVector<const Symbol *, 0> entries;
public:
IpltSection();
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
bool isNeeded() const override { return !entries.empty(); }
void addSymbols();
void addEntry(Symbol &sym);
};
class PPC32GlinkSection : public PltSection {
public:
PPC32GlinkSection();
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
SmallVector<const Symbol *, 0> canonical_plts;
static constexpr size_t footerSize = 64;
};
// This is x86-only.
class IBTPltSection : public SyntheticSection {
public:
IBTPltSection();
void writeTo(uint8_t *Buf) override;
+ bool isNeeded() const override;
size_t getSize() const override;
};
class GdbIndexSection final : public SyntheticSection {
public:
struct AddressEntry {
InputSection *section;
uint64_t lowAddress;
uint64_t highAddress;
uint32_t cuIndex;
};
struct CuEntry {
uint64_t cuOffset;
uint64_t cuLength;
};
struct NameAttrEntry {
llvm::CachedHashStringRef name;
uint32_t cuIndexAndAttrs;
};
struct GdbChunk {
InputSection *sec;
SmallVector<AddressEntry, 0> addressAreas;
SmallVector<CuEntry, 0> compilationUnits;
};
struct GdbSymbol {
llvm::CachedHashStringRef name;
SmallVector<uint32_t, 0> cuVector;
uint32_t nameOff;
uint32_t cuVectorOff;
};
GdbIndexSection();
template <typename ELFT> static GdbIndexSection *create();
void writeTo(uint8_t *buf) override;
size_t getSize() const override { return size; }
bool isNeeded() const override;
private:
struct GdbIndexHeader {
llvm::support::ulittle32_t version;
llvm::support::ulittle32_t cuListOff;
llvm::support::ulittle32_t cuTypesOff;
llvm::support::ulittle32_t addressAreaOff;
llvm::support::ulittle32_t symtabOff;
llvm::support::ulittle32_t constantPoolOff;
};
void initOutputSize();
size_t computeSymtabSize() const;
// Each chunk contains information gathered from debug sections of a
// single object file.
SmallVector<GdbChunk, 0> chunks;
// A symbol table for this .gdb_index section.
SmallVector<GdbSymbol, 0> symbols;
size_t size;
};
// --eh-frame-hdr option tells linker to construct a header for all the
// .eh_frame sections. This header is placed to a section named .eh_frame_hdr
// and also to a PT_GNU_EH_FRAME segment.
// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by
// calling dl_iterate_phdr.
// This section contains a lookup table for quick binary search of FDEs.
// Detailed info about internals can be found in Ian Lance Taylor's blog:
// http://www.airs.com/blog/archives/460 (".eh_frame")
// http://www.airs.com/blog/archives/462 (".eh_frame_hdr")
class EhFrameHeader final : public SyntheticSection {
public:
EhFrameHeader();
void write();
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
bool isNeeded() const override;
};
// For more information about .gnu.version and .gnu.version_r see:
// https://www.akkadia.org/drepper/symbol-versioning
// The .gnu.version_d section which has a section type of SHT_GNU_verdef shall
// contain symbol version definitions. The number of entries in this section
// shall be contained in the DT_VERDEFNUM entry of the .dynamic section.
// The section shall contain an array of Elf_Verdef structures, optionally
// followed by an array of Elf_Verdaux structures.
class VersionDefinitionSection final : public SyntheticSection {
public:
VersionDefinitionSection();
void finalizeContents() override;
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
private:
enum { EntrySize = 28 };
void writeOne(uint8_t *buf, uint32_t index, StringRef name, size_t nameOff);
StringRef getFileDefName();
unsigned fileDefNameOff;
SmallVector<unsigned, 0> verDefNameOffs;
};
// The .gnu.version section specifies the required version of each symbol in the
// dynamic symbol table. It contains one Elf_Versym for each dynamic symbol
// table entry. An Elf_Versym is just a 16-bit integer that refers to a version
// identifier defined in the either .gnu.version_r or .gnu.version_d section.
// The values 0 and 1 are reserved. All other values are used for versions in
// the own object or in any of the dependencies.
class VersionTableSection final : public SyntheticSection {
public:
VersionTableSection();
void finalizeContents() override;
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
bool isNeeded() const override;
};
// The .gnu.version_r section defines the version identifiers used by
// .gnu.version. It contains a linked list of Elf_Verneed data structures. Each
// Elf_Verneed specifies the version requirements for a single DSO, and contains
// a reference to a linked list of Elf_Vernaux data structures which define the
// mapping from version identifiers to version names.
template <class ELFT>
class VersionNeedSection final : public SyntheticSection {
using Elf_Verneed = typename ELFT::Verneed;
using Elf_Vernaux = typename ELFT::Vernaux;
struct Vernaux {
uint64_t hash;
uint32_t verneedIndex;
uint64_t nameStrTab;
};
struct Verneed {
uint64_t nameStrTab;
std::vector<Vernaux> vernauxs;
};
SmallVector<Verneed, 0> verneeds;
public:
VersionNeedSection();
void finalizeContents() override;
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
bool isNeeded() const override;
};
// MergeSyntheticSection is a class that allows us to put mergeable sections
// with different attributes in a single output sections. To do that
// we put them into MergeSyntheticSection synthetic input sections which are
// attached to regular output sections.
class MergeSyntheticSection : public SyntheticSection {
public:
void addSection(MergeInputSection *ms);
SmallVector<MergeInputSection *, 0> sections;
protected:
MergeSyntheticSection(StringRef name, uint32_t type, uint64_t flags,
uint32_t alignment)
: SyntheticSection(flags, type, alignment, name) {}
};
class MergeTailSection final : public MergeSyntheticSection {
public:
MergeTailSection(StringRef name, uint32_t type, uint64_t flags,
uint32_t alignment);
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
void finalizeContents() override;
private:
llvm::StringTableBuilder builder;
};
class MergeNoTailSection final : public MergeSyntheticSection {
public:
MergeNoTailSection(StringRef name, uint32_t type, uint64_t flags,
uint32_t alignment)
: MergeSyntheticSection(name, type, flags, alignment) {}
size_t getSize() const override { return size; }
void writeTo(uint8_t *buf) override;
void finalizeContents() override;
private:
// We use the most significant bits of a hash as a shard ID.
// The reason why we don't want to use the least significant bits is
// because DenseMap also uses lower bits to determine a bucket ID.
// If we use lower bits, it significantly increases the probability of
// hash collisons.
size_t getShardId(uint32_t hash) {
assert((hash >> 31) == 0);
return hash >> (31 - llvm::countTrailingZeros(numShards));
}
// Section size
size_t size;
// String table contents
constexpr static size_t numShards = 32;
SmallVector<llvm::StringTableBuilder, 0> shards;
size_t shardOffsets[numShards];
};
// .MIPS.abiflags section.
template <class ELFT>
class MipsAbiFlagsSection final : public SyntheticSection {
using Elf_Mips_ABIFlags = llvm::object::Elf_Mips_ABIFlags<ELFT>;
public:
static std::unique_ptr<MipsAbiFlagsSection> create();
MipsAbiFlagsSection(Elf_Mips_ABIFlags flags);
size_t getSize() const override { return sizeof(Elf_Mips_ABIFlags); }
void writeTo(uint8_t *buf) override;
private:
Elf_Mips_ABIFlags flags;
};
// .MIPS.options section.
template <class ELFT> class MipsOptionsSection final : public SyntheticSection {
using Elf_Mips_Options = llvm::object::Elf_Mips_Options<ELFT>;
using Elf_Mips_RegInfo = llvm::object::Elf_Mips_RegInfo<ELFT>;
public:
static std::unique_ptr<MipsOptionsSection<ELFT>> create();
MipsOptionsSection(Elf_Mips_RegInfo reginfo);
void writeTo(uint8_t *buf) override;
size_t getSize() const override {
return sizeof(Elf_Mips_Options) + sizeof(Elf_Mips_RegInfo);
}
private:
Elf_Mips_RegInfo reginfo;
};
// MIPS .reginfo section.
template <class ELFT> class MipsReginfoSection final : public SyntheticSection {
using Elf_Mips_RegInfo = llvm::object::Elf_Mips_RegInfo<ELFT>;
public:
static std::unique_ptr<MipsReginfoSection> create();
MipsReginfoSection(Elf_Mips_RegInfo reginfo);
size_t getSize() const override { return sizeof(Elf_Mips_RegInfo); }
void writeTo(uint8_t *buf) override;
private:
Elf_Mips_RegInfo reginfo;
};
// This is a MIPS specific section to hold a space within the data segment
// of executable file which is pointed to by the DT_MIPS_RLD_MAP entry.
// See "Dynamic section" in Chapter 5 in the following document:
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
class MipsRldMapSection : public SyntheticSection {
public:
MipsRldMapSection();
size_t getSize() const override { return config->wordsize; }
void writeTo(uint8_t *buf) override {}
};
// Representation of the combined .ARM.Exidx input sections. We process these
// as a SyntheticSection like .eh_frame as we need to merge duplicate entries
// and add terminating sentinel entries.
//
// The .ARM.exidx input sections after SHF_LINK_ORDER processing is done form
// a table that the unwinder can derive (Addresses are encoded as offsets from
// table):
// | Address of function | Unwind instructions for function |
// where the unwind instructions are either a small number of unwind or the
// special EXIDX_CANTUNWIND entry representing no unwinding information.
// When an exception is thrown from an address A, the unwinder searches the
// table for the closest table entry with Address of function <= A. This means
// that for two consecutive table entries:
// | A1 | U1 |
// | A2 | U2 |
// The range of addresses described by U1 is [A1, A2)
//
// There are two cases where we need a linker generated table entry to fixup
// the address ranges in the table
// Case 1:
// - A sentinel entry added with an address higher than all
// executable sections. This was needed to work around libunwind bug pr31091.
// - After address assignment we need to find the highest addressed executable
// section and use the limit of that section so that the unwinder never
// matches it.
// Case 2:
// - InputSections without a .ARM.exidx section (usually from Assembly)
// need a table entry so that they terminate the range of the previously
// function. This is pr40277.
//
// Instead of storing pointers to the .ARM.exidx InputSections from
// InputObjects, we store pointers to the executable sections that need
// .ARM.exidx sections. We can then use the dependentSections of these to
// either find the .ARM.exidx section or know that we need to generate one.
class ARMExidxSyntheticSection : public SyntheticSection {
public:
ARMExidxSyntheticSection();
// Add an input section to the ARMExidxSyntheticSection. Returns whether the
// section needs to be removed from the main input section list.
bool addSection(InputSection *isec);
size_t getSize() const override { return size; }
void writeTo(uint8_t *buf) override;
bool isNeeded() const override;
// Sort and remove duplicate entries.
void finalizeContents() override;
InputSection *getLinkOrderDep() const;
static bool classof(const SectionBase *d);
// Links to the ARMExidxSections so we can transfer the relocations once the
// layout is known.
SmallVector<InputSection *, 0> exidxSections;
private:
size_t size = 0;
// Instead of storing pointers to the .ARM.exidx InputSections from
// InputObjects, we store pointers to the executable sections that need
// .ARM.exidx sections. We can then use the dependentSections of these to
// either find the .ARM.exidx section or know that we need to generate one.
SmallVector<InputSection *, 0> executableSections;
// The executable InputSection with the highest address to use for the
// sentinel. We store separately from ExecutableSections as merging of
// duplicate entries may mean this InputSection is removed from
// ExecutableSections.
InputSection *sentinel = nullptr;
};
// A container for one or more linker generated thunks. Instances of these
// thunks including ARM interworking and Mips LA25 PI to non-PI thunks.
class ThunkSection : public SyntheticSection {
public:
// ThunkSection in OS, with desired outSecOff of Off
ThunkSection(OutputSection *os, uint64_t off);
// Add a newly created Thunk to this container:
// Thunk is given offset from start of this InputSection
// Thunk defines a symbol in this InputSection that can be used as target
// of a relocation
void addThunk(Thunk *t);
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
InputSection *getTargetInputSection() const;
bool assignOffsets();
// When true, round up reported size of section to 4 KiB. See comment
// in addThunkSection() for more details.
bool roundUpSizeForErrata = false;
private:
SmallVector<Thunk *, 0> thunks;
size_t size = 0;
};
// Used to compute outSecOff of .got2 in each object file. This is needed to
// synthesize PLT entries for PPC32 Secure PLT ABI.
class PPC32Got2Section final : public SyntheticSection {
public:
PPC32Got2Section();
size_t getSize() const override { return 0; }
bool isNeeded() const override;
void finalizeContents() override;
void writeTo(uint8_t *buf) override {}
};
// This section is used to store the addresses of functions that are called
// in range-extending thunks on PowerPC64. When producing position dependent
// code the addresses are link-time constants and the table is written out to
// the binary. When producing position-dependent code the table is allocated and
// filled in by the dynamic linker.
class PPC64LongBranchTargetSection final : public SyntheticSection {
public:
PPC64LongBranchTargetSection();
uint64_t getEntryVA(const Symbol *sym, int64_t addend);
llvm::Optional<uint32_t> addEntry(const Symbol *sym, int64_t addend);
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
bool isNeeded() const override;
void finalizeContents() override { finalized = true; }
private:
SmallVector<std::pair<const Symbol *, int64_t>, 0> entries;
llvm::DenseMap<std::pair<const Symbol *, int64_t>, uint32_t> entry_index;
bool finalized = false;
};
template <typename ELFT>
class PartitionElfHeaderSection : public SyntheticSection {
public:
PartitionElfHeaderSection();
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
};
template <typename ELFT>
class PartitionProgramHeadersSection : public SyntheticSection {
public:
PartitionProgramHeadersSection();
size_t getSize() const override;
void writeTo(uint8_t *buf) override;
};
class PartitionIndexSection : public SyntheticSection {
public:
PartitionIndexSection();
size_t getSize() const override;
void finalizeContents() override;
void writeTo(uint8_t *buf) override;
};
InputSection *createInterpSection();
MergeInputSection *createCommentSection();
template <class ELFT> void splitSections();
template <typename ELFT> void writeEhdr(uint8_t *buf, Partition &part);
template <typename ELFT> void writePhdrs(uint8_t *buf, Partition &part);
Defined *addSyntheticLocal(StringRef name, uint8_t type, uint64_t value,
uint64_t size, InputSectionBase &section);
void addVerneed(Symbol *ss);
// Linker generated per-partition sections.
struct Partition {
StringRef name;
uint64_t nameStrTab;
std::unique_ptr<SyntheticSection> elfHeader;
std::unique_ptr<SyntheticSection> programHeaders;
SmallVector<PhdrEntry *, 0> phdrs;
std::unique_ptr<ARMExidxSyntheticSection> armExidx;
std::unique_ptr<BuildIdSection> buildId;
std::unique_ptr<SyntheticSection> dynamic;
std::unique_ptr<StringTableSection> dynStrTab;
std::unique_ptr<SymbolTableBaseSection> dynSymTab;
std::unique_ptr<EhFrameHeader> ehFrameHdr;
std::unique_ptr<EhFrameSection> ehFrame;
std::unique_ptr<GnuHashTableSection> gnuHashTab;
std::unique_ptr<HashTableSection> hashTab;
std::unique_ptr<RelocationBaseSection> relaDyn;
std::unique_ptr<RelrBaseSection> relrDyn;
std::unique_ptr<VersionDefinitionSection> verDef;
std::unique_ptr<SyntheticSection> verNeed;
std::unique_ptr<VersionTableSection> verSym;
unsigned getNumber() const { return this - &partitions[0] + 1; }
};
extern Partition *mainPart;
inline Partition &SectionBase::getPartition() const {
assert(isLive());
return partitions[partition - 1];
}
// Linker generated sections which can be used as inputs and are not specific to
// a partition.
struct InStruct {
std::unique_ptr<InputSection> attributes;
std::unique_ptr<BssSection> bss;
std::unique_ptr<BssSection> bssRelRo;
std::unique_ptr<GotSection> got;
std::unique_ptr<GotPltSection> gotPlt;
std::unique_ptr<IgotPltSection> igotPlt;
std::unique_ptr<PPC64LongBranchTargetSection> ppc64LongBranchTarget;
std::unique_ptr<SyntheticSection> mipsAbiFlags;
std::unique_ptr<MipsGotSection> mipsGot;
std::unique_ptr<SyntheticSection> mipsOptions;
std::unique_ptr<SyntheticSection> mipsReginfo;
std::unique_ptr<MipsRldMapSection> mipsRldMap;
std::unique_ptr<SyntheticSection> partEnd;
std::unique_ptr<SyntheticSection> partIndex;
std::unique_ptr<PltSection> plt;
std::unique_ptr<IpltSection> iplt;
std::unique_ptr<PPC32Got2Section> ppc32Got2;
std::unique_ptr<IBTPltSection> ibtPlt;
std::unique_ptr<RelocationBaseSection> relaPlt;
std::unique_ptr<RelocationBaseSection> relaIplt;
std::unique_ptr<StringTableSection> shStrTab;
std::unique_ptr<StringTableSection> strTab;
std::unique_ptr<SymbolTableBaseSection> symTab;
std::unique_ptr<SymtabShndxSection> symTabShndx;
void reset();
};
extern InStruct in;
} // namespace elf
} // namespace lld
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h
index d5b60ee540e0..ce4413682bdc 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -1,292 +1,292 @@
//==- llvm/Analysis/MemoryBuiltins.h - Calls to memory builtins --*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This family of functions identifies calls to builtin functions that allocate
// or free memory.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H
#define LLVM_ANALYSIS_MEMORYBUILTINS_H
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/ValueHandle.h"
#include <cstdint>
#include <utility>
namespace llvm {
class AllocaInst;
class Argument;
class CallInst;
class ConstantPointerNull;
class DataLayout;
class ExtractElementInst;
class ExtractValueInst;
class GEPOperator;
class GlobalAlias;
class GlobalVariable;
class Instruction;
class IntegerType;
class IntrinsicInst;
class IntToPtrInst;
class LLVMContext;
class LoadInst;
class PHINode;
class SelectInst;
class Type;
class UndefValue;
class Value;
/// Tests if a value is a call or invoke to a library function that
/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
/// like).
bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI);
bool isAllocationFn(const Value *V,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
/// Tests if a value is a call or invoke to a library function that
/// allocates memory similar to malloc or calloc.
bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI);
/// Tests if a value is a call or invoke to a library function that
/// allocates memory (either malloc, calloc, or strdup like).
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI);
/// Tests if a value is a call or invoke to a library function that
/// reallocates memory (e.g., realloc).
bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI);
/// Tests if a function is a call or invoke to a library function that
/// reallocates memory (e.g., realloc).
bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
//===----------------------------------------------------------------------===//
// free Call Utility Functions.
//
/// isLibFreeFunction - Returns true if the function is a builtin free()
bool isLibFreeFunction(const Function *F, const LibFunc TLIFn);
/// isFreeCall - Returns non-null if the value is a call to the builtin free()
const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI);
inline CallInst *isFreeCall(Value *I, const TargetLibraryInfo *TLI) {
return const_cast<CallInst*>(isFreeCall((const Value*)I, TLI));
}
//===----------------------------------------------------------------------===//
// Properties of allocation functions
//
/// Return false if the allocation can have side effects on the program state
/// we are required to preserve beyond the effect of allocating a new object.
/// Ex: If our allocation routine has a counter for the number of objects
/// allocated, and the program prints it on exit, can the value change due
/// to optimization? Answer is highly language dependent.
/// Note: *Removable* really does mean removable; it does not mean observable.
/// A language (e.g. C++) can allow removing allocations without allowing
/// insertion or speculative execution of allocation routines.
bool isAllocRemovable(const CallBase *V, const TargetLibraryInfo *TLI);
/// Gets the alignment argument for an aligned_alloc-like function
Value *getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI);
/// Return the size of the requested allocation. With a trivial mapper, this is
/// identical to calling getObjectSize(..., Exact). A mapper function can be
/// used to replace one Value* (operand to the allocation) with another. This
/// is useful when doing abstract interpretation.
Optional<APInt> getAllocSize(const CallBase *CB,
const TargetLibraryInfo *TLI,
std::function<const Value*(const Value*)> Mapper);
/// If this allocation function initializes memory to a fixed value, return
/// said value in the requested type. Otherwise, return nullptr.
Constant *getInitialValueOfAllocation(const CallBase *Alloc,
const TargetLibraryInfo *TLI,
Type *Ty);
//===----------------------------------------------------------------------===//
// Utility functions to compute size of objects.
//
/// Various options to control the behavior of getObjectSize.
struct ObjectSizeOpts {
/// Controls how we handle conditional statements with unknown conditions.
enum class Mode : uint8_t {
/// Fail to evaluate an unknown condition.
Exact,
/// Evaluate all branches of an unknown condition. If all evaluations
/// succeed, pick the minimum size.
Min,
/// Same as Min, except we pick the maximum size of all of the branches.
Max
};
/// How we want to evaluate this object's size.
Mode EvalMode = Mode::Exact;
/// Whether to round the result up to the alignment of allocas, byval
/// arguments, and global variables.
bool RoundToAlign = false;
/// If this is true, null pointers in address space 0 will be treated as
/// though they can't be evaluated. Otherwise, null is always considered to
/// point to a 0 byte region of memory.
bool NullIsUnknownSize = false;
};
/// Compute the size of the object pointed by Ptr. Returns true and the
/// object size in Size if successful, and false otherwise. In this context, by
/// object we mean the region of memory starting at Ptr to the end of the
/// underlying object pointed to by Ptr.
///
/// WARNING: The object size returned is the allocation size. This does not
/// imply dereferenceability at site of use since the object may be freeed in
/// between.
bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, ObjectSizeOpts Opts = {});
/// Try to turn a call to \@llvm.objectsize into an integer value of the given
/// Type. Returns null on failure. If MustSucceed is true, this function will
/// not return null, and may return conservative values governed by the second
/// argument of the call to objectsize.
Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL,
const TargetLibraryInfo *TLI, bool MustSucceed);
using SizeOffsetType = std::pair<APInt, APInt>;
/// Evaluate the size and offset of an object pointed to by a Value*
/// statically. Fails if size or offset are not known at compile time.
class ObjectSizeOffsetVisitor
: public InstVisitor<ObjectSizeOffsetVisitor, SizeOffsetType> {
const DataLayout &DL;
const TargetLibraryInfo *TLI;
ObjectSizeOpts Options;
unsigned IntTyBits;
APInt Zero;
SmallPtrSet<Instruction *, 8> SeenInsts;
APInt align(APInt Size, MaybeAlign Align);
SizeOffsetType unknown() {
return std::make_pair(APInt(), APInt());
}
public:
ObjectSizeOffsetVisitor(const DataLayout &DL, const TargetLibraryInfo *TLI,
LLVMContext &Context, ObjectSizeOpts Options = {});
SizeOffsetType compute(Value *V);
static bool knownSize(const SizeOffsetType &SizeOffset) {
return SizeOffset.first.getBitWidth() > 1;
}
static bool knownOffset(const SizeOffsetType &SizeOffset) {
return SizeOffset.second.getBitWidth() > 1;
}
static bool bothKnown(const SizeOffsetType &SizeOffset) {
return knownSize(SizeOffset) && knownOffset(SizeOffset);
}
// These are "private", except they can't actually be made private. Only
// compute() should be used by external users.
SizeOffsetType visitAllocaInst(AllocaInst &I);
SizeOffsetType visitArgument(Argument &A);
SizeOffsetType visitCallBase(CallBase &CB);
SizeOffsetType visitConstantPointerNull(ConstantPointerNull&);
SizeOffsetType visitExtractElementInst(ExtractElementInst &I);
SizeOffsetType visitExtractValueInst(ExtractValueInst &I);
- SizeOffsetType visitGEPOperator(GEPOperator &GEP);
SizeOffsetType visitGlobalAlias(GlobalAlias &GA);
SizeOffsetType visitGlobalVariable(GlobalVariable &GV);
SizeOffsetType visitIntToPtrInst(IntToPtrInst&);
SizeOffsetType visitLoadInst(LoadInst &I);
SizeOffsetType visitPHINode(PHINode&);
SizeOffsetType visitSelectInst(SelectInst &I);
SizeOffsetType visitUndefValue(UndefValue&);
SizeOffsetType visitInstruction(Instruction &I);
private:
+ SizeOffsetType computeImpl(Value *V);
bool CheckedZextOrTrunc(APInt &I);
};
using SizeOffsetEvalType = std::pair<Value *, Value *>;
/// Evaluate the size and offset of an object pointed to by a Value*.
/// May create code to compute the result at run-time.
class ObjectSizeOffsetEvaluator
: public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> {
using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
using WeakEvalType = std::pair<WeakTrackingVH, WeakTrackingVH>;
using CacheMapTy = DenseMap<const Value *, WeakEvalType>;
using PtrSetTy = SmallPtrSet<const Value *, 8>;
const DataLayout &DL;
const TargetLibraryInfo *TLI;
LLVMContext &Context;
BuilderTy Builder;
IntegerType *IntTy;
Value *Zero;
CacheMapTy CacheMap;
PtrSetTy SeenVals;
ObjectSizeOpts EvalOpts;
SmallPtrSet<Instruction *, 8> InsertedInstructions;
SizeOffsetEvalType compute_(Value *V);
public:
static SizeOffsetEvalType unknown() {
return std::make_pair(nullptr, nullptr);
}
ObjectSizeOffsetEvaluator(const DataLayout &DL, const TargetLibraryInfo *TLI,
LLVMContext &Context, ObjectSizeOpts EvalOpts = {});
SizeOffsetEvalType compute(Value *V);
bool knownSize(SizeOffsetEvalType SizeOffset) {
return SizeOffset.first;
}
bool knownOffset(SizeOffsetEvalType SizeOffset) {
return SizeOffset.second;
}
bool anyKnown(SizeOffsetEvalType SizeOffset) {
return knownSize(SizeOffset) || knownOffset(SizeOffset);
}
bool bothKnown(SizeOffsetEvalType SizeOffset) {
return knownSize(SizeOffset) && knownOffset(SizeOffset);
}
// The individual instruction visitors should be treated as private.
SizeOffsetEvalType visitAllocaInst(AllocaInst &I);
SizeOffsetEvalType visitCallBase(CallBase &CB);
SizeOffsetEvalType visitExtractElementInst(ExtractElementInst &I);
SizeOffsetEvalType visitExtractValueInst(ExtractValueInst &I);
SizeOffsetEvalType visitGEPOperator(GEPOperator &GEP);
SizeOffsetEvalType visitIntToPtrInst(IntToPtrInst&);
SizeOffsetEvalType visitLoadInst(LoadInst &I);
SizeOffsetEvalType visitPHINode(PHINode &PHI);
SizeOffsetEvalType visitSelectInst(SelectInst &I);
SizeOffsetEvalType visitInstruction(Instruction &I);
};
} // end namespace llvm
#endif // LLVM_ANALYSIS_MEMORYBUILTINS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
index e7dde986784f..016fe0289406 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/COFF.h
@@ -1,737 +1,741 @@
//===-- llvm/BinaryFormat/COFF.h --------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains an definitions used in Windows COFF Files.
//
// Structures and enums defined within this file where created using
// information from Microsoft's publicly available PE/COFF format document:
//
// Microsoft Portable Executable and Common Object File Format Specification
// Revision 8.1 - February 15, 2008
//
// As of 5/2/2010, hosted by Microsoft at:
// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BINARYFORMAT_COFF_H
#define LLVM_BINARYFORMAT_COFF_H
#include "llvm/Support/DataTypes.h"
#include <cassert>
#include <cstring>
namespace llvm {
namespace COFF {
// The maximum number of sections that a COFF object can have (inclusive).
const int32_t MaxNumberOfSections16 = 65279;
// The PE signature bytes that follows the DOS stub header.
static const char PEMagic[] = {'P', 'E', '\0', '\0'};
static const char BigObjMagic[] = {
'\xc7', '\xa1', '\xba', '\xd1', '\xee', '\xba', '\xa9', '\x4b',
'\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8',
};
static const char ClGlObjMagic[] = {
'\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d',
'\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2',
};
// The signature bytes that start a .res file.
static const char WinResMagic[] = {
'\x00', '\x00', '\x00', '\x00', '\x20', '\x00', '\x00', '\x00',
'\xff', '\xff', '\x00', '\x00', '\xff', '\xff', '\x00', '\x00',
};
// Sizes in bytes of various things in the COFF format.
enum {
Header16Size = 20,
Header32Size = 56,
NameSize = 8,
Symbol16Size = 18,
Symbol32Size = 20,
SectionSize = 40,
RelocationSize = 10
};
struct header {
uint16_t Machine;
int32_t NumberOfSections;
uint32_t TimeDateStamp;
uint32_t PointerToSymbolTable;
uint32_t NumberOfSymbols;
uint16_t SizeOfOptionalHeader;
uint16_t Characteristics;
};
struct BigObjHeader {
enum : uint16_t { MinBigObjectVersion = 2 };
uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0).
uint16_t Sig2; ///< Must be 0xFFFF.
uint16_t Version;
uint16_t Machine;
uint32_t TimeDateStamp;
uint8_t UUID[16];
uint32_t unused1;
uint32_t unused2;
uint32_t unused3;
uint32_t unused4;
uint32_t NumberOfSections;
uint32_t PointerToSymbolTable;
uint32_t NumberOfSymbols;
};
enum MachineTypes : unsigned {
MT_Invalid = 0xffff,
IMAGE_FILE_MACHINE_UNKNOWN = 0x0,
IMAGE_FILE_MACHINE_AM33 = 0x1D3,
IMAGE_FILE_MACHINE_AMD64 = 0x8664,
IMAGE_FILE_MACHINE_ARM = 0x1C0,
IMAGE_FILE_MACHINE_ARMNT = 0x1C4,
IMAGE_FILE_MACHINE_ARM64 = 0xAA64,
IMAGE_FILE_MACHINE_EBC = 0xEBC,
IMAGE_FILE_MACHINE_I386 = 0x14C,
IMAGE_FILE_MACHINE_IA64 = 0x200,
IMAGE_FILE_MACHINE_M32R = 0x9041,
IMAGE_FILE_MACHINE_MIPS16 = 0x266,
IMAGE_FILE_MACHINE_MIPSFPU = 0x366,
IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466,
IMAGE_FILE_MACHINE_POWERPC = 0x1F0,
IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1,
IMAGE_FILE_MACHINE_R4000 = 0x166,
IMAGE_FILE_MACHINE_RISCV32 = 0x5032,
IMAGE_FILE_MACHINE_RISCV64 = 0x5064,
IMAGE_FILE_MACHINE_RISCV128 = 0x5128,
IMAGE_FILE_MACHINE_SH3 = 0x1A2,
IMAGE_FILE_MACHINE_SH3DSP = 0x1A3,
IMAGE_FILE_MACHINE_SH4 = 0x1A6,
IMAGE_FILE_MACHINE_SH5 = 0x1A8,
IMAGE_FILE_MACHINE_THUMB = 0x1C2,
IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169
};
enum Characteristics : unsigned {
C_Invalid = 0,
/// The file does not contain base relocations and must be loaded at its
/// preferred base. If this cannot be done, the loader will error.
IMAGE_FILE_RELOCS_STRIPPED = 0x0001,
/// The file is valid and can be run.
IMAGE_FILE_EXECUTABLE_IMAGE = 0x0002,
/// COFF line numbers have been stripped. This is deprecated and should be
/// 0.
IMAGE_FILE_LINE_NUMS_STRIPPED = 0x0004,
/// COFF symbol table entries for local symbols have been removed. This is
/// deprecated and should be 0.
IMAGE_FILE_LOCAL_SYMS_STRIPPED = 0x0008,
/// Aggressively trim working set. This is deprecated and must be 0.
IMAGE_FILE_AGGRESSIVE_WS_TRIM = 0x0010,
/// Image can handle > 2GiB addresses.
IMAGE_FILE_LARGE_ADDRESS_AWARE = 0x0020,
/// Little endian: the LSB precedes the MSB in memory. This is deprecated
/// and should be 0.
IMAGE_FILE_BYTES_REVERSED_LO = 0x0080,
/// Machine is based on a 32bit word architecture.
IMAGE_FILE_32BIT_MACHINE = 0x0100,
/// Debugging info has been removed.
IMAGE_FILE_DEBUG_STRIPPED = 0x0200,
/// If the image is on removable media, fully load it and copy it to swap.
IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400,
/// If the image is on network media, fully load it and copy it to swap.
IMAGE_FILE_NET_RUN_FROM_SWAP = 0x0800,
/// The image file is a system file, not a user program.
IMAGE_FILE_SYSTEM = 0x1000,
/// The image file is a DLL.
IMAGE_FILE_DLL = 0x2000,
/// This file should only be run on a uniprocessor machine.
IMAGE_FILE_UP_SYSTEM_ONLY = 0x4000,
/// Big endian: the MSB precedes the LSB in memory. This is deprecated
/// and should be 0.
IMAGE_FILE_BYTES_REVERSED_HI = 0x8000
};
enum ResourceTypeID : unsigned {
RID_Cursor = 1,
RID_Bitmap = 2,
RID_Icon = 3,
RID_Menu = 4,
RID_Dialog = 5,
RID_String = 6,
RID_FontDir = 7,
RID_Font = 8,
RID_Accelerator = 9,
RID_RCData = 10,
RID_MessageTable = 11,
RID_Group_Cursor = 12,
RID_Group_Icon = 14,
RID_Version = 16,
RID_DLGInclude = 17,
RID_PlugPlay = 19,
RID_VXD = 20,
RID_AniCursor = 21,
RID_AniIcon = 22,
RID_HTML = 23,
RID_Manifest = 24,
};
struct symbol {
char Name[NameSize];
uint32_t Value;
int32_t SectionNumber;
uint16_t Type;
uint8_t StorageClass;
uint8_t NumberOfAuxSymbols;
};
enum SymbolSectionNumber : int32_t {
IMAGE_SYM_DEBUG = -2,
IMAGE_SYM_ABSOLUTE = -1,
IMAGE_SYM_UNDEFINED = 0
};
/// Storage class tells where and what the symbol represents
enum SymbolStorageClass {
SSC_Invalid = 0xff,
IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, ///< Physical end of function
IMAGE_SYM_CLASS_NULL = 0, ///< No symbol
IMAGE_SYM_CLASS_AUTOMATIC = 1, ///< Stack variable
IMAGE_SYM_CLASS_EXTERNAL = 2, ///< External symbol
IMAGE_SYM_CLASS_STATIC = 3, ///< Static
IMAGE_SYM_CLASS_REGISTER = 4, ///< Register variable
IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, ///< External definition
IMAGE_SYM_CLASS_LABEL = 6, ///< Label
IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, ///< Undefined label
IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, ///< Member of structure
IMAGE_SYM_CLASS_ARGUMENT = 9, ///< Function argument
IMAGE_SYM_CLASS_STRUCT_TAG = 10, ///< Structure tag
IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, ///< Member of union
IMAGE_SYM_CLASS_UNION_TAG = 12, ///< Union tag
IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, ///< Type definition
IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, ///< Undefined static
IMAGE_SYM_CLASS_ENUM_TAG = 15, ///< Enumeration tag
IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, ///< Member of enumeration
IMAGE_SYM_CLASS_REGISTER_PARAM = 17, ///< Register parameter
IMAGE_SYM_CLASS_BIT_FIELD = 18, ///< Bit field
/// ".bb" or ".eb" - beginning or end of block
IMAGE_SYM_CLASS_BLOCK = 100,
/// ".bf" or ".ef" - beginning or end of function
IMAGE_SYM_CLASS_FUNCTION = 101,
IMAGE_SYM_CLASS_END_OF_STRUCT = 102, ///< End of structure
IMAGE_SYM_CLASS_FILE = 103, ///< File name
/// Line number, reformatted as symbol
IMAGE_SYM_CLASS_SECTION = 104,
IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, ///< Duplicate tag
/// External symbol in dmert public lib
IMAGE_SYM_CLASS_CLR_TOKEN = 107
};
enum SymbolBaseType : unsigned {
IMAGE_SYM_TYPE_NULL = 0, ///< No type information or unknown base type.
IMAGE_SYM_TYPE_VOID = 1, ///< Used with void pointers and functions.
IMAGE_SYM_TYPE_CHAR = 2, ///< A character (signed byte).
IMAGE_SYM_TYPE_SHORT = 3, ///< A 2-byte signed integer.
IMAGE_SYM_TYPE_INT = 4, ///< A natural integer type on the target.
IMAGE_SYM_TYPE_LONG = 5, ///< A 4-byte signed integer.
IMAGE_SYM_TYPE_FLOAT = 6, ///< A 4-byte floating-point number.
IMAGE_SYM_TYPE_DOUBLE = 7, ///< An 8-byte floating-point number.
IMAGE_SYM_TYPE_STRUCT = 8, ///< A structure.
IMAGE_SYM_TYPE_UNION = 9, ///< An union.
IMAGE_SYM_TYPE_ENUM = 10, ///< An enumerated type.
IMAGE_SYM_TYPE_MOE = 11, ///< A member of enumeration (a specific value).
IMAGE_SYM_TYPE_BYTE = 12, ///< A byte; unsigned 1-byte integer.
IMAGE_SYM_TYPE_WORD = 13, ///< A word; unsigned 2-byte integer.
IMAGE_SYM_TYPE_UINT = 14, ///< An unsigned integer of natural size.
IMAGE_SYM_TYPE_DWORD = 15 ///< An unsigned 4-byte integer.
};
enum SymbolComplexType : unsigned {
IMAGE_SYM_DTYPE_NULL = 0, ///< No complex type; simple scalar variable.
IMAGE_SYM_DTYPE_POINTER = 1, ///< A pointer to base type.
IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type.
IMAGE_SYM_DTYPE_ARRAY = 3, ///< An array of base type.
/// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
SCT_COMPLEX_TYPE_SHIFT = 4
};
enum AuxSymbolType { IMAGE_AUX_SYMBOL_TYPE_TOKEN_DEF = 1 };
struct section {
char Name[NameSize];
uint32_t VirtualSize;
uint32_t VirtualAddress;
uint32_t SizeOfRawData;
uint32_t PointerToRawData;
uint32_t PointerToRelocations;
uint32_t PointerToLineNumbers;
uint16_t NumberOfRelocations;
uint16_t NumberOfLineNumbers;
uint32_t Characteristics;
};
enum SectionCharacteristics : uint32_t {
SC_Invalid = 0xffffffff,
IMAGE_SCN_TYPE_NOLOAD = 0x00000002,
IMAGE_SCN_TYPE_NO_PAD = 0x00000008,
IMAGE_SCN_CNT_CODE = 0x00000020,
IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040,
IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080,
IMAGE_SCN_LNK_OTHER = 0x00000100,
IMAGE_SCN_LNK_INFO = 0x00000200,
IMAGE_SCN_LNK_REMOVE = 0x00000800,
IMAGE_SCN_LNK_COMDAT = 0x00001000,
IMAGE_SCN_GPREL = 0x00008000,
IMAGE_SCN_MEM_PURGEABLE = 0x00020000,
IMAGE_SCN_MEM_16BIT = 0x00020000,
IMAGE_SCN_MEM_LOCKED = 0x00040000,
IMAGE_SCN_MEM_PRELOAD = 0x00080000,
IMAGE_SCN_ALIGN_1BYTES = 0x00100000,
IMAGE_SCN_ALIGN_2BYTES = 0x00200000,
IMAGE_SCN_ALIGN_4BYTES = 0x00300000,
IMAGE_SCN_ALIGN_8BYTES = 0x00400000,
IMAGE_SCN_ALIGN_16BYTES = 0x00500000,
IMAGE_SCN_ALIGN_32BYTES = 0x00600000,
IMAGE_SCN_ALIGN_64BYTES = 0x00700000,
IMAGE_SCN_ALIGN_128BYTES = 0x00800000,
IMAGE_SCN_ALIGN_256BYTES = 0x00900000,
IMAGE_SCN_ALIGN_512BYTES = 0x00A00000,
IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000,
IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000,
IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000,
IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000,
IMAGE_SCN_ALIGN_MASK = 0x00F00000,
IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000,
IMAGE_SCN_MEM_DISCARDABLE = 0x02000000,
IMAGE_SCN_MEM_NOT_CACHED = 0x04000000,
IMAGE_SCN_MEM_NOT_PAGED = 0x08000000,
IMAGE_SCN_MEM_SHARED = 0x10000000,
IMAGE_SCN_MEM_EXECUTE = 0x20000000,
IMAGE_SCN_MEM_READ = 0x40000000,
IMAGE_SCN_MEM_WRITE = 0x80000000
};
struct relocation {
uint32_t VirtualAddress;
uint32_t SymbolTableIndex;
uint16_t Type;
};
enum RelocationTypeI386 : unsigned {
IMAGE_REL_I386_ABSOLUTE = 0x0000,
IMAGE_REL_I386_DIR16 = 0x0001,
IMAGE_REL_I386_REL16 = 0x0002,
IMAGE_REL_I386_DIR32 = 0x0006,
IMAGE_REL_I386_DIR32NB = 0x0007,
IMAGE_REL_I386_SEG12 = 0x0009,
IMAGE_REL_I386_SECTION = 0x000A,
IMAGE_REL_I386_SECREL = 0x000B,
IMAGE_REL_I386_TOKEN = 0x000C,
IMAGE_REL_I386_SECREL7 = 0x000D,
IMAGE_REL_I386_REL32 = 0x0014
};
enum RelocationTypeAMD64 : unsigned {
IMAGE_REL_AMD64_ABSOLUTE = 0x0000,
IMAGE_REL_AMD64_ADDR64 = 0x0001,
IMAGE_REL_AMD64_ADDR32 = 0x0002,
IMAGE_REL_AMD64_ADDR32NB = 0x0003,
IMAGE_REL_AMD64_REL32 = 0x0004,
IMAGE_REL_AMD64_REL32_1 = 0x0005,
IMAGE_REL_AMD64_REL32_2 = 0x0006,
IMAGE_REL_AMD64_REL32_3 = 0x0007,
IMAGE_REL_AMD64_REL32_4 = 0x0008,
IMAGE_REL_AMD64_REL32_5 = 0x0009,
IMAGE_REL_AMD64_SECTION = 0x000A,
IMAGE_REL_AMD64_SECREL = 0x000B,
IMAGE_REL_AMD64_SECREL7 = 0x000C,
IMAGE_REL_AMD64_TOKEN = 0x000D,
IMAGE_REL_AMD64_SREL32 = 0x000E,
IMAGE_REL_AMD64_PAIR = 0x000F,
IMAGE_REL_AMD64_SSPAN32 = 0x0010
};
enum RelocationTypesARM : unsigned {
IMAGE_REL_ARM_ABSOLUTE = 0x0000,
IMAGE_REL_ARM_ADDR32 = 0x0001,
IMAGE_REL_ARM_ADDR32NB = 0x0002,
IMAGE_REL_ARM_BRANCH24 = 0x0003,
IMAGE_REL_ARM_BRANCH11 = 0x0004,
IMAGE_REL_ARM_TOKEN = 0x0005,
IMAGE_REL_ARM_BLX24 = 0x0008,
IMAGE_REL_ARM_BLX11 = 0x0009,
IMAGE_REL_ARM_REL32 = 0x000A,
IMAGE_REL_ARM_SECTION = 0x000E,
IMAGE_REL_ARM_SECREL = 0x000F,
IMAGE_REL_ARM_MOV32A = 0x0010,
IMAGE_REL_ARM_MOV32T = 0x0011,
IMAGE_REL_ARM_BRANCH20T = 0x0012,
IMAGE_REL_ARM_BRANCH24T = 0x0014,
IMAGE_REL_ARM_BLX23T = 0x0015,
IMAGE_REL_ARM_PAIR = 0x0016,
};
enum RelocationTypesARM64 : unsigned {
IMAGE_REL_ARM64_ABSOLUTE = 0x0000,
IMAGE_REL_ARM64_ADDR32 = 0x0001,
IMAGE_REL_ARM64_ADDR32NB = 0x0002,
IMAGE_REL_ARM64_BRANCH26 = 0x0003,
IMAGE_REL_ARM64_PAGEBASE_REL21 = 0x0004,
IMAGE_REL_ARM64_REL21 = 0x0005,
IMAGE_REL_ARM64_PAGEOFFSET_12A = 0x0006,
IMAGE_REL_ARM64_PAGEOFFSET_12L = 0x0007,
IMAGE_REL_ARM64_SECREL = 0x0008,
IMAGE_REL_ARM64_SECREL_LOW12A = 0x0009,
IMAGE_REL_ARM64_SECREL_HIGH12A = 0x000A,
IMAGE_REL_ARM64_SECREL_LOW12L = 0x000B,
IMAGE_REL_ARM64_TOKEN = 0x000C,
IMAGE_REL_ARM64_SECTION = 0x000D,
IMAGE_REL_ARM64_ADDR64 = 0x000E,
IMAGE_REL_ARM64_BRANCH19 = 0x000F,
IMAGE_REL_ARM64_BRANCH14 = 0x0010,
IMAGE_REL_ARM64_REL32 = 0x0011,
};
enum COMDATType : uint8_t {
IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
IMAGE_COMDAT_SELECT_ANY,
IMAGE_COMDAT_SELECT_SAME_SIZE,
IMAGE_COMDAT_SELECT_EXACT_MATCH,
IMAGE_COMDAT_SELECT_ASSOCIATIVE,
IMAGE_COMDAT_SELECT_LARGEST,
IMAGE_COMDAT_SELECT_NEWEST
};
// Auxiliary Symbol Formats
struct AuxiliaryFunctionDefinition {
uint32_t TagIndex;
uint32_t TotalSize;
uint32_t PointerToLinenumber;
uint32_t PointerToNextFunction;
char unused[2];
};
struct AuxiliarybfAndefSymbol {
uint8_t unused1[4];
uint16_t Linenumber;
uint8_t unused2[6];
uint32_t PointerToNextFunction;
uint8_t unused3[2];
};
struct AuxiliaryWeakExternal {
uint32_t TagIndex;
uint32_t Characteristics;
uint8_t unused[10];
};
enum WeakExternalCharacteristics : unsigned {
IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1,
IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2,
IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3,
IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY = 4
};
struct AuxiliarySectionDefinition {
uint32_t Length;
uint16_t NumberOfRelocations;
uint16_t NumberOfLinenumbers;
uint32_t CheckSum;
uint32_t Number;
uint8_t Selection;
char unused;
};
struct AuxiliaryCLRToken {
uint8_t AuxType;
uint8_t unused1;
uint32_t SymbolTableIndex;
char unused2[12];
};
union Auxiliary {
AuxiliaryFunctionDefinition FunctionDefinition;
AuxiliarybfAndefSymbol bfAndefSymbol;
AuxiliaryWeakExternal WeakExternal;
AuxiliarySectionDefinition SectionDefinition;
};
/// The Import Directory Table.
///
/// There is a single array of these and one entry per imported DLL.
struct ImportDirectoryTableEntry {
uint32_t ImportLookupTableRVA;
uint32_t TimeDateStamp;
uint32_t ForwarderChain;
uint32_t NameRVA;
uint32_t ImportAddressTableRVA;
};
/// The PE32 Import Lookup Table.
///
/// There is an array of these for each imported DLL. It represents either
/// the ordinal to import from the target DLL, or a name to lookup and import
/// from the target DLL.
///
/// This also happens to be the same format used by the Import Address Table
/// when it is initially written out to the image.
struct ImportLookupTableEntry32 {
uint32_t data;
/// Is this entry specified by ordinal, or name?
bool isOrdinal() const { return data & 0x80000000; }
/// Get the ordinal value of this entry. isOrdinal must be true.
uint16_t getOrdinal() const {
assert(isOrdinal() && "ILT entry is not an ordinal!");
return data & 0xFFFF;
}
/// Set the ordinal value and set isOrdinal to true.
void setOrdinal(uint16_t o) {
data = o;
data |= 0x80000000;
}
/// Get the Hint/Name entry RVA. isOrdinal must be false.
uint32_t getHintNameRVA() const {
assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!");
return data;
}
/// Set the Hint/Name entry RVA and set isOrdinal to false.
void setHintNameRVA(uint32_t rva) { data = rva; }
};
/// The DOS compatible header at the front of all PEs.
struct DOSHeader {
uint16_t Magic;
uint16_t UsedBytesInTheLastPage;
uint16_t FileSizeInPages;
uint16_t NumberOfRelocationItems;
uint16_t HeaderSizeInParagraphs;
uint16_t MinimumExtraParagraphs;
uint16_t MaximumExtraParagraphs;
uint16_t InitialRelativeSS;
uint16_t InitialSP;
uint16_t Checksum;
uint16_t InitialIP;
uint16_t InitialRelativeCS;
uint16_t AddressOfRelocationTable;
uint16_t OverlayNumber;
uint16_t Reserved[4];
uint16_t OEMid;
uint16_t OEMinfo;
uint16_t Reserved2[10];
uint32_t AddressOfNewExeHeader;
};
struct PE32Header {
enum { PE32 = 0x10b, PE32_PLUS = 0x20b };
uint16_t Magic;
uint8_t MajorLinkerVersion;
uint8_t MinorLinkerVersion;
uint32_t SizeOfCode;
uint32_t SizeOfInitializedData;
uint32_t SizeOfUninitializedData;
uint32_t AddressOfEntryPoint; // RVA
uint32_t BaseOfCode; // RVA
uint32_t BaseOfData; // RVA
uint64_t ImageBase;
uint32_t SectionAlignment;
uint32_t FileAlignment;
uint16_t MajorOperatingSystemVersion;
uint16_t MinorOperatingSystemVersion;
uint16_t MajorImageVersion;
uint16_t MinorImageVersion;
uint16_t MajorSubsystemVersion;
uint16_t MinorSubsystemVersion;
uint32_t Win32VersionValue;
uint32_t SizeOfImage;
uint32_t SizeOfHeaders;
uint32_t CheckSum;
uint16_t Subsystem;
// FIXME: This should be DllCharacteristics to match the COFF spec.
uint16_t DLLCharacteristics;
uint64_t SizeOfStackReserve;
uint64_t SizeOfStackCommit;
uint64_t SizeOfHeapReserve;
uint64_t SizeOfHeapCommit;
uint32_t LoaderFlags;
// FIXME: This should be NumberOfRvaAndSizes to match the COFF spec.
uint32_t NumberOfRvaAndSize;
};
struct DataDirectory {
uint32_t RelativeVirtualAddress;
uint32_t Size;
};
enum DataDirectoryIndex : unsigned {
EXPORT_TABLE = 0,
IMPORT_TABLE,
RESOURCE_TABLE,
EXCEPTION_TABLE,
CERTIFICATE_TABLE,
BASE_RELOCATION_TABLE,
DEBUG_DIRECTORY,
ARCHITECTURE,
GLOBAL_PTR,
TLS_TABLE,
LOAD_CONFIG_TABLE,
BOUND_IMPORT,
IAT,
DELAY_IMPORT_DESCRIPTOR,
CLR_RUNTIME_HEADER,
NUM_DATA_DIRECTORIES
};
enum WindowsSubsystem : unsigned {
IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem.
IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes
IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem.
IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem.
IMAGE_SUBSYSTEM_OS2_CUI = 5, ///< The OS/2 character subsystem.
IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem.
IMAGE_SUBSYSTEM_NATIVE_WINDOWS = 8, ///< Native Windows 9x driver.
IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE.
IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application.
IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot
/// services.
IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time
/// services.
IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image.
IMAGE_SUBSYSTEM_XBOX = 14, ///< XBOX.
IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION = 16 ///< A BCD application.
};
enum DLLCharacteristics : unsigned {
/// ASLR with 64 bit address space.
IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA = 0x0020,
/// DLL can be relocated at load time.
IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040,
/// Code integrity checks are enforced.
IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080,
///< Image is NX compatible.
IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100,
/// Isolation aware, but do not isolate the image.
IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200,
/// Does not use structured exception handling (SEH). No SEH handler may be
/// called in this image.
IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400,
/// Do not bind the image.
IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800,
///< Image should execute in an AppContainer.
IMAGE_DLL_CHARACTERISTICS_APPCONTAINER = 0x1000,
///< A WDM driver.
IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000,
///< Image supports Control Flow Guard.
IMAGE_DLL_CHARACTERISTICS_GUARD_CF = 0x4000,
/// Terminal Server aware.
IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000
};
enum ExtendedDLLCharacteristics : unsigned {
/// Image is CET compatible
IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT = 0x0001
};
enum DebugType : unsigned {
IMAGE_DEBUG_TYPE_UNKNOWN = 0,
IMAGE_DEBUG_TYPE_COFF = 1,
IMAGE_DEBUG_TYPE_CODEVIEW = 2,
IMAGE_DEBUG_TYPE_FPO = 3,
IMAGE_DEBUG_TYPE_MISC = 4,
IMAGE_DEBUG_TYPE_EXCEPTION = 5,
IMAGE_DEBUG_TYPE_FIXUP = 6,
IMAGE_DEBUG_TYPE_OMAP_TO_SRC = 7,
IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8,
IMAGE_DEBUG_TYPE_BORLAND = 9,
IMAGE_DEBUG_TYPE_RESERVED10 = 10,
IMAGE_DEBUG_TYPE_CLSID = 11,
IMAGE_DEBUG_TYPE_VC_FEATURE = 12,
IMAGE_DEBUG_TYPE_POGO = 13,
IMAGE_DEBUG_TYPE_ILTCG = 14,
IMAGE_DEBUG_TYPE_MPX = 15,
IMAGE_DEBUG_TYPE_REPRO = 16,
IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS = 20,
};
enum BaseRelocationType : unsigned {
IMAGE_REL_BASED_ABSOLUTE = 0,
IMAGE_REL_BASED_HIGH = 1,
IMAGE_REL_BASED_LOW = 2,
IMAGE_REL_BASED_HIGHLOW = 3,
IMAGE_REL_BASED_HIGHADJ = 4,
IMAGE_REL_BASED_MIPS_JMPADDR = 5,
IMAGE_REL_BASED_ARM_MOV32A = 5,
IMAGE_REL_BASED_ARM_MOV32T = 7,
IMAGE_REL_BASED_MIPS_JMPADDR16 = 9,
IMAGE_REL_BASED_DIR64 = 10
};
enum ImportType : unsigned {
IMPORT_CODE = 0,
IMPORT_DATA = 1,
IMPORT_CONST = 2
};
enum ImportNameType : unsigned {
/// Import is by ordinal. This indicates that the value in the Ordinal/Hint
/// field of the import header is the import's ordinal. If this constant is
/// not specified, then the Ordinal/Hint field should always be interpreted
/// as the import's hint.
IMPORT_ORDINAL = 0,
/// The import name is identical to the public symbol name
IMPORT_NAME = 1,
/// The import name is the public symbol name, but skipping the leading ?,
/// @, or optionally _.
IMPORT_NAME_NOPREFIX = 2,
/// The import name is the public symbol name, but skipping the leading ?,
/// @, or optionally _, and truncating at the first @.
IMPORT_NAME_UNDECORATE = 3
};
struct ImportHeader {
uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0).
uint16_t Sig2; ///< Must be 0xFFFF.
uint16_t Version;
uint16_t Machine;
uint32_t TimeDateStamp;
uint32_t SizeOfData;
uint16_t OrdinalHint;
uint16_t TypeInfo;
ImportType getType() const { return static_cast<ImportType>(TypeInfo & 0x3); }
ImportNameType getNameType() const {
return static_cast<ImportNameType>((TypeInfo & 0x1C) >> 2);
}
};
enum CodeViewIdentifiers {
DEBUG_SECTION_MAGIC = 0x4,
DEBUG_HASHES_SECTION_MAGIC = 0x133C9C5
};
inline bool isReservedSectionNumber(int32_t SectionNumber) {
return SectionNumber <= 0;
}
+/// Encode section name based on string table offset.
+/// The size of Out must be at least COFF::NameSize.
+bool encodeSectionName(char *Out, uint64_t Offset);
+
} // End namespace COFF.
} // End namespace llvm.
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def
index 814d8b113ec4..ae25ec53813c 100644
--- a/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/contrib/llvm-project/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -1,257 +1,258 @@
#ifndef DYNAMIC_TAG
#error "DYNAMIC_TAG must be defined"
#endif
// Add separate macros for the architecture specific tags and the markers
// such as DT_HIOS, etc. to allow using this file to in other contexts.
// For example we can use it to generate a stringification switch statement.
#ifndef AARCH64_DYNAMIC_TAG
#define AARCH64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define AARCH64_DYNAMIC_TAG_DEFINED
#endif
#ifndef HEXAGON_DYNAMIC_TAG
#define HEXAGON_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define HEXAGON_DYNAMIC_TAG_DEFINED
#endif
#ifndef MIPS_DYNAMIC_TAG
#define MIPS_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define MIPS_DYNAMIC_TAG_DEFINED
#endif
#ifndef PPC_DYNAMIC_TAG
#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define PPC_DYNAMIC_TAG_DEFINED
#endif
#ifndef PPC64_DYNAMIC_TAG
#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define PPC64_DYNAMIC_TAG_DEFINED
#endif
#ifndef RISCV_DYNAMIC_TAG
#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
#define RISCV_DYNAMIC_TAG_DEFINED
#endif
#ifndef DYNAMIC_TAG_MARKER
#define DYNAMIC_TAG_MARKER(name, value) DYNAMIC_TAG(name, value)
#define DYNAMIC_TAG_MARKER_DEFINED
#endif
DYNAMIC_TAG(NULL, 0) // Marks end of dynamic array.
DYNAMIC_TAG(NEEDED, 1) // String table offset of needed library.
DYNAMIC_TAG(PLTRELSZ, 2) // Size of relocation entries in PLT.
DYNAMIC_TAG(PLTGOT, 3) // Address associated with linkage table.
DYNAMIC_TAG(HASH, 4) // Address of symbolic hash table.
DYNAMIC_TAG(STRTAB, 5) // Address of dynamic string table.
DYNAMIC_TAG(SYMTAB, 6) // Address of dynamic symbol table.
DYNAMIC_TAG(RELA, 7) // Address of relocation table (Rela entries).
DYNAMIC_TAG(RELASZ, 8) // Size of Rela relocation table.
DYNAMIC_TAG(RELAENT, 9) // Size of a Rela relocation entry.
DYNAMIC_TAG(STRSZ, 10) // Total size of the string table.
DYNAMIC_TAG(SYMENT, 11) // Size of a symbol table entry.
DYNAMIC_TAG(INIT, 12) // Address of initialization function.
DYNAMIC_TAG(FINI, 13) // Address of termination function.
DYNAMIC_TAG(SONAME, 14) // String table offset of a shared objects name.
DYNAMIC_TAG(RPATH, 15) // String table offset of library search path.
DYNAMIC_TAG(SYMBOLIC, 16) // Changes symbol resolution algorithm.
DYNAMIC_TAG(REL, 17) // Address of relocation table (Rel entries).
DYNAMIC_TAG(RELSZ, 18) // Size of Rel relocation table.
DYNAMIC_TAG(RELENT, 19) // Size of a Rel relocation entry.
DYNAMIC_TAG(PLTREL, 20) // Type of relocation entry used for linking.
DYNAMIC_TAG(DEBUG, 21) // Reserved for debugger.
DYNAMIC_TAG(TEXTREL, 22) // Relocations exist for non-writable segments.
DYNAMIC_TAG(JMPREL, 23) // Address of relocations associated with PLT.
DYNAMIC_TAG(BIND_NOW, 24) // Process all relocations before execution.
DYNAMIC_TAG(INIT_ARRAY, 25) // Pointer to array of initialization functions.
DYNAMIC_TAG(FINI_ARRAY, 26) // Pointer to array of termination functions.
DYNAMIC_TAG(INIT_ARRAYSZ, 27) // Size of DT_INIT_ARRAY.
DYNAMIC_TAG(FINI_ARRAYSZ, 28) // Size of DT_FINI_ARRAY.
DYNAMIC_TAG(RUNPATH, 29) // String table offset of lib search path.
DYNAMIC_TAG(FLAGS, 30) // Flags.
DYNAMIC_TAG_MARKER(ENCODING, 32) // Values from here to DT_LOOS follow the rules
// for the interpretation of the d_un union.
DYNAMIC_TAG(PREINIT_ARRAY, 32) // Pointer to array of preinit functions.
DYNAMIC_TAG(PREINIT_ARRAYSZ, 33) // Size of the DT_PREINIT_ARRAY array.
DYNAMIC_TAG(SYMTAB_SHNDX, 34) // Address of the SHT_SYMTAB_SHNDX section.
// Experimental support for SHT_RELR sections. For details, see proposal
// at https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg
DYNAMIC_TAG(RELRSZ, 35) // Size of Relr relocation table.
DYNAMIC_TAG(RELR, 36) // Address of relocation table (Relr entries).
DYNAMIC_TAG(RELRENT, 37) // Size of a Relr relocation entry.
DYNAMIC_TAG_MARKER(LOOS, 0x60000000) // Start of environment specific tags.
DYNAMIC_TAG_MARKER(HIOS, 0x6FFFFFFF) // End of environment specific tags.
DYNAMIC_TAG_MARKER(LOPROC, 0x70000000) // Start of processor specific tags.
DYNAMIC_TAG_MARKER(HIPROC, 0x7FFFFFFF) // End of processor specific tags.
// Android packed relocation section tags.
// https://android.googlesource.com/platform/bionic/+/6f12bfece5dcc01325e0abba56a46b1bcf991c69/tools/relocation_packer/src/elf_file.cc#31
DYNAMIC_TAG(ANDROID_REL, 0x6000000F)
DYNAMIC_TAG(ANDROID_RELSZ, 0x60000010)
DYNAMIC_TAG(ANDROID_RELA, 0x60000011)
DYNAMIC_TAG(ANDROID_RELASZ, 0x60000012)
// Android's experimental support for SHT_RELR sections.
// https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#253
DYNAMIC_TAG(ANDROID_RELR, 0x6FFFE000) // Address of relocation table (Relr entries).
DYNAMIC_TAG(ANDROID_RELRSZ, 0x6FFFE001) // Size of Relr relocation table.
DYNAMIC_TAG(ANDROID_RELRENT, 0x6FFFE003) // Size of a Relr relocation entry.
DYNAMIC_TAG(GNU_HASH, 0x6FFFFEF5) // Reference to the GNU hash table.
DYNAMIC_TAG(TLSDESC_PLT, 0x6FFFFEF6) // Location of PLT entry for TLS
// descriptor resolver calls.
DYNAMIC_TAG(TLSDESC_GOT, 0x6FFFFEF7) // Location of GOT entry used by TLS
// descriptor resolver PLT entry.
DYNAMIC_TAG(RELACOUNT, 0x6FFFFFF9) // ELF32_Rela count.
DYNAMIC_TAG(RELCOUNT, 0x6FFFFFFA) // ELF32_Rel count.
DYNAMIC_TAG(FLAGS_1, 0X6FFFFFFB) // Flags_1.
DYNAMIC_TAG(VERSYM, 0x6FFFFFF0) // The address of .gnu.version section.
DYNAMIC_TAG(VERDEF, 0X6FFFFFFC) // The address of the version definition
// table.
DYNAMIC_TAG(VERDEFNUM, 0X6FFFFFFD) // The number of entries in DT_VERDEF.
DYNAMIC_TAG(VERNEED, 0X6FFFFFFE) // The address of the version dependency
// table.
DYNAMIC_TAG(VERNEEDNUM, 0X6FFFFFFF) // The number of entries in DT_VERNEED.
// AArch64 specific dynamic table entries
AARCH64_DYNAMIC_TAG(AARCH64_BTI_PLT, 0x70000001)
AARCH64_DYNAMIC_TAG(AARCH64_PAC_PLT, 0x70000003)
AARCH64_DYNAMIC_TAG(AARCH64_VARIANT_PCS, 0x70000005)
// Hexagon specific dynamic table entries
HEXAGON_DYNAMIC_TAG(HEXAGON_SYMSZ, 0x70000000)
HEXAGON_DYNAMIC_TAG(HEXAGON_VER, 0x70000001)
HEXAGON_DYNAMIC_TAG(HEXAGON_PLT, 0x70000002)
// Mips specific dynamic table entry tags.
MIPS_DYNAMIC_TAG(MIPS_RLD_VERSION, 0x70000001) // 32 bit version number for
// runtime linker interface.
MIPS_DYNAMIC_TAG(MIPS_TIME_STAMP, 0x70000002) // Time stamp.
MIPS_DYNAMIC_TAG(MIPS_ICHECKSUM, 0x70000003) // Checksum of external strings
// and common sizes.
MIPS_DYNAMIC_TAG(MIPS_IVERSION, 0x70000004) // Index of version string
// in string table.
MIPS_DYNAMIC_TAG(MIPS_FLAGS, 0x70000005) // 32 bits of flags.
MIPS_DYNAMIC_TAG(MIPS_BASE_ADDRESS, 0x70000006) // Base address of the segment.
MIPS_DYNAMIC_TAG(MIPS_MSYM, 0x70000007) // Address of .msym section.
MIPS_DYNAMIC_TAG(MIPS_CONFLICT, 0x70000008) // Address of .conflict section.
MIPS_DYNAMIC_TAG(MIPS_LIBLIST, 0x70000009) // Address of .liblist section.
MIPS_DYNAMIC_TAG(MIPS_LOCAL_GOTNO, 0x7000000a) // Number of local global offset
// table entries.
MIPS_DYNAMIC_TAG(MIPS_CONFLICTNO, 0x7000000b) // Number of entries
// in the .conflict section.
MIPS_DYNAMIC_TAG(MIPS_LIBLISTNO, 0x70000010) // Number of entries
// in the .liblist section.
MIPS_DYNAMIC_TAG(MIPS_SYMTABNO, 0x70000011) // Number of entries
// in the .dynsym section.
MIPS_DYNAMIC_TAG(MIPS_UNREFEXTNO, 0x70000012) // Index of first external dynamic
// symbol not referenced locally.
MIPS_DYNAMIC_TAG(MIPS_GOTSYM, 0x70000013) // Index of first dynamic symbol
// in global offset table.
MIPS_DYNAMIC_TAG(MIPS_HIPAGENO, 0x70000014) // Number of page table entries
// in global offset table.
MIPS_DYNAMIC_TAG(MIPS_RLD_MAP, 0x70000016) // Address of run time loader map
// used for debugging.
MIPS_DYNAMIC_TAG(MIPS_DELTA_CLASS, 0x70000017) // Delta C++ class definition.
MIPS_DYNAMIC_TAG(MIPS_DELTA_CLASS_NO, 0x70000018) // Number of entries
// in DT_MIPS_DELTA_CLASS.
MIPS_DYNAMIC_TAG(MIPS_DELTA_INSTANCE, 0x70000019) // Delta C++ class instances.
MIPS_DYNAMIC_TAG(MIPS_DELTA_INSTANCE_NO, 0x7000001A) // Number of entries
// in DT_MIPS_DELTA_INSTANCE.
MIPS_DYNAMIC_TAG(MIPS_DELTA_RELOC, 0x7000001B) // Delta relocations.
MIPS_DYNAMIC_TAG(MIPS_DELTA_RELOC_NO, 0x7000001C) // Number of entries
// in DT_MIPS_DELTA_RELOC.
MIPS_DYNAMIC_TAG(MIPS_DELTA_SYM, 0x7000001D) // Delta symbols that Delta
// relocations refer to.
MIPS_DYNAMIC_TAG(MIPS_DELTA_SYM_NO, 0x7000001E) // Number of entries
// in DT_MIPS_DELTA_SYM.
MIPS_DYNAMIC_TAG(MIPS_DELTA_CLASSSYM, 0x70000020) // Delta symbols that hold
// class declarations.
MIPS_DYNAMIC_TAG(MIPS_DELTA_CLASSSYM_NO, 0x70000021) // Number of entries
// in DT_MIPS_DELTA_CLASSSYM.
MIPS_DYNAMIC_TAG(MIPS_CXX_FLAGS, 0x70000022) // Flags indicating information
// about C++ flavor.
MIPS_DYNAMIC_TAG(MIPS_PIXIE_INIT, 0x70000023) // Pixie information.
MIPS_DYNAMIC_TAG(MIPS_SYMBOL_LIB, 0x70000024) // Address of .MIPS.symlib
MIPS_DYNAMIC_TAG(MIPS_LOCALPAGE_GOTIDX, 0x70000025) // The GOT index of the first PTE
// for a segment
MIPS_DYNAMIC_TAG(MIPS_LOCAL_GOTIDX, 0x70000026) // The GOT index of the first PTE
// for a local symbol
MIPS_DYNAMIC_TAG(MIPS_HIDDEN_GOTIDX, 0x70000027) // The GOT index of the first PTE
// for a hidden symbol
MIPS_DYNAMIC_TAG(MIPS_PROTECTED_GOTIDX, 0x70000028) // The GOT index of the first PTE
// for a protected symbol
MIPS_DYNAMIC_TAG(MIPS_OPTIONS, 0x70000029) // Address of `.MIPS.options'.
MIPS_DYNAMIC_TAG(MIPS_INTERFACE, 0x7000002A) // Address of `.interface'.
MIPS_DYNAMIC_TAG(MIPS_DYNSTR_ALIGN, 0x7000002B) // Unknown.
MIPS_DYNAMIC_TAG(MIPS_INTERFACE_SIZE, 0x7000002C) // Size of the .interface section.
MIPS_DYNAMIC_TAG(MIPS_RLD_TEXT_RESOLVE_ADDR, 0x7000002D) // Size of rld_text_resolve
// function stored in the GOT.
MIPS_DYNAMIC_TAG(MIPS_PERF_SUFFIX, 0x7000002E) // Default suffix of DSO to be added
// by rld on dlopen() calls.
MIPS_DYNAMIC_TAG(MIPS_COMPACT_SIZE, 0x7000002F) // Size of compact relocation
// section (O32).
MIPS_DYNAMIC_TAG(MIPS_GP_VALUE, 0x70000030) // GP value for auxiliary GOTs.
MIPS_DYNAMIC_TAG(MIPS_AUX_DYNAMIC, 0x70000031) // Address of auxiliary .dynamic.
MIPS_DYNAMIC_TAG(MIPS_PLTGOT, 0x70000032) // Address of the base of the PLTGOT.
MIPS_DYNAMIC_TAG(MIPS_RWPLT, 0x70000034) // Points to the base
// of a writable PLT.
MIPS_DYNAMIC_TAG(MIPS_RLD_MAP_REL, 0x70000035) // Relative offset of run time loader
// map, used for debugging.
+MIPS_DYNAMIC_TAG(MIPS_XHASH, 0x70000036) // GNU-style hash table with xlat.
// PPC specific dynamic table entries.
PPC_DYNAMIC_TAG(PPC_GOT, 0x70000000) // Uses Secure PLT ABI.
PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
// PPC64 specific dynamic table entries.
PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
// first glink lazy resolver stub.
// RISC-V specific dynamic array tags.
RISCV_DYNAMIC_TAG(RISCV_VARIANT_CC, 0x70000001)
// Sun machine-independent extensions.
DYNAMIC_TAG(AUXILIARY, 0x7FFFFFFD) // Shared object to load before self
DYNAMIC_TAG(USED, 0x7FFFFFFE) // Same as DT_NEEDED
DYNAMIC_TAG(FILTER, 0x7FFFFFFF) // Shared object to get values from
#ifdef DYNAMIC_TAG_MARKER_DEFINED
#undef DYNAMIC_TAG_MARKER
#undef DYNAMIC_TAG_MARKER_DEFINED
#endif
#ifdef AARCH64_DYNAMIC_TAG_DEFINED
#undef AARCH64_DYNAMIC_TAG
#undef AARCH64_DYNAMIC_TAG_DEFINED
#endif
#ifdef MIPS_DYNAMIC_TAG_DEFINED
#undef MIPS_DYNAMIC_TAG
#undef MIPS_DYNAMIC_TAG_DEFINED
#endif
#ifdef HEXAGON_DYNAMIC_TAG_DEFINED
#undef HEXAGON_DYNAMIC_TAG
#undef HEXAGON_DYNAMIC_TAG_DEFINED
#endif
#ifdef PPC_DYNAMIC_TAG_DEFINED
#undef PPC_DYNAMIC_TAG
#undef PPC_DYNAMIC_TAG_DEFINED
#endif
#ifdef PPC64_DYNAMIC_TAG_DEFINED
#undef PPC64_DYNAMIC_TAG
#undef PPC64_DYNAMIC_TAG_DEFINED
#endif
#ifdef RISCV_DYNAMIC_TAG_DEFINED
#undef RISCV_DYNAMIC_TAG
#undef RISCV_DYNAMIC_TAG_DEFINED
#endif
diff --git a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
index 92de5882bafe..354984b540a9 100644
--- a/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
@@ -1,222 +1,222 @@
//===--- EPCIndirectionUtils.h - EPC based indirection utils ----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Indirection utilities (stubs, trampolines, lazy call-throughs) that use the
// ExecutorProcessControl API to interact with the executor process.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_EXECUTIONENGINE_ORC_EPCINDIRECTIONUTILS_H
#define LLVM_EXECUTIONENGINE_ORC_EPCINDIRECTIONUTILS_H
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
#include <mutex>
namespace llvm {
namespace orc {
class ExecutorProcessControl;
/// Provides ExecutorProcessControl based indirect stubs, trampoline pool and
/// lazy call through manager.
class EPCIndirectionUtils {
friend class EPCIndirectionUtilsAccess;
public:
/// ABI support base class. Used to write resolver, stub, and trampoline
/// blocks.
class ABISupport {
protected:
ABISupport(unsigned PointerSize, unsigned TrampolineSize, unsigned StubSize,
unsigned StubToPointerMaxDisplacement, unsigned ResolverCodeSize)
: PointerSize(PointerSize), TrampolineSize(TrampolineSize),
StubSize(StubSize),
StubToPointerMaxDisplacement(StubToPointerMaxDisplacement),
ResolverCodeSize(ResolverCodeSize) {}
public:
virtual ~ABISupport();
unsigned getPointerSize() const { return PointerSize; }
unsigned getTrampolineSize() const { return TrampolineSize; }
unsigned getStubSize() const { return StubSize; }
unsigned getStubToPointerMaxDisplacement() const {
return StubToPointerMaxDisplacement;
}
unsigned getResolverCodeSize() const { return ResolverCodeSize; }
virtual void writeResolverCode(char *ResolverWorkingMem,
JITTargetAddress ResolverTargetAddr,
JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr) const = 0;
virtual void writeTrampolines(char *TrampolineBlockWorkingMem,
JITTargetAddress TrampolineBlockTragetAddr,
JITTargetAddress ResolverAddr,
unsigned NumTrampolines) const = 0;
virtual void
writeIndirectStubsBlock(char *StubsBlockWorkingMem,
JITTargetAddress StubsBlockTargetAddress,
JITTargetAddress PointersBlockTargetAddress,
unsigned NumStubs) const = 0;
private:
unsigned PointerSize = 0;
unsigned TrampolineSize = 0;
unsigned StubSize = 0;
unsigned StubToPointerMaxDisplacement = 0;
unsigned ResolverCodeSize = 0;
};
/// Create using the given ABI class.
template <typename ORCABI>
static std::unique_ptr<EPCIndirectionUtils>
CreateWithABI(ExecutorProcessControl &EPC);
/// Create based on the ExecutorProcessControl triple.
static Expected<std::unique_ptr<EPCIndirectionUtils>>
Create(ExecutorProcessControl &EPC);
/// Return a reference to the ExecutorProcessControl object.
ExecutorProcessControl &getExecutorProcessControl() const { return EPC; }
/// Return a reference to the ABISupport object for this instance.
ABISupport &getABISupport() const { return *ABI; }
/// Release memory for resources held by this instance. This *must* be called
/// prior to destruction of the class.
Error cleanup();
/// Write resolver code to the executor process and return its address.
/// This must be called before any call to createTrampolinePool or
/// createLazyCallThroughManager.
Expected<JITTargetAddress>
writeResolverBlock(JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr);
/// Returns the address of the Resolver block. Returns zero if the
/// writeResolverBlock method has not previously been called.
JITTargetAddress getResolverBlockAddress() const { return ResolverBlockAddr; }
/// Create an IndirectStubsManager for the executor process.
std::unique_ptr<IndirectStubsManager> createIndirectStubsManager();
/// Create a TrampolinePool for the executor process.
TrampolinePool &getTrampolinePool();
/// Create a LazyCallThroughManager.
/// This function should only be called once.
LazyCallThroughManager &
createLazyCallThroughManager(ExecutionSession &ES,
JITTargetAddress ErrorHandlerAddr);
/// Create a LazyCallThroughManager for the executor process.
LazyCallThroughManager &getLazyCallThroughManager() {
assert(LCTM && "createLazyCallThroughManager must be called first");
return *LCTM;
}
private:
using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
struct IndirectStubInfo {
IndirectStubInfo() = default;
IndirectStubInfo(JITTargetAddress StubAddress,
JITTargetAddress PointerAddress)
: StubAddress(StubAddress), PointerAddress(PointerAddress) {}
JITTargetAddress StubAddress = 0;
JITTargetAddress PointerAddress = 0;
};
using IndirectStubInfoVector = std::vector<IndirectStubInfo>;
/// Create an EPCIndirectionUtils instance.
EPCIndirectionUtils(ExecutorProcessControl &EPC,
std::unique_ptr<ABISupport> ABI);
Expected<IndirectStubInfoVector> getIndirectStubs(unsigned NumStubs);
std::mutex EPCUIMutex;
ExecutorProcessControl &EPC;
std::unique_ptr<ABISupport> ABI;
- JITTargetAddress ResolverBlockAddr;
+ JITTargetAddress ResolverBlockAddr = 0;
FinalizedAlloc ResolverBlock;
std::unique_ptr<TrampolinePool> TP;
std::unique_ptr<LazyCallThroughManager> LCTM;
std::vector<IndirectStubInfo> AvailableIndirectStubs;
std::vector<FinalizedAlloc> IndirectStubAllocs;
};
/// This will call writeResolver on the given EPCIndirectionUtils instance
/// to set up re-entry via a function that will directly return the trampoline
/// landing address.
///
/// The EPCIndirectionUtils' LazyCallThroughManager must have been previously
/// created via EPCIndirectionUtils::createLazyCallThroughManager.
///
/// The EPCIndirectionUtils' writeResolver method must not have been previously
/// called.
///
/// This function is experimental and likely subject to revision.
Error setUpInProcessLCTMReentryViaEPCIU(EPCIndirectionUtils &EPCIU);
namespace detail {
template <typename ORCABI>
class ABISupportImpl : public EPCIndirectionUtils::ABISupport {
public:
ABISupportImpl()
: ABISupport(ORCABI::PointerSize, ORCABI::TrampolineSize,
ORCABI::StubSize, ORCABI::StubToPointerMaxDisplacement,
ORCABI::ResolverCodeSize) {}
void writeResolverCode(char *ResolverWorkingMem,
JITTargetAddress ResolverTargetAddr,
JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr) const override {
ORCABI::writeResolverCode(ResolverWorkingMem, ResolverTargetAddr,
ReentryFnAddr, ReentryCtxAddr);
}
void writeTrampolines(char *TrampolineBlockWorkingMem,
JITTargetAddress TrampolineBlockTargetAddr,
JITTargetAddress ResolverAddr,
unsigned NumTrampolines) const override {
ORCABI::writeTrampolines(TrampolineBlockWorkingMem,
TrampolineBlockTargetAddr, ResolverAddr,
NumTrampolines);
}
void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
JITTargetAddress StubsBlockTargetAddress,
JITTargetAddress PointersBlockTargetAddress,
unsigned NumStubs) const override {
ORCABI::writeIndirectStubsBlock(StubsBlockWorkingMem,
StubsBlockTargetAddress,
PointersBlockTargetAddress, NumStubs);
}
};
} // end namespace detail
template <typename ORCABI>
std::unique_ptr<EPCIndirectionUtils>
EPCIndirectionUtils::CreateWithABI(ExecutorProcessControl &EPC) {
return std::unique_ptr<EPCIndirectionUtils>(new EPCIndirectionUtils(
EPC, std::make_unique<detail::ABISupportImpl<ORCABI>>()));
}
} // end namespace orc
} // end namespace llvm
#endif // LLVM_EXECUTIONENGINE_ORC_EPCINDIRECTIONUTILS_H
diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
index 7eee16f71d64..8677a0ba62f2 100644
--- a/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/contrib/llvm-project/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1,4867 +1,4869 @@
//===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Attributor: An inter procedural (abstract) "attribute" deduction framework.
//
// The Attributor framework is an inter procedural abstract analysis (fixpoint
// iteration analysis). The goal is to allow easy deduction of new attributes as
// well as information exchange between abstract attributes in-flight.
//
// The Attributor class is the driver and the link between the various abstract
// attributes. The Attributor will iterate until a fixpoint state is reached by
// all abstract attributes in-flight, or until it will enforce a pessimistic fix
// point because an iteration limit is reached.
//
// Abstract attributes, derived from the AbstractAttribute class, actually
// describe properties of the code. They can correspond to actual LLVM-IR
// attributes, or they can be more general, ultimately unrelated to LLVM-IR
// attributes. The latter is useful when an abstract attributes provides
// information to other abstract attributes in-flight but we might not want to
// manifest the information. The Attributor allows to query in-flight abstract
// attributes through the `Attributor::getAAFor` method (see the method
// description for an example). If the method is used by an abstract attribute
// P, and it results in an abstract attribute Q, the Attributor will
// automatically capture a potential dependence from Q to P. This dependence
// will cause P to be reevaluated whenever Q changes in the future.
//
// The Attributor will only reevaluate abstract attributes that might have
// changed since the last iteration. That means that the Attribute will not
// revisit all instructions/blocks/functions in the module but only query
// an update from a subset of the abstract attributes.
//
// The update method `AbstractAttribute::updateImpl` is implemented by the
// specific "abstract attribute" subclasses. The method is invoked whenever the
// currently assumed state (see the AbstractState class) might not be valid
// anymore. This can, for example, happen if the state was dependent on another
// abstract attribute that changed. In every invocation, the update method has
// to adjust the internal state of an abstract attribute to a point that is
// justifiable by the underlying IR and the current state of abstract attributes
// in-flight. Since the IR is given and assumed to be valid, the information
// derived from it can be assumed to hold. However, information derived from
// other abstract attributes is conditional on various things. If the justifying
// state changed, the `updateImpl` has to revisit the situation and potentially
// find another justification or limit the optimistic assumes made.
//
// Change is the key in this framework. Until a state of no-change, thus a
// fixpoint, is reached, the Attributor will query the abstract attributes
// in-flight to re-evaluate their state. If the (current) state is too
// optimistic, hence it cannot be justified anymore through other abstract
// attributes or the state of the IR, the state of the abstract attribute will
// have to change. Generally, we assume abstract attribute state to be a finite
// height lattice and the update function to be monotone. However, these
// conditions are not enforced because the iteration limit will guarantee
// termination. If an optimistic fixpoint is reached, or a pessimistic fix
// point is enforced after a timeout, the abstract attributes are tasked to
// manifest their result in the IR for passes to come.
//
// Attribute manifestation is not mandatory. If desired, there is support to
// generate a single or multiple LLVM-IR attributes already in the helper struct
// IRAttribute. In the simplest case, a subclass inherits from IRAttribute with
// a proper Attribute::AttrKind as template parameter. The Attributor
// manifestation framework will then create and place a new attribute if it is
// allowed to do so (based on the abstract state). Other use cases can be
// achieved by overloading AbstractAttribute or IRAttribute methods.
//
//
// The "mechanics" of adding a new "abstract attribute":
// - Define a class (transitively) inheriting from AbstractAttribute and one
// (which could be the same) that (transitively) inherits from AbstractState.
// For the latter, consider the already available BooleanState and
// {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a
// number tracking or bit-encoding.
// - Implement all pure methods. Also use overloading if the attribute is not
// conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
// an argument, call site argument, function return value, or function. See
// the class and method descriptions for more information on the two
// "Abstract" classes and their respective methods.
// - Register opportunities for the new abstract attribute in the
// `Attributor::identifyDefaultAbstractAttributes` method if it should be
// counted as a 'default' attribute.
// - Add sufficient tests.
// - Add a Statistics object for bookkeeping. If it is a simple (set of)
// attribute(s) manifested through the Attributor manifestation framework, see
// the bookkeeping function in Attributor.cpp.
// - If instructions with a certain opcode are interesting to the attribute, add
// that opcode to the switch in `Attributor::identifyAbstractAttributes`. This
// will make it possible to query all those instructions through the
// `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the
// need to traverse the IR repeatedly.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/AbstractCallSite.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
namespace llvm {
struct AADepGraphNode;
struct AADepGraph;
struct Attributor;
struct AbstractAttribute;
struct InformationCache;
struct AAIsDead;
struct AttributorCallGraph;
struct IRPosition;
class AAResults;
class Function;
/// Abstract Attribute helper functions.
namespace AA {
/// Return true if \p I is a `nosync` instruction. Use generic reasoning and
/// potentially the corresponding AANoSync.
bool isNoSyncInst(Attributor &A, const Instruction &I,
const AbstractAttribute &QueryingAA);
/// Return true if \p V is dynamically unique, that is, there are no two
/// "instances" of \p V at runtime with different values.
bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
const Value &V);
/// Return true if \p V is a valid value in \p Scope, that is a constant or an
/// instruction/argument of \p Scope.
bool isValidInScope(const Value &V, const Function *Scope);
/// Return true if \p V is a valid value at position \p CtxI, that is a
/// constant, an argument of the same function as \p CtxI, or an instruction in
/// that function that dominates \p CtxI.
bool isValidAtPosition(const Value &V, const Instruction &CtxI,
InformationCache &InfoCache);
/// Try to convert \p V to type \p Ty without introducing new instructions. If
/// this is not possible return `nullptr`. Note: this function basically knows
/// how to cast various constants.
Value *getWithType(Value &V, Type &Ty);
/// Return the combination of \p A and \p B such that the result is a possible
/// value of both. \p B is potentially casted to match the type \p Ty or the
/// type of \p A if \p Ty is null.
///
/// Examples:
/// X + none => X
/// not_none + undef => not_none
/// V1 + V2 => nullptr
Optional<Value *>
combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
const Optional<Value *> &B, Type *Ty);
/// Return the initial value of \p Obj with type \p Ty if that is a constant.
Constant *getInitialValueForObj(Value &Obj, Type &Ty,
const TargetLibraryInfo *TLI);
/// Collect all potential underlying objects of \p Ptr at position \p CtxI in
/// \p Objects. Assumed information is used and dependences onto \p QueryingAA
/// are added appropriately.
///
/// \returns True if \p Objects contains all assumed underlying objects, and
/// false if something went wrong and the objects could not be
/// determined.
bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
const Instruction *CtxI,
+ bool &UsedAssumedInformation,
bool Intraprocedural = false);
/// Collect all potential values of the one stored by \p SI into
/// \p PotentialCopies. That is, the only copies that were made via the
/// store are assumed to be known and all in \p PotentialCopies. Dependences
/// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will
/// inform the caller if assumed information was used.
///
/// \returns True if the assumed potential copies are all in \p PotentialCopies,
/// false if something went wrong and the copies could not be
/// determined.
bool getPotentialCopiesOfStoredValue(
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
/// Return true if \p IRP is readonly. This will query respective AAs that
/// deduce the information and introduce dependences for \p QueryingAA.
bool isAssumedReadOnly(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA, bool &IsKnown);
/// Return true if \p IRP is readnone. This will query respective AAs that
/// deduce the information and introduce dependences for \p QueryingAA.
bool isAssumedReadNone(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA, bool &IsKnown);
/// Return true if \p ToI is potentially reachable from \p FromI. The two
/// instructions do not need to be in the same function. \p GoBackwardsCB
/// can be provided to convey domain knowledge about the "lifespan" the user is
/// interested in. By default, the callers of \p FromI are checked as well to
/// determine if \p ToI can be reached. If the query is not interested in
/// callers beyond a certain point, e.g., a GPU kernel entry or the function
/// containing an alloca, the \p GoBackwardsCB should return false.
bool isPotentiallyReachable(
Attributor &A, const Instruction &FromI, const Instruction &ToI,
const AbstractAttribute &QueryingAA,
std::function<bool(const Function &F)> GoBackwardsCB = nullptr);
/// Same as above but it is sufficient to reach any instruction in \p ToFn.
bool isPotentiallyReachable(
Attributor &A, const Instruction &FromI, const Function &ToFn,
const AbstractAttribute &QueryingAA,
std::function<bool(const Function &F)> GoBackwardsCB);
} // namespace AA
/// The value passed to the line option that defines the maximal initialization
/// chain length.
extern unsigned MaxInitializationChainLength;
///{
enum class ChangeStatus {
CHANGED,
UNCHANGED,
};
ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
ChangeStatus &operator|=(ChangeStatus &l, ChangeStatus r);
ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
ChangeStatus &operator&=(ChangeStatus &l, ChangeStatus r);
enum class DepClassTy {
REQUIRED, ///< The target cannot be valid if the source is not.
OPTIONAL, ///< The target may be valid if the source is not.
NONE, ///< Do not track a dependence between source and target.
};
///}
/// The data structure for the nodes of a dependency graph
struct AADepGraphNode {
public:
virtual ~AADepGraphNode() = default;
using DepTy = PointerIntPair<AADepGraphNode *, 1>;
protected:
/// Set of dependency graph nodes which should be updated if this one
/// is updated. The bit encodes if it is optional.
TinyPtrVector<DepTy> Deps;
static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
static AbstractAttribute *DepGetValAA(DepTy &DT) {
return cast<AbstractAttribute>(DT.getPointer());
}
operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
public:
using iterator =
mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
using aaiterator =
mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
TinyPtrVector<DepTy> &getDeps() { return Deps; }
friend struct Attributor;
friend struct AADepGraph;
};
/// The data structure for the dependency graph
///
/// Note that in this graph if there is an edge from A to B (A -> B),
/// then it means that B depends on A, and when the state of A is
/// updated, node B should also be updated
struct AADepGraph {
AADepGraph() = default;
~AADepGraph() = default;
using DepTy = AADepGraphNode::DepTy;
static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
using iterator =
mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
/// There is no root node for the dependency graph. But the SCCIterator
/// requires a single entry point, so we maintain a fake("synthetic") root
/// node that depends on every node.
AADepGraphNode SyntheticRoot;
AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
iterator begin() { return SyntheticRoot.child_begin(); }
iterator end() { return SyntheticRoot.child_end(); }
void viewGraph();
/// Dump graph to file
void dumpGraph();
/// Print dependency graph
void print();
};
/// Helper to describe and deal with positions in the LLVM-IR.
///
/// A position in the IR is described by an anchor value and an "offset" that
/// could be the argument number, for call sites and arguments, or an indicator
/// of the "position kind". The kinds, specified in the Kind enum below, include
/// the locations in the attribute list, i.a., function scope and return value,
/// as well as a distinction between call sites and functions. Finally, there
/// are floating values that do not have a corresponding attribute list
/// position.
struct IRPosition {
// NOTE: In the future this definition can be changed to support recursive
// functions.
using CallBaseContext = CallBase;
/// The positions we distinguish in the IR.
enum Kind : char {
IRP_INVALID, ///< An invalid position.
IRP_FLOAT, ///< A position that is not associated with a spot suitable
///< for attributes. This could be any value or instruction.
IRP_RETURNED, ///< An attribute for the function return value.
IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value.
IRP_FUNCTION, ///< An attribute for a function (scope).
IRP_CALL_SITE, ///< An attribute for a call site (function scope).
IRP_ARGUMENT, ///< An attribute for a function argument.
IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
};
/// Default constructor available to create invalid positions implicitly. All
/// other positions need to be created explicitly through the appropriate
/// static member function.
IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); }
/// Create a position describing the value of \p V.
static const IRPosition value(const Value &V,
const CallBaseContext *CBContext = nullptr) {
if (auto *Arg = dyn_cast<Argument>(&V))
return IRPosition::argument(*Arg, CBContext);
if (auto *CB = dyn_cast<CallBase>(&V))
return IRPosition::callsite_returned(*CB);
return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext);
}
/// Create a position describing the instruction \p I. This is different from
/// the value version because call sites are treated as intrusctions rather
/// than their return value in this function.
static const IRPosition inst(const Instruction &I,
const CallBaseContext *CBContext = nullptr) {
return IRPosition(const_cast<Instruction &>(I), IRP_FLOAT, CBContext);
}
/// Create a position describing the function scope of \p F.
/// \p CBContext is used for call base specific analysis.
static const IRPosition function(const Function &F,
const CallBaseContext *CBContext = nullptr) {
return IRPosition(const_cast<Function &>(F), IRP_FUNCTION, CBContext);
}
/// Create a position describing the returned value of \p F.
/// \p CBContext is used for call base specific analysis.
static const IRPosition returned(const Function &F,
const CallBaseContext *CBContext = nullptr) {
return IRPosition(const_cast<Function &>(F), IRP_RETURNED, CBContext);
}
/// Create a position describing the argument \p Arg.
/// \p CBContext is used for call base specific analysis.
static const IRPosition argument(const Argument &Arg,
const CallBaseContext *CBContext = nullptr) {
return IRPosition(const_cast<Argument &>(Arg), IRP_ARGUMENT, CBContext);
}
/// Create a position describing the function scope of \p CB.
static const IRPosition callsite_function(const CallBase &CB) {
return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE);
}
/// Create a position describing the returned value of \p CB.
static const IRPosition callsite_returned(const CallBase &CB) {
return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED);
}
/// Create a position describing the argument of \p CB at position \p ArgNo.
static const IRPosition callsite_argument(const CallBase &CB,
unsigned ArgNo) {
return IRPosition(const_cast<Use &>(CB.getArgOperandUse(ArgNo)),
IRP_CALL_SITE_ARGUMENT);
}
/// Create a position describing the argument of \p ACS at position \p ArgNo.
static const IRPosition callsite_argument(AbstractCallSite ACS,
unsigned ArgNo) {
if (ACS.getNumArgOperands() <= ArgNo)
return IRPosition();
int CSArgNo = ACS.getCallArgOperandNo(ArgNo);
if (CSArgNo >= 0)
return IRPosition::callsite_argument(
cast<CallBase>(*ACS.getInstruction()), CSArgNo);
return IRPosition();
}
/// Create a position with function scope matching the "context" of \p IRP.
/// If \p IRP is a call site (see isAnyCallSitePosition()) then the result
/// will be a call site position, otherwise the function position of the
/// associated function.
static const IRPosition
function_scope(const IRPosition &IRP,
const CallBaseContext *CBContext = nullptr) {
if (IRP.isAnyCallSitePosition()) {
return IRPosition::callsite_function(
cast<CallBase>(IRP.getAnchorValue()));
}
assert(IRP.getAssociatedFunction());
return IRPosition::function(*IRP.getAssociatedFunction(), CBContext);
}
bool operator==(const IRPosition &RHS) const {
return Enc == RHS.Enc && RHS.CBContext == CBContext;
}
bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); }
/// Return the value this abstract attribute is anchored with.
///
/// The anchor value might not be the associated value if the latter is not
/// sufficient to determine where arguments will be manifested. This is, so
/// far, only the case for call site arguments as the value is not sufficient
/// to pinpoint them. Instead, we can use the call site as an anchor.
Value &getAnchorValue() const {
switch (getEncodingBits()) {
case ENC_VALUE:
case ENC_RETURNED_VALUE:
case ENC_FLOATING_FUNCTION:
return *getAsValuePtr();
case ENC_CALL_SITE_ARGUMENT_USE:
return *(getAsUsePtr()->getUser());
default:
llvm_unreachable("Unkown encoding!");
};
}
/// Return the associated function, if any.
Function *getAssociatedFunction() const {
if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
// We reuse the logic that associates callback calles to arguments of a
// call site here to identify the callback callee as the associated
// function.
if (Argument *Arg = getAssociatedArgument())
return Arg->getParent();
return CB->getCalledFunction();
}
return getAnchorScope();
}
/// Return the associated argument, if any.
Argument *getAssociatedArgument() const;
/// Return true if the position refers to a function interface, that is the
/// function scope, the function return, or an argument.
bool isFnInterfaceKind() const {
switch (getPositionKind()) {
case IRPosition::IRP_FUNCTION:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_ARGUMENT:
return true;
default:
return false;
}
}
/// Return the Function surrounding the anchor value.
Function *getAnchorScope() const {
Value &V = getAnchorValue();
if (isa<Function>(V))
return &cast<Function>(V);
if (isa<Argument>(V))
return cast<Argument>(V).getParent();
if (isa<Instruction>(V))
return cast<Instruction>(V).getFunction();
return nullptr;
}
/// Return the context instruction, if any.
Instruction *getCtxI() const {
Value &V = getAnchorValue();
if (auto *I = dyn_cast<Instruction>(&V))
return I;
if (auto *Arg = dyn_cast<Argument>(&V))
if (!Arg->getParent()->isDeclaration())
return &Arg->getParent()->getEntryBlock().front();
if (auto *F = dyn_cast<Function>(&V))
if (!F->isDeclaration())
return &(F->getEntryBlock().front());
return nullptr;
}
/// Return the value this abstract attribute is associated with.
Value &getAssociatedValue() const {
if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue()))
return getAnchorValue();
assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!");
return *cast<CallBase>(&getAnchorValue())
->getArgOperand(getCallSiteArgNo());
}
/// Return the type this abstract attribute is associated with.
Type *getAssociatedType() const {
if (getPositionKind() == IRPosition::IRP_RETURNED)
return getAssociatedFunction()->getReturnType();
return getAssociatedValue().getType();
}
/// Return the callee argument number of the associated value if it is an
/// argument or call site argument, otherwise a negative value. In contrast to
/// `getCallSiteArgNo` this method will always return the "argument number"
/// from the perspective of the callee. This may not the same as the call site
/// if this is a callback call.
int getCalleeArgNo() const {
return getArgNo(/* CallbackCalleeArgIfApplicable */ true);
}
/// Return the call site argument number of the associated value if it is an
/// argument or call site argument, otherwise a negative value. In contrast to
/// `getCalleArgNo` this method will always return the "operand number" from
/// the perspective of the call site. This may not the same as the callee
/// perspective if this is a callback call.
int getCallSiteArgNo() const {
return getArgNo(/* CallbackCalleeArgIfApplicable */ false);
}
/// Return the index in the attribute list for this position.
unsigned getAttrIdx() const {
switch (getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
break;
case IRPosition::IRP_FUNCTION:
case IRPosition::IRP_CALL_SITE:
return AttributeList::FunctionIndex;
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
return AttributeList::ReturnIndex;
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
return getCallSiteArgNo() + AttributeList::FirstArgIndex;
}
llvm_unreachable(
"There is no attribute index for a floating or invalid position!");
}
/// Return the associated position kind.
Kind getPositionKind() const {
char EncodingBits = getEncodingBits();
if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE)
return IRP_CALL_SITE_ARGUMENT;
if (EncodingBits == ENC_FLOATING_FUNCTION)
return IRP_FLOAT;
Value *V = getAsValuePtr();
if (!V)
return IRP_INVALID;
if (isa<Argument>(V))
return IRP_ARGUMENT;
if (isa<Function>(V))
return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION;
if (isa<CallBase>(V))
return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED
: IRP_CALL_SITE;
return IRP_FLOAT;
}
/// TODO: Figure out if the attribute related helper functions should live
/// here or somewhere else.
/// Return true if any kind in \p AKs existing in the IR at a position that
/// will affect this one. See also getAttrs(...).
/// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
/// e.g., the function position if this is an
/// argument position, should be ignored.
bool hasAttr(ArrayRef<Attribute::AttrKind> AKs,
bool IgnoreSubsumingPositions = false,
Attributor *A = nullptr) const;
/// Return the attributes of any kind in \p AKs existing in the IR at a
/// position that will affect this one. While each position can only have a
/// single attribute of any kind in \p AKs, there are "subsuming" positions
/// that could have an attribute as well. This method returns all attributes
/// found in \p Attrs.
/// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
/// e.g., the function position if this is an
/// argument position, should be ignored.
void getAttrs(ArrayRef<Attribute::AttrKind> AKs,
SmallVectorImpl<Attribute> &Attrs,
bool IgnoreSubsumingPositions = false,
Attributor *A = nullptr) const;
/// Remove the attribute of kind \p AKs existing in the IR at this position.
void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const {
if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
return;
AttributeList AttrList;
auto *CB = dyn_cast<CallBase>(&getAnchorValue());
if (CB)
AttrList = CB->getAttributes();
else
AttrList = getAssociatedFunction()->getAttributes();
LLVMContext &Ctx = getAnchorValue().getContext();
for (Attribute::AttrKind AK : AKs)
AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK);
if (CB)
CB->setAttributes(AttrList);
else
getAssociatedFunction()->setAttributes(AttrList);
}
bool isAnyCallSitePosition() const {
switch (getPositionKind()) {
case IRPosition::IRP_CALL_SITE:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
return true;
default:
return false;
}
}
/// Return true if the position is an argument or call site argument.
bool isArgumentPosition() const {
switch (getPositionKind()) {
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
return true;
default:
return false;
}
}
/// Return the same position without the call base context.
IRPosition stripCallBaseContext() const {
IRPosition Result = *this;
Result.CBContext = nullptr;
return Result;
}
/// Get the call base context from the position.
const CallBaseContext *getCallBaseContext() const { return CBContext; }
/// Check if the position has any call base context.
bool hasCallBaseContext() const { return CBContext != nullptr; }
/// Special DenseMap key values.
///
///{
static const IRPosition EmptyKey;
static const IRPosition TombstoneKey;
///}
/// Conversion into a void * to allow reuse of pointer hashing.
operator void *() const { return Enc.getOpaqueValue(); }
private:
/// Private constructor for special values only!
explicit IRPosition(void *Ptr, const CallBaseContext *CBContext = nullptr)
: CBContext(CBContext) {
Enc.setFromOpaqueValue(Ptr);
}
/// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK.
explicit IRPosition(Value &AnchorVal, Kind PK,
const CallBaseContext *CBContext = nullptr)
: CBContext(CBContext) {
switch (PK) {
case IRPosition::IRP_INVALID:
llvm_unreachable("Cannot create invalid IRP with an anchor value!");
break;
case IRPosition::IRP_FLOAT:
// Special case for floating functions.
if (isa<Function>(AnchorVal) || isa<CallBase>(AnchorVal))
Enc = {&AnchorVal, ENC_FLOATING_FUNCTION};
else
Enc = {&AnchorVal, ENC_VALUE};
break;
case IRPosition::IRP_FUNCTION:
case IRPosition::IRP_CALL_SITE:
Enc = {&AnchorVal, ENC_VALUE};
break;
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
Enc = {&AnchorVal, ENC_RETURNED_VALUE};
break;
case IRPosition::IRP_ARGUMENT:
Enc = {&AnchorVal, ENC_VALUE};
break;
case IRPosition::IRP_CALL_SITE_ARGUMENT:
llvm_unreachable(
"Cannot create call site argument IRP with an anchor value!");
break;
}
verify();
}
/// Return the callee argument number of the associated value if it is an
/// argument or call site argument. See also `getCalleeArgNo` and
/// `getCallSiteArgNo`.
int getArgNo(bool CallbackCalleeArgIfApplicable) const {
if (CallbackCalleeArgIfApplicable)
if (Argument *Arg = getAssociatedArgument())
return Arg->getArgNo();
switch (getPositionKind()) {
case IRPosition::IRP_ARGUMENT:
return cast<Argument>(getAsValuePtr())->getArgNo();
case IRPosition::IRP_CALL_SITE_ARGUMENT: {
Use &U = *getAsUsePtr();
return cast<CallBase>(U.getUser())->getArgOperandNo(&U);
}
default:
return -1;
}
}
/// IRPosition for the use \p U. The position kind \p PK needs to be
/// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value
/// the used value.
explicit IRPosition(Use &U, Kind PK) {
assert(PK == IRP_CALL_SITE_ARGUMENT &&
"Use constructor is for call site arguments only!");
Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE};
verify();
}
/// Verify internal invariants.
void verify();
/// Return the attributes of kind \p AK existing in the IR as attribute.
bool getAttrsFromIRAttr(Attribute::AttrKind AK,
SmallVectorImpl<Attribute> &Attrs) const;
/// Return the attributes of kind \p AK existing in the IR as operand bundles
/// of an llvm.assume.
bool getAttrsFromAssumes(Attribute::AttrKind AK,
SmallVectorImpl<Attribute> &Attrs,
Attributor &A) const;
/// Return the underlying pointer as Value *, valid for all positions but
/// IRP_CALL_SITE_ARGUMENT.
Value *getAsValuePtr() const {
assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE &&
"Not a value pointer!");
return reinterpret_cast<Value *>(Enc.getPointer());
}
/// Return the underlying pointer as Use *, valid only for
/// IRP_CALL_SITE_ARGUMENT positions.
Use *getAsUsePtr() const {
assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE &&
"Not a value pointer!");
return reinterpret_cast<Use *>(Enc.getPointer());
}
/// Return true if \p EncodingBits describe a returned or call site returned
/// position.
static bool isReturnPosition(char EncodingBits) {
return EncodingBits == ENC_RETURNED_VALUE;
}
/// Return true if the encoding bits describe a returned or call site returned
/// position.
bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); }
/// The encoding of the IRPosition is a combination of a pointer and two
/// encoding bits. The values of the encoding bits are defined in the enum
/// below. The pointer is either a Value* (for the first three encoding bit
/// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE).
///
///{
enum {
ENC_VALUE = 0b00,
ENC_RETURNED_VALUE = 0b01,
ENC_FLOATING_FUNCTION = 0b10,
ENC_CALL_SITE_ARGUMENT_USE = 0b11,
};
// Reserve the maximal amount of bits so there is no need to mask out the
// remaining ones. We will not encode anything else in the pointer anyway.
static constexpr int NumEncodingBits =
PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
static_assert(NumEncodingBits >= 2, "At least two bits are required!");
/// The pointer with the encoding bits.
PointerIntPair<void *, NumEncodingBits, char> Enc;
///}
/// Call base context. Used for callsite specific analysis.
const CallBaseContext *CBContext = nullptr;
/// Return the encoding bits.
char getEncodingBits() const { return Enc.getInt(); }
};
/// Helper that allows IRPosition as a key in a DenseMap.
template <> struct DenseMapInfo<IRPosition> {
static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; }
static inline IRPosition getTombstoneKey() {
return IRPosition::TombstoneKey;
}
static unsigned getHashValue(const IRPosition &IRP) {
return (DenseMapInfo<void *>::getHashValue(IRP) << 4) ^
(DenseMapInfo<Value *>::getHashValue(IRP.getCallBaseContext()));
}
static bool isEqual(const IRPosition &a, const IRPosition &b) {
return a == b;
}
};
/// A visitor class for IR positions.
///
/// Given a position P, the SubsumingPositionIterator allows to visit "subsuming
/// positions" wrt. attributes/information. Thus, if a piece of information
/// holds for a subsuming position, it also holds for the position P.
///
/// The subsuming positions always include the initial position and then,
/// depending on the position kind, additionally the following ones:
/// - for IRP_RETURNED:
/// - the function (IRP_FUNCTION)
/// - for IRP_ARGUMENT:
/// - the function (IRP_FUNCTION)
/// - for IRP_CALL_SITE:
/// - the callee (IRP_FUNCTION), if known
/// - for IRP_CALL_SITE_RETURNED:
/// - the callee (IRP_RETURNED), if known
/// - the call site (IRP_FUNCTION)
/// - the callee (IRP_FUNCTION), if known
/// - for IRP_CALL_SITE_ARGUMENT:
/// - the argument of the callee (IRP_ARGUMENT), if known
/// - the callee (IRP_FUNCTION), if known
/// - the position the call site argument is associated with if it is not
/// anchored to the call site, e.g., if it is an argument then the argument
/// (IRP_ARGUMENT)
class SubsumingPositionIterator {
SmallVector<IRPosition, 4> IRPositions;
using iterator = decltype(IRPositions)::iterator;
public:
SubsumingPositionIterator(const IRPosition &IRP);
iterator begin() { return IRPositions.begin(); }
iterator end() { return IRPositions.end(); }
};
/// Wrapper for FunctoinAnalysisManager.
struct AnalysisGetter {
template <typename Analysis>
typename Analysis::Result *getAnalysis(const Function &F) {
if (!FAM || !F.getParent())
return nullptr;
return &FAM->getResult<Analysis>(const_cast<Function &>(F));
}
AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
AnalysisGetter() = default;
private:
FunctionAnalysisManager *FAM = nullptr;
};
/// Data structure to hold cached (LLVM-IR) information.
///
/// All attributes are given an InformationCache object at creation time to
/// avoid inspection of the IR by all of them individually. This default
/// InformationCache will hold information required by 'default' attributes,
/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
/// is called.
///
/// If custom abstract attributes, registered manually through
/// Attributor::registerAA(...), need more information, especially if it is not
/// reusable, it is advised to inherit from the InformationCache and cast the
/// instance down in the abstract attributes.
struct InformationCache {
InformationCache(const Module &M, AnalysisGetter &AG,
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC)
: DL(M.getDataLayout()), Allocator(Allocator),
Explorer(
/* ExploreInterBlock */ true, /* ExploreCFGForward */ true,
/* ExploreCFGBackward */ true,
/* LIGetter */
[&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); },
/* DTGetter */
[&](const Function &F) {
return AG.getAnalysis<DominatorTreeAnalysis>(F);
},
/* PDTGetter */
[&](const Function &F) {
return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
}),
AG(AG), TargetTriple(M.getTargetTriple()) {
if (CGSCC)
initializeModuleSlice(*CGSCC);
}
~InformationCache() {
// The FunctionInfo objects are allocated via a BumpPtrAllocator, we call
// the destructor manually.
for (auto &It : FuncInfoMap)
It.getSecond()->~FunctionInfo();
}
/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
/// true, constant expression users are not given to \p CB but their uses are
/// traversed transitively.
template <typename CBTy>
static void foreachUse(Function &F, CBTy CB,
bool LookThroughConstantExprUses = true) {
SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));
for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) {
Use &U = *Worklist[Idx];
// Allow use in constant bitcasts and simply look through them.
if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
Worklist.push_back(&CEU);
continue;
}
CB(U);
}
}
/// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
/// (a subset of) all functions that we can look at during this SCC traversal.
/// This includes functions (transitively) called from the SCC and the
/// (transitive) callers of SCC functions. We also can look at a function if
/// there is a "reference edge", i.a., if the function somehow uses (!=calls)
/// a function in the SCC or a caller of a function in the SCC.
void initializeModuleSlice(SetVector<Function *> &SCC) {
ModuleSlice.insert(SCC.begin(), SCC.end());
SmallPtrSet<Function *, 16> Seen;
SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
while (!Worklist.empty()) {
Function *F = Worklist.pop_back_val();
ModuleSlice.insert(F);
for (Instruction &I : instructions(*F))
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction())
if (Seen.insert(Callee).second)
Worklist.push_back(Callee);
}
Seen.clear();
Worklist.append(SCC.begin(), SCC.end());
while (!Worklist.empty()) {
Function *F = Worklist.pop_back_val();
ModuleSlice.insert(F);
// Traverse all transitive uses.
foreachUse(*F, [&](Use &U) {
if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
if (Seen.insert(UsrI->getFunction()).second)
Worklist.push_back(UsrI->getFunction());
});
}
}
/// The slice of the module we are allowed to look at.
SmallPtrSet<Function *, 8> ModuleSlice;
/// A vector type to hold instructions.
using InstructionVectorTy = SmallVector<Instruction *, 8>;
/// A map type from opcodes to instructions with this opcode.
using OpcodeInstMapTy = DenseMap<unsigned, InstructionVectorTy *>;
/// Return the map that relates "interesting" opcodes with all instructions
/// with that opcode in \p F.
OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) {
return getFunctionInfo(F).OpcodeInstMap;
}
/// Return the instructions in \p F that may read or write memory.
InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) {
return getFunctionInfo(F).RWInsts;
}
/// Return MustBeExecutedContextExplorer
MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() {
return Explorer;
}
/// Return TargetLibraryInfo for function \p F.
TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) {
return AG.getAnalysis<TargetLibraryAnalysis>(F);
}
/// Return AliasAnalysis Result for function \p F.
AAResults *getAAResultsForFunction(const Function &F);
/// Return true if \p Arg is involved in a must-tail call, thus the argument
/// of the caller or callee.
bool isInvolvedInMustTailCall(const Argument &Arg) {
FunctionInfo &FI = getFunctionInfo(*Arg.getParent());
return FI.CalledViaMustTail || FI.ContainsMustTailCall;
}
/// Return the analysis result from a pass \p AP for function \p F.
template <typename AP>
typename AP::Result *getAnalysisResultForFunction(const Function &F) {
return AG.getAnalysis<AP>(F);
}
/// Return datalayout used in the module.
const DataLayout &getDL() { return DL; }
/// Return the map conaining all the knowledge we have from `llvm.assume`s.
const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; }
/// Return if \p To is potentially reachable form \p From or not
/// If the same query was answered, return cached result
bool getPotentiallyReachable(const Instruction &From, const Instruction &To) {
auto KeyPair = std::make_pair(&From, &To);
auto Iter = PotentiallyReachableMap.find(KeyPair);
if (Iter != PotentiallyReachableMap.end())
return Iter->second;
const Function &F = *From.getFunction();
bool Result = true;
if (From.getFunction() == To.getFunction())
Result = isPotentiallyReachable(&From, &To, nullptr,
AG.getAnalysis<DominatorTreeAnalysis>(F),
AG.getAnalysis<LoopAnalysis>(F));
PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result));
return Result;
}
/// Check whether \p F is part of module slice.
bool isInModuleSlice(const Function &F) {
return ModuleSlice.count(const_cast<Function *>(&F));
}
/// Return true if the stack (llvm::Alloca) can be accessed by other threads.
bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); }
/// Return true if the target is a GPU.
bool targetIsGPU() {
return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX();
}
private:
struct FunctionInfo {
~FunctionInfo();
/// A nested map that remembers all instructions in a function with a
/// certain instruction opcode (Instruction::getOpcode()).
OpcodeInstMapTy OpcodeInstMap;
/// A map from functions to their instructions that may read or write
/// memory.
InstructionVectorTy RWInsts;
/// Function is called by a `musttail` call.
bool CalledViaMustTail;
/// Function contains a `musttail` call.
bool ContainsMustTailCall;
};
/// A map type from functions to informatio about it.
DenseMap<const Function *, FunctionInfo *> FuncInfoMap;
/// Return information about the function \p F, potentially by creating it.
FunctionInfo &getFunctionInfo(const Function &F) {
FunctionInfo *&FI = FuncInfoMap[&F];
if (!FI) {
FI = new (Allocator) FunctionInfo();
initializeInformationCache(F, *FI);
}
return *FI;
}
/// Initialize the function information cache \p FI for the function \p F.
///
/// This method needs to be called for all function that might be looked at
/// through the information cache interface *prior* to looking at them.
void initializeInformationCache(const Function &F, FunctionInfo &FI);
/// The datalayout used in the module.
const DataLayout &DL;
/// The allocator used to allocate memory, e.g. for `FunctionInfo`s.
BumpPtrAllocator &Allocator;
/// MustBeExecutedContextExplorer
MustBeExecutedContextExplorer Explorer;
/// A map with knowledge retained in `llvm.assume` instructions.
RetainedKnowledgeMap KnowledgeMap;
/// Getters for analysis.
AnalysisGetter &AG;
/// Set of inlineable functions
SmallPtrSet<const Function *, 8> InlineableFunctions;
/// A map for caching results of queries for isPotentiallyReachable
DenseMap<std::pair<const Instruction *, const Instruction *>, bool>
PotentiallyReachableMap;
/// The triple describing the target machine.
Triple TargetTriple;
/// Give the Attributor access to the members so
/// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
friend struct Attributor;
};
/// The fixpoint analysis framework that orchestrates the attribute deduction.
///
/// The Attributor provides a general abstract analysis framework (guided
/// fixpoint iteration) as well as helper functions for the deduction of
/// (LLVM-IR) attributes. However, also other code properties can be deduced,
/// propagated, and ultimately manifested through the Attributor framework. This
/// is particularly useful if these properties interact with attributes and a
/// co-scheduled deduction allows to improve the solution. Even if not, thus if
/// attributes/properties are completely isolated, they should use the
/// Attributor framework to reduce the number of fixpoint iteration frameworks
/// in the code base. Note that the Attributor design makes sure that isolated
/// attributes are not impacted, in any way, by others derived at the same time
/// if there is no cross-reasoning performed.
///
/// The public facing interface of the Attributor is kept simple and basically
/// allows abstract attributes to one thing, query abstract attributes
/// in-flight. There are two reasons to do this:
/// a) The optimistic state of one abstract attribute can justify an
/// optimistic state of another, allowing to framework to end up with an
/// optimistic (=best possible) fixpoint instead of one based solely on
/// information in the IR.
/// b) This avoids reimplementing various kinds of lookups, e.g., to check
/// for existing IR attributes, in favor of a single lookups interface
/// provided by an abstract attribute subclass.
///
/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
/// described in the file comment.
struct Attributor {
using OptimizationRemarkGetter =
function_ref<OptimizationRemarkEmitter &(Function *)>;
/// Constructor
///
/// \param Functions The set of functions we are deriving attributes for.
/// \param InfoCache Cache to hold various information accessible for
/// the abstract attributes.
/// \param CGUpdater Helper to update an underlying call graph.
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions.
/// \param RewriteSignatures Whether to rewrite function signatures.
Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
CallGraphUpdater &CGUpdater,
DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
bool RewriteSignatures = true)
: Allocator(InfoCache.Allocator), Functions(Functions),
InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
MaxFixpointIterations(None), OREGetter(None), PassName("") {}
/// Constructor
///
/// \param Functions The set of functions we are deriving attributes for.
/// \param InfoCache Cache to hold various information accessible for
/// the abstract attributes.
/// \param CGUpdater Helper to update an underlying call graph.
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions
/// \param RewriteSignatures Whether to rewrite function signatures.
/// \param MaxFixpointIterations Maximum number of iterations to run until
/// fixpoint.
/// \param OREGetter A callback function that returns an ORE object from a
/// Function pointer.
/// \param PassName The name of the pass emitting remarks.
Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed,
bool DeleteFns, bool RewriteSignatures,
Optional<unsigned> MaxFixpointIterations,
OptimizationRemarkGetter OREGetter, const char *PassName)
: Allocator(InfoCache.Allocator), Functions(Functions),
InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
MaxFixpointIterations(MaxFixpointIterations),
OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)),
PassName(PassName) {}
~Attributor();
/// Run the analyses until a fixpoint is reached or enforced (timeout).
///
/// The attributes registered with this Attributor can be used after as long
/// as the Attributor is not destroyed (it owns the attributes now).
///
/// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
ChangeStatus run();
/// Lookup an abstract attribute of type \p AAType at position \p IRP. While
/// no abstract attribute is found equivalent positions are checked, see
/// SubsumingPositionIterator. Thus, the returned abstract attribute
/// might be anchored at a different position, e.g., the callee if \p IRP is a
/// call base.
///
/// This method is the only (supported) way an abstract attribute can retrieve
/// information from another abstract attribute. As an example, take an
/// abstract attribute that determines the memory access behavior for a
/// argument (readnone, readonly, ...). It should use `getAAFor` to get the
/// most optimistic information for other abstract attributes in-flight, e.g.
/// the one reasoning about the "captured" state for the argument or the one
/// reasoning on the memory access behavior of the function as a whole.
///
/// If the DepClass enum is set to `DepClassTy::None` the dependence from
/// \p QueryingAA to the return abstract attribute is not automatically
/// recorded. This should only be used if the caller will record the
/// dependence explicitly if necessary, thus if it the returned abstract
/// attribute is used for reasoning. To record the dependences explicitly use
/// the `Attributor::recordDependence` method.
template <typename AAType>
const AAType &getAAFor(const AbstractAttribute &QueryingAA,
const IRPosition &IRP, DepClassTy DepClass) {
return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
/* ForceUpdate */ false);
}
/// Similar to getAAFor but the return abstract attribute will be updated (via
/// `AbstractAttribute::update`) even if it is found in the cache. This is
/// especially useful for AAIsDead as changes in liveness can make updates
/// possible/useful that were not happening before as the abstract attribute
/// was assumed dead.
template <typename AAType>
const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA,
const IRPosition &IRP, DepClassTy DepClass) {
return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
/* ForceUpdate */ true);
}
/// The version of getAAFor that allows to omit a querying abstract
/// attribute. Using this after Attributor started running is restricted to
/// only the Attributor itself. Initial seeding of AAs can be done via this
/// function.
/// NOTE: ForceUpdate is ignored in any stage other than the update stage.
template <typename AAType>
const AAType &getOrCreateAAFor(IRPosition IRP,
const AbstractAttribute *QueryingAA,
DepClassTy DepClass, bool ForceUpdate = false,
bool UpdateAfterInit = true) {
if (!shouldPropagateCallBaseContext(IRP))
IRP = IRP.stripCallBaseContext();
if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, DepClass,
/* AllowInvalidState */ true)) {
if (ForceUpdate && Phase == AttributorPhase::UPDATE)
updateAA(*AAPtr);
return *AAPtr;
}
// No matching attribute found, create one.
// Use the static create method.
auto &AA = AAType::createForPosition(IRP, *this);
// If we are currenty seeding attributes, enforce seeding rules.
if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) {
AA.getState().indicatePessimisticFixpoint();
return AA;
}
registerAA(AA);
// For now we ignore naked and optnone functions.
bool Invalidate = Allowed && !Allowed->count(&AAType::ID);
const Function *FnScope = IRP.getAnchorScope();
if (FnScope)
Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) ||
FnScope->hasFnAttribute(Attribute::OptimizeNone);
// Avoid too many nested initializations to prevent a stack overflow.
Invalidate |= InitializationChainLength > MaxInitializationChainLength;
// Bootstrap the new attribute with an initial update to propagate
// information, e.g., function -> call site. If it is not on a given
// Allowed we will not perform updates at all.
if (Invalidate) {
AA.getState().indicatePessimisticFixpoint();
return AA;
}
{
TimeTraceScope TimeScope(AA.getName() + "::initialize");
++InitializationChainLength;
AA.initialize(*this);
--InitializationChainLength;
}
// Initialize and update is allowed for code outside of the current function
// set, but only if it is part of module slice we are allowed to look at.
// Only exception is AAIsDeadFunction whose initialization is prevented
// directly, since we don't to compute it twice.
if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) {
if (!getInfoCache().isInModuleSlice(*FnScope)) {
AA.getState().indicatePessimisticFixpoint();
return AA;
}
}
// If this is queried in the manifest stage, we force the AA to indicate
// pessimistic fixpoint immediately.
if (Phase == AttributorPhase::MANIFEST) {
AA.getState().indicatePessimisticFixpoint();
return AA;
}
// Allow seeded attributes to declare dependencies.
// Remember the seeding state.
if (UpdateAfterInit) {
AttributorPhase OldPhase = Phase;
Phase = AttributorPhase::UPDATE;
updateAA(AA);
Phase = OldPhase;
}
if (QueryingAA && AA.getState().isValidState())
recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
DepClass);
return AA;
}
template <typename AAType>
const AAType &getOrCreateAAFor(const IRPosition &IRP) {
return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr,
DepClassTy::NONE);
}
/// Return the attribute of \p AAType for \p IRP if existing and valid. This
/// also allows non-AA users lookup.
template <typename AAType>
AAType *lookupAAFor(const IRPosition &IRP,
const AbstractAttribute *QueryingAA = nullptr,
DepClassTy DepClass = DepClassTy::OPTIONAL,
bool AllowInvalidState = false) {
static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
"Cannot query an attribute with a type not derived from "
"'AbstractAttribute'!");
// Lookup the abstract attribute of type AAType. If found, return it after
// registering a dependence of QueryingAA on the one returned attribute.
AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP});
if (!AAPtr)
return nullptr;
AAType *AA = static_cast<AAType *>(AAPtr);
// Do not register a dependence on an attribute with an invalid state.
if (DepClass != DepClassTy::NONE && QueryingAA &&
AA->getState().isValidState())
recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA),
DepClass);
// Return nullptr if this attribute has an invalid state.
if (!AllowInvalidState && !AA->getState().isValidState())
return nullptr;
return AA;
}
/// Allows a query AA to request an update if a new query was received.
void registerForUpdate(AbstractAttribute &AA);
/// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
/// \p FromAA changes \p ToAA should be updated as well.
///
/// This method should be used in conjunction with the `getAAFor` method and
/// with the DepClass enum passed to the method set to None. This can
/// be beneficial to avoid false dependences but it requires the users of
/// `getAAFor` to explicitly record true dependences through this method.
/// The \p DepClass flag indicates if the dependence is striclty necessary.
/// That means for required dependences, if \p FromAA changes to an invalid
/// state, \p ToAA can be moved to a pessimistic fixpoint because it required
/// information from \p FromAA but none are available anymore.
void recordDependence(const AbstractAttribute &FromAA,
const AbstractAttribute &ToAA, DepClassTy DepClass);
/// Introduce a new abstract attribute into the fixpoint analysis.
///
/// Note that ownership of the attribute is given to the Attributor. It will
/// invoke delete for the Attributor on destruction of the Attributor.
///
/// Attributes are identified by their IR position (AAType::getIRPosition())
/// and the address of their static member (see AAType::ID).
template <typename AAType> AAType &registerAA(AAType &AA) {
static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
"Cannot register an attribute with a type not derived from "
"'AbstractAttribute'!");
// Put the attribute in the lookup map structure and the container we use to
// keep track of all attributes.
const IRPosition &IRP = AA.getIRPosition();
AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}];
assert(!AAPtr && "Attribute already in map!");
AAPtr = &AA;
// Register AA with the synthetic root only before the manifest stage.
if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE)
DG.SyntheticRoot.Deps.push_back(
AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
return AA;
}
/// Return the internal information cache.
InformationCache &getInfoCache() { return InfoCache; }
/// Return true if this is a module pass, false otherwise.
bool isModulePass() const {
return !Functions.empty() &&
Functions.size() == Functions.front()->getParent()->size();
}
/// Return true if we derive attributes for \p Fn
bool isRunOn(Function &Fn) const {
return Functions.empty() || Functions.count(&Fn);
}
/// Determine opportunities to derive 'default' attributes in \p F and create
/// abstract attribute objects for them.
///
/// \param F The function that is checked for attribute opportunities.
///
/// Note that abstract attribute instances are generally created even if the
/// IR already contains the information they would deduce. The most important
/// reason for this is the single interface, the one of the abstract attribute
/// instance, which can be queried without the need to look at the IR in
/// various places.
void identifyDefaultAbstractAttributes(Function &F);
/// Determine whether the function \p F is IPO amendable
///
/// If a function is exactly defined or it has alwaysinline attribute
/// and is viable to be inlined, we say it is IPO amendable
bool isFunctionIPOAmendable(const Function &F) {
return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F);
}
/// Mark the internal function \p F as live.
///
/// This will trigger the identification and initialization of attributes for
/// \p F.
void markLiveInternalFunction(const Function &F) {
assert(F.hasLocalLinkage() &&
"Only local linkage is assumed dead initially.");
identifyDefaultAbstractAttributes(const_cast<Function &>(F));
}
/// Helper function to remove callsite.
void removeCallSite(CallInst *CI) {
if (!CI)
return;
CGUpdater.removeCallSite(*CI);
}
/// Record that \p U is to be replaces with \p NV after information was
/// manifested. This also triggers deletion of trivially dead istructions.
bool changeUseAfterManifest(Use &U, Value &NV) {
Value *&V = ToBeChangedUses[&U];
if (V && (V->stripPointerCasts() == NV.stripPointerCasts() ||
isa_and_nonnull<UndefValue>(V)))
return false;
assert((!V || V == &NV || isa<UndefValue>(NV)) &&
"Use was registered twice for replacement with different values!");
V = &NV;
return true;
}
/// Helper function to replace all uses of \p V with \p NV. Return true if
/// there is any change. The flag \p ChangeDroppable indicates if dropppable
/// uses should be changed too.
bool changeValueAfterManifest(Value &V, Value &NV,
bool ChangeDroppable = true) {
auto &Entry = ToBeChangedValues[&V];
Value *&CurNV = Entry.first;
if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() ||
isa<UndefValue>(CurNV)))
return false;
assert((!CurNV || CurNV == &NV || isa<UndefValue>(NV)) &&
"Value replacement was registered twice with different values!");
CurNV = &NV;
Entry.second = ChangeDroppable;
return true;
}
/// Record that \p I is to be replaced with `unreachable` after information
/// was manifested.
void changeToUnreachableAfterManifest(Instruction *I) {
ToBeChangedToUnreachableInsts.insert(I);
}
/// Record that \p II has at least one dead successor block. This information
/// is used, e.g., to replace \p II with a call, after information was
/// manifested.
void registerInvokeWithDeadSuccessor(InvokeInst &II) {
InvokeWithDeadSuccessor.push_back(&II);
}
/// Record that \p I is deleted after information was manifested. This also
/// triggers deletion of trivially dead istructions.
void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); }
/// Record that \p BB is deleted after information was manifested. This also
/// triggers deletion of trivially dead istructions.
void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); }
// Record that \p BB is added during the manifest of an AA. Added basic blocks
// are preserved in the IR.
void registerManifestAddedBasicBlock(BasicBlock &BB) {
ManifestAddedBlocks.insert(&BB);
}
/// Record that \p F is deleted after information was manifested.
void deleteAfterManifest(Function &F) {
if (DeleteFns)
ToBeDeletedFunctions.insert(&F);
}
/// If \p IRP is assumed to be a constant, return it, if it is unclear yet,
/// return None, otherwise return `nullptr`.
Optional<Constant *> getAssumedConstant(const IRPosition &IRP,
const AbstractAttribute &AA,
bool &UsedAssumedInformation);
Optional<Constant *> getAssumedConstant(const Value &V,
const AbstractAttribute &AA,
bool &UsedAssumedInformation) {
return getAssumedConstant(IRPosition::value(V), AA, UsedAssumedInformation);
}
/// If \p V is assumed simplified, return it, if it is unclear yet,
/// return None, otherwise return `nullptr`.
Optional<Value *> getAssumedSimplified(const IRPosition &IRP,
const AbstractAttribute &AA,
bool &UsedAssumedInformation) {
return getAssumedSimplified(IRP, &AA, UsedAssumedInformation);
}
Optional<Value *> getAssumedSimplified(const Value &V,
const AbstractAttribute &AA,
bool &UsedAssumedInformation) {
return getAssumedSimplified(IRPosition::value(V), AA,
UsedAssumedInformation);
}
/// If \p V is assumed simplified, return it, if it is unclear yet,
/// return None, otherwise return `nullptr`. Same as the public version
/// except that it can be used without recording dependences on any \p AA.
Optional<Value *> getAssumedSimplified(const IRPosition &V,
const AbstractAttribute *AA,
bool &UsedAssumedInformation);
/// Register \p CB as a simplification callback.
/// `Attributor::getAssumedSimplified` will use these callbacks before
/// we it will ask `AAValueSimplify`. It is important to ensure this
/// is called before `identifyDefaultAbstractAttributes`, assuming the
/// latter is called at all.
using SimplifictionCallbackTy = std::function<Optional<Value *>(
const IRPosition &, const AbstractAttribute *, bool &)>;
void registerSimplificationCallback(const IRPosition &IRP,
const SimplifictionCallbackTy &CB) {
SimplificationCallbacks[IRP].emplace_back(CB);
}
/// Return true if there is a simplification callback for \p IRP.
bool hasSimplificationCallback(const IRPosition &IRP) {
return SimplificationCallbacks.count(IRP);
}
private:
/// The vector with all simplification callbacks registered by outside AAs.
DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
SimplificationCallbacks;
public:
/// Translate \p V from the callee context into the call site context.
Optional<Value *>
translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB,
const AbstractAttribute &AA,
bool &UsedAssumedInformation);
/// Return true if \p AA (or its context instruction) is assumed dead.
///
/// If \p LivenessAA is not provided it is queried.
bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly = false,
DepClassTy DepClass = DepClassTy::OPTIONAL);
/// Return true if \p I is assumed dead.
///
/// If \p LivenessAA is not provided it is queried.
bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA,
const AAIsDead *LivenessAA, bool &UsedAssumedInformation,
bool CheckBBLivenessOnly = false,
DepClassTy DepClass = DepClassTy::OPTIONAL);
/// Return true if \p U is assumed dead.
///
/// If \p FnLivenessAA is not provided it is queried.
bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
bool CheckBBLivenessOnly = false,
DepClassTy DepClass = DepClassTy::OPTIONAL);
/// Return true if \p IRP is assumed dead.
///
/// If \p FnLivenessAA is not provided it is queried.
bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
bool CheckBBLivenessOnly = false,
DepClassTy DepClass = DepClassTy::OPTIONAL);
/// Return true if \p BB is assumed dead.
///
/// If \p LivenessAA is not provided it is queried.
bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA,
DepClassTy DepClass = DepClassTy::OPTIONAL);
/// Check \p Pred on all (transitive) uses of \p V.
///
/// This method will evaluate \p Pred on all (transitive) uses of the
/// associated value and return true if \p Pred holds every time.
/// If uses are skipped in favor of equivalent ones, e.g., if we look through
/// memory, the \p EquivalentUseCB will be used to give the caller an idea
/// what original used was replaced by a new one (or new ones). The visit is
/// cut short if \p EquivalentUseCB returns false and the function will return
/// false as well.
bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
const AbstractAttribute &QueryingAA, const Value &V,
bool CheckBBLivenessOnly = false,
DepClassTy LivenessDepClass = DepClassTy::OPTIONAL,
function_ref<bool(const Use &OldU, const Use &NewU)>
EquivalentUseCB = nullptr);
/// Emit a remark generically.
///
/// This template function can be used to generically emit a remark. The
/// RemarkKind should be one of the following:
/// - OptimizationRemark to indicate a successful optimization attempt
/// - OptimizationRemarkMissed to report a failed optimization attempt
/// - OptimizationRemarkAnalysis to provide additional information about an
/// optimization attempt
///
/// The remark is built using a callback function \p RemarkCB that takes a
/// RemarkKind as input and returns a RemarkKind.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Instruction *I, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
if (!OREGetter)
return;
Function *F = I->getFunction();
auto &ORE = OREGetter.getValue()(F);
if (RemarkName.startswith("OMP"))
ORE.emit([&]() {
return RemarkCB(RemarkKind(PassName, RemarkName, I))
<< " [" << RemarkName << "]";
});
else
ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); });
}
/// Emit a remark on a function.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Function *F, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
if (!OREGetter)
return;
auto &ORE = OREGetter.getValue()(F);
if (RemarkName.startswith("OMP"))
ORE.emit([&]() {
return RemarkCB(RemarkKind(PassName, RemarkName, F))
<< " [" << RemarkName << "]";
});
else
ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); });
}
/// Helper struct used in the communication between an abstract attribute (AA)
/// that wants to change the signature of a function and the Attributor which
/// applies the changes. The struct is partially initialized with the
/// information from the AA (see the constructor). All other members are
/// provided by the Attributor prior to invoking any callbacks.
struct ArgumentReplacementInfo {
/// Callee repair callback type
///
/// The function repair callback is invoked once to rewire the replacement
/// arguments in the body of the new function. The argument replacement info
/// is passed, as build from the registerFunctionSignatureRewrite call, as
/// well as the replacement function and an iteratore to the first
/// replacement argument.
using CalleeRepairCBTy = std::function<void(
const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>;
/// Abstract call site (ACS) repair callback type
///
/// The abstract call site repair callback is invoked once on every abstract
/// call site of the replaced function (\see ReplacedFn). The callback needs
/// to provide the operands for the call to the new replacement function.
/// The number and type of the operands appended to the provided vector
/// (second argument) is defined by the number and types determined through
/// the replacement type vector (\see ReplacementTypes). The first argument
/// is the ArgumentReplacementInfo object registered with the Attributor
/// through the registerFunctionSignatureRewrite call.
using ACSRepairCBTy =
std::function<void(const ArgumentReplacementInfo &, AbstractCallSite,
SmallVectorImpl<Value *> &)>;
/// Simple getters, see the corresponding members for details.
///{
Attributor &getAttributor() const { return A; }
const Function &getReplacedFn() const { return ReplacedFn; }
const Argument &getReplacedArg() const { return ReplacedArg; }
unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); }
const SmallVectorImpl<Type *> &getReplacementTypes() const {
return ReplacementTypes;
}
///}
private:
/// Constructor that takes the argument to be replaced, the types of
/// the replacement arguments, as well as callbacks to repair the call sites
/// and new function after the replacement happened.
ArgumentReplacementInfo(Attributor &A, Argument &Arg,
ArrayRef<Type *> ReplacementTypes,
CalleeRepairCBTy &&CalleeRepairCB,
ACSRepairCBTy &&ACSRepairCB)
: A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg),
ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()),
CalleeRepairCB(std::move(CalleeRepairCB)),
ACSRepairCB(std::move(ACSRepairCB)) {}
/// Reference to the attributor to allow access from the callbacks.
Attributor &A;
/// The "old" function replaced by ReplacementFn.
const Function &ReplacedFn;
/// The "old" argument replaced by new ones defined via ReplacementTypes.
const Argument &ReplacedArg;
/// The types of the arguments replacing ReplacedArg.
const SmallVector<Type *, 8> ReplacementTypes;
/// Callee repair callback, see CalleeRepairCBTy.
const CalleeRepairCBTy CalleeRepairCB;
/// Abstract call site (ACS) repair callback, see ACSRepairCBTy.
const ACSRepairCBTy ACSRepairCB;
/// Allow access to the private members from the Attributor.
friend struct Attributor;
};
/// Check if we can rewrite a function signature.
///
/// The argument \p Arg is replaced with new ones defined by the number,
/// order, and types in \p ReplacementTypes.
///
/// \returns True, if the replacement can be registered, via
/// registerFunctionSignatureRewrite, false otherwise.
bool isValidFunctionSignatureRewrite(Argument &Arg,
ArrayRef<Type *> ReplacementTypes);
/// Register a rewrite for a function signature.
///
/// The argument \p Arg is replaced with new ones defined by the number,
/// order, and types in \p ReplacementTypes. The rewiring at the call sites is
/// done through \p ACSRepairCB and at the callee site through
/// \p CalleeRepairCB.
///
/// \returns True, if the replacement was registered, false otherwise.
bool registerFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes,
ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB);
/// Check \p Pred on all function call sites.
///
/// This method will evaluate \p Pred on call sites and return
/// true if \p Pred holds in every call sites. However, this is only possible
/// all call sites are known, hence the function has internal linkage.
- /// If true is returned, \p AllCallSitesKnown is set if all possible call
- /// sites of the function have been visited.
+ /// If true is returned, \p UsedAssumedInformation is set if assumed
+ /// information was used to skip or simplify potential call sites.
bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const AbstractAttribute &QueryingAA,
- bool RequireAllCallSites, bool &AllCallSitesKnown);
+ bool RequireAllCallSites,
+ bool &UsedAssumedInformation);
/// Check \p Pred on all call sites of \p Fn.
///
/// This method will evaluate \p Pred on call sites and return
/// true if \p Pred holds in every call sites. However, this is only possible
/// all call sites are known, hence the function has internal linkage.
- /// If true is returned, \p AllCallSitesKnown is set if all possible call
- /// sites of the function have been visited.
+ /// If true is returned, \p UsedAssumedInformation is set if assumed
+ /// information was used to skip or simplify potential call sites.
bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Function &Fn, bool RequireAllCallSites,
const AbstractAttribute *QueryingAA,
- bool &AllCallSitesKnown);
+ bool &UsedAssumedInformation);
/// Check \p Pred on all values potentially returned by \p F.
///
/// This method will evaluate \p Pred on all values potentially returned by
/// the function associated with \p QueryingAA. The returned values are
/// matched with their respective return instructions. Returns true if \p Pred
/// holds on all of them.
bool checkForAllReturnedValuesAndReturnInsts(
function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
const AbstractAttribute &QueryingAA);
/// Check \p Pred on all values potentially returned by the function
/// associated with \p QueryingAA.
///
/// This is the context insensitive version of the method above.
bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
const AbstractAttribute &QueryingAA);
/// Check \p Pred on all instructions with an opcode present in \p Opcodes.
///
/// This method will evaluate \p Pred on all instructions with an opcode
/// present in \p Opcode and return true if \p Pred holds on all of them.
bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly = false,
bool CheckPotentiallyDead = false);
/// Check \p Pred on all call-like instructions (=CallBased derived).
///
/// See checkForAllCallLikeInstructions(...) for more information.
bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly = false,
bool CheckPotentiallyDead = false) {
return checkForAllInstructions(
Pred, QueryingAA,
{(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
(unsigned)Instruction::Call},
UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead);
}
/// Check \p Pred on all Read/Write instructions.
///
/// This method will evaluate \p Pred on all instructions that read or write
/// to memory present in the information cache and return true if \p Pred
/// holds on all of them.
bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred,
AbstractAttribute &QueryingAA,
bool &UsedAssumedInformation);
/// Create a shallow wrapper for \p F such that \p F has internal linkage
/// afterwards. It also sets the original \p F 's name to anonymous
///
/// A wrapper is a function with the same type (and attributes) as \p F
/// that will only call \p F and return the result, if any.
///
/// Assuming the declaration of looks like:
/// rty F(aty0 arg0, ..., atyN argN);
///
/// The wrapper will then look as follows:
/// rty wrapper(aty0 arg0, ..., atyN argN) {
/// return F(arg0, ..., argN);
/// }
///
static void createShallowWrapper(Function &F);
/// Returns true if the function \p F can be internalized. i.e. it has a
/// compatible linkage.
static bool isInternalizable(Function &F);
/// Make another copy of the function \p F such that the copied version has
/// internal linkage afterwards and can be analysed. Then we replace all uses
/// of the original function to the copied one
///
/// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
/// linkage can be internalized because these linkages guarantee that other
/// definitions with the same name have the same semantics as this one.
///
/// This will only be run if the `attributor-allow-deep-wrappers` option is
/// set, or if the function is called with \p Force set to true.
///
/// If the function \p F failed to be internalized the return value will be a
/// null pointer.
static Function *internalizeFunction(Function &F, bool Force = false);
/// Make copies of each function in the set \p FnSet such that the copied
/// version has internal linkage afterwards and can be analysed. Then we
/// replace all uses of the original function to the copied one. The map
/// \p FnMap contains a mapping of functions to their internalized versions.
///
/// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
/// linkage can be internalized because these linkages guarantee that other
/// definitions with the same name have the same semantics as this one.
///
/// This version will internalize all the functions in the set \p FnSet at
/// once and then replace the uses. This prevents internalized functions being
/// called by external functions when there is an internalized version in the
/// module.
static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
DenseMap<Function *, Function *> &FnMap);
/// Return the data layout associated with the anchor scope.
const DataLayout &getDataLayout() const { return InfoCache.DL; }
/// The allocator used to allocate memory, e.g. for `AbstractAttribute`s.
BumpPtrAllocator &Allocator;
private:
/// This method will do fixpoint iteration until fixpoint or the
/// maximum iteration count is reached.
///
/// If the maximum iteration count is reached, This method will
/// indicate pessimistic fixpoint on attributes that transitively depend
/// on attributes that were scheduled for an update.
void runTillFixpoint();
/// Gets called after scheduling, manifests attributes to the LLVM IR.
ChangeStatus manifestAttributes();
/// Gets called after attributes have been manifested, cleans up the IR.
/// Deletes dead functions, blocks and instructions.
/// Rewrites function signitures and updates the call graph.
ChangeStatus cleanupIR();
/// Identify internal functions that are effectively dead, thus not reachable
/// from a live entry point. The functions are added to ToBeDeletedFunctions.
void identifyDeadInternalFunctions();
/// Run `::update` on \p AA and track the dependences queried while doing so.
/// Also adjust the state if we know further updates are not necessary.
ChangeStatus updateAA(AbstractAttribute &AA);
/// Remember the dependences on the top of the dependence stack such that they
/// may trigger further updates. (\see DependenceStack)
void rememberDependences();
/// Determine if CallBase context in \p IRP should be propagated.
bool shouldPropagateCallBaseContext(const IRPosition &IRP);
/// Apply all requested function signature rewrites
/// (\see registerFunctionSignatureRewrite) and return Changed if the module
/// was altered.
ChangeStatus
rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns);
/// Check if the Attribute \p AA should be seeded.
/// See getOrCreateAAFor.
bool shouldSeedAttribute(AbstractAttribute &AA);
/// A nested map to lookup abstract attributes based on the argument position
/// on the outer level, and the addresses of the static member (AAType::ID) on
/// the inner level.
///{
using AAMapKeyTy = std::pair<const char *, IRPosition>;
DenseMap<AAMapKeyTy, AbstractAttribute *> AAMap;
///}
/// Map to remember all requested signature changes (= argument replacements).
DenseMap<Function *, SmallVector<std::unique_ptr<ArgumentReplacementInfo>, 8>>
ArgumentReplacementMap;
/// The set of functions we are deriving attributes for.
SetVector<Function *> &Functions;
/// The information cache that holds pre-processed (LLVM-IR) information.
InformationCache &InfoCache;
/// Helper to update an underlying call graph.
CallGraphUpdater &CGUpdater;
/// Abstract Attribute dependency graph
AADepGraph DG;
/// Set of functions for which we modified the content such that it might
/// impact the call graph.
SmallPtrSet<Function *, 8> CGModifiedFunctions;
/// Information about a dependence. If FromAA is changed ToAA needs to be
/// updated as well.
struct DepInfo {
const AbstractAttribute *FromAA;
const AbstractAttribute *ToAA;
DepClassTy DepClass;
};
/// The dependence stack is used to track dependences during an
/// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be
/// recursive we might have multiple vectors of dependences in here. The stack
/// size, should be adjusted according to the expected recursion depth and the
/// inner dependence vector size to the expected number of dependences per
/// abstract attribute. Since the inner vectors are actually allocated on the
/// stack we can be generous with their size.
using DependenceVector = SmallVector<DepInfo, 8>;
SmallVector<DependenceVector *, 16> DependenceStack;
/// If not null, a set limiting the attribute opportunities.
const DenseSet<const char *> *Allowed;
/// Whether to delete functions.
const bool DeleteFns;
/// Whether to rewrite signatures.
const bool RewriteSignatures;
/// Maximum number of fixedpoint iterations.
Optional<unsigned> MaxFixpointIterations;
/// A set to remember the functions we already assume to be live and visited.
DenseSet<const Function *> VisitedFunctions;
/// Uses we replace with a new value after manifest is done. We will remove
/// then trivially dead instructions as well.
DenseMap<Use *, Value *> ToBeChangedUses;
/// Values we replace with a new value after manifest is done. We will remove
/// then trivially dead instructions as well.
DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues;
/// Instructions we replace with `unreachable` insts after manifest is done.
SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts;
/// Invoke instructions with at least a single dead successor block.
SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;
/// A flag that indicates which stage of the process we are in. Initially, the
/// phase is SEEDING. Phase is changed in `Attributor::run()`
enum class AttributorPhase {
SEEDING,
UPDATE,
MANIFEST,
CLEANUP,
} Phase = AttributorPhase::SEEDING;
/// The current initialization chain length. Tracked to avoid stack overflows.
unsigned InitializationChainLength = 0;
/// Functions, blocks, and instructions we delete after manifest is done.
///
///{
SmallPtrSet<Function *, 8> ToBeDeletedFunctions;
SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks;
SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
///}
/// Callback to get an OptimizationRemarkEmitter from a Function *.
Optional<OptimizationRemarkGetter> OREGetter;
/// Container with all the query AAs that requested an update via
/// registerForUpdate.
SmallSetVector<AbstractAttribute *, 16> QueryAAsAwaitingUpdate;
/// The name of the pass to emit remarks for.
const char *PassName = "";
friend AADepGraph;
friend AttributorCallGraph;
};
/// An interface to query the internal state of an abstract attribute.
///
/// The abstract state is a minimal interface that allows the Attributor to
/// communicate with the abstract attributes about their internal state without
/// enforcing or exposing implementation details, e.g., the (existence of an)
/// underlying lattice.
///
/// It is sufficient to be able to query if a state is (1) valid or invalid, (2)
/// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint
/// was reached or (4) a pessimistic fixpoint was enforced.
///
/// All methods need to be implemented by the subclass. For the common use case,
/// a single boolean state or a bit-encoded state, the BooleanState and
/// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract
/// attribute can inherit from them to get the abstract state interface and
/// additional methods to directly modify the state based if needed. See the
/// class comments for help.
struct AbstractState {
virtual ~AbstractState() = default;
/// Return if this abstract state is in a valid state. If false, no
/// information provided should be used.
virtual bool isValidState() const = 0;
/// Return if this abstract state is fixed, thus does not need to be updated
/// if information changes as it cannot change itself.
virtual bool isAtFixpoint() const = 0;
/// Indicate that the abstract state should converge to the optimistic state.
///
/// This will usually make the optimistically assumed state the known to be
/// true state.
///
/// \returns ChangeStatus::UNCHANGED as the assumed value should not change.
virtual ChangeStatus indicateOptimisticFixpoint() = 0;
/// Indicate that the abstract state should converge to the pessimistic state.
///
/// This will usually revert the optimistically assumed state to the known to
/// be true state.
///
/// \returns ChangeStatus::CHANGED as the assumed value may change.
virtual ChangeStatus indicatePessimisticFixpoint() = 0;
};
/// Simple state with integers encoding.
///
/// The interface ensures that the assumed bits are always a subset of the known
/// bits. Users can only add known bits and, except through adding known bits,
/// they can only remove assumed bits. This should guarantee monotoniticy and
/// thereby the existence of a fixpoint (if used corretly). The fixpoint is
/// reached when the assumed and known state/bits are equal. Users can
/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
/// state will catch up with the assumed one, for a pessimistic fixpoint it is
/// the other way around.
template <typename base_ty, base_ty BestState, base_ty WorstState>
struct IntegerStateBase : public AbstractState {
using base_t = base_ty;
IntegerStateBase() = default;
IntegerStateBase(base_t Assumed) : Assumed(Assumed) {}
/// Return the best possible representable state.
static constexpr base_t getBestState() { return BestState; }
static constexpr base_t getBestState(const IntegerStateBase &) {
return getBestState();
}
/// Return the worst possible representable state.
static constexpr base_t getWorstState() { return WorstState; }
static constexpr base_t getWorstState(const IntegerStateBase &) {
return getWorstState();
}
/// See AbstractState::isValidState()
/// NOTE: For now we simply pretend that the worst possible state is invalid.
bool isValidState() const override { return Assumed != getWorstState(); }
/// See AbstractState::isAtFixpoint()
bool isAtFixpoint() const override { return Assumed == Known; }
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
Known = Assumed;
return ChangeStatus::UNCHANGED;
}
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
Assumed = Known;
return ChangeStatus::CHANGED;
}
/// Return the known state encoding
base_t getKnown() const { return Known; }
/// Return the assumed state encoding.
base_t getAssumed() const { return Assumed; }
/// Equality for IntegerStateBase.
bool
operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
return this->getAssumed() == R.getAssumed() &&
this->getKnown() == R.getKnown();
}
/// Inequality for IntegerStateBase.
bool
operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
return !(*this == R);
}
/// "Clamp" this state with \p R. The result is subtype dependent but it is
/// intended that only information assumed in both states will be assumed in
/// this one afterwards.
void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
handleNewAssumedValue(R.getAssumed());
}
/// "Clamp" this state with \p R. The result is subtype dependent but it is
/// intended that information known in either state will be known in
/// this one afterwards.
void operator+=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
handleNewKnownValue(R.getKnown());
}
void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
joinOR(R.getAssumed(), R.getKnown());
}
void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
joinAND(R.getAssumed(), R.getKnown());
}
protected:
/// Handle a new assumed value \p Value. Subtype dependent.
virtual void handleNewAssumedValue(base_t Value) = 0;
/// Handle a new known value \p Value. Subtype dependent.
virtual void handleNewKnownValue(base_t Value) = 0;
/// Handle a value \p Value. Subtype dependent.
virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0;
/// Handle a new assumed value \p Value. Subtype dependent.
virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0;
/// The known state encoding in an integer of type base_t.
base_t Known = getWorstState();
/// The assumed state encoding in an integer of type base_t.
base_t Assumed = getBestState();
};
/// Specialization of the integer state for a bit-wise encoding.
template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
base_ty WorstState = 0>
struct BitIntegerState
: public IntegerStateBase<base_ty, BestState, WorstState> {
using base_t = base_ty;
/// Return true if the bits set in \p BitsEncoding are "known bits".
bool isKnown(base_t BitsEncoding) const {
return (this->Known & BitsEncoding) == BitsEncoding;
}
/// Return true if the bits set in \p BitsEncoding are "assumed bits".
bool isAssumed(base_t BitsEncoding) const {
return (this->Assumed & BitsEncoding) == BitsEncoding;
}
/// Add the bits in \p BitsEncoding to the "known bits".
BitIntegerState &addKnownBits(base_t Bits) {
// Make sure we never miss any "known bits".
this->Assumed |= Bits;
this->Known |= Bits;
return *this;
}
/// Remove the bits in \p BitsEncoding from the "assumed bits" if not known.
BitIntegerState &removeAssumedBits(base_t BitsEncoding) {
return intersectAssumedBits(~BitsEncoding);
}
/// Remove the bits in \p BitsEncoding from the "known bits".
BitIntegerState &removeKnownBits(base_t BitsEncoding) {
this->Known = (this->Known & ~BitsEncoding);
return *this;
}
/// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
BitIntegerState &intersectAssumedBits(base_t BitsEncoding) {
// Make sure we never loose any "known bits".
this->Assumed = (this->Assumed & BitsEncoding) | this->Known;
return *this;
}
private:
void handleNewAssumedValue(base_t Value) override {
intersectAssumedBits(Value);
}
void handleNewKnownValue(base_t Value) override { addKnownBits(Value); }
void joinOR(base_t AssumedValue, base_t KnownValue) override {
this->Known |= KnownValue;
this->Assumed |= AssumedValue;
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
this->Known &= KnownValue;
this->Assumed &= AssumedValue;
}
};
/// Specialization of the integer state for an increasing value, hence ~0u is
/// the best state and 0 the worst.
template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
base_ty WorstState = 0>
struct IncIntegerState
: public IntegerStateBase<base_ty, BestState, WorstState> {
using super = IntegerStateBase<base_ty, BestState, WorstState>;
using base_t = base_ty;
IncIntegerState() : super() {}
IncIntegerState(base_t Assumed) : super(Assumed) {}
/// Return the best possible representable state.
static constexpr base_t getBestState() { return BestState; }
static constexpr base_t
getBestState(const IncIntegerState<base_ty, BestState, WorstState> &) {
return getBestState();
}
/// Take minimum of assumed and \p Value.
IncIntegerState &takeAssumedMinimum(base_t Value) {
// Make sure we never loose "known value".
this->Assumed = std::max(std::min(this->Assumed, Value), this->Known);
return *this;
}
/// Take maximum of known and \p Value.
IncIntegerState &takeKnownMaximum(base_t Value) {
// Make sure we never loose "known value".
this->Assumed = std::max(Value, this->Assumed);
this->Known = std::max(Value, this->Known);
return *this;
}
private:
void handleNewAssumedValue(base_t Value) override {
takeAssumedMinimum(Value);
}
void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); }
void joinOR(base_t AssumedValue, base_t KnownValue) override {
this->Known = std::max(this->Known, KnownValue);
this->Assumed = std::max(this->Assumed, AssumedValue);
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
this->Known = std::min(this->Known, KnownValue);
this->Assumed = std::min(this->Assumed, AssumedValue);
}
};
/// Specialization of the integer state for a decreasing value, hence 0 is the
/// best state and ~0u the worst.
template <typename base_ty = uint32_t>
struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> {
using base_t = base_ty;
/// Take maximum of assumed and \p Value.
DecIntegerState &takeAssumedMaximum(base_t Value) {
// Make sure we never loose "known value".
this->Assumed = std::min(std::max(this->Assumed, Value), this->Known);
return *this;
}
/// Take minimum of known and \p Value.
DecIntegerState &takeKnownMinimum(base_t Value) {
// Make sure we never loose "known value".
this->Assumed = std::min(Value, this->Assumed);
this->Known = std::min(Value, this->Known);
return *this;
}
private:
void handleNewAssumedValue(base_t Value) override {
takeAssumedMaximum(Value);
}
void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); }
void joinOR(base_t AssumedValue, base_t KnownValue) override {
this->Assumed = std::min(this->Assumed, KnownValue);
this->Assumed = std::min(this->Assumed, AssumedValue);
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
this->Assumed = std::max(this->Assumed, KnownValue);
this->Assumed = std::max(this->Assumed, AssumedValue);
}
};
/// Simple wrapper for a single bit (boolean) state.
struct BooleanState : public IntegerStateBase<bool, true, false> {
using super = IntegerStateBase<bool, true, false>;
using base_t = IntegerStateBase::base_t;
BooleanState() = default;
BooleanState(base_t Assumed) : super(Assumed) {}
/// Set the assumed value to \p Value but never below the known one.
void setAssumed(bool Value) { Assumed &= (Known | Value); }
/// Set the known and asssumed value to \p Value.
void setKnown(bool Value) {
Known |= Value;
Assumed |= Value;
}
/// Return true if the state is assumed to hold.
bool isAssumed() const { return getAssumed(); }
/// Return true if the state is known to hold.
bool isKnown() const { return getKnown(); }
private:
void handleNewAssumedValue(base_t Value) override {
if (!Value)
Assumed = Known;
}
void handleNewKnownValue(base_t Value) override {
if (Value)
Known = (Assumed = Value);
}
void joinOR(base_t AssumedValue, base_t KnownValue) override {
Known |= KnownValue;
Assumed |= AssumedValue;
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
Known &= KnownValue;
Assumed &= AssumedValue;
}
};
/// State for an integer range.
struct IntegerRangeState : public AbstractState {
/// Bitwidth of the associated value.
uint32_t BitWidth;
/// State representing assumed range, initially set to empty.
ConstantRange Assumed;
/// State representing known range, initially set to [-inf, inf].
ConstantRange Known;
IntegerRangeState(uint32_t BitWidth)
: BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)),
Known(ConstantRange::getFull(BitWidth)) {}
IntegerRangeState(const ConstantRange &CR)
: BitWidth(CR.getBitWidth()), Assumed(CR),
Known(getWorstState(CR.getBitWidth())) {}
/// Return the worst possible representable state.
static ConstantRange getWorstState(uint32_t BitWidth) {
return ConstantRange::getFull(BitWidth);
}
/// Return the best possible representable state.
static ConstantRange getBestState(uint32_t BitWidth) {
return ConstantRange::getEmpty(BitWidth);
}
static ConstantRange getBestState(const IntegerRangeState &IRS) {
return getBestState(IRS.getBitWidth());
}
/// Return associated values' bit width.
uint32_t getBitWidth() const { return BitWidth; }
/// See AbstractState::isValidState()
bool isValidState() const override {
return BitWidth > 0 && !Assumed.isFullSet();
}
/// See AbstractState::isAtFixpoint()
bool isAtFixpoint() const override { return Assumed == Known; }
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
Known = Assumed;
return ChangeStatus::CHANGED;
}
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
Assumed = Known;
return ChangeStatus::CHANGED;
}
/// Return the known state encoding
ConstantRange getKnown() const { return Known; }
/// Return the assumed state encoding.
ConstantRange getAssumed() const { return Assumed; }
/// Unite assumed range with the passed state.
void unionAssumed(const ConstantRange &R) {
// Don't loose a known range.
Assumed = Assumed.unionWith(R).intersectWith(Known);
}
/// See IntegerRangeState::unionAssumed(..).
void unionAssumed(const IntegerRangeState &R) {
unionAssumed(R.getAssumed());
}
/// Unite known range with the passed state.
void unionKnown(const ConstantRange &R) {
// Don't loose a known range.
Known = Known.unionWith(R);
Assumed = Assumed.unionWith(Known);
}
/// See IntegerRangeState::unionKnown(..).
void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); }
/// Intersect known range with the passed state.
void intersectKnown(const ConstantRange &R) {
Assumed = Assumed.intersectWith(R);
Known = Known.intersectWith(R);
}
/// See IntegerRangeState::intersectKnown(..).
void intersectKnown(const IntegerRangeState &R) {
intersectKnown(R.getKnown());
}
/// Equality for IntegerRangeState.
bool operator==(const IntegerRangeState &R) const {
return getAssumed() == R.getAssumed() && getKnown() == R.getKnown();
}
/// "Clamp" this state with \p R. The result is subtype dependent but it is
/// intended that only information assumed in both states will be assumed in
/// this one afterwards.
IntegerRangeState operator^=(const IntegerRangeState &R) {
// NOTE: `^=` operator seems like `intersect` but in this case, we need to
// take `union`.
unionAssumed(R);
return *this;
}
IntegerRangeState operator&=(const IntegerRangeState &R) {
// NOTE: `&=` operator seems like `intersect` but in this case, we need to
// take `union`.
unionKnown(R);
unionAssumed(R);
return *this;
}
};
/// Simple state for a set.
///
/// This represents a state containing a set of values. The interface supports
/// modelling sets that contain all possible elements. The state's internal
/// value is modified using union or intersection operations.
template <typename BaseTy> struct SetState : public AbstractState {
/// A wrapper around a set that has semantics for handling unions and
/// intersections with a "universal" set that contains all elements.
struct SetContents {
/// Creates a universal set with no concrete elements or an empty set.
SetContents(bool Universal) : Universal(Universal) {}
/// Creates a non-universal set with concrete values.
SetContents(const DenseSet<BaseTy> &Assumptions)
: Universal(false), Set(Assumptions) {}
SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions)
: Universal(Universal), Set(Assumptions) {}
const DenseSet<BaseTy> &getSet() const { return Set; }
bool isUniversal() const { return Universal; }
bool empty() const { return Set.empty() && !Universal; }
/// Finds A := A ^ B where A or B could be the "Universal" set which
/// contains every possible attribute. Returns true if changes were made.
bool getIntersection(const SetContents &RHS) {
bool IsUniversal = Universal;
unsigned Size = Set.size();
// A := A ^ U = A
if (RHS.isUniversal())
return false;
// A := U ^ B = B
if (Universal)
Set = RHS.getSet();
else
set_intersect(Set, RHS.getSet());
Universal &= RHS.isUniversal();
return IsUniversal != Universal || Size != Set.size();
}
/// Finds A := A u B where A or B could be the "Universal" set which
/// contains every possible attribute. returns true if changes were made.
bool getUnion(const SetContents &RHS) {
bool IsUniversal = Universal;
unsigned Size = Set.size();
// A := A u U = U = U u B
if (!RHS.isUniversal() && !Universal)
set_union(Set, RHS.getSet());
Universal |= RHS.isUniversal();
return IsUniversal != Universal || Size != Set.size();
}
private:
/// Indicates if this set is "universal", containing every possible element.
bool Universal;
/// The set of currently active assumptions.
DenseSet<BaseTy> Set;
};
SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {}
/// Initializes the known state with an initial set and initializes the
/// assumed state as universal.
SetState(const DenseSet<BaseTy> &Known)
: Known(Known), Assumed(true), IsAtFixedpoint(false) {}
/// See AbstractState::isValidState()
bool isValidState() const override { return !Assumed.empty(); }
/// See AbstractState::isAtFixpoint()
bool isAtFixpoint() const override { return IsAtFixedpoint; }
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
IsAtFixedpoint = true;
Known = Assumed;
return ChangeStatus::UNCHANGED;
}
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
IsAtFixedpoint = true;
Assumed = Known;
return ChangeStatus::CHANGED;
}
/// Return the known state encoding.
const SetContents &getKnown() const { return Known; }
/// Return the assumed state encoding.
const SetContents &getAssumed() const { return Assumed; }
/// Returns if the set state contains the element.
bool setContains(const BaseTy &Elem) const {
return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem);
}
/// Performs the set intersection between this set and \p RHS. Returns true if
/// changes were made.
bool getIntersection(const SetContents &RHS) {
unsigned SizeBefore = Assumed.getSet().size();
// Get intersection and make sure that the known set is still a proper
// subset of the assumed set. A := K u (A ^ R).
Assumed.getIntersection(RHS);
Assumed.getUnion(Known);
return SizeBefore != Assumed.getSet().size();
}
/// Performs the set union between this set and \p RHS. Returns true if
/// changes were made.
bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); }
private:
/// The set of values known for this state.
SetContents Known;
/// The set of assumed values for this state.
SetContents Assumed;
bool IsAtFixedpoint;
};
/// Helper struct necessary as the modular build fails if the virtual method
/// IRAttribute::manifest is defined in the Attributor.cpp.
struct IRAttributeManifest {
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
const ArrayRef<Attribute> &DeducedAttrs,
bool ForceReplace = false);
};
/// Helper to tie a abstract state implementation to an abstract attribute.
template <typename StateTy, typename BaseType, class... Ts>
struct StateWrapper : public BaseType, public StateTy {
/// Provide static access to the type of the state.
using StateType = StateTy;
StateWrapper(const IRPosition &IRP, Ts... Args)
: BaseType(IRP), StateTy(Args...) {}
/// See AbstractAttribute::getState(...).
StateType &getState() override { return *this; }
/// See AbstractAttribute::getState(...).
const StateType &getState() const override { return *this; }
};
/// Helper class that provides common functionality to manifest IR attributes.
template <Attribute::AttrKind AK, typename BaseType>
struct IRAttribute : public BaseType {
IRAttribute(const IRPosition &IRP) : BaseType(IRP) {}
/// See AbstractAttribute::initialize(...).
virtual void initialize(Attributor &A) override {
const IRPosition &IRP = this->getIRPosition();
if (isa<UndefValue>(IRP.getAssociatedValue()) ||
this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false,
&A)) {
this->getState().indicateOptimisticFixpoint();
return;
}
bool IsFnInterface = IRP.isFnInterfaceKind();
const Function *FnScope = IRP.getAnchorScope();
// TODO: Not all attributes require an exact definition. Find a way to
// enable deduction for some but not all attributes in case the
// definition might be changed at runtime, see also
// http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
// TODO: We could always determine abstract attributes and if sufficient
// information was found we could duplicate the functions that do not
// have an exact definition.
if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope)))
this->getState().indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
if (isa<UndefValue>(this->getIRPosition().getAssociatedValue()))
return ChangeStatus::UNCHANGED;
SmallVector<Attribute, 4> DeducedAttrs;
getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs);
return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(),
DeducedAttrs);
}
/// Return the kind that identifies the abstract attribute implementation.
Attribute::AttrKind getAttrKind() const { return AK; }
/// Return the deduced attributes in \p Attrs.
virtual void getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const {
Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
}
};
/// Base struct for all "concrete attribute" deductions.
///
/// The abstract attribute is a minimal interface that allows the Attributor to
/// orchestrate the abstract/fixpoint analysis. The design allows to hide away
/// implementation choices made for the subclasses but also to structure their
/// implementation and simplify the use of other abstract attributes in-flight.
///
/// To allow easy creation of new attributes, most methods have default
/// implementations. The ones that do not are generally straight forward, except
/// `AbstractAttribute::updateImpl` which is the location of most reasoning
/// associated with the abstract attribute. The update is invoked by the
/// Attributor in case the situation used to justify the current optimistic
/// state might have changed. The Attributor determines this automatically
/// by monitoring the `Attributor::getAAFor` calls made by abstract attributes.
///
/// The `updateImpl` method should inspect the IR and other abstract attributes
/// in-flight to justify the best possible (=optimistic) state. The actual
/// implementation is, similar to the underlying abstract state encoding, not
/// exposed. In the most common case, the `updateImpl` will go through a list of
/// reasons why its optimistic state is valid given the current information. If
/// any combination of them holds and is sufficient to justify the current
/// optimistic state, the method shall return UNCHAGED. If not, the optimistic
/// state is adjusted to the situation and the method shall return CHANGED.
///
/// If the manifestation of the "concrete attribute" deduced by the subclass
/// differs from the "default" behavior, which is a (set of) LLVM-IR
/// attribute(s) for an argument, call site argument, function return value, or
/// function, the `AbstractAttribute::manifest` method should be overloaded.
///
/// NOTE: If the state obtained via getState() is INVALID, thus if
/// AbstractAttribute::getState().isValidState() returns false, no
/// information provided by the methods of this class should be used.
/// NOTE: The Attributor currently has certain limitations to what we can do.
/// As a general rule of thumb, "concrete" abstract attributes should *for
/// now* only perform "backward" information propagation. That means
/// optimistic information obtained through abstract attributes should
/// only be used at positions that precede the origin of the information
/// with regards to the program flow. More practically, information can
/// *now* be propagated from instructions to their enclosing function, but
/// *not* from call sites to the called function. The mechanisms to allow
/// both directions will be added in the future.
/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
/// described in the file comment.
struct AbstractAttribute : public IRPosition, public AADepGraphNode {
using StateType = AbstractState;
AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
/// Virtual destructor.
virtual ~AbstractAttribute() = default;
/// This function is used to identify if an \p DGN is of type
/// AbstractAttribute so that the dyn_cast and cast can use such information
/// to cast an AADepGraphNode to an AbstractAttribute.
///
/// We eagerly return true here because all AADepGraphNodes except for the
/// Synthethis Node are of type AbstractAttribute
static bool classof(const AADepGraphNode *DGN) { return true; }
/// Initialize the state with the information in the Attributor \p A.
///
/// This function is called by the Attributor once all abstract attributes
/// have been identified. It can and shall be used for task like:
/// - identify existing knowledge in the IR and use it for the "known state"
/// - perform any work that is not going to change over time, e.g., determine
/// a subset of the IR, or attributes in-flight, that have to be looked at
/// in the `updateImpl` method.
virtual void initialize(Attributor &A) {}
/// A query AA is always scheduled as long as we do updates because it does
/// lazy computation that cannot be determined to be done from the outside.
/// However, while query AAs will not be fixed if they do not have outstanding
/// dependences, we will only schedule them like other AAs. If a query AA that
/// received a new query it needs to request an update via
/// `Attributor::requestUpdateForAA`.
virtual bool isQueryAA() const { return false; }
/// Return the internal abstract state for inspection.
virtual StateType &getState() = 0;
virtual const StateType &getState() const = 0;
/// Return an IR position, see struct IRPosition.
const IRPosition &getIRPosition() const { return *this; };
IRPosition &getIRPosition() { return *this; };
/// Helper functions, for debug purposes only.
///{
void print(raw_ostream &OS) const override;
virtual void printWithDeps(raw_ostream &OS) const;
void dump() const { print(dbgs()); }
/// This function should return the "summarized" assumed state as string.
virtual const std::string getAsStr() const = 0;
/// This function should return the name of the AbstractAttribute
virtual const std::string getName() const = 0;
/// This function should return the address of the ID of the AbstractAttribute
virtual const char *getIdAddr() const = 0;
///}
/// Allow the Attributor access to the protected methods.
friend struct Attributor;
protected:
/// Hook for the Attributor to trigger an update of the internal state.
///
/// If this attribute is already fixed, this method will return UNCHANGED,
/// otherwise it delegates to `AbstractAttribute::updateImpl`.
///
/// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
ChangeStatus update(Attributor &A);
/// Hook for the Attributor to trigger the manifestation of the information
/// represented by the abstract attribute in the LLVM-IR.
///
/// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
virtual ChangeStatus manifest(Attributor &A) {
return ChangeStatus::UNCHANGED;
}
/// Hook to enable custom statistic tracking, called after manifest that
/// resulted in a change if statistics are enabled.
///
/// We require subclasses to provide an implementation so we remember to
/// add statistics for them.
virtual void trackStatistics() const = 0;
/// The actual update/transfer function which has to be implemented by the
/// derived classes.
///
/// If it is called, the environment has changed and we have to determine if
/// the current information is still valid or adjust it otherwise.
///
/// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
virtual ChangeStatus updateImpl(Attributor &A) = 0;
};
/// Forward declarations of output streams for debug purposes.
///
///{
raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind);
raw_ostream &operator<<(raw_ostream &OS, const IRPosition &);
raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
template <typename base_ty, base_ty BestState, base_ty WorstState>
raw_ostream &
operator<<(raw_ostream &OS,
const IntegerStateBase<base_ty, BestState, WorstState> &S) {
return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
<< static_cast<const AbstractState &>(S);
}
raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State);
///}
struct AttributorPass : public PassInfoMixin<AttributorPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> {
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
};
Pass *createAttributorLegacyPass();
Pass *createAttributorCGSCCLegacyPass();
/// Helper function to clamp a state \p S of type \p StateType with the
/// information in \p R and indicate/return if \p S did change (as-in update is
/// required to be run again).
template <typename StateType>
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
auto Assumed = S.getAssumed();
S ^= R;
return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// ----------------------------------------------------------------------------
/// Abstract Attribute Classes
/// ----------------------------------------------------------------------------
/// An abstract attribute for the returned values of a function.
struct AAReturnedValues
: public IRAttribute<Attribute::Returned, AbstractAttribute> {
AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return an assumed unique return value if a single candidate is found. If
/// there cannot be one, return a nullptr. If it is not clear yet, return the
/// Optional::NoneType.
Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
/// Check \p Pred on all returned values.
///
/// This method will evaluate \p Pred on returned values and return
/// true if (1) all returned values are known, and (2) \p Pred returned true
/// for all returned values.
///
/// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts
/// method, this one will not filter dead return instructions.
virtual bool checkForAllReturnedValuesAndReturnInsts(
function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
const = 0;
using iterator =
MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator;
using const_iterator =
MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator;
virtual llvm::iterator_range<iterator> returned_values() = 0;
virtual llvm::iterator_range<const_iterator> returned_values() const = 0;
virtual size_t getNumReturnValues() const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAReturnedValues &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAReturnedValues"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAReturnedValues
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
struct AANoUnwind
: public IRAttribute<Attribute::NoUnwind,
StateWrapper<BooleanState, AbstractAttribute>> {
AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Returns true if nounwind is assumed.
bool isAssumedNoUnwind() const { return getAssumed(); }
/// Returns true if nounwind is known.
bool isKnownNoUnwind() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoUnwind"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoUnwind
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
struct AANoSync
: public IRAttribute<Attribute::NoSync,
StateWrapper<BooleanState, AbstractAttribute>> {
AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Returns true if "nosync" is assumed.
bool isAssumedNoSync() const { return getAssumed(); }
/// Returns true if "nosync" is known.
bool isKnownNoSync() const { return getKnown(); }
/// Helper function used to determine whether an instruction is non-relaxed
/// atomic. In other words, if an atomic instruction does not have unordered
/// or monotonic ordering
static bool isNonRelaxedAtomic(const Instruction *I);
/// Helper function specific for intrinsics which are potentially volatile.
static bool isNoSyncIntrinsic(const Instruction *I);
/// Create an abstract attribute view for the position \p IRP.
static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoSync"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoSync
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for all nonnull attributes.
struct AANonNull
: public IRAttribute<Attribute::NonNull,
StateWrapper<BooleanState, AbstractAttribute>> {
AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if we assume that the underlying value is nonnull.
bool isAssumedNonNull() const { return getAssumed(); }
/// Return true if we know that underlying value is nonnull.
bool isKnownNonNull() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANonNull"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANonNull
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract attribute for norecurse.
struct AANoRecurse
: public IRAttribute<Attribute::NoRecurse,
StateWrapper<BooleanState, AbstractAttribute>> {
AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if "norecurse" is assumed.
bool isAssumedNoRecurse() const { return getAssumed(); }
/// Return true if "norecurse" is known.
bool isKnownNoRecurse() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoRecurse"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoRecurse
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract attribute for willreturn.
struct AAWillReturn
: public IRAttribute<Attribute::WillReturn,
StateWrapper<BooleanState, AbstractAttribute>> {
AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if "willreturn" is assumed.
bool isAssumedWillReturn() const { return getAssumed(); }
/// Return true if "willreturn" is known.
bool isKnownWillReturn() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAWillReturn"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AAWillReturn
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract attribute for undefined behavior.
struct AAUndefinedBehavior
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Return true if "undefined behavior" is assumed.
bool isAssumedToCauseUB() const { return getAssumed(); }
/// Return true if "undefined behavior" is assumed for a specific instruction.
virtual bool isAssumedToCauseUB(Instruction *I) const = 0;
/// Return true if "undefined behavior" is known.
bool isKnownToCauseUB() const { return getKnown(); }
/// Return true if "undefined behavior" is known for a specific instruction.
virtual bool isKnownToCauseUB(Instruction *I) const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAUndefinedBehavior &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAUndefinedBehavior"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAUndefineBehavior
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface to determine reachability of point A to B.
struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Returns true if 'From' instruction is assumed to reach, 'To' instruction.
/// Users should provide two positions they are interested in, and the class
/// determines (and caches) reachability.
bool isAssumedReachable(Attributor &A, const Instruction &From,
const Instruction &To) const {
if (!getState().isValidState())
return true;
return A.getInfoCache().getPotentiallyReachable(From, To);
}
/// Returns true if 'From' instruction is known to reach, 'To' instruction.
/// Users should provide two positions they are interested in, and the class
/// determines (and caches) reachability.
bool isKnownReachable(Attributor &A, const Instruction &From,
const Instruction &To) const {
if (!getState().isValidState())
return false;
return A.getInfoCache().getPotentiallyReachable(From, To);
}
/// Create an abstract attribute view for the position \p IRP.
static AAReachability &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAReachability"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAReachability
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for all noalias attributes.
struct AANoAlias
: public IRAttribute<Attribute::NoAlias,
StateWrapper<BooleanState, AbstractAttribute>> {
AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if we assume that the underlying value is alias.
bool isAssumedNoAlias() const { return getAssumed(); }
/// Return true if we know that underlying value is noalias.
bool isKnownNoAlias() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoAlias"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoAlias
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An AbstractAttribute for nofree.
struct AANoFree
: public IRAttribute<Attribute::NoFree,
StateWrapper<BooleanState, AbstractAttribute>> {
AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if "nofree" is assumed.
bool isAssumedNoFree() const { return getAssumed(); }
/// Return true if "nofree" is known.
bool isKnownNoFree() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoFree"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoFree
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An AbstractAttribute for noreturn.
struct AANoReturn
: public IRAttribute<Attribute::NoReturn,
StateWrapper<BooleanState, AbstractAttribute>> {
AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if the underlying object is assumed to never return.
bool isAssumedNoReturn() const { return getAssumed(); }
/// Return true if the underlying object is known to never return.
bool isKnownNoReturn() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoReturn"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoReturn
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for liveness abstract attribute.
struct AAIsDead
: public StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute> {
using Base = StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute>;
AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// State encoding bits. A set bit in the state means the property holds.
enum {
HAS_NO_EFFECT = 1 << 0,
IS_REMOVABLE = 1 << 1,
IS_DEAD = HAS_NO_EFFECT | IS_REMOVABLE,
};
static_assert(IS_DEAD == getBestState(), "Unexpected BEST_STATE value");
protected:
/// The query functions are protected such that other attributes need to go
/// through the Attributor interfaces: `Attributor::isAssumedDead(...)`
/// Returns true if the underlying value is assumed dead.
virtual bool isAssumedDead() const = 0;
/// Returns true if the underlying value is known dead.
virtual bool isKnownDead() const = 0;
/// Returns true if \p BB is assumed dead.
virtual bool isAssumedDead(const BasicBlock *BB) const = 0;
/// Returns true if \p BB is known dead.
virtual bool isKnownDead(const BasicBlock *BB) const = 0;
/// Returns true if \p I is assumed dead.
virtual bool isAssumedDead(const Instruction *I) const = 0;
/// Returns true if \p I is known dead.
virtual bool isKnownDead(const Instruction *I) const = 0;
/// This method is used to check if at least one instruction in a collection
/// of instructions is live.
template <typename T> bool isLiveInstSet(T begin, T end) const {
for (const auto &I : llvm::make_range(begin, end)) {
assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&
"Instruction must be in the same anchor scope function.");
if (!isAssumedDead(I))
return true;
}
return false;
}
public:
/// Create an abstract attribute view for the position \p IRP.
static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A);
/// Determine if \p F might catch asynchronous exceptions.
static bool mayCatchAsynchronousExceptions(const Function &F) {
return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
}
/// Return if the edge from \p From BB to \p To BB is assumed dead.
/// This is specifically useful in AAReachability.
virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const {
return false;
}
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAIsDead"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AAIsDead
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
friend struct Attributor;
};
/// State for dereferenceable attribute
struct DerefState : AbstractState {
static DerefState getBestState() { return DerefState(); }
static DerefState getBestState(const DerefState &) { return getBestState(); }
/// Return the worst possible representable state.
static DerefState getWorstState() {
DerefState DS;
DS.indicatePessimisticFixpoint();
return DS;
}
static DerefState getWorstState(const DerefState &) {
return getWorstState();
}
/// State representing for dereferenceable bytes.
IncIntegerState<> DerefBytesState;
/// Map representing for accessed memory offsets and sizes.
/// A key is Offset and a value is size.
/// If there is a load/store instruction something like,
/// p[offset] = v;
/// (offset, sizeof(v)) will be inserted to this map.
/// std::map is used because we want to iterate keys in ascending order.
std::map<int64_t, uint64_t> AccessedBytesMap;
/// Helper function to calculate dereferenceable bytes from current known
/// bytes and accessed bytes.
///
/// int f(int *A){
/// *A = 0;
/// *(A+2) = 2;
/// *(A+1) = 1;
/// *(A+10) = 10;
/// }
/// ```
/// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`.
/// AccessedBytesMap is std::map so it is iterated in accending order on
/// key(Offset). So KnownBytes will be updated like this:
///
/// |Access | KnownBytes
/// |(0, 4)| 0 -> 4
/// |(4, 4)| 4 -> 8
/// |(8, 4)| 8 -> 12
/// |(40, 4) | 12 (break)
void computeKnownDerefBytesFromAccessedMap() {
int64_t KnownBytes = DerefBytesState.getKnown();
for (auto &Access : AccessedBytesMap) {
if (KnownBytes < Access.first)
break;
KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second);
}
DerefBytesState.takeKnownMaximum(KnownBytes);
}
/// State representing that whether the value is globaly dereferenceable.
BooleanState GlobalState;
/// See AbstractState::isValidState()
bool isValidState() const override { return DerefBytesState.isValidState(); }
/// See AbstractState::isAtFixpoint()
bool isAtFixpoint() const override {
return !isValidState() ||
(DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint());
}
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
DerefBytesState.indicateOptimisticFixpoint();
GlobalState.indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
DerefBytesState.indicatePessimisticFixpoint();
GlobalState.indicatePessimisticFixpoint();
return ChangeStatus::CHANGED;
}
/// Update known dereferenceable bytes.
void takeKnownDerefBytesMaximum(uint64_t Bytes) {
DerefBytesState.takeKnownMaximum(Bytes);
// Known bytes might increase.
computeKnownDerefBytesFromAccessedMap();
}
/// Update assumed dereferenceable bytes.
void takeAssumedDerefBytesMinimum(uint64_t Bytes) {
DerefBytesState.takeAssumedMinimum(Bytes);
}
/// Add accessed bytes to the map.
void addAccessedBytes(int64_t Offset, uint64_t Size) {
uint64_t &AccessedBytes = AccessedBytesMap[Offset];
AccessedBytes = std::max(AccessedBytes, Size);
// Known bytes might increase.
computeKnownDerefBytesFromAccessedMap();
}
/// Equality for DerefState.
bool operator==(const DerefState &R) const {
return this->DerefBytesState == R.DerefBytesState &&
this->GlobalState == R.GlobalState;
}
/// Inequality for DerefState.
bool operator!=(const DerefState &R) const { return !(*this == R); }
/// See IntegerStateBase::operator^=
DerefState operator^=(const DerefState &R) {
DerefBytesState ^= R.DerefBytesState;
GlobalState ^= R.GlobalState;
return *this;
}
/// See IntegerStateBase::operator+=
DerefState operator+=(const DerefState &R) {
DerefBytesState += R.DerefBytesState;
GlobalState += R.GlobalState;
return *this;
}
/// See IntegerStateBase::operator&=
DerefState operator&=(const DerefState &R) {
DerefBytesState &= R.DerefBytesState;
GlobalState &= R.GlobalState;
return *this;
}
/// See IntegerStateBase::operator|=
DerefState operator|=(const DerefState &R) {
DerefBytesState |= R.DerefBytesState;
GlobalState |= R.GlobalState;
return *this;
}
protected:
const AANonNull *NonNullAA = nullptr;
};
/// An abstract interface for all dereferenceable attribute.
struct AADereferenceable
: public IRAttribute<Attribute::Dereferenceable,
StateWrapper<DerefState, AbstractAttribute>> {
AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if we assume that the underlying value is nonnull.
bool isAssumedNonNull() const {
return NonNullAA && NonNullAA->isAssumedNonNull();
}
/// Return true if we know that the underlying value is nonnull.
bool isKnownNonNull() const {
return NonNullAA && NonNullAA->isKnownNonNull();
}
/// Return true if we assume that underlying value is
/// dereferenceable(_or_null) globally.
bool isAssumedGlobal() const { return GlobalState.getAssumed(); }
/// Return true if we know that underlying value is
/// dereferenceable(_or_null) globally.
bool isKnownGlobal() const { return GlobalState.getKnown(); }
/// Return assumed dereferenceable bytes.
uint32_t getAssumedDereferenceableBytes() const {
return DerefBytesState.getAssumed();
}
/// Return known dereferenceable bytes.
uint32_t getKnownDereferenceableBytes() const {
return DerefBytesState.getKnown();
}
/// Create an abstract attribute view for the position \p IRP.
static AADereferenceable &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AADereferenceable"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AADereferenceable
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
using AAAlignmentStateType =
IncIntegerState<uint64_t, Value::MaximumAlignment, 1>;
/// An abstract interface for all align attributes.
struct AAAlign : public IRAttribute<
Attribute::Alignment,
StateWrapper<AAAlignmentStateType, AbstractAttribute>> {
AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return assumed alignment.
uint64_t getAssumedAlign() const { return getAssumed(); }
/// Return known alignment.
uint64_t getKnownAlign() const { return getKnown(); }
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAlign"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AAAlign
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Create an abstract attribute view for the position \p IRP.
static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A);
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for all nocapture attributes.
struct AANoCapture
: public IRAttribute<
Attribute::NoCapture,
StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> {
AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// State encoding bits. A set bit in the state means the property holds.
/// NO_CAPTURE is the best possible state, 0 the worst possible state.
enum {
NOT_CAPTURED_IN_MEM = 1 << 0,
NOT_CAPTURED_IN_INT = 1 << 1,
NOT_CAPTURED_IN_RET = 1 << 2,
/// If we do not capture the value in memory or through integers we can only
/// communicate it back as a derived pointer.
NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT,
/// If we do not capture the value in memory, through integers, or as a
/// derived pointer we know it is not captured.
NO_CAPTURE =
NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET,
};
/// Return true if we know that the underlying value is not captured in its
/// respective scope.
bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); }
/// Return true if we assume that the underlying value is not captured in its
/// respective scope.
bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); }
/// Return true if we know that the underlying value is not captured in its
/// respective scope but we allow it to escape through a "return".
bool isKnownNoCaptureMaybeReturned() const {
return isKnown(NO_CAPTURE_MAYBE_RETURNED);
}
/// Return true if we assume that the underlying value is not captured in its
/// respective scope but we allow it to escape through a "return".
bool isAssumedNoCaptureMaybeReturned() const {
return isAssumed(NO_CAPTURE_MAYBE_RETURNED);
}
/// Create an abstract attribute view for the position \p IRP.
static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoCapture"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoCapture
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
struct ValueSimplifyStateType : public AbstractState {
ValueSimplifyStateType(Type *Ty) : Ty(Ty) {}
static ValueSimplifyStateType getBestState(Type *Ty) {
return ValueSimplifyStateType(Ty);
}
static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) {
return getBestState(VS.Ty);
}
/// Return the worst possible representable state.
static ValueSimplifyStateType getWorstState(Type *Ty) {
ValueSimplifyStateType DS(Ty);
DS.indicatePessimisticFixpoint();
return DS;
}
static ValueSimplifyStateType
getWorstState(const ValueSimplifyStateType &VS) {
return getWorstState(VS.Ty);
}
/// See AbstractState::isValidState(...)
bool isValidState() const override { return BS.isValidState(); }
/// See AbstractState::isAtFixpoint(...)
bool isAtFixpoint() const override { return BS.isAtFixpoint(); }
/// Return the assumed state encoding.
ValueSimplifyStateType getAssumed() { return *this; }
const ValueSimplifyStateType &getAssumed() const { return *this; }
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
return BS.indicatePessimisticFixpoint();
}
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
return BS.indicateOptimisticFixpoint();
}
/// "Clamp" this state with \p PVS.
ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) {
BS ^= VS.BS;
unionAssumed(VS.SimplifiedAssociatedValue);
return *this;
}
bool operator==(const ValueSimplifyStateType &RHS) const {
if (isValidState() != RHS.isValidState())
return false;
if (!isValidState() && !RHS.isValidState())
return true;
return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue;
}
protected:
/// The type of the original value.
Type *Ty;
/// Merge \p Other into the currently assumed simplified value
bool unionAssumed(Optional<Value *> Other);
/// Helper to track validity and fixpoint
BooleanState BS;
/// An assumed simplified value. Initially, it is set to Optional::None, which
/// means that the value is not clear under current assumption. If in the
/// pessimistic state, getAssumedSimplifiedValue doesn't return this value but
/// returns orignal associated value.
Optional<Value *> SimplifiedAssociatedValue;
};
/// An abstract interface for value simplify abstract attribute.
struct AAValueSimplify
: public StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *> {
using Base = StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *>;
AAValueSimplify(const IRPosition &IRP, Attributor &A)
: Base(IRP, IRP.getAssociatedType()) {}
/// Create an abstract attribute view for the position \p IRP.
static AAValueSimplify &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAValueSimplify"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAValueSimplify
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
private:
/// Return an assumed simplified value if a single candidate is found. If
/// there cannot be one, return original value. If it is not clear yet, return
/// the Optional::NoneType.
///
/// Use `Attributor::getAssumedSimplified` for value simplification.
virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0;
friend struct Attributor;
};
struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Returns true if HeapToStack conversion is assumed to be possible.
virtual bool isAssumedHeapToStack(const CallBase &CB) const = 0;
/// Returns true if HeapToStack conversion is assumed and the CB is a
/// callsite to a free operation to be removed.
virtual bool isAssumedHeapToStackRemovedFree(CallBase &CB) const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAHeapToStack"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AAHeapToStack
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for privatizability.
///
/// A pointer is privatizable if it can be replaced by a new, private one.
/// Privatizing pointer reduces the use count, interaction between unrelated
/// code parts.
///
/// In order for a pointer to be privatizable its value cannot be observed
/// (=nocapture), it is (for now) not written (=readonly & noalias), we know
/// what values are necessary to make the private copy look like the original
/// one, and the values we need can be loaded (=dereferenceable).
struct AAPrivatizablePtr
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Returns true if pointer privatization is assumed to be possible.
bool isAssumedPrivatizablePtr() const { return getAssumed(); }
/// Returns true if pointer privatization is known to be possible.
bool isKnownPrivatizablePtr() const { return getKnown(); }
/// Return the type we can choose for a private copy of the underlying
/// value. None means it is not clear yet, nullptr means there is none.
virtual Optional<Type *> getPrivatizableType() const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAPrivatizablePtr"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAPricatizablePtr
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for memory access kind related attributes
/// (readnone/readonly/writeonly).
struct AAMemoryBehavior
: public IRAttribute<
Attribute::ReadNone,
StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> {
AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// State encoding bits. A set bit in the state means the property holds.
/// BEST_STATE is the best possible state, 0 the worst possible state.
enum {
NO_READS = 1 << 0,
NO_WRITES = 1 << 1,
NO_ACCESSES = NO_READS | NO_WRITES,
BEST_STATE = NO_ACCESSES,
};
static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");
/// Return true if we know that the underlying value is not read or accessed
/// in its respective scope.
bool isKnownReadNone() const { return isKnown(NO_ACCESSES); }
/// Return true if we assume that the underlying value is not read or accessed
/// in its respective scope.
bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); }
/// Return true if we know that the underlying value is not accessed
/// (=written) in its respective scope.
bool isKnownReadOnly() const { return isKnown(NO_WRITES); }
/// Return true if we assume that the underlying value is not accessed
/// (=written) in its respective scope.
bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); }
/// Return true if we know that the underlying value is not read in its
/// respective scope.
bool isKnownWriteOnly() const { return isKnown(NO_READS); }
/// Return true if we assume that the underlying value is not read in its
/// respective scope.
bool isAssumedWriteOnly() const { return isAssumed(NO_READS); }
/// Create an abstract attribute view for the position \p IRP.
static AAMemoryBehavior &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAMemoryBehavior"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAMemoryBehavior
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for all memory location attributes
/// (readnone/argmemonly/inaccessiblememonly/inaccessibleorargmemonly).
struct AAMemoryLocation
: public IRAttribute<
Attribute::ReadNone,
StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> {
using MemoryLocationsKind = StateType::base_t;
AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Encoding of different locations that could be accessed by a memory
/// access.
enum {
ALL_LOCATIONS = 0,
NO_LOCAL_MEM = 1 << 0,
NO_CONST_MEM = 1 << 1,
NO_GLOBAL_INTERNAL_MEM = 1 << 2,
NO_GLOBAL_EXTERNAL_MEM = 1 << 3,
NO_GLOBAL_MEM = NO_GLOBAL_INTERNAL_MEM | NO_GLOBAL_EXTERNAL_MEM,
NO_ARGUMENT_MEM = 1 << 4,
NO_INACCESSIBLE_MEM = 1 << 5,
NO_MALLOCED_MEM = 1 << 6,
NO_UNKOWN_MEM = 1 << 7,
NO_LOCATIONS = NO_LOCAL_MEM | NO_CONST_MEM | NO_GLOBAL_INTERNAL_MEM |
NO_GLOBAL_EXTERNAL_MEM | NO_ARGUMENT_MEM |
NO_INACCESSIBLE_MEM | NO_MALLOCED_MEM | NO_UNKOWN_MEM,
// Helper bit to track if we gave up or not.
VALID_STATE = NO_LOCATIONS + 1,
BEST_STATE = NO_LOCATIONS | VALID_STATE,
};
static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");
/// Return true if we know that the associated functions has no observable
/// accesses.
bool isKnownReadNone() const { return isKnown(NO_LOCATIONS); }
/// Return true if we assume that the associated functions has no observable
/// accesses.
bool isAssumedReadNone() const {
return isAssumed(NO_LOCATIONS) || isAssumedStackOnly();
}
/// Return true if we know that the associated functions has at most
/// local/stack accesses.
bool isKnowStackOnly() const {
return isKnown(inverseLocation(NO_LOCAL_MEM, true, true));
}
/// Return true if we assume that the associated functions has at most
/// local/stack accesses.
bool isAssumedStackOnly() const {
return isAssumed(inverseLocation(NO_LOCAL_MEM, true, true));
}
/// Return true if we know that the underlying value will only access
/// inaccesible memory only (see Attribute::InaccessibleMemOnly).
bool isKnownInaccessibleMemOnly() const {
return isKnown(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
}
/// Return true if we assume that the underlying value will only access
/// inaccesible memory only (see Attribute::InaccessibleMemOnly).
bool isAssumedInaccessibleMemOnly() const {
return isAssumed(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
}
/// Return true if we know that the underlying value will only access
/// argument pointees (see Attribute::ArgMemOnly).
bool isKnownArgMemOnly() const {
return isKnown(inverseLocation(NO_ARGUMENT_MEM, true, true));
}
/// Return true if we assume that the underlying value will only access
/// argument pointees (see Attribute::ArgMemOnly).
bool isAssumedArgMemOnly() const {
return isAssumed(inverseLocation(NO_ARGUMENT_MEM, true, true));
}
/// Return true if we know that the underlying value will only access
/// inaccesible memory or argument pointees (see
/// Attribute::InaccessibleOrArgMemOnly).
bool isKnownInaccessibleOrArgMemOnly() const {
return isKnown(
inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
}
/// Return true if we assume that the underlying value will only access
/// inaccesible memory or argument pointees (see
/// Attribute::InaccessibleOrArgMemOnly).
bool isAssumedInaccessibleOrArgMemOnly() const {
return isAssumed(
inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
}
/// Return true if the underlying value may access memory through arguement
/// pointers of the associated function, if any.
bool mayAccessArgMem() const { return !isAssumed(NO_ARGUMENT_MEM); }
/// Return true if only the memory locations specififed by \p MLK are assumed
/// to be accessed by the associated function.
bool isAssumedSpecifiedMemOnly(MemoryLocationsKind MLK) const {
return isAssumed(MLK);
}
/// Return the locations that are assumed to be not accessed by the associated
/// function, if any.
MemoryLocationsKind getAssumedNotAccessedLocation() const {
return getAssumed();
}
/// Return the inverse of location \p Loc, thus for NO_XXX the return
/// describes ONLY_XXX. The flags \p AndLocalMem and \p AndConstMem determine
/// if local (=stack) and constant memory are allowed as well. Most of the
/// time we do want them to be included, e.g., argmemonly allows accesses via
/// argument pointers or local or constant memory accesses.
static MemoryLocationsKind
inverseLocation(MemoryLocationsKind Loc, bool AndLocalMem, bool AndConstMem) {
return NO_LOCATIONS & ~(Loc | (AndLocalMem ? NO_LOCAL_MEM : 0) |
(AndConstMem ? NO_CONST_MEM : 0));
};
/// Return the locations encoded by \p MLK as a readable string.
static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK);
/// Simple enum to distinguish read/write/read-write accesses.
enum AccessKind {
NONE = 0,
READ = 1 << 0,
WRITE = 1 << 1,
READ_WRITE = READ | WRITE,
};
/// Check \p Pred on all accesses to the memory kinds specified by \p MLK.
///
/// This method will evaluate \p Pred on all accesses (access instruction +
/// underlying accessed memory pointer) and it will return true if \p Pred
/// holds every time.
virtual bool checkForAllAccessesToMemoryKind(
function_ref<bool(const Instruction *, const Value *, AccessKind,
MemoryLocationsKind)>
Pred,
MemoryLocationsKind MLK) const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAMemoryLocation &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractState::getAsStr().
const std::string getAsStr() const override {
return getMemoryLocationsAsStr(getAssumedNotAccessedLocation());
}
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAMemoryLocation"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAMemoryLocation
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for range value analysis.
struct AAValueConstantRange
: public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
AAValueConstantRange(const IRPosition &IRP, Attributor &A)
: Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {}
/// See AbstractAttribute::getState(...).
IntegerRangeState &getState() override { return *this; }
const IntegerRangeState &getState() const override { return *this; }
/// Create an abstract attribute view for the position \p IRP.
static AAValueConstantRange &createForPosition(const IRPosition &IRP,
Attributor &A);
/// Return an assumed range for the associated value a program point \p CtxI.
/// If \p I is nullptr, simply return an assumed range.
virtual ConstantRange
getAssumedConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const = 0;
/// Return a known range for the associated value at a program point \p CtxI.
/// If \p I is nullptr, simply return a known range.
virtual ConstantRange
getKnownConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const = 0;
/// Return an assumed constant for the associated value a program point \p
/// CtxI.
Optional<ConstantInt *>
getAssumedConstantInt(Attributor &A,
const Instruction *CtxI = nullptr) const {
ConstantRange RangeV = getAssumedConstantRange(A, CtxI);
if (auto *C = RangeV.getSingleElement())
return cast<ConstantInt>(
ConstantInt::get(getAssociatedValue().getType(), *C));
if (RangeV.isEmptySet())
return llvm::None;
return nullptr;
}
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAValueConstantRange"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAValueConstantRange
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// A class for a set state.
/// The assumed boolean state indicates whether the corresponding set is full
/// set or not. If the assumed state is false, this is the worst state. The
/// worst state (invalid state) of set of potential values is when the set
/// contains every possible value (i.e. we cannot in any way limit the value
/// that the target position can take). That never happens naturally, we only
/// force it. As for the conditions under which we force it, see
/// AAPotentialValues.
template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>>
struct PotentialValuesState : AbstractState {
using SetTy = DenseSet<MemberTy, KeyInfo>;
PotentialValuesState() : IsValidState(true), UndefIsContained(false) {}
PotentialValuesState(bool IsValid)
: IsValidState(IsValid), UndefIsContained(false) {}
/// See AbstractState::isValidState(...)
bool isValidState() const override { return IsValidState.isValidState(); }
/// See AbstractState::isAtFixpoint(...)
bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); }
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
return IsValidState.indicatePessimisticFixpoint();
}
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
return IsValidState.indicateOptimisticFixpoint();
}
/// Return the assumed state
PotentialValuesState &getAssumed() { return *this; }
const PotentialValuesState &getAssumed() const { return *this; }
/// Return this set. We should check whether this set is valid or not by
/// isValidState() before calling this function.
const SetTy &getAssumedSet() const {
assert(isValidState() && "This set shoud not be used when it is invalid!");
return Set;
}
/// Returns whether this state contains an undef value or not.
bool undefIsContained() const {
assert(isValidState() && "This flag shoud not be used when it is invalid!");
return UndefIsContained;
}
bool operator==(const PotentialValuesState &RHS) const {
if (isValidState() != RHS.isValidState())
return false;
if (!isValidState() && !RHS.isValidState())
return true;
if (undefIsContained() != RHS.undefIsContained())
return false;
return Set == RHS.getAssumedSet();
}
/// Maximum number of potential values to be tracked.
/// This is set by -attributor-max-potential-values command line option
static unsigned MaxPotentialValues;
/// Return empty set as the best state of potential values.
static PotentialValuesState getBestState() {
return PotentialValuesState(true);
}
static PotentialValuesState getBestState(PotentialValuesState &PVS) {
return getBestState();
}
/// Return full set as the worst state of potential values.
static PotentialValuesState getWorstState() {
return PotentialValuesState(false);
}
/// Union assumed set with the passed value.
void unionAssumed(const MemberTy &C) { insert(C); }
/// Union assumed set with assumed set of the passed state \p PVS.
void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); }
/// Union assumed set with an undef value.
void unionAssumedWithUndef() { unionWithUndef(); }
/// "Clamp" this state with \p PVS.
PotentialValuesState operator^=(const PotentialValuesState &PVS) {
IsValidState ^= PVS.IsValidState;
unionAssumed(PVS);
return *this;
}
PotentialValuesState operator&=(const PotentialValuesState &PVS) {
IsValidState &= PVS.IsValidState;
unionAssumed(PVS);
return *this;
}
private:
/// Check the size of this set, and invalidate when the size is no
/// less than \p MaxPotentialValues threshold.
void checkAndInvalidate() {
if (Set.size() >= MaxPotentialValues)
indicatePessimisticFixpoint();
else
reduceUndefValue();
}
/// If this state contains both undef and not undef, we can reduce
/// undef to the not undef value.
void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); }
/// Insert an element into this set.
void insert(const MemberTy &C) {
if (!isValidState())
return;
Set.insert(C);
checkAndInvalidate();
}
/// Take union with R.
void unionWith(const PotentialValuesState &R) {
/// If this is a full set, do nothing.
if (!isValidState())
return;
/// If R is full set, change L to a full set.
if (!R.isValidState()) {
indicatePessimisticFixpoint();
return;
}
for (const MemberTy &C : R.Set)
Set.insert(C);
UndefIsContained |= R.undefIsContained();
checkAndInvalidate();
}
/// Take union with an undef value.
void unionWithUndef() {
UndefIsContained = true;
reduceUndefValue();
}
/// Take intersection with R.
void intersectWith(const PotentialValuesState &R) {
/// If R is a full set, do nothing.
if (!R.isValidState())
return;
/// If this is a full set, change this to R.
if (!isValidState()) {
*this = R;
return;
}
SetTy IntersectSet;
for (const MemberTy &C : Set) {
if (R.Set.count(C))
IntersectSet.insert(C);
}
Set = IntersectSet;
UndefIsContained &= R.undefIsContained();
reduceUndefValue();
}
/// A helper state which indicate whether this state is valid or not.
BooleanState IsValidState;
/// Container for potential values
SetTy Set;
/// Flag for undef value
bool UndefIsContained;
};
using PotentialConstantIntValuesState = PotentialValuesState<APInt>;
raw_ostream &operator<<(raw_ostream &OS,
const PotentialConstantIntValuesState &R);
/// An abstract interface for potential values analysis.
///
/// This AA collects potential values for each IR position.
/// An assumed set of potential values is initialized with the empty set (the
/// best state) and it will grow monotonically as we find more potential values
/// for this position.
/// The set might be forced to the worst state, that is, to contain every
/// possible value for this position in 2 cases.
/// 1. We surpassed the \p MaxPotentialValues threshold. This includes the
/// case that this position is affected (e.g. because of an operation) by a
/// Value that is in the worst state.
/// 2. We tried to initialize on a Value that we cannot handle (e.g. an
/// operator we do not currently handle).
///
/// TODO: Support values other than constant integers.
struct AAPotentialValues
: public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> {
using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>;
AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// See AbstractAttribute::getState(...).
PotentialConstantIntValuesState &getState() override { return *this; }
const PotentialConstantIntValuesState &getState() const override {
return *this;
}
/// Create an abstract attribute view for the position \p IRP.
static AAPotentialValues &createForPosition(const IRPosition &IRP,
Attributor &A);
/// Return assumed constant for the associated value
Optional<ConstantInt *>
getAssumedConstantInt(Attributor &A,
const Instruction *CtxI = nullptr) const {
if (!isValidState())
return nullptr;
if (getAssumedSet().size() == 1)
return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(),
*(getAssumedSet().begin())));
if (getAssumedSet().size() == 0) {
if (undefIsContained())
return cast<ConstantInt>(
ConstantInt::get(getAssociatedValue().getType(), 0));
return llvm::None;
}
return nullptr;
}
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAPotentialValues"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAPotentialValues
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract interface for all noundef attributes.
struct AANoUndef
: public IRAttribute<Attribute::NoUndef,
StateWrapper<BooleanState, AbstractAttribute>> {
AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if we assume that the underlying value is noundef.
bool isAssumedNoUndef() const { return getAssumed(); }
/// Return true if we know that underlying value is noundef.
bool isKnownNoUndef() const { return getKnown(); }
/// Create an abstract attribute view for the position \p IRP.
static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoUndef"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AANoUndef
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
struct AACallGraphNode;
struct AACallEdges;
/// An Iterator for call edges, creates AACallEdges attributes in a lazy way.
/// This iterator becomes invalid if the underlying edge list changes.
/// So This shouldn't outlive a iteration of Attributor.
class AACallEdgeIterator
: public iterator_adaptor_base<AACallEdgeIterator,
SetVector<Function *>::iterator> {
AACallEdgeIterator(Attributor &A, SetVector<Function *>::iterator Begin)
: iterator_adaptor_base(Begin), A(A) {}
public:
AACallGraphNode *operator*() const;
private:
Attributor &A;
friend AACallEdges;
friend AttributorCallGraph;
};
struct AACallGraphNode {
AACallGraphNode(Attributor &A) : A(A) {}
virtual ~AACallGraphNode() = default;
virtual AACallEdgeIterator optimisticEdgesBegin() const = 0;
virtual AACallEdgeIterator optimisticEdgesEnd() const = 0;
/// Iterator range for exploring the call graph.
iterator_range<AACallEdgeIterator> optimisticEdgesRange() const {
return iterator_range<AACallEdgeIterator>(optimisticEdgesBegin(),
optimisticEdgesEnd());
}
protected:
/// Reference to Attributor needed for GraphTraits implementation.
Attributor &A;
};
/// An abstract state for querying live call edges.
/// This interface uses the Attributor's optimistic liveness
/// information to compute the edges that are alive.
struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
AACallGraphNode {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AACallEdges(const IRPosition &IRP, Attributor &A)
: Base(IRP), AACallGraphNode(A) {}
/// Get the optimistic edges.
virtual const SetVector<Function *> &getOptimisticEdges() const = 0;
/// Is there any call with a unknown callee.
virtual bool hasUnknownCallee() const = 0;
/// Is there any call with a unknown callee, excluding any inline asm.
virtual bool hasNonAsmUnknownCallee() const = 0;
/// Iterator for exploring the call graph.
AACallEdgeIterator optimisticEdgesBegin() const override {
return AACallEdgeIterator(A, getOptimisticEdges().begin());
}
/// Iterator for exploring the call graph.
AACallEdgeIterator optimisticEdgesEnd() const override {
return AACallEdgeIterator(A, getOptimisticEdges().end());
}
/// Create an abstract attribute view for the position \p IRP.
static AACallEdges &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AACallEdges"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AACallEdges.
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
// Synthetic root node for the Attributor's internal call graph.
struct AttributorCallGraph : public AACallGraphNode {
AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {}
virtual ~AttributorCallGraph() = default;
AACallEdgeIterator optimisticEdgesBegin() const override {
return AACallEdgeIterator(A, A.Functions.begin());
}
AACallEdgeIterator optimisticEdgesEnd() const override {
return AACallEdgeIterator(A, A.Functions.end());
}
/// Force populate the entire call graph.
void populateAll() const {
for (const AACallGraphNode *AA : optimisticEdgesRange()) {
// Nothing else to do here.
(void)AA;
}
}
void print();
};
template <> struct GraphTraits<AACallGraphNode *> {
using NodeRef = AACallGraphNode *;
using ChildIteratorType = AACallEdgeIterator;
static AACallEdgeIterator child_begin(AACallGraphNode *Node) {
return Node->optimisticEdgesBegin();
}
static AACallEdgeIterator child_end(AACallGraphNode *Node) {
return Node->optimisticEdgesEnd();
}
};
template <>
struct GraphTraits<AttributorCallGraph *>
: public GraphTraits<AACallGraphNode *> {
using nodes_iterator = AACallEdgeIterator;
static AACallGraphNode *getEntryNode(AttributorCallGraph *G) {
return static_cast<AACallGraphNode *>(G);
}
static AACallEdgeIterator nodes_begin(const AttributorCallGraph *G) {
return G->optimisticEdgesBegin();
}
static AACallEdgeIterator nodes_end(const AttributorCallGraph *G) {
return G->optimisticEdgesEnd();
}
};
template <>
struct DOTGraphTraits<AttributorCallGraph *> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool Simple = false) : DefaultDOTGraphTraits(Simple) {}
std::string getNodeLabel(const AACallGraphNode *Node,
const AttributorCallGraph *Graph) {
const AACallEdges *AACE = static_cast<const AACallEdges *>(Node);
return AACE->getAssociatedFunction()->getName().str();
}
static bool isNodeHidden(const AACallGraphNode *Node,
const AttributorCallGraph *Graph) {
// Hide the synth root.
return static_cast<const AACallGraphNode *>(Graph) == Node;
}
};
struct AAExecutionDomain
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
static AAExecutionDomain &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName().
const std::string getName() const override { return "AAExecutionDomain"; }
/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }
/// Check if an instruction is executed only by the initial thread.
virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0;
/// Check if a basic block is executed only by the initial thread.
virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0;
/// This function should return true if the type of the \p AA is
/// AAExecutionDomain.
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract Attribute for computing reachability between functions.
struct AAFunctionReachability
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// See AbstractAttribute::isQueryAA.
bool isQueryAA() const override { return true; }
/// If the function represented by this possition can reach \p Fn.
virtual bool canReach(Attributor &A, const Function &Fn) const = 0;
/// Can \p CB reach \p Fn.
virtual bool canReach(Attributor &A, CallBase &CB,
const Function &Fn) const = 0;
/// Can \p Inst reach \p Fn.
/// See also AA::isPotentiallyReachable.
virtual bool instructionCanReach(Attributor &A, const Instruction &Inst,
const Function &Fn,
bool UseBackwards = true) const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAFunctionReachability &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override {
return "AAFunctionReachability";
}
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AACallEdges.
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
private:
/// Can this function reach a call with unknown calee.
virtual bool canReachUnknownCallee() const = 0;
};
/// An abstract interface for struct information.
struct AAPointerInfo : public AbstractAttribute {
AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {}
enum AccessKind {
AK_READ = 1 << 0,
AK_WRITE = 1 << 1,
AK_READ_WRITE = AK_READ | AK_WRITE,
};
/// An access description.
struct Access {
Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty)
: LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {}
Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content,
AccessKind Kind, Type *Ty)
: LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
Ty(Ty) {}
Access(const Access &Other) = default;
Access(const Access &&Other)
: LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
Kind(Other.Kind), Ty(Other.Ty) {}
Access &operator=(const Access &Other) = default;
bool operator==(const Access &R) const {
return LocalI == R.LocalI && RemoteI == R.RemoteI &&
Content == R.Content && Kind == R.Kind;
}
bool operator!=(const Access &R) const { return !(*this == R); }
Access &operator&=(const Access &R) {
assert(RemoteI == R.RemoteI && "Expected same instruction!");
Content =
AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
Kind = AccessKind(Kind | R.Kind);
return *this;
}
/// Return the access kind.
AccessKind getKind() const { return Kind; }
/// Return true if this is a read access.
bool isRead() const { return Kind & AK_READ; }
/// Return true if this is a write access.
bool isWrite() const { return Kind & AK_WRITE; }
/// Return the instruction that causes the access with respect to the local
/// scope of the associated attribute.
Instruction *getLocalInst() const { return LocalI; }
/// Return the actual instruction that causes the access.
Instruction *getRemoteInst() const { return RemoteI; }
/// Return true if the value written is not known yet.
bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); }
/// Return true if the value written cannot be determined at all.
bool isWrittenValueUnknown() const {
return Content.hasValue() && !*Content;
}
/// Return the type associated with the access, if known.
Type *getType() const { return Ty; }
/// Return the value writen, if any. As long as
/// isWrittenValueYetUndetermined return true this function shall not be
/// called.
Value *getWrittenValue() const { return *Content; }
/// Return the written value which can be `llvm::null` if it is not yet
/// determined.
Optional<Value *> getContent() const { return Content; }
private:
/// The instruction responsible for the access with respect to the local
/// scope of the associated attribute.
Instruction *LocalI;
/// The instruction responsible for the access.
Instruction *RemoteI;
/// The value written, if any. `llvm::none` means "not known yet", `nullptr`
/// cannot be determined.
Optional<Value *> Content;
/// The access kind, e.g., READ, as bitset (could be more than one).
AccessKind Kind;
/// The type of the content, thus the type read/written, can be null if not
/// available.
Type *Ty;
};
/// Create an abstract attribute view for the position \p IRP.
static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAPointerInfo"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// Call \p CB on all accesses that might interfere with \p LI and return true
/// if all such accesses were known and the callback returned true for all of
/// them, false otherwise.
virtual bool forallInterferingAccesses(
LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0;
virtual bool forallInterferingAccesses(
StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;
/// Call \p CB on all write accesses that might interfere with \p LI and
/// return true if all such accesses were known and the callback returned true
/// for all of them, false otherwise. In contrast to forallInterferingAccesses
/// this function will perform reasoning to exclude write accesses that cannot
/// affect the load even if they on the surface look as if they would.
virtual bool forallInterferingWrites(
Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
function_ref<bool(const Access &, bool)> CB) const = 0;
/// This function should return true if the type of the \p AA is AAPointerInfo
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
/// An abstract attribute for getting assumption information.
struct AAAssumptionInfo
: public StateWrapper<SetState<StringRef>, AbstractAttribute,
DenseSet<StringRef>> {
using Base =
StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>;
AAAssumptionInfo(const IRPosition &IRP, Attributor &A,
const DenseSet<StringRef> &Known)
: Base(IRP, Known) {}
/// Returns true if the assumption set contains the assumption \p Assumption.
virtual bool hasAssumption(const StringRef Assumption) const = 0;
/// Create an abstract attribute view for the position \p IRP.
static AAAssumptionInfo &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAssumptionInfo"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAAssumptionInfo
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
/// Run options, used by the pass manager.
enum AttributorRunOption {
NONE = 0,
MODULE = 1 << 0,
CGSCC = 1 << 1,
ALL = MODULE | CGSCC
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
index 7cf69f613c66..f6b955162fa5 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1,3222 +1,3225 @@
//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines routines for folding instructions into constants.
//
// Also, to supplement the basic IR ConstantExpr simplifications,
// this file defines some additional folding routines that can make use of
// DataLayout information. These functions cannot go in IR due to library
// dependency issues.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <cerrno>
#include <cfenv>
#include <cmath>
#include <cstddef>
#include <cstdint>
using namespace llvm;
namespace {
//===----------------------------------------------------------------------===//
// Constant Folding internal helper functions
//===----------------------------------------------------------------------===//
static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
Constant *C, Type *SrcEltTy,
unsigned NumSrcElts,
const DataLayout &DL) {
// Now that we know that the input value is a vector of integers, just shift
// and insert them into our result.
unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
for (unsigned i = 0; i != NumSrcElts; ++i) {
Constant *Element;
if (DL.isLittleEndian())
Element = C->getAggregateElement(NumSrcElts - i - 1);
else
Element = C->getAggregateElement(i);
if (Element && isa<UndefValue>(Element)) {
Result <<= BitShift;
continue;
}
auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
if (!ElementCI)
return ConstantExpr::getBitCast(C, DestTy);
Result <<= BitShift;
Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
}
return nullptr;
}
/// Constant fold bitcast, symbolically evaluating it with DataLayout.
/// This always returns a non-null constant, but it may be a
/// ConstantExpr if unfoldable.
Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) &&
"Invalid constantexpr bitcast!");
// Catch the obvious splat cases.
if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy))
return Res;
if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
// Handle a vector->scalar integer/fp cast.
if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
unsigned NumSrcElts = cast<FixedVectorType>(VTy)->getNumElements();
Type *SrcEltTy = VTy->getElementType();
// If the vector is a vector of floating point, convert it to vector of int
// to simplify things.
if (SrcEltTy->isFloatingPointTy()) {
unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
auto *SrcIVTy = FixedVectorType::get(
IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
// Ask IR to do the conversion now that #elts line up.
C = ConstantExpr::getBitCast(C, SrcIVTy);
}
APInt Result(DL.getTypeSizeInBits(DestTy), 0);
if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
SrcEltTy, NumSrcElts, DL))
return CE;
if (isa<IntegerType>(DestTy))
return ConstantInt::get(DestTy, Result);
APFloat FP(DestTy->getFltSemantics(), Result);
return ConstantFP::get(DestTy->getContext(), FP);
}
}
// The code below only handles casts to vectors currently.
auto *DestVTy = dyn_cast<VectorType>(DestTy);
if (!DestVTy)
return ConstantExpr::getBitCast(C, DestTy);
// If this is a scalar -> vector cast, convert the input into a <1 x scalar>
// vector so the code below can handle it uniformly.
if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
Constant *Ops = C; // don't take the address of C!
return FoldBitCast(ConstantVector::get(Ops), DestTy, DL);
}
// If this is a bitcast from constant vector -> vector, fold it.
if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
return ConstantExpr::getBitCast(C, DestTy);
// If the element types match, IR can fold it.
unsigned NumDstElt = cast<FixedVectorType>(DestVTy)->getNumElements();
unsigned NumSrcElt = cast<FixedVectorType>(C->getType())->getNumElements();
if (NumDstElt == NumSrcElt)
return ConstantExpr::getBitCast(C, DestTy);
Type *SrcEltTy = cast<VectorType>(C->getType())->getElementType();
Type *DstEltTy = DestVTy->getElementType();
// Otherwise, we're changing the number of elements in a vector, which
// requires endianness information to do the right thing. For example,
// bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
// folds to (little endian):
// <4 x i32> <i32 0, i32 0, i32 1, i32 0>
// and to (big endian):
// <4 x i32> <i32 0, i32 0, i32 0, i32 1>
// First thing is first. We only want to think about integer here, so if
// we have something in FP form, recast it as integer.
if (DstEltTy->isFloatingPointTy()) {
// Fold to an vector of integers with same size as our FP type.
unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
auto *DestIVTy = FixedVectorType::get(
IntegerType::get(C->getContext(), FPWidth), NumDstElt);
// Recursively handle this integer conversion, if possible.
C = FoldBitCast(C, DestIVTy, DL);
// Finally, IR can handle this now that #elts line up.
return ConstantExpr::getBitCast(C, DestTy);
}
// Okay, we know the destination is integer, if the input is FP, convert
// it to integer first.
if (SrcEltTy->isFloatingPointTy()) {
unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
auto *SrcIVTy = FixedVectorType::get(
IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
// Ask IR to do the conversion now that #elts line up.
C = ConstantExpr::getBitCast(C, SrcIVTy);
// If IR wasn't able to fold it, bail out.
if (!isa<ConstantVector>(C) && // FIXME: Remove ConstantVector.
!isa<ConstantDataVector>(C))
return C;
}
// Now we know that the input and output vectors are both integer vectors
// of the same size, and that their #elements is not the same. Do the
// conversion here, which depends on whether the input or output has
// more elements.
bool isLittleEndian = DL.isLittleEndian();
SmallVector<Constant*, 32> Result;
if (NumDstElt < NumSrcElt) {
// Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
Constant *Zero = Constant::getNullValue(DstEltTy);
unsigned Ratio = NumSrcElt/NumDstElt;
unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
unsigned SrcElt = 0;
for (unsigned i = 0; i != NumDstElt; ++i) {
// Build each element of the result.
Constant *Elt = Zero;
unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
for (unsigned j = 0; j != Ratio; ++j) {
Constant *Src = C->getAggregateElement(SrcElt++);
if (Src && isa<UndefValue>(Src))
Src = Constant::getNullValue(
cast<VectorType>(C->getType())->getElementType());
else
Src = dyn_cast_or_null<ConstantInt>(Src);
if (!Src) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
// Zero extend the element to the right size.
Src = ConstantExpr::getZExt(Src, Elt->getType());
// Shift it to the right place, depending on endianness.
Src = ConstantExpr::getShl(Src,
ConstantInt::get(Src->getType(), ShiftAmt));
ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
// Mix it in.
Elt = ConstantExpr::getOr(Elt, Src);
}
Result.push_back(Elt);
}
return ConstantVector::get(Result);
}
// Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
unsigned Ratio = NumDstElt/NumSrcElt;
unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy);
// Loop over each source value, expanding into multiple results.
for (unsigned i = 0; i != NumSrcElt; ++i) {
auto *Element = C->getAggregateElement(i);
if (!Element) // Reject constantexpr elements.
return ConstantExpr::getBitCast(C, DestTy);
if (isa<UndefValue>(Element)) {
// Correctly Propagate undef values.
Result.append(Ratio, UndefValue::get(DstEltTy));
continue;
}
auto *Src = dyn_cast<ConstantInt>(Element);
if (!Src)
return ConstantExpr::getBitCast(C, DestTy);
unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
for (unsigned j = 0; j != Ratio; ++j) {
// Shift the piece of the value into the right place, depending on
// endianness.
Constant *Elt = ConstantExpr::getLShr(Src,
ConstantInt::get(Src->getType(), ShiftAmt));
ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
// Truncate the element to an integer with the same pointer size and
// convert the element back to a pointer using a inttoptr.
if (DstEltTy->isPointerTy()) {
IntegerType *DstIntTy = Type::getIntNTy(C->getContext(), DstBitSize);
Constant *CE = ConstantExpr::getTrunc(Elt, DstIntTy);
Result.push_back(ConstantExpr::getIntToPtr(CE, DstEltTy));
continue;
}
// Truncate and remember this piece.
Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
}
}
return ConstantVector::get(Result);
}
} // end anonymous namespace
/// If this constant is a constant offset from a global, return the global and
/// the constant. Because of constantexprs, this function is recursive.
bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
APInt &Offset, const DataLayout &DL,
DSOLocalEquivalent **DSOEquiv) {
if (DSOEquiv)
*DSOEquiv = nullptr;
// Trivial case, constant is the global.
if ((GV = dyn_cast<GlobalValue>(C))) {
unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
Offset = APInt(BitWidth, 0);
return true;
}
if (auto *FoundDSOEquiv = dyn_cast<DSOLocalEquivalent>(C)) {
if (DSOEquiv)
*DSOEquiv = FoundDSOEquiv;
GV = FoundDSOEquiv->getGlobalValue();
unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType());
Offset = APInt(BitWidth, 0);
return true;
}
// Otherwise, if this isn't a constant expr, bail out.
auto *CE = dyn_cast<ConstantExpr>(C);
if (!CE) return false;
// Look through ptr->int and ptr->ptr casts.
if (CE->getOpcode() == Instruction::PtrToInt ||
CE->getOpcode() == Instruction::BitCast)
return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL,
DSOEquiv);
// i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
auto *GEP = dyn_cast<GEPOperator>(CE);
if (!GEP)
return false;
unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
APInt TmpOffset(BitWidth, 0);
// If the base isn't a global+constant, we aren't either.
if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL,
DSOEquiv))
return false;
// Otherwise, add any offset that our operands provide.
if (!GEP->accumulateConstantOffset(DL, TmpOffset))
return false;
Offset = TmpOffset;
return true;
}
Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
const DataLayout &DL) {
do {
Type *SrcTy = C->getType();
if (SrcTy == DestTy)
return C;
TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
if (!TypeSize::isKnownGE(SrcSize, DestSize))
return nullptr;
// Catch the obvious splat cases (since all-zeros can coerce non-integral
// pointers legally).
if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy))
return Res;
// If the type sizes are the same and a cast is legal, just directly
// cast the constant.
// But be careful not to coerce non-integral pointers illegally.
if (SrcSize == DestSize &&
DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
DL.isNonIntegralPointerType(DestTy->getScalarType())) {
Instruction::CastOps Cast = Instruction::BitCast;
// If we are going from a pointer to int or vice versa, we spell the cast
// differently.
if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
Cast = Instruction::IntToPtr;
else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
Cast = Instruction::PtrToInt;
if (CastInst::castIsValid(Cast, C, DestTy))
return ConstantExpr::getCast(Cast, C, DestTy);
}
// If this isn't an aggregate type, there is nothing we can do to drill down
// and find a bitcastable constant.
if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy())
return nullptr;
// We're simulating a load through a pointer that was bitcast to point to
// a different type, so we can try to walk down through the initial
// elements of an aggregate to see if some part of the aggregate is
// castable to implement the "load" semantic model.
if (SrcTy->isStructTy()) {
// Struct types might have leading zero-length elements like [0 x i32],
// which are certainly not what we are looking for, so skip them.
unsigned Elem = 0;
Constant *ElemC;
do {
ElemC = C->getAggregateElement(Elem++);
} while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero());
C = ElemC;
} else {
// For non-byte-sized vector elements, the first element is not
// necessarily located at the vector base address.
if (auto *VT = dyn_cast<VectorType>(SrcTy))
if (!DL.typeSizeEqualsStoreSize(VT->getElementType()))
return nullptr;
C = C->getAggregateElement(0u);
}
} while (C);
return nullptr;
}
namespace {
/// Recursive helper to read bits out of global. C is the constant being copied
/// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy
/// results into and BytesLeft is the number of bytes left in
/// the CurPtr buffer. DL is the DataLayout.
bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
unsigned BytesLeft, const DataLayout &DL) {
assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) &&
"Out of range access");
// If this element is zero or undefined, we can just return since *CurPtr is
// zero initialized.
if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
return true;
if (auto *CI = dyn_cast<ConstantInt>(C)) {
if (CI->getBitWidth() > 64 ||
(CI->getBitWidth() & 7) != 0)
return false;
uint64_t Val = CI->getZExtValue();
unsigned IntBytes = unsigned(CI->getBitWidth()/8);
for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
int n = ByteOffset;
if (!DL.isLittleEndian())
n = IntBytes - n - 1;
CurPtr[i] = (unsigned char)(Val >> (n * 8));
++ByteOffset;
}
return true;
}
if (auto *CFP = dyn_cast<ConstantFP>(C)) {
if (CFP->getType()->isDoubleTy()) {
C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL);
return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
if (CFP->getType()->isFloatTy()){
C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL);
return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
if (CFP->getType()->isHalfTy()){
C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL);
return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL);
}
return false;
}
if (auto *CS = dyn_cast<ConstantStruct>(C)) {
const StructLayout *SL = DL.getStructLayout(CS->getType());
unsigned Index = SL->getElementContainingOffset(ByteOffset);
uint64_t CurEltOffset = SL->getElementOffset(Index);
ByteOffset -= CurEltOffset;
while (true) {
// If the element access is to the element itself and not to tail padding,
// read the bytes from the element.
uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
if (ByteOffset < EltSize &&
!ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
BytesLeft, DL))
return false;
++Index;
// Check to see if we read from the last struct element, if so we're done.
if (Index == CS->getType()->getNumElements())
return true;
// If we read all of the bytes we needed from this element we're done.
uint64_t NextEltOffset = SL->getElementOffset(Index);
if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset)
return true;
// Move to the next element of the struct.
CurPtr += NextEltOffset - CurEltOffset - ByteOffset;
BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset;
ByteOffset = 0;
CurEltOffset = NextEltOffset;
}
// not reached.
}
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
uint64_t NumElts;
Type *EltTy;
if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
NumElts = AT->getNumElements();
EltTy = AT->getElementType();
} else {
NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
EltTy = cast<FixedVectorType>(C->getType())->getElementType();
}
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
for (; Index != NumElts; ++Index) {
if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
BytesLeft, DL))
return false;
uint64_t BytesWritten = EltSize - Offset;
assert(BytesWritten <= EltSize && "Not indexing into this element?");
if (BytesWritten >= BytesLeft)
return true;
Offset = 0;
BytesLeft -= BytesWritten;
CurPtr += BytesWritten;
}
return true;
}
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::IntToPtr &&
CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) {
return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr,
BytesLeft, DL);
}
}
// Otherwise, unknown initializer type.
return false;
}
Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
int64_t Offset, const DataLayout &DL) {
// Bail out early. Not expect to load from scalable global variable.
if (isa<ScalableVectorType>(LoadTy))
return nullptr;
auto *IntType = dyn_cast<IntegerType>(LoadTy);
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
// If this is a non-integer load, we can try folding it as an int load and
// then bitcast the result. This can be useful for union cases. Note
// that address spaces don't matter here since we're not going to result in
// an actual new load.
if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
!LoadTy->isVectorTy())
return nullptr;
Type *MapTy = Type::getIntNTy(
C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize());
if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
!LoadTy->isX86_AMXTy())
// Materializing a zero can be done trivially without a bitcast
return Constant::getNullValue(LoadTy);
Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy;
Res = FoldBitCast(Res, CastTy, DL);
if (LoadTy->isPtrOrPtrVectorTy()) {
// For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr
if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
!LoadTy->isX86_AMXTy())
return Constant::getNullValue(LoadTy);
if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
// Be careful not to replace a load of an addrspace value with an inttoptr here
return nullptr;
Res = ConstantExpr::getCast(Instruction::IntToPtr, Res, LoadTy);
}
return Res;
}
return nullptr;
}
unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
if (BytesLoaded > 32 || BytesLoaded == 0)
return nullptr;
- int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize();
-
// If we're not accessing anything in this constant, the result is undefined.
if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
return UndefValue::get(IntType);
+ // TODO: We should be able to support scalable types.
+ TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
+ if (InitializerSize.isScalable())
+ return nullptr;
+
// If we're not accessing anything in this constant, the result is undefined.
- if (Offset >= InitializerSize)
+ if (Offset >= (int64_t)InitializerSize.getFixedValue())
return UndefValue::get(IntType);
unsigned char RawBytes[32] = {0};
unsigned char *CurPtr = RawBytes;
unsigned BytesLeft = BytesLoaded;
// If we're loading off the beginning of the global, some bytes may be valid.
if (Offset < 0) {
CurPtr += -Offset;
BytesLeft += Offset;
Offset = 0;
}
if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
return nullptr;
APInt ResultVal = APInt(IntType->getBitWidth(), 0);
if (DL.isLittleEndian()) {
ResultVal = RawBytes[BytesLoaded - 1];
for (unsigned i = 1; i != BytesLoaded; ++i) {
ResultVal <<= 8;
ResultVal |= RawBytes[BytesLoaded - 1 - i];
}
} else {
ResultVal = RawBytes[0];
for (unsigned i = 1; i != BytesLoaded; ++i) {
ResultVal <<= 8;
ResultVal |= RawBytes[i];
}
}
return ConstantInt::get(IntType->getContext(), ResultVal);
}
/// If this Offset points exactly to the start of an aggregate element, return
/// that element, otherwise return nullptr.
Constant *getConstantAtOffset(Constant *Base, APInt Offset,
const DataLayout &DL) {
if (Offset.isZero())
return Base;
if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))
return nullptr;
Type *ElemTy = Base->getType();
SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
if (!Offset.isZero() || !Indices[0].isZero())
return nullptr;
Constant *C = Base;
for (const APInt &Index : drop_begin(Indices)) {
if (Index.isNegative() || Index.getActiveBits() >= 32)
return nullptr;
C = C->getAggregateElement(Index.getZExtValue());
if (!C)
return nullptr;
}
return C;
}
} // end anonymous namespace
Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
const APInt &Offset,
const DataLayout &DL) {
if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
return Result;
// Explicitly check for out-of-bounds access, so we return undef even if the
// constant is a uniform value.
TypeSize Size = DL.getTypeAllocSize(C->getType());
if (!Size.isScalable() && Offset.sge(Size.getFixedSize()))
return UndefValue::get(Ty);
// Try an offset-independent fold of a uniform value.
if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty))
return Result;
// Try hard to fold loads from bitcasted strange and non-type-safe things.
if (Offset.getMinSignedBits() <= 64)
if (Constant *Result =
FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))
return Result;
return nullptr;
}
Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
const DataLayout &DL) {
return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);
}
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
APInt Offset,
const DataLayout &DL) {
C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true));
if (auto *GV = dyn_cast<GlobalVariable>(C))
if (GV->isConstant() && GV->hasDefinitiveInitializer())
if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
Offset, DL))
return Result;
// If this load comes from anywhere in a uniform constant global, the value
// is always the same, regardless of the loaded offset.
if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
if (Constant *Res =
ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty))
return Res;
}
}
return nullptr;
}
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
const DataLayout &DL) {
APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL);
}
Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty) {
if (isa<PoisonValue>(C))
return PoisonValue::get(Ty);
if (isa<UndefValue>(C))
return UndefValue::get(Ty);
if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy())
return Constant::getNullValue(Ty);
if (C->isAllOnesValue() &&
(Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy()))
return Constant::getAllOnesValue(Ty);
return nullptr;
}
namespace {
/// One of Op0/Op1 is a constant expression.
/// Attempt to symbolically evaluate the result of a binary operator merging
/// these together. If target data info is available, it is provided as DL,
/// otherwise DL is null.
Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
const DataLayout &DL) {
// SROA
// Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
// Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
// bits.
if (Opc == Instruction::And) {
KnownBits Known0 = computeKnownBits(Op0, DL);
KnownBits Known1 = computeKnownBits(Op1, DL);
if ((Known1.One | Known0.Zero).isAllOnes()) {
// All the bits of Op0 that the 'and' could be masking are already zero.
return Op0;
}
if ((Known0.One | Known1.Zero).isAllOnes()) {
// All the bits of Op1 that the 'and' could be masking are already zero.
return Op1;
}
Known0 &= Known1;
if (Known0.isConstant())
return ConstantInt::get(Op0->getType(), Known0.getConstant());
}
// If the constant expr is something like &A[123] - &A[4].f, fold this into a
// constant. This happens frequently when iterating over a global array.
if (Opc == Instruction::Sub) {
GlobalValue *GV1, *GV2;
APInt Offs1, Offs2;
if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL))
if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) {
unsigned OpSize = DL.getTypeSizeInBits(Op0->getType());
// (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
// PtrToInt may change the bitwidth so we have convert to the right size
// first.
return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) -
Offs2.zextOrTrunc(OpSize));
}
}
return nullptr;
}
/// If array indices are not pointer-sized integers, explicitly cast them so
/// that they aren't implicitly casted by the getelementptr.
Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
Type *ResultTy, Optional<unsigned> InRangeIndex,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
Type *IntIdxTy = DL.getIndexType(ResultTy);
Type *IntIdxScalarTy = IntIdxTy->getScalarType();
bool Any = false;
SmallVector<Constant*, 32> NewIdxs;
for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
if ((i == 1 ||
!isa<StructType>(GetElementPtrInst::getIndexedType(
SrcElemTy, Ops.slice(1, i - 1)))) &&
Ops[i]->getType()->getScalarType() != IntIdxScalarTy) {
Any = true;
Type *NewType = Ops[i]->getType()->isVectorTy()
? IntIdxTy
: IntIdxScalarTy;
NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
true,
NewType,
true),
Ops[i], NewType));
} else
NewIdxs.push_back(Ops[i]);
}
if (!Any)
return nullptr;
Constant *C = ConstantExpr::getGetElementPtr(
SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
return ConstantFoldConstant(C, DL, TLI);
}
/// Strip the pointer casts, but preserve the address space information.
Constant *StripPtrCastKeepAS(Constant *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
auto *OldPtrTy = cast<PointerType>(Ptr->getType());
Ptr = cast<Constant>(Ptr->stripPointerCasts());
auto *NewPtrTy = cast<PointerType>(Ptr->getType());
// Preserve the address space number of the pointer.
if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
Ptr = ConstantExpr::getPointerCast(
Ptr, PointerType::getWithSamePointeeType(NewPtrTy,
OldPtrTy->getAddressSpace()));
}
return Ptr;
}
/// If we can symbolically evaluate the GEP constant expression, do so.
Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
const GEPOperator *InnermostGEP = GEP;
bool InBounds = GEP->isInBounds();
Type *SrcElemTy = GEP->getSourceElementType();
Type *ResElemTy = GEP->getResultElementType();
Type *ResTy = GEP->getType();
if (!SrcElemTy->isSized() || isa<ScalableVectorType>(SrcElemTy))
return nullptr;
if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
GEP->getInRangeIndex(), DL, TLI))
return C;
Constant *Ptr = Ops[0];
if (!Ptr->getType()->isPointerTy())
return nullptr;
Type *IntIdxTy = DL.getIndexType(Ptr->getType());
// If this is "gep i8* Ptr, (sub 0, V)", fold this as:
// "inttoptr (sub (ptrtoint Ptr), V)"
if (Ops.size() == 2 && ResElemTy->isIntegerTy(8)) {
auto *CE = dyn_cast<ConstantExpr>(Ops[1]);
assert((!CE || CE->getType() == IntIdxTy) &&
"CastGEPIndices didn't canonicalize index types!");
if (CE && CE->getOpcode() == Instruction::Sub &&
CE->getOperand(0)->isNullValue()) {
Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
Res = ConstantExpr::getSub(Res, CE->getOperand(1));
Res = ConstantExpr::getIntToPtr(Res, ResTy);
return ConstantFoldConstant(Res, DL, TLI);
}
}
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
if (!isa<ConstantInt>(Ops[i]))
return nullptr;
unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy);
APInt Offset =
APInt(BitWidth,
DL.getIndexedOffsetInType(
SrcElemTy,
makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
Ptr = StripPtrCastKeepAS(Ptr);
// If this is a GEP of a GEP, fold it all into a single GEP.
while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
InnermostGEP = GEP;
InBounds &= GEP->isInBounds();
SmallVector<Value *, 4> NestedOps(llvm::drop_begin(GEP->operands()));
// Do not try the incorporate the sub-GEP if some index is not a number.
bool AllConstantInt = true;
for (Value *NestedOp : NestedOps)
if (!isa<ConstantInt>(NestedOp)) {
AllConstantInt = false;
break;
}
if (!AllConstantInt)
break;
Ptr = cast<Constant>(GEP->getOperand(0));
SrcElemTy = GEP->getSourceElementType();
Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps));
Ptr = StripPtrCastKeepAS(Ptr);
}
// If the base value for this address is a literal integer value, fold the
// getelementptr to the resulting integer value casted to the pointer type.
APInt BasePtr(BitWidth, 0);
if (auto *CE = dyn_cast<ConstantExpr>(Ptr)) {
if (CE->getOpcode() == Instruction::IntToPtr) {
if (auto *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
BasePtr = Base->getValue().zextOrTrunc(BitWidth);
}
}
auto *PTy = cast<PointerType>(Ptr->getType());
if ((Ptr->isNullValue() || BasePtr != 0) &&
!DL.isNonIntegralPointerType(PTy)) {
Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
return ConstantExpr::getIntToPtr(C, ResTy);
}
// Otherwise form a regular getelementptr. Recompute the indices so that
// we eliminate over-indexing of the notional static type array bounds.
// This makes it easy to determine if the getelementptr is "inbounds".
// Also, this helps GlobalOpt do SROA on GlobalVariables.
// For GEPs of GlobalValues, use the value type even for opaque pointers.
// Otherwise use an i8 GEP.
if (auto *GV = dyn_cast<GlobalValue>(Ptr))
SrcElemTy = GV->getValueType();
else if (!PTy->isOpaque())
SrcElemTy = PTy->getNonOpaquePointerElementType();
else
SrcElemTy = Type::getInt8Ty(Ptr->getContext());
if (!SrcElemTy->isSized())
return nullptr;
Type *ElemTy = SrcElemTy;
SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
if (Offset != 0)
return nullptr;
// Try to add additional zero indices to reach the desired result element
// type.
// TODO: Should we avoid extra zero indices if ResElemTy can't be reached and
// we'll have to insert a bitcast anyway?
while (ElemTy != ResElemTy) {
Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0);
if (!NextTy)
break;
Indices.push_back(APInt::getZero(isa<StructType>(ElemTy) ? 32 : BitWidth));
ElemTy = NextTy;
}
SmallVector<Constant *, 32> NewIdxs;
for (const APInt &Index : Indices)
NewIdxs.push_back(ConstantInt::get(
Type::getIntNTy(Ptr->getContext(), Index.getBitWidth()), Index));
// Preserve the inrange index from the innermost GEP if possible. We must
// have calculated the same indices up to and including the inrange index.
Optional<unsigned> InRangeIndex;
if (Optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex())
if (SrcElemTy == InnermostGEP->getSourceElementType() &&
NewIdxs.size() > *LastIRIndex) {
InRangeIndex = LastIRIndex;
for (unsigned I = 0; I <= *LastIRIndex; ++I)
if (NewIdxs[I] != InnermostGEP->getOperand(I + 1))
return nullptr;
}
// Create a GEP.
Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
InBounds, InRangeIndex);
assert(
cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) &&
"Computed GetElementPtr has unexpected type!");
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
if (C->getType() != ResTy)
C = FoldBitCast(C, ResTy, DL);
return C;
}
/// Attempt to constant fold an instruction with the
/// specified opcode and operands. If successful, the constant result is
/// returned, if not, null is returned. Note that this function can fail when
/// attempting to fold instructions like loads and stores, which have no
/// constant expression form.
Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
Type *DestTy = InstOrCE->getType();
if (Instruction::isUnaryOp(Opcode))
return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL);
if (Instruction::isBinaryOp(Opcode))
return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
if (Instruction::isCast(Opcode))
return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
return C;
return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
Ops.slice(1), GEP->isInBounds(),
GEP->getInRangeIndex());
}
if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
return CE->getWithOperands(Ops);
switch (Opcode) {
default: return nullptr;
case Instruction::ICmp:
case Instruction::FCmp: llvm_unreachable("Invalid for compares");
case Instruction::Freeze:
return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr;
case Instruction::Call:
if (auto *F = dyn_cast<Function>(Ops.back())) {
const auto *Call = cast<CallBase>(InstOrCE);
if (canConstantFoldCallTo(Call, F))
return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
}
return nullptr;
case Instruction::Select:
return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
case Instruction::ExtractValue:
return ConstantExpr::getExtractValue(
Ops[0], cast<ExtractValueInst>(InstOrCE)->getIndices());
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(
Ops[0], Ops[1], cast<ShuffleVectorInst>(InstOrCE)->getShuffleMask());
}
}
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Constant Folding public APIs
//===----------------------------------------------------------------------===//
namespace {
Constant *
ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
const TargetLibraryInfo *TLI,
SmallDenseMap<Constant *, Constant *> &FoldedOps) {
if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
return const_cast<Constant *>(C);
SmallVector<Constant *, 8> Ops;
for (const Use &OldU : C->operands()) {
Constant *OldC = cast<Constant>(&OldU);
Constant *NewC = OldC;
// Recursively fold the ConstantExpr's operands. If we have already folded
// a ConstantExpr, we don't have to process it again.
if (isa<ConstantVector>(OldC) || isa<ConstantExpr>(OldC)) {
auto It = FoldedOps.find(OldC);
if (It == FoldedOps.end()) {
NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps);
FoldedOps.insert({OldC, NewC});
} else {
NewC = It->second;
}
}
Ops.push_back(NewC);
}
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->isCompare())
return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
DL, TLI);
return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
}
assert(isa<ConstantVector>(C));
return ConstantVector::get(Ops);
}
} // end anonymous namespace
Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// Handle PHI nodes quickly here...
if (auto *PN = dyn_cast<PHINode>(I)) {
Constant *CommonValue = nullptr;
SmallDenseMap<Constant *, Constant *> FoldedOps;
for (Value *Incoming : PN->incoming_values()) {
// If the incoming value is undef then skip it. Note that while we could
// skip the value if it is equal to the phi node itself we choose not to
// because that would break the rule that constant folding only applies if
// all operands are constants.
if (isa<UndefValue>(Incoming))
continue;
// If the incoming value is not a constant, then give up.
auto *C = dyn_cast<Constant>(Incoming);
if (!C)
return nullptr;
// Fold the PHI's operands.
C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
// If the incoming value is a different constant to
// the one we saw previously, then give up.
if (CommonValue && C != CommonValue)
return nullptr;
CommonValue = C;
}
// If we reach here, all incoming values are the same constant or undef.
return CommonValue ? CommonValue : UndefValue::get(PN->getType());
}
// Scan the operand list, checking to see if they are all constants, if so,
// hand off to ConstantFoldInstOperandsImpl.
if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))
return nullptr;
SmallDenseMap<Constant *, Constant *> FoldedOps;
SmallVector<Constant *, 8> Ops;
for (const Use &OpU : I->operands()) {
auto *Op = cast<Constant>(&OpU);
// Fold the Instruction's operands.
Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps);
Ops.push_back(Op);
}
if (const auto *CI = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
DL, TLI);
if (const auto *LI = dyn_cast<LoadInst>(I)) {
if (LI->isVolatile())
return nullptr;
return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL);
}
if (auto *IVI = dyn_cast<InsertValueInst>(I))
return ConstantExpr::getInsertValue(Ops[0], Ops[1], IVI->getIndices());
if (auto *EVI = dyn_cast<ExtractValueInst>(I))
return ConstantExpr::getExtractValue(Ops[0], EVI->getIndices());
return ConstantFoldInstOperands(I, Ops, DL, TLI);
}
Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
SmallDenseMap<Constant *, Constant *> FoldedOps;
return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
}
Constant *llvm::ConstantFoldInstOperands(Instruction *I,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
}
Constant *llvm::ConstantFoldCompareInstOperands(unsigned IntPredicate,
Constant *Ops0, Constant *Ops1,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate;
// fold: icmp (inttoptr x), null -> icmp x, 0
// fold: icmp null, (inttoptr x) -> icmp 0, x
// fold: icmp (ptrtoint x), 0 -> icmp x, null
// fold: icmp 0, (ptrtoint x) -> icmp null, x
// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
// fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
//
// FIXME: The following comment is out of data and the DataLayout is here now.
// ConstantExpr::getCompare cannot do this, because it doesn't have DL
// around to know if bit truncation is happening.
if (auto *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
if (Ops1->isNullValue()) {
if (CE0->getOpcode() == Instruction::IntToPtr) {
Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
IntPtrTy, false);
Constant *Null = Constant::getNullValue(C->getType());
return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
if (CE0->getOpcode() == Instruction::PtrToInt) {
Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
if (CE0->getType() == IntPtrTy) {
Constant *C = CE0->getOperand(0);
Constant *Null = Constant::getNullValue(C->getType());
return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI);
}
}
}
if (auto *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
if (CE0->getOpcode() == CE1->getOpcode()) {
if (CE0->getOpcode() == Instruction::IntToPtr) {
Type *IntPtrTy = DL.getIntPtrType(CE0->getType());
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
IntPtrTy, false);
Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
IntPtrTy, false);
return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI);
}
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
if (CE0->getOpcode() == Instruction::PtrToInt) {
Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType());
if (CE0->getType() == IntPtrTy &&
CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) {
return ConstantFoldCompareInstOperands(
Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI);
}
}
}
}
// icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
// icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
Constant *LHS = ConstantFoldCompareInstOperands(
Predicate, CE0->getOperand(0), Ops1, DL, TLI);
Constant *RHS = ConstantFoldCompareInstOperands(
Predicate, CE0->getOperand(1), Ops1, DL, TLI);
unsigned OpC =
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL);
}
// Convert pointer comparison (base+offset1) pred (base+offset2) into
// offset1 pred offset2, for the case where the offset is inbounds. This
// only works for equality and unsigned comparison, as inbounds permits
// crossing the sign boundary. However, the offset comparison itself is
// signed.
if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) {
unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType());
APInt Offset0(IndexWidth, 0);
Value *Stripped0 =
Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0);
APInt Offset1(IndexWidth, 0);
Value *Stripped1 =
Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1);
if (Stripped0 == Stripped1)
return ConstantExpr::getCompare(
ICmpInst::getSignedPredicate(Predicate),
ConstantInt::get(CE0->getContext(), Offset0),
ConstantInt::get(CE0->getContext(), Offset1));
}
} else if (isa<ConstantExpr>(Ops1)) {
// If RHS is a constant expression, but the left side isn't, swap the
// operands and try again.
Predicate = ICmpInst::getSwappedPredicate(Predicate);
return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
}
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
}
Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
const DataLayout &DL) {
assert(Instruction::isUnaryOp(Opcode));
return ConstantExpr::get(Opcode, Op);
}
Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
Constant *RHS,
const DataLayout &DL) {
assert(Instruction::isBinaryOp(Opcode));
if (isa<ConstantExpr>(LHS) || isa<ConstantExpr>(RHS))
if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL))
return C;
return ConstantExpr::get(Opcode, LHS, RHS);
}
Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
Type *DestTy, const DataLayout &DL) {
assert(Instruction::isCast(Opcode));
switch (Opcode) {
default:
llvm_unreachable("Missing case");
case Instruction::PtrToInt:
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
Constant *FoldedValue = nullptr;
// If the input is a inttoptr, eliminate the pair. This requires knowing
// the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (CE->getOpcode() == Instruction::IntToPtr) {
// zext/trunc the inttoptr to pointer size.
FoldedValue = ConstantExpr::getIntegerCast(
CE->getOperand(0), DL.getIntPtrType(CE->getType()),
/*IsSigned=*/false);
} else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
// If we have GEP, we can perform the following folds:
// (ptrtoint (gep null, x)) -> x
// (ptrtoint (gep (gep null, x), y) -> x + y, etc.
unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
APInt BaseOffset(BitWidth, 0);
auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
DL, BaseOffset, /*AllowNonInbounds=*/true));
if (Base->isNullValue()) {
FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
}
}
if (FoldedValue) {
// Do a zext or trunc to get to the ptrtoint dest size.
return ConstantExpr::getIntegerCast(FoldedValue, DestTy,
/*IsSigned=*/false);
}
}
return ConstantExpr::getCast(Opcode, C, DestTy);
case Instruction::IntToPtr:
// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
// the int size is >= the ptr size and the address spaces are the same.
// This requires knowing the width of a pointer, so it can't be done in
// ConstantExpr::getCast.
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
if (CE->getOpcode() == Instruction::PtrToInt) {
Constant *SrcPtr = CE->getOperand(0);
unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType());
unsigned MidIntSize = CE->getType()->getScalarSizeInBits();
if (MidIntSize >= SrcPtrSize) {
unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace();
if (SrcAS == DestTy->getPointerAddressSpace())
return FoldBitCast(CE->getOperand(0), DestTy, DL);
}
}
}
return ConstantExpr::getCast(Opcode, C, DestTy);
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::UIToFP:
case Instruction::SIToFP:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(Opcode, C, DestTy);
case Instruction::BitCast:
return FoldBitCast(C, DestTy, DL);
}
}
//===----------------------------------------------------------------------===//
// Constant Folding for Calls
//
bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
if (Call->isNoBuiltin())
return false;
switch (F->getIntrinsicID()) {
// Operations that do not operate floating-point numbers and do not depend on
// FP environment can be folded even in strictfp functions.
case Intrinsic::bswap:
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
case Intrinsic::fshl:
case Intrinsic::fshr:
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::masked_load:
case Intrinsic::get_active_lane_mask:
case Intrinsic::abs:
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow:
case Intrinsic::ssub_with_overflow:
case Intrinsic::usub_with_overflow:
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow:
case Intrinsic::sadd_sat:
case Intrinsic::uadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::usub_sat:
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat:
case Intrinsic::bitreverse:
case Intrinsic::is_constant:
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_umax:
// Target intrinsics
case Intrinsic::amdgcn_perm:
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
case Intrinsic::arm_mve_vctp64:
case Intrinsic::aarch64_sve_convert_from_svbool:
// WebAssembly float semantics are always known
case Intrinsic::wasm_trunc_signed:
case Intrinsic::wasm_trunc_unsigned:
return true;
// Floating point operations cannot be folded in strictfp functions in
// general case. They can be folded if FP environment is known to compiler.
case Intrinsic::minnum:
case Intrinsic::maxnum:
case Intrinsic::minimum:
case Intrinsic::maximum:
case Intrinsic::log:
case Intrinsic::log2:
case Intrinsic::log10:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::sqrt:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::pow:
case Intrinsic::powi:
case Intrinsic::fma:
case Intrinsic::fmuladd:
case Intrinsic::fptoui_sat:
case Intrinsic::fptosi_sat:
case Intrinsic::convert_from_fp16:
case Intrinsic::convert_to_fp16:
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
case Intrinsic::amdgcn_cubesc:
case Intrinsic::amdgcn_cubetc:
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fma_legacy:
case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_sin:
// The intrinsics below depend on rounding mode in MXCSR.
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
case Intrinsic::x86_sse2_cvtsd2si:
case Intrinsic::x86_sse2_cvtsd2si64:
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64:
case Intrinsic::x86_avx512_vcvtss2si32:
case Intrinsic::x86_avx512_vcvtss2si64:
case Intrinsic::x86_avx512_cvttss2si:
case Intrinsic::x86_avx512_cvttss2si64:
case Intrinsic::x86_avx512_vcvtsd2si32:
case Intrinsic::x86_avx512_vcvtsd2si64:
case Intrinsic::x86_avx512_cvttsd2si:
case Intrinsic::x86_avx512_cvttsd2si64:
case Intrinsic::x86_avx512_vcvtss2usi32:
case Intrinsic::x86_avx512_vcvtss2usi64:
case Intrinsic::x86_avx512_cvttss2usi:
case Intrinsic::x86_avx512_cvttss2usi64:
case Intrinsic::x86_avx512_vcvtsd2usi32:
case Intrinsic::x86_avx512_vcvtsd2usi64:
case Intrinsic::x86_avx512_cvttsd2usi:
case Intrinsic::x86_avx512_cvttsd2usi64:
return !Call->isStrictFP();
// Sign operations are actually bitwise operations, they do not raise
// exceptions even for SNANs.
case Intrinsic::fabs:
case Intrinsic::copysign:
// Non-constrained variants of rounding operations means default FP
// environment, they can be folded in any case.
case Intrinsic::ceil:
case Intrinsic::floor:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::trunc:
case Intrinsic::nearbyint:
case Intrinsic::rint:
// Constrained intrinsics can be folded if FP environment is known
// to compiler.
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fmuladd:
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_roundeven:
case Intrinsic::experimental_constrained_trunc:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_rint:
return true;
default:
return false;
case Intrinsic::not_intrinsic: break;
}
if (!F->hasName() || Call->isStrictFP())
return false;
// In these cases, the check of the length is required. We don't want to
// return true for a name like "cos\0blah" which strcmp would return equal to
// "cos", but has length 8.
StringRef Name = F->getName();
switch (Name[0]) {
default:
return false;
case 'a':
return Name == "acos" || Name == "acosf" ||
Name == "asin" || Name == "asinf" ||
Name == "atan" || Name == "atanf" ||
Name == "atan2" || Name == "atan2f";
case 'c':
return Name == "ceil" || Name == "ceilf" ||
Name == "cos" || Name == "cosf" ||
Name == "cosh" || Name == "coshf";
case 'e':
return Name == "exp" || Name == "expf" ||
Name == "exp2" || Name == "exp2f";
case 'f':
return Name == "fabs" || Name == "fabsf" ||
Name == "floor" || Name == "floorf" ||
Name == "fmod" || Name == "fmodf";
case 'l':
return Name == "log" || Name == "logf" ||
Name == "log2" || Name == "log2f" ||
Name == "log10" || Name == "log10f";
case 'n':
return Name == "nearbyint" || Name == "nearbyintf";
case 'p':
return Name == "pow" || Name == "powf";
case 'r':
return Name == "remainder" || Name == "remainderf" ||
Name == "rint" || Name == "rintf" ||
Name == "round" || Name == "roundf";
case 's':
return Name == "sin" || Name == "sinf" ||
Name == "sinh" || Name == "sinhf" ||
Name == "sqrt" || Name == "sqrtf";
case 't':
return Name == "tan" || Name == "tanf" ||
Name == "tanh" || Name == "tanhf" ||
Name == "trunc" || Name == "truncf";
case '_':
// Check for various function names that get used for the math functions
// when the header files are preprocessed with the macro
// __FINITE_MATH_ONLY__ enabled.
// The '12' here is the length of the shortest name that can match.
// We need to check the size before looking at Name[1] and Name[2]
// so we may as well check a limit that will eliminate mismatches.
if (Name.size() < 12 || Name[1] != '_')
return false;
switch (Name[2]) {
default:
return false;
case 'a':
return Name == "__acos_finite" || Name == "__acosf_finite" ||
Name == "__asin_finite" || Name == "__asinf_finite" ||
Name == "__atan2_finite" || Name == "__atan2f_finite";
case 'c':
return Name == "__cosh_finite" || Name == "__coshf_finite";
case 'e':
return Name == "__exp_finite" || Name == "__expf_finite" ||
Name == "__exp2_finite" || Name == "__exp2f_finite";
case 'l':
return Name == "__log_finite" || Name == "__logf_finite" ||
Name == "__log10_finite" || Name == "__log10f_finite";
case 'p':
return Name == "__pow_finite" || Name == "__powf_finite";
case 's':
return Name == "__sinh_finite" || Name == "__sinhf_finite";
}
}
}
namespace {
Constant *GetConstantFoldFPValue(double V, Type *Ty) {
if (Ty->isHalfTy() || Ty->isFloatTy()) {
APFloat APF(V);
bool unused;
APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused);
return ConstantFP::get(Ty->getContext(), APF);
}
if (Ty->isDoubleTy())
return ConstantFP::get(Ty->getContext(), APFloat(V));
llvm_unreachable("Can only constant fold half/float/double");
}
/// Clear the floating-point exception state.
inline void llvm_fenv_clearexcept() {
#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT
feclearexcept(FE_ALL_EXCEPT);
#endif
errno = 0;
}
/// Test if a floating-point exception was raised.
inline bool llvm_fenv_testexcept() {
int errno_val = errno;
if (errno_val == ERANGE || errno_val == EDOM)
return true;
#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT
if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
return true;
#endif
return false;
}
Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V,
Type *Ty) {
llvm_fenv_clearexcept();
double Result = NativeFP(V.convertToDouble());
if (llvm_fenv_testexcept()) {
llvm_fenv_clearexcept();
return nullptr;
}
return GetConstantFoldFPValue(Result, Ty);
}
Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
const APFloat &V, const APFloat &W, Type *Ty) {
llvm_fenv_clearexcept();
double Result = NativeFP(V.convertToDouble(), W.convertToDouble());
if (llvm_fenv_testexcept()) {
llvm_fenv_clearexcept();
return nullptr;
}
return GetConstantFoldFPValue(Result, Ty);
}
Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) {
FixedVectorType *VT = dyn_cast<FixedVectorType>(Op->getType());
if (!VT)
return nullptr;
// This isn't strictly necessary, but handle the special/common case of zero:
// all integer reductions of a zero input produce zero.
if (isa<ConstantAggregateZero>(Op))
return ConstantInt::get(VT->getElementType(), 0);
// This is the same as the underlying binops - poison propagates.
if (isa<PoisonValue>(Op) || Op->containsPoisonElement())
return PoisonValue::get(VT->getElementType());
// TODO: Handle undef.
if (!isa<ConstantVector>(Op) && !isa<ConstantDataVector>(Op))
return nullptr;
auto *EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(0U));
if (!EltC)
return nullptr;
APInt Acc = EltC->getValue();
for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) {
if (!(EltC = dyn_cast<ConstantInt>(Op->getAggregateElement(I))))
return nullptr;
const APInt &X = EltC->getValue();
switch (IID) {
case Intrinsic::vector_reduce_add:
Acc = Acc + X;
break;
case Intrinsic::vector_reduce_mul:
Acc = Acc * X;
break;
case Intrinsic::vector_reduce_and:
Acc = Acc & X;
break;
case Intrinsic::vector_reduce_or:
Acc = Acc | X;
break;
case Intrinsic::vector_reduce_xor:
Acc = Acc ^ X;
break;
case Intrinsic::vector_reduce_smin:
Acc = APIntOps::smin(Acc, X);
break;
case Intrinsic::vector_reduce_smax:
Acc = APIntOps::smax(Acc, X);
break;
case Intrinsic::vector_reduce_umin:
Acc = APIntOps::umin(Acc, X);
break;
case Intrinsic::vector_reduce_umax:
Acc = APIntOps::umax(Acc, X);
break;
}
}
return ConstantInt::get(Op->getContext(), Acc);
}
/// Attempt to fold an SSE floating point to integer conversion of a constant
/// floating point. If roundTowardZero is false, the default IEEE rounding is
/// used (toward nearest, ties to even). This matches the behavior of the
/// non-truncating SSE instructions in the default rounding mode. The desired
/// integer type Ty is used to select how many bits are available for the
/// result. Returns null if the conversion cannot be performed, otherwise
/// returns the Constant value resulting from the conversion.
Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero,
Type *Ty, bool IsSigned) {
// All of these conversion intrinsics form an integer of at most 64bits.
unsigned ResultWidth = Ty->getIntegerBitWidth();
assert(ResultWidth <= 64 &&
"Can only constant fold conversions to 64 and 32 bit ints");
uint64_t UIntVal;
bool isExact = false;
APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
: APFloat::rmNearestTiesToEven;
APFloat::opStatus status =
Val.convertToInteger(makeMutableArrayRef(UIntVal), ResultWidth,
IsSigned, mode, &isExact);
if (status != APFloat::opOK &&
(!roundTowardZero || status != APFloat::opInexact))
return nullptr;
return ConstantInt::get(Ty, UIntVal, IsSigned);
}
double getValueAsDouble(ConstantFP *Op) {
Type *Ty = Op->getType();
if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
return Op->getValueAPF().convertToDouble();
bool unused;
APFloat APF = Op->getValueAPF();
APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);
return APF.convertToDouble();
}
static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
if (auto *CI = dyn_cast<ConstantInt>(Op)) {
C = &CI->getValue();
return true;
}
if (isa<UndefValue>(Op)) {
C = nullptr;
return true;
}
return false;
}
/// Checks if the given intrinsic call, which evaluates to constant, is allowed
/// to be folded.
///
/// \param CI Constrained intrinsic call.
/// \param St Exception flags raised during constant evaluation.
static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
APFloat::opStatus St) {
Optional<RoundingMode> ORM = CI->getRoundingMode();
Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
// If the operation does not change exception status flags, it is safe
// to fold.
if (St == APFloat::opStatus::opOK)
return true;
// If evaluation raised FP exception, the result can depend on rounding
// mode. If the latter is unknown, folding is not possible.
if (!ORM || *ORM == RoundingMode::Dynamic)
return false;
// If FP exceptions are ignored, fold the call, even if such exception is
// raised.
if (!EB || *EB != fp::ExceptionBehavior::ebStrict)
return true;
// Leave the calculation for runtime so that exception flags be correctly set
// in hardware.
return false;
}
/// Returns the rounding mode that should be used for constant evaluation.
static RoundingMode
getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) {
Optional<RoundingMode> ORM = CI->getRoundingMode();
if (!ORM || *ORM == RoundingMode::Dynamic)
// Even if the rounding mode is unknown, try evaluating the operation.
// If it does not raise inexact exception, rounding was not applied,
// so the result is exact and does not depend on rounding mode. Whether
// other FP exceptions are raised, it does not depend on rounding mode.
return RoundingMode::NearestTiesToEven;
return *ORM;
}
static Constant *ConstantFoldScalarCall1(StringRef Name,
Intrinsic::ID IntrinsicID,
Type *Ty,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI,
const CallBase *Call) {
assert(Operands.size() == 1 && "Wrong number of operands.");
if (IntrinsicID == Intrinsic::is_constant) {
// We know we have a "Constant" argument. But we want to only
// return true for manifest constants, not those that depend on
// constants with unknowable values, e.g. GlobalValue or BlockAddress.
if (Operands[0]->isManifestConstant())
return ConstantInt::getTrue(Ty->getContext());
return nullptr;
}
if (isa<UndefValue>(Operands[0])) {
// cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN.
// ctpop() is between 0 and bitwidth, pick 0 for undef.
// fptoui.sat and fptosi.sat can always fold to zero (for a zero input).
if (IntrinsicID == Intrinsic::cos ||
IntrinsicID == Intrinsic::ctpop ||
IntrinsicID == Intrinsic::fptoui_sat ||
IntrinsicID == Intrinsic::fptosi_sat)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::bswap ||
IntrinsicID == Intrinsic::bitreverse ||
IntrinsicID == Intrinsic::launder_invariant_group ||
IntrinsicID == Intrinsic::strip_invariant_group)
return Operands[0];
}
if (isa<ConstantPointerNull>(Operands[0])) {
// launder(null) == null == strip(null) iff in addrspace 0
if (IntrinsicID == Intrinsic::launder_invariant_group ||
IntrinsicID == Intrinsic::strip_invariant_group) {
// If instruction is not yet put in a basic block (e.g. when cloning
// a function during inlining), Call's caller may not be available.
// So check Call's BB first before querying Call->getCaller.
const Function *Caller =
Call->getParent() ? Call->getCaller() : nullptr;
if (Caller &&
!NullPointerIsDefined(
Caller, Operands[0]->getType()->getPointerAddressSpace())) {
return Operands[0];
}
return nullptr;
}
}
if (auto *Op = dyn_cast<ConstantFP>(Operands[0])) {
if (IntrinsicID == Intrinsic::convert_to_fp16) {
APFloat Val(Op->getValueAPF());
bool lost = false;
Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
}
APFloat U = Op->getValueAPF();
if (IntrinsicID == Intrinsic::wasm_trunc_signed ||
IntrinsicID == Intrinsic::wasm_trunc_unsigned) {
bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed;
if (U.isNaN())
return nullptr;
unsigned Width = Ty->getIntegerBitWidth();
APSInt Int(Width, !Signed);
bool IsExact = false;
APFloat::opStatus Status =
U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
if (Status == APFloat::opOK || Status == APFloat::opInexact)
return ConstantInt::get(Ty, Int);
return nullptr;
}
if (IntrinsicID == Intrinsic::fptoui_sat ||
IntrinsicID == Intrinsic::fptosi_sat) {
// convertToInteger() already has the desired saturation semantics.
APSInt Int(Ty->getIntegerBitWidth(),
IntrinsicID == Intrinsic::fptoui_sat);
bool IsExact;
U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact);
return ConstantInt::get(Ty, Int);
}
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
return nullptr;
// Use internal versions of these intrinsics.
if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) {
U.roundToIntegral(APFloat::rmNearestTiesToEven);
return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::round) {
U.roundToIntegral(APFloat::rmNearestTiesToAway);
return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::roundeven) {
U.roundToIntegral(APFloat::rmNearestTiesToEven);
return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::ceil) {
U.roundToIntegral(APFloat::rmTowardPositive);
return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::floor) {
U.roundToIntegral(APFloat::rmTowardNegative);
return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::trunc) {
U.roundToIntegral(APFloat::rmTowardZero);
return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::fabs) {
U.clearSign();
return ConstantFP::get(Ty->getContext(), U);
}
if (IntrinsicID == Intrinsic::amdgcn_fract) {
// The v_fract instruction behaves like the OpenCL spec, which defines
// fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is
// there to prevent fract(-small) from returning 1.0. It returns the
// largest positive floating-point number less than 1.0."
APFloat FloorU(U);
FloorU.roundToIntegral(APFloat::rmTowardNegative);
APFloat FractU(U - FloorU);
APFloat AlmostOne(U.getSemantics(), 1);
AlmostOne.next(/*nextDown*/ true);
return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne));
}
// Rounding operations (floor, trunc, ceil, round and nearbyint) do not
// raise FP exceptions, unless the argument is signaling NaN.
Optional<APFloat::roundingMode> RM;
switch (IntrinsicID) {
default:
break;
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_rint: {
auto CI = cast<ConstrainedFPIntrinsic>(Call);
RM = CI->getRoundingMode();
if (!RM || RM.getValue() == RoundingMode::Dynamic)
return nullptr;
break;
}
case Intrinsic::experimental_constrained_round:
RM = APFloat::rmNearestTiesToAway;
break;
case Intrinsic::experimental_constrained_ceil:
RM = APFloat::rmTowardPositive;
break;
case Intrinsic::experimental_constrained_floor:
RM = APFloat::rmTowardNegative;
break;
case Intrinsic::experimental_constrained_trunc:
RM = APFloat::rmTowardZero;
break;
}
if (RM) {
auto CI = cast<ConstrainedFPIntrinsic>(Call);
if (U.isFinite()) {
APFloat::opStatus St = U.roundToIntegral(*RM);
if (IntrinsicID == Intrinsic::experimental_constrained_rint &&
St == APFloat::opInexact) {
Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
if (EB && *EB == fp::ebStrict)
return nullptr;
}
} else if (U.isSignaling()) {
Optional<fp::ExceptionBehavior> EB = CI->getExceptionBehavior();
if (EB && *EB != fp::ebIgnore)
return nullptr;
U = APFloat::getQNaN(U.getSemantics());
}
return ConstantFP::get(Ty->getContext(), U);
}
/// We only fold functions with finite arguments. Folding NaN and inf is
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
if (!U.isFinite())
return nullptr;
/// Currently APFloat versions of these functions do not exist, so we use
/// the host native double versions. Float versions are not called
/// directly but for all these it is true (float)(f((double)arg)) ==
/// f(arg). Long double not supported yet.
const APFloat &APF = Op->getValueAPF();
switch (IntrinsicID) {
default: break;
case Intrinsic::log:
return ConstantFoldFP(log, APF, Ty);
case Intrinsic::log2:
// TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(Log2, APF, Ty);
case Intrinsic::log10:
// TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(log10, APF, Ty);
case Intrinsic::exp:
return ConstantFoldFP(exp, APF, Ty);
case Intrinsic::exp2:
// Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
case Intrinsic::sin:
return ConstantFoldFP(sin, APF, Ty);
case Intrinsic::cos:
return ConstantFoldFP(cos, APF, Ty);
case Intrinsic::sqrt:
return ConstantFoldFP(sqrt, APF, Ty);
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_sin: {
double V = getValueAsDouble(Op);
if (V < -256.0 || V > 256.0)
// The gfx8 and gfx9 architectures handle arguments outside the range
// [-256, 256] differently. This should be a rare case so bail out
// rather than trying to handle the difference.
return nullptr;
bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos;
double V4 = V * 4.0;
if (V4 == floor(V4)) {
// Force exact results for quarter-integer inputs.
const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 };
V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3];
} else {
if (IsCos)
V = cos(V * 2.0 * numbers::pi);
else
V = sin(V * 2.0 * numbers::pi);
}
return GetConstantFoldFPValue(V, Ty);
}
}
if (!TLI)
return nullptr;
LibFunc Func = NotLibFunc;
if (!TLI->getLibFunc(Name, Func))
return nullptr;
switch (Func) {
default:
break;
case LibFunc_acos:
case LibFunc_acosf:
case LibFunc_acos_finite:
case LibFunc_acosf_finite:
if (TLI->has(Func))
return ConstantFoldFP(acos, APF, Ty);
break;
case LibFunc_asin:
case LibFunc_asinf:
case LibFunc_asin_finite:
case LibFunc_asinf_finite:
if (TLI->has(Func))
return ConstantFoldFP(asin, APF, Ty);
break;
case LibFunc_atan:
case LibFunc_atanf:
if (TLI->has(Func))
return ConstantFoldFP(atan, APF, Ty);
break;
case LibFunc_ceil:
case LibFunc_ceilf:
if (TLI->has(Func)) {
U.roundToIntegral(APFloat::rmTowardPositive);
return ConstantFP::get(Ty->getContext(), U);
}
break;
case LibFunc_cos:
case LibFunc_cosf:
if (TLI->has(Func))
return ConstantFoldFP(cos, APF, Ty);
break;
case LibFunc_cosh:
case LibFunc_coshf:
case LibFunc_cosh_finite:
case LibFunc_coshf_finite:
if (TLI->has(Func))
return ConstantFoldFP(cosh, APF, Ty);
break;
case LibFunc_exp:
case LibFunc_expf:
case LibFunc_exp_finite:
case LibFunc_expf_finite:
if (TLI->has(Func))
return ConstantFoldFP(exp, APF, Ty);
break;
case LibFunc_exp2:
case LibFunc_exp2f:
case LibFunc_exp2_finite:
case LibFunc_exp2f_finite:
if (TLI->has(Func))
// Fold exp2(x) as pow(2, x), in case the host lacks a C99 library.
return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty);
break;
case LibFunc_fabs:
case LibFunc_fabsf:
if (TLI->has(Func)) {
U.clearSign();
return ConstantFP::get(Ty->getContext(), U);
}
break;
case LibFunc_floor:
case LibFunc_floorf:
if (TLI->has(Func)) {
U.roundToIntegral(APFloat::rmTowardNegative);
return ConstantFP::get(Ty->getContext(), U);
}
break;
case LibFunc_log:
case LibFunc_logf:
case LibFunc_log_finite:
case LibFunc_logf_finite:
if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
return ConstantFoldFP(log, APF, Ty);
break;
case LibFunc_log2:
case LibFunc_log2f:
case LibFunc_log2_finite:
case LibFunc_log2f_finite:
if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
// TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(Log2, APF, Ty);
break;
case LibFunc_log10:
case LibFunc_log10f:
case LibFunc_log10_finite:
case LibFunc_log10f_finite:
if (!APF.isNegative() && !APF.isZero() && TLI->has(Func))
// TODO: What about hosts that lack a C99 library?
return ConstantFoldFP(log10, APF, Ty);
break;
case LibFunc_nearbyint:
case LibFunc_nearbyintf:
case LibFunc_rint:
case LibFunc_rintf:
if (TLI->has(Func)) {
U.roundToIntegral(APFloat::rmNearestTiesToEven);
return ConstantFP::get(Ty->getContext(), U);
}
break;
case LibFunc_round:
case LibFunc_roundf:
if (TLI->has(Func)) {
U.roundToIntegral(APFloat::rmNearestTiesToAway);
return ConstantFP::get(Ty->getContext(), U);
}
break;
case LibFunc_sin:
case LibFunc_sinf:
if (TLI->has(Func))
return ConstantFoldFP(sin, APF, Ty);
break;
case LibFunc_sinh:
case LibFunc_sinhf:
case LibFunc_sinh_finite:
case LibFunc_sinhf_finite:
if (TLI->has(Func))
return ConstantFoldFP(sinh, APF, Ty);
break;
case LibFunc_sqrt:
case LibFunc_sqrtf:
if (!APF.isNegative() && TLI->has(Func))
return ConstantFoldFP(sqrt, APF, Ty);
break;
case LibFunc_tan:
case LibFunc_tanf:
if (TLI->has(Func))
return ConstantFoldFP(tan, APF, Ty);
break;
case LibFunc_tanh:
case LibFunc_tanhf:
if (TLI->has(Func))
return ConstantFoldFP(tanh, APF, Ty);
break;
case LibFunc_trunc:
case LibFunc_truncf:
if (TLI->has(Func)) {
U.roundToIntegral(APFloat::rmTowardZero);
return ConstantFP::get(Ty->getContext(), U);
}
break;
}
return nullptr;
}
if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
switch (IntrinsicID) {
case Intrinsic::bswap:
return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
case Intrinsic::ctpop:
return ConstantInt::get(Ty, Op->getValue().countPopulation());
case Intrinsic::bitreverse:
return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
case Intrinsic::convert_from_fp16: {
APFloat Val(APFloat::IEEEhalf(), Op->getValue());
bool lost = false;
APFloat::opStatus status = Val.convert(
Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost);
// Conversion is always precise.
(void)status;
assert(status == APFloat::opOK && !lost &&
"Precision lost during fp16 constfolding");
return ConstantFP::get(Ty->getContext(), Val);
}
default:
return nullptr;
}
}
switch (IntrinsicID) {
default: break;
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
case Intrinsic::vector_reduce_and:
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_umax:
if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0]))
return C;
break;
}
// Support ConstantVector in case we have an Undef in the top.
if (isa<ConstantVector>(Operands[0]) ||
isa<ConstantDataVector>(Operands[0])) {
auto *Op = cast<Constant>(Operands[0]);
switch (IntrinsicID) {
default: break;
case Intrinsic::x86_sse_cvtss2si:
case Intrinsic::x86_sse_cvtss2si64:
case Intrinsic::x86_sse2_cvtsd2si:
case Intrinsic::x86_sse2_cvtsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/false, Ty,
/*IsSigned*/true);
break;
case Intrinsic::x86_sse_cvttss2si:
case Intrinsic::x86_sse_cvttss2si64:
case Intrinsic::x86_sse2_cvttsd2si:
case Intrinsic::x86_sse2_cvttsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty,
/*IsSigned*/true);
break;
}
}
return nullptr;
}
static Constant *ConstantFoldScalarCall2(StringRef Name,
Intrinsic::ID IntrinsicID,
Type *Ty,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI,
const CallBase *Call) {
assert(Operands.size() == 2 && "Wrong number of operands.");
if (Ty->isFloatingPointTy()) {
// TODO: We should have undef handling for all of the FP intrinsics that
// are attempted to be folded in this function.
bool IsOp0Undef = isa<UndefValue>(Operands[0]);
bool IsOp1Undef = isa<UndefValue>(Operands[1]);
switch (IntrinsicID) {
case Intrinsic::maxnum:
case Intrinsic::minnum:
case Intrinsic::maximum:
case Intrinsic::minimum:
// If one argument is undef, return the other argument.
if (IsOp0Undef)
return Operands[1];
if (IsOp1Undef)
return Operands[0];
break;
}
}
if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isFloatingPointTy())
return nullptr;
const APFloat &Op1V = Op1->getValueAPF();
if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (Op2->getType() != Op1->getType())
return nullptr;
const APFloat &Op2V = Op2->getValueAPF();
if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
APFloat Res = Op1V;
APFloat::opStatus St;
switch (IntrinsicID) {
default:
return nullptr;
case Intrinsic::experimental_constrained_fadd:
St = Res.add(Op2V, RM);
break;
case Intrinsic::experimental_constrained_fsub:
St = Res.subtract(Op2V, RM);
break;
case Intrinsic::experimental_constrained_fmul:
St = Res.multiply(Op2V, RM);
break;
case Intrinsic::experimental_constrained_fdiv:
St = Res.divide(Op2V, RM);
break;
case Intrinsic::experimental_constrained_frem:
St = Res.mod(Op2V);
break;
}
if (mayFoldConstrained(const_cast<ConstrainedFPIntrinsic *>(ConstrIntr),
St))
return ConstantFP::get(Ty->getContext(), Res);
return nullptr;
}
switch (IntrinsicID) {
default:
break;
case Intrinsic::copysign:
return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));
case Intrinsic::minnum:
return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));
case Intrinsic::maxnum:
return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));
case Intrinsic::minimum:
return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));
case Intrinsic::maximum:
return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V));
}
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
return nullptr;
switch (IntrinsicID) {
default:
break;
case Intrinsic::pow:
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
case Intrinsic::amdgcn_fmul_legacy:
// The legacy behaviour is that multiplying +/- 0.0 by anything, even
// NaN or infinity, gives +0.0.
if (Op1V.isZero() || Op2V.isZero())
return ConstantFP::getNullValue(Ty);
return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
}
if (!TLI)
return nullptr;
LibFunc Func = NotLibFunc;
if (!TLI->getLibFunc(Name, Func))
return nullptr;
switch (Func) {
default:
break;
case LibFunc_pow:
case LibFunc_powf:
case LibFunc_pow_finite:
case LibFunc_powf_finite:
if (TLI->has(Func))
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
break;
case LibFunc_fmod:
case LibFunc_fmodf:
if (TLI->has(Func)) {
APFloat V = Op1->getValueAPF();
if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF()))
return ConstantFP::get(Ty->getContext(), V);
}
break;
case LibFunc_remainder:
case LibFunc_remainderf:
if (TLI->has(Func)) {
APFloat V = Op1->getValueAPF();
if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF()))
return ConstantFP::get(Ty->getContext(), V);
}
break;
case LibFunc_atan2:
case LibFunc_atan2f:
case LibFunc_atan2_finite:
case LibFunc_atan2f_finite:
if (TLI->has(Func))
return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
break;
}
} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
return nullptr;
if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
return ConstantFP::get(
Ty->getContext(),
APFloat((float)std::pow((float)Op1V.convertToDouble(),
(int)Op2C->getZExtValue())));
if (IntrinsicID == Intrinsic::powi && Ty->isFloatTy())
return ConstantFP::get(
Ty->getContext(),
APFloat((float)std::pow((float)Op1V.convertToDouble(),
(int)Op2C->getZExtValue())));
if (IntrinsicID == Intrinsic::powi && Ty->isDoubleTy())
return ConstantFP::get(
Ty->getContext(),
APFloat((double)std::pow(Op1V.convertToDouble(),
(int)Op2C->getZExtValue())));
if (IntrinsicID == Intrinsic::amdgcn_ldexp) {
// FIXME: Should flush denorms depending on FP mode, but that's ignored
// everywhere else.
// scalbn is equivalent to ldexp with float radix 2
APFloat Result = scalbn(Op1->getValueAPF(), Op2C->getSExtValue(),
APFloat::rmNearestTiesToEven);
return ConstantFP::get(Ty->getContext(), Result);
}
}
return nullptr;
}
if (Operands[0]->getType()->isIntegerTy() &&
Operands[1]->getType()->isIntegerTy()) {
const APInt *C0, *C1;
if (!getConstIntOrUndef(Operands[0], C0) ||
!getConstIntOrUndef(Operands[1], C1))
return nullptr;
switch (IntrinsicID) {
default: break;
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin:
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty);
return ConstantInt::get(
Ty, ICmpInst::compare(*C0, *C1,
MinMaxIntrinsic::getPredicate(IntrinsicID))
? *C0
: *C1);
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// X - undef -> { 0, false }
// undef - X -> { 0, false }
if (!C0 || !C1)
return Constant::getNullValue(Ty);
LLVM_FALLTHROUGH;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
// X + undef -> { -1, false }
// undef + x -> { -1, false }
if (!C0 || !C1) {
return ConstantStruct::get(
cast<StructType>(Ty),
{Constant::getAllOnesValue(Ty->getStructElementType(0)),
Constant::getNullValue(Ty->getStructElementType(1))});
}
LLVM_FALLTHROUGH;
case Intrinsic::smul_with_overflow:
case Intrinsic::umul_with_overflow: {
// undef * X -> { 0, false }
// X * undef -> { 0, false }
if (!C0 || !C1)
return Constant::getNullValue(Ty);
APInt Res;
bool Overflow;
switch (IntrinsicID) {
default: llvm_unreachable("Invalid case");
case Intrinsic::sadd_with_overflow:
Res = C0->sadd_ov(*C1, Overflow);
break;
case Intrinsic::uadd_with_overflow:
Res = C0->uadd_ov(*C1, Overflow);
break;
case Intrinsic::ssub_with_overflow:
Res = C0->ssub_ov(*C1, Overflow);
break;
case Intrinsic::usub_with_overflow:
Res = C0->usub_ov(*C1, Overflow);
break;
case Intrinsic::smul_with_overflow:
Res = C0->smul_ov(*C1, Overflow);
break;
case Intrinsic::umul_with_overflow:
Res = C0->umul_ov(*C1, Overflow);
break;
}
Constant *Ops[] = {
ConstantInt::get(Ty->getContext(), Res),
ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow)
};
return ConstantStruct::get(cast<StructType>(Ty), Ops);
}
case Intrinsic::uadd_sat:
case Intrinsic::sadd_sat:
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
return Constant::getAllOnesValue(Ty);
if (IntrinsicID == Intrinsic::uadd_sat)
return ConstantInt::get(Ty, C0->uadd_sat(*C1));
else
return ConstantInt::get(Ty, C0->sadd_sat(*C1));
case Intrinsic::usub_sat:
case Intrinsic::ssub_sat:
if (!C0 && !C1)
return UndefValue::get(Ty);
if (!C0 || !C1)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::usub_sat)
return ConstantInt::get(Ty, C0->usub_sat(*C1));
else
return ConstantInt::get(Ty, C0->ssub_sat(*C1));
case Intrinsic::cttz:
case Intrinsic::ctlz:
assert(C1 && "Must be constant int");
// cttz(0, 1) and ctlz(0, 1) are poison.
if (C1->isOne() && (!C0 || C0->isZero()))
return PoisonValue::get(Ty);
if (!C0)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::cttz)
return ConstantInt::get(Ty, C0->countTrailingZeros());
else
return ConstantInt::get(Ty, C0->countLeadingZeros());
case Intrinsic::abs:
assert(C1 && "Must be constant int");
assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1");
// Undef or minimum val operand with poison min --> undef
if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
return UndefValue::get(Ty);
// Undef operand with no poison min --> 0 (sign bit must be clear)
if (!C0)
return Constant::getNullValue(Ty);
return ConstantInt::get(Ty, C0->abs());
}
return nullptr;
}
// Support ConstantVector in case we have an Undef in the top.
if ((isa<ConstantVector>(Operands[0]) ||
isa<ConstantDataVector>(Operands[0])) &&
// Check for default rounding mode.
// FIXME: Support other rounding modes?
isa<ConstantInt>(Operands[1]) &&
cast<ConstantInt>(Operands[1])->getValue() == 4) {
auto *Op = cast<Constant>(Operands[0]);
switch (IntrinsicID) {
default: break;
case Intrinsic::x86_avx512_vcvtss2si32:
case Intrinsic::x86_avx512_vcvtss2si64:
case Intrinsic::x86_avx512_vcvtsd2si32:
case Intrinsic::x86_avx512_vcvtsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/false, Ty,
/*IsSigned*/true);
break;
case Intrinsic::x86_avx512_vcvtss2usi32:
case Intrinsic::x86_avx512_vcvtss2usi64:
case Intrinsic::x86_avx512_vcvtsd2usi32:
case Intrinsic::x86_avx512_vcvtsd2usi64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/false, Ty,
/*IsSigned*/false);
break;
case Intrinsic::x86_avx512_cvttss2si:
case Intrinsic::x86_avx512_cvttss2si64:
case Intrinsic::x86_avx512_cvttsd2si:
case Intrinsic::x86_avx512_cvttsd2si64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty,
/*IsSigned*/true);
break;
case Intrinsic::x86_avx512_cvttss2usi:
case Intrinsic::x86_avx512_cvttss2usi64:
case Intrinsic::x86_avx512_cvttsd2usi:
case Intrinsic::x86_avx512_cvttsd2usi64:
if (ConstantFP *FPOp =
dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(),
/*roundTowardZero=*/true, Ty,
/*IsSigned*/false);
break;
}
}
return nullptr;
}
static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
const APFloat &S0,
const APFloat &S1,
const APFloat &S2) {
unsigned ID;
const fltSemantics &Sem = S0.getSemantics();
APFloat MA(Sem), SC(Sem), TC(Sem);
if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
// S2 < 0
ID = 5;
SC = -S0;
} else {
ID = 4;
SC = S0;
}
MA = S2;
TC = -S1;
} else if (abs(S1) >= abs(S0)) {
if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
// S1 < 0
ID = 3;
TC = -S2;
} else {
ID = 2;
TC = S2;
}
MA = S1;
SC = S0;
} else {
if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
// S0 < 0
ID = 1;
SC = S2;
} else {
ID = 0;
SC = -S2;
}
MA = S0;
TC = -S1;
}
switch (IntrinsicID) {
default:
llvm_unreachable("unhandled amdgcn cube intrinsic");
case Intrinsic::amdgcn_cubeid:
return APFloat(Sem, ID);
case Intrinsic::amdgcn_cubema:
return MA + MA;
case Intrinsic::amdgcn_cubesc:
return SC;
case Intrinsic::amdgcn_cubetc:
return TC;
}
}
static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef<Constant *> Operands,
Type *Ty) {
const APInt *C0, *C1, *C2;
if (!getConstIntOrUndef(Operands[0], C0) ||
!getConstIntOrUndef(Operands[1], C1) ||
!getConstIntOrUndef(Operands[2], C2))
return nullptr;
if (!C2)
return UndefValue::get(Ty);
APInt Val(32, 0);
unsigned NumUndefBytes = 0;
for (unsigned I = 0; I < 32; I += 8) {
unsigned Sel = C2->extractBitsAsZExtValue(8, I);
unsigned B = 0;
if (Sel >= 13)
B = 0xff;
else if (Sel == 12)
B = 0x00;
else {
const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1;
if (!Src)
++NumUndefBytes;
else if (Sel < 8)
B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8);
else
B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff;
}
Val.insertBits(B, I, 8);
}
if (NumUndefBytes == 4)
return UndefValue::get(Ty);
return ConstantInt::get(Ty, Val);
}
static Constant *ConstantFoldScalarCall3(StringRef Name,
Intrinsic::ID IntrinsicID,
Type *Ty,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI,
const CallBase *Call) {
assert(Operands.size() == 3 && "Wrong number of operands.");
if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (const auto *Op3 = dyn_cast<ConstantFP>(Operands[2])) {
const APFloat &C1 = Op1->getValueAPF();
const APFloat &C2 = Op2->getValueAPF();
const APFloat &C3 = Op3->getValueAPF();
if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
APFloat Res = C1;
APFloat::opStatus St;
switch (IntrinsicID) {
default:
return nullptr;
case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_fmuladd:
St = Res.fusedMultiplyAdd(C2, C3, RM);
break;
}
if (mayFoldConstrained(
const_cast<ConstrainedFPIntrinsic *>(ConstrIntr), St))
return ConstantFP::get(Ty->getContext(), Res);
return nullptr;
}
switch (IntrinsicID) {
default: break;
case Intrinsic::amdgcn_fma_legacy: {
// The legacy behaviour is that multiplying +/- 0.0 by anything, even
// NaN or infinity, gives +0.0.
if (C1.isZero() || C2.isZero()) {
// It's tempting to just return C3 here, but that would give the
// wrong result if C3 was -0.0.
return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3);
}
LLVM_FALLTHROUGH;
}
case Intrinsic::fma:
case Intrinsic::fmuladd: {
APFloat V = C1;
V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven);
return ConstantFP::get(Ty->getContext(), V);
}
case Intrinsic::amdgcn_cubeid:
case Intrinsic::amdgcn_cubema:
case Intrinsic::amdgcn_cubesc:
case Intrinsic::amdgcn_cubetc: {
APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3);
return ConstantFP::get(Ty->getContext(), V);
}
}
}
}
}
if (IntrinsicID == Intrinsic::smul_fix ||
IntrinsicID == Intrinsic::smul_fix_sat) {
// poison * C -> poison
// C * poison -> poison
if (isa<PoisonValue>(Operands[0]) || isa<PoisonValue>(Operands[1]))
return PoisonValue::get(Ty);
const APInt *C0, *C1;
if (!getConstIntOrUndef(Operands[0], C0) ||
!getConstIntOrUndef(Operands[1], C1))
return nullptr;
// undef * C -> 0
// C * undef -> 0
if (!C0 || !C1)
return Constant::getNullValue(Ty);
// This code performs rounding towards negative infinity in case the result
// cannot be represented exactly for the given scale. Targets that do care
// about rounding should use a target hook for specifying how rounding
// should be done, and provide their own folding to be consistent with
// rounding. This is the same approach as used by
// DAGTypeLegalizer::ExpandIntRes_MULFIX.
unsigned Scale = cast<ConstantInt>(Operands[2])->getZExtValue();
unsigned Width = C0->getBitWidth();
assert(Scale < Width && "Illegal scale.");
unsigned ExtendedWidth = Width * 2;
APInt Product = (C0->sextOrSelf(ExtendedWidth) *
C1->sextOrSelf(ExtendedWidth)).ashr(Scale);
if (IntrinsicID == Intrinsic::smul_fix_sat) {
APInt Max = APInt::getSignedMaxValue(Width).sextOrSelf(ExtendedWidth);
APInt Min = APInt::getSignedMinValue(Width).sextOrSelf(ExtendedWidth);
Product = APIntOps::smin(Product, Max);
Product = APIntOps::smax(Product, Min);
}
return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width));
}
if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) {
const APInt *C0, *C1, *C2;
if (!getConstIntOrUndef(Operands[0], C0) ||
!getConstIntOrUndef(Operands[1], C1) ||
!getConstIntOrUndef(Operands[2], C2))
return nullptr;
bool IsRight = IntrinsicID == Intrinsic::fshr;
if (!C2)
return Operands[IsRight ? 1 : 0];
if (!C0 && !C1)
return UndefValue::get(Ty);
// The shift amount is interpreted as modulo the bitwidth. If the shift
// amount is effectively 0, avoid UB due to oversized inverse shift below.
unsigned BitWidth = C2->getBitWidth();
unsigned ShAmt = C2->urem(BitWidth);
if (!ShAmt)
return Operands[IsRight ? 1 : 0];
// (C0 << ShlAmt) | (C1 >> LshrAmt)
unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt;
unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt;
if (!C0)
return ConstantInt::get(Ty, C1->lshr(LshrAmt));
if (!C1)
return ConstantInt::get(Ty, C0->shl(ShlAmt));
return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt));
}
if (IntrinsicID == Intrinsic::amdgcn_perm)
return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty);
return nullptr;
}
static Constant *ConstantFoldScalarCall(StringRef Name,
Intrinsic::ID IntrinsicID,
Type *Ty,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI,
const CallBase *Call) {
if (Operands.size() == 1)
return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call);
if (Operands.size() == 2)
return ConstantFoldScalarCall2(Name, IntrinsicID, Ty, Operands, TLI, Call);
if (Operands.size() == 3)
return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call);
return nullptr;
}
static Constant *ConstantFoldFixedVectorCall(
StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy,
ArrayRef<Constant *> Operands, const DataLayout &DL,
const TargetLibraryInfo *TLI, const CallBase *Call) {
SmallVector<Constant *, 4> Result(FVTy->getNumElements());
SmallVector<Constant *, 4> Lane(Operands.size());
Type *Ty = FVTy->getElementType();
switch (IntrinsicID) {
case Intrinsic::masked_load: {
auto *SrcPtr = Operands[0];
auto *Mask = Operands[2];
auto *Passthru = Operands[3];
Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL);
SmallVector<Constant *, 32> NewElements;
for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
auto *MaskElt = Mask->getAggregateElement(I);
if (!MaskElt)
break;
auto *PassthruElt = Passthru->getAggregateElement(I);
auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr;
if (isa<UndefValue>(MaskElt)) {
if (PassthruElt)
NewElements.push_back(PassthruElt);
else if (VecElt)
NewElements.push_back(VecElt);
else
return nullptr;
}
if (MaskElt->isNullValue()) {
if (!PassthruElt)
return nullptr;
NewElements.push_back(PassthruElt);
} else if (MaskElt->isOneValue()) {
if (!VecElt)
return nullptr;
NewElements.push_back(VecElt);
} else {
return nullptr;
}
}
if (NewElements.size() != FVTy->getNumElements())
return nullptr;
return ConstantVector::get(NewElements);
}
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
case Intrinsic::arm_mve_vctp64: {
if (auto *Op = dyn_cast<ConstantInt>(Operands[0])) {
unsigned Lanes = FVTy->getNumElements();
uint64_t Limit = Op->getZExtValue();
SmallVector<Constant *, 16> NCs;
for (unsigned i = 0; i < Lanes; i++) {
if (i < Limit)
NCs.push_back(ConstantInt::getTrue(Ty));
else
NCs.push_back(ConstantInt::getFalse(Ty));
}
return ConstantVector::get(NCs);
}
break;
}
case Intrinsic::get_active_lane_mask: {
auto *Op0 = dyn_cast<ConstantInt>(Operands[0]);
auto *Op1 = dyn_cast<ConstantInt>(Operands[1]);
if (Op0 && Op1) {
unsigned Lanes = FVTy->getNumElements();
uint64_t Base = Op0->getZExtValue();
uint64_t Limit = Op1->getZExtValue();
SmallVector<Constant *, 16> NCs;
for (unsigned i = 0; i < Lanes; i++) {
if (Base + i < Limit)
NCs.push_back(ConstantInt::getTrue(Ty));
else
NCs.push_back(ConstantInt::getFalse(Ty));
}
return ConstantVector::get(NCs);
}
break;
}
default:
break;
}
for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) {
// Gather a column of constants.
for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) {
// Some intrinsics use a scalar type for certain arguments.
if (hasVectorInstrinsicScalarOpd(IntrinsicID, J)) {
Lane[J] = Operands[J];
continue;
}
Constant *Agg = Operands[J]->getAggregateElement(I);
if (!Agg)
return nullptr;
Lane[J] = Agg;
}
// Use the regular scalar folding to simplify this column.
Constant *Folded =
ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call);
if (!Folded)
return nullptr;
Result[I] = Folded;
}
return ConstantVector::get(Result);
}
static Constant *ConstantFoldScalableVectorCall(
StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy,
ArrayRef<Constant *> Operands, const DataLayout &DL,
const TargetLibraryInfo *TLI, const CallBase *Call) {
switch (IntrinsicID) {
case Intrinsic::aarch64_sve_convert_from_svbool: {
auto *Src = dyn_cast<Constant>(Operands[0]);
if (!Src || !Src->isNullValue())
break;
return ConstantInt::getFalse(SVTy);
}
default:
break;
}
return nullptr;
}
} // end anonymous namespace
Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
if (Call->isNoBuiltin())
return nullptr;
if (!F->hasName())
return nullptr;
// If this is not an intrinsic and not recognized as a library call, bail out.
if (F->getIntrinsicID() == Intrinsic::not_intrinsic) {
if (!TLI)
return nullptr;
LibFunc LibF;
if (!TLI->getLibFunc(*F, LibF))
return nullptr;
}
StringRef Name = F->getName();
Type *Ty = F->getReturnType();
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
return ConstantFoldFixedVectorCall(
Name, F->getIntrinsicID(), FVTy, Operands,
F->getParent()->getDataLayout(), TLI, Call);
if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))
return ConstantFoldScalableVectorCall(
Name, F->getIntrinsicID(), SVTy, Operands,
F->getParent()->getDataLayout(), TLI, Call);
// TODO: If this is a library function, we already discovered that above,
// so we should pass the LibFunc, not the name (and it might be better
// still to separate intrinsic handling from libcalls).
return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI,
Call);
}
bool llvm::isMathLibCallNoop(const CallBase *Call,
const TargetLibraryInfo *TLI) {
// FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
// (and to some extent ConstantFoldScalarCall).
if (Call->isNoBuiltin() || Call->isStrictFP())
return false;
Function *F = Call->getCalledFunction();
if (!F)
return false;
LibFunc Func;
if (!TLI || !TLI->getLibFunc(*F, Func))
return false;
if (Call->arg_size() == 1) {
if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
const APFloat &Op = OpC->getValueAPF();
switch (Func) {
case LibFunc_logl:
case LibFunc_log:
case LibFunc_logf:
case LibFunc_log2l:
case LibFunc_log2:
case LibFunc_log2f:
case LibFunc_log10l:
case LibFunc_log10:
case LibFunc_log10f:
return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
case LibFunc_expl:
case LibFunc_exp:
case LibFunc_expf:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
return !(Op < APFloat(-745.0) || Op > APFloat(709.0));
if (OpC->getType()->isFloatTy())
return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f));
break;
case LibFunc_exp2l:
case LibFunc_exp2:
case LibFunc_exp2f:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0));
if (OpC->getType()->isFloatTy())
return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f));
break;
case LibFunc_sinl:
case LibFunc_sin:
case LibFunc_sinf:
case LibFunc_cosl:
case LibFunc_cos:
case LibFunc_cosf:
return !Op.isInfinity();
case LibFunc_tanl:
case LibFunc_tan:
case LibFunc_tanf: {
// FIXME: Stop using the host math library.
// FIXME: The computation isn't done in the right precision.
Type *Ty = OpC->getType();
if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy())
return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr;
break;
}
case LibFunc_asinl:
case LibFunc_asin:
case LibFunc_asinf:
case LibFunc_acosl:
case LibFunc_acos:
case LibFunc_acosf:
return !(Op < APFloat(Op.getSemantics(), "-1") ||
Op > APFloat(Op.getSemantics(), "1"));
case LibFunc_sinh:
case LibFunc_cosh:
case LibFunc_sinhf:
case LibFunc_coshf:
case LibFunc_sinhl:
case LibFunc_coshl:
// FIXME: These boundaries are slightly conservative.
if (OpC->getType()->isDoubleTy())
return !(Op < APFloat(-710.0) || Op > APFloat(710.0));
if (OpC->getType()->isFloatTy())
return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f));
break;
case LibFunc_sqrtl:
case LibFunc_sqrt:
case LibFunc_sqrtf:
return Op.isNaN() || Op.isZero() || !Op.isNegative();
// FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
// maybe others?
default:
break;
}
}
}
if (Call->arg_size() == 2) {
ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
if (Op0C && Op1C) {
const APFloat &Op0 = Op0C->getValueAPF();
const APFloat &Op1 = Op1C->getValueAPF();
switch (Func) {
case LibFunc_powl:
case LibFunc_pow:
case LibFunc_powf: {
// FIXME: Stop using the host math library.
// FIXME: The computation isn't done in the right precision.
Type *Ty = Op0C->getType();
if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
if (Ty == Op1C->getType())
return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr;
}
break;
}
case LibFunc_fmodl:
case LibFunc_fmod:
case LibFunc_fmodf:
case LibFunc_remainderl:
case LibFunc_remainder:
case LibFunc_remainderf:
return Op0.isNaN() || Op1.isNaN() ||
(!Op0.isInfinity() && !Op1.isZero());
default:
break;
}
}
}
return false;
}
void TargetFolder::anchor() {}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
index 44b1d94ebdc8..74b0d6751023 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1,1443 +1,1447 @@
//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file "describes" induction and recurrence variables.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include <set>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "iv-descriptors"
bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
SmallPtrSetImpl<Instruction *> &Set) {
for (const Use &Use : I->operands())
if (!Set.count(dyn_cast<Instruction>(Use)))
return false;
return true;
}
bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
switch (Kind) {
default:
break;
case RecurKind::Add:
case RecurKind::Mul:
case RecurKind::Or:
case RecurKind::And:
case RecurKind::Xor:
case RecurKind::SMax:
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
return true;
}
return false;
}
bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) {
return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
}
bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
switch (Kind) {
default:
break;
case RecurKind::Add:
case RecurKind::Mul:
case RecurKind::FAdd:
case RecurKind::FMul:
case RecurKind::FMulAdd:
return true;
}
return false;
}
/// Determines if Phi may have been type-promoted. If Phi has a single user
/// that ANDs the Phi with a type mask, return the user. RT is updated to
/// account for the narrower bit width represented by the mask, and the AND
/// instruction is added to CI.
static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
SmallPtrSetImpl<Instruction *> &Visited,
SmallPtrSetImpl<Instruction *> &CI) {
if (!Phi->hasOneUse())
return Phi;
const APInt *M = nullptr;
Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
// Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
// with a new integer type of the corresponding bit width.
if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
int32_t Bits = (*M + 1).exactLogBase2();
if (Bits > 0) {
RT = IntegerType::get(Phi->getContext(), Bits);
Visited.insert(Phi);
CI.insert(J);
return J;
}
}
return Phi;
}
/// Compute the minimal bit width needed to represent a reduction whose exit
/// instruction is given by Exit.
static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
DemandedBits *DB,
AssumptionCache *AC,
DominatorTree *DT) {
bool IsSigned = false;
const DataLayout &DL = Exit->getModule()->getDataLayout();
uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
if (DB) {
// Use the demanded bits analysis to determine the bits that are live out
// of the exit instruction, rounding up to the nearest power of two. If the
// use of demanded bits results in a smaller bit width, we know the value
// must be positive (i.e., IsSigned = false), because if this were not the
// case, the sign bit would have been demanded.
auto Mask = DB->getDemandedBits(Exit);
MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
}
if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
// If demanded bits wasn't able to limit the bit width, we can try to use
// value tracking instead. This can be the case, for example, if the value
// may be negative.
auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
MaxBitWidth = NumTypeBits - NumSignBits;
KnownBits Bits = computeKnownBits(Exit, DL);
if (!Bits.isNonNegative()) {
// If the value is not known to be non-negative, we set IsSigned to true,
// meaning that we will use sext instructions instead of zext
// instructions to restore the original type.
IsSigned = true;
// Make sure at at least one sign bit is included in the result, so it
// will get properly sign-extended.
++MaxBitWidth;
}
}
if (!isPowerOf2_64(MaxBitWidth))
MaxBitWidth = NextPowerOf2(MaxBitWidth);
return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
IsSigned);
}
/// Collect cast instructions that can be ignored in the vectorizer's cost
/// model, given a reduction exit value and the minimal type in which the
// reduction can be represented. Also search casts to the recurrence type
// to find the minimum width used by the recurrence.
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
Type *RecurrenceType,
SmallPtrSetImpl<Instruction *> &Casts,
unsigned &MinWidthCastToRecurTy) {
SmallVector<Instruction *, 8> Worklist;
SmallPtrSet<Instruction *, 8> Visited;
Worklist.push_back(Exit);
MinWidthCastToRecurTy = -1U;
while (!Worklist.empty()) {
Instruction *Val = Worklist.pop_back_val();
Visited.insert(Val);
if (auto *Cast = dyn_cast<CastInst>(Val)) {
if (Cast->getSrcTy() == RecurrenceType) {
// If the source type of a cast instruction is equal to the recurrence
// type, it will be eliminated, and should be ignored in the vectorizer
// cost model.
Casts.insert(Cast);
continue;
}
if (Cast->getDestTy() == RecurrenceType) {
// The minimum width used by the recurrence is found by checking for
// casts on its operands. The minimum width is used by the vectorizer
// when finding the widest type for in-loop reductions without any
// loads/stores.
MinWidthCastToRecurTy = std::min<unsigned>(
MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
continue;
}
}
// Add all operands to the work list if they are loop-varying values that
// we haven't yet visited.
for (Value *O : cast<User>(Val)->operands())
if (auto *I = dyn_cast<Instruction>(O))
if (TheLoop->contains(I) && !Visited.count(I))
Worklist.push_back(I);
}
}
// Check if a given Phi node can be recognized as an ordered reduction for
// vectorizing floating point operations without unsafe math.
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
Instruction *Exit, PHINode *Phi) {
// Currently only FAdd and FMulAdd are supported.
if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
return false;
if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
return false;
if (Kind == RecurKind::FMulAdd &&
!RecurrenceDescriptor::isFMulAddIntrinsic(Exit))
return false;
// Ensure the exit instruction has only one user other than the reduction PHI
if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
return false;
// The only pattern accepted is the one in which the reduction PHI
// is used as one of the operands of the exit instruction
auto *Op0 = Exit->getOperand(0);
auto *Op1 = Exit->getOperand(1);
if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
return false;
if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
return false;
LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
<< ", ExitInst: " << *Exit << "\n");
return true;
}
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
Loop *TheLoop, FastMathFlags FuncFMF,
RecurrenceDescriptor &RedDes,
DemandedBits *DB,
AssumptionCache *AC,
DominatorTree *DT) {
if (Phi->getNumIncomingValues() != 2)
return false;
// Reduction variables are only found in the loop header block.
if (Phi->getParent() != TheLoop->getHeader())
return false;
// Obtain the reduction start value from the value that comes from the loop
// preheader.
Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
// ExitInstruction is the single value which is used outside the loop.
// We only allow for a single reduction value to be used outside the loop.
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
Instruction *ExitInstruction = nullptr;
// Indicates that we found a reduction operation in our scan.
bool FoundReduxOp = false;
// We start with the PHI node and scan for all of the users of this
// instruction. All users must be instructions that can be used as reduction
// variables (such as ADD). We must have a single out-of-block user. The cycle
// must include the original PHI.
bool FoundStartPHI = false;
// To recognize min/max patterns formed by a icmp select sequence, we store
// the number of instruction we saw from the recognized min/max pattern,
// to make sure we only see exactly the two instructions.
unsigned NumCmpSelectPatternInst = 0;
InstDesc ReduxDesc(false, nullptr);
// Data used for determining if the recurrence has been type-promoted.
Type *RecurrenceType = Phi->getType();
SmallPtrSet<Instruction *, 4> CastInsts;
unsigned MinWidthCastToRecurrenceType;
Instruction *Start = Phi;
bool IsSigned = false;
SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist;
// Return early if the recurrence kind does not match the type of Phi. If the
// recurrence kind is arithmetic, we attempt to look through AND operations
// resulting from the type promotion performed by InstCombine. Vector
// operations are not limited to the legal integer widths, so we may be able
// to evaluate the reduction in the narrower width.
if (RecurrenceType->isFloatingPointTy()) {
if (!isFloatingPointRecurrenceKind(Kind))
return false;
} else if (RecurrenceType->isIntegerTy()) {
if (!isIntegerRecurrenceKind(Kind))
return false;
if (!isMinMaxRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
} else {
// Pointer min/max may exist, but it is not supported as a reduction op.
return false;
}
Worklist.push_back(Start);
VisitedInsts.insert(Start);
// Start with all flags set because we will intersect this with the reduction
// flags from all the reduction operations.
FastMathFlags FMF = FastMathFlags::getFast();
// The first instruction in the use-def chain of the Phi node that requires
// exact floating point operations.
Instruction *ExactFPMathInst = nullptr;
// A value in the reduction can be used:
// - By the reduction:
// - Reduction operation:
// - One use of reduction value (safe).
// - Multiple use of reduction value (not safe).
// - PHI:
// - All uses of the PHI must be the reduction (safe).
// - Otherwise, not safe.
// - By instructions outside of the loop (safe).
// * One value may have several outside users, but all outside
// uses must be of the same value.
// - By an instruction that is not part of the reduction (not safe).
// This is either:
// * An instruction type other than PHI or the reduction operation.
// * A PHI in the header other than the initial PHI.
while (!Worklist.empty()) {
Instruction *Cur = Worklist.pop_back_val();
// No Users.
// If the instruction has no users then this is a broken chain and can't be
// a reduction variable.
if (Cur->use_empty())
return false;
bool IsAPhi = isa<PHINode>(Cur);
// A header PHI use other than the original PHI.
if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
return false;
// Reductions of instructions such as Div, and Sub is only possible if the
// LHS is the reduction variable.
if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
!isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
!VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
return false;
// Any reduction instruction must be of one of the allowed kinds. We ignore
// the starting value (the Phi or an AND instruction if the Phi has been
// type-promoted).
if (Cur != Start) {
ReduxDesc =
isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
ExactFPMathInst = ExactFPMathInst == nullptr
? ReduxDesc.getExactFPMathInst()
: ExactFPMathInst;
if (!ReduxDesc.isRecurrence())
return false;
// FIXME: FMF is allowed on phi, but propagation is not handled correctly.
if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
// Accept FMF on either fcmp or select of a min/max idiom.
// TODO: This is a hack to work-around the fact that FMF may not be
// assigned/propagated correctly. If that problem is fixed or we
// standardize on fmin/fmax via intrinsics, this can be removed.
if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
CurFMF |= FCmp->getFastMathFlags();
}
FMF &= CurFMF;
}
// Update this reduction kind if we matched a new instruction.
// TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
// state accurate while processing the worklist?
if (ReduxDesc.getRecKind() != RecurKind::None)
Kind = ReduxDesc.getRecKind();
}
bool IsASelect = isa<SelectInst>(Cur);
// A conditional reduction operation must only have 2 or less uses in
// VisitedInsts.
if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
hasMultipleUsesOf(Cur, VisitedInsts, 2))
return false;
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
!isSelectCmpRecurrenceKind(Kind) &&
hasMultipleUsesOf(Cur, VisitedInsts, 1))
return false;
// All inputs to a PHI node must be a reduction value.
if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
return false;
if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
(isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
(isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
// Check whether we found a reduction operator.
FoundReduxOp |= !IsAPhi && Cur != Start;
// Process users of current instruction. Push non-PHI nodes after PHI nodes
// onto the stack. This way we are going to have seen all inputs to PHI
// nodes once we get to them.
SmallVector<Instruction *, 8> NonPHIs;
SmallVector<Instruction *, 8> PHIs;
for (User *U : Cur->users()) {
Instruction *UI = cast<Instruction>(U);
// If the user is a call to llvm.fmuladd then the instruction can only be
// the final operand.
if (isFMulAddIntrinsic(UI))
if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
return false;
// Check if we found the exit user.
BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
// If we already know this instruction is used externally, move on to
// the next user.
if (ExitInstruction == Cur)
continue;
// Exit if you find multiple values used outside or if the header phi
// node is being used. In this case the user uses the value of the
// previous iteration, in which case we would loose "VF-1" iterations of
// the reduction operation if we vectorize.
if (ExitInstruction != nullptr || Cur == Phi)
return false;
// The instruction used by an outside user must be the last instruction
// before we feed back to the reduction phi. Otherwise, we loose VF-1
// operations on the value.
if (!is_contained(Phi->operands(), Cur))
return false;
ExitInstruction = Cur;
continue;
}
// Process instructions only once (termination). Each reduction cycle
// value must only be used once, except by phi nodes and min/max
// reductions which are represented as a cmp followed by a select.
InstDesc IgnoredVal(false, nullptr);
if (VisitedInsts.insert(UI).second) {
if (isa<PHINode>(UI))
PHIs.push_back(UI);
else
NonPHIs.push_back(UI);
} else if (!isa<PHINode>(UI) &&
((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
!isa<SelectInst>(UI)) ||
(!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
!isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
.isRecurrence() &&
!isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
return false;
// Remember that we completed the cycle.
if (UI == Phi)
FoundStartPHI = true;
}
Worklist.append(PHIs.begin(), PHIs.end());
Worklist.append(NonPHIs.begin(), NonPHIs.end());
}
// This means we have seen one but not the other instruction of the
// pattern or more than just a select and cmp. Zero implies that we saw a
// llvm.min/max instrinsic, which is always OK.
if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
NumCmpSelectPatternInst != 0)
return false;
if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
return false;
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false;
const bool IsOrdered =
checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
if (Start != Phi) {
// If the starting value is not the same as the phi node, we speculatively
// looked through an 'and' instruction when evaluating a potential
// arithmetic reduction to determine if it may have been type-promoted.
//
// We now compute the minimal bit width that is required to represent the
// reduction. If this is the same width that was indicated by the 'and', we
// can represent the reduction in the smaller type. The 'and' instruction
// will be eliminated since it will essentially be a cast instruction that
// can be ignore in the cost model. If we compute a different type than we
// did when evaluating the 'and', the 'and' will not be eliminated, and we
// will end up with different kinds of operations in the recurrence
// expression (e.g., IntegerAND, IntegerADD). We give up if this is
// the case.
//
// The vectorizer relies on InstCombine to perform the actual
// type-shrinking. It does this by inserting instructions to truncate the
// exit value of the reduction to the width indicated by RecurrenceType and
// then extend this value back to the original width. If IsSigned is false,
// a 'zext' instruction will be generated; otherwise, a 'sext' will be
// used.
//
// TODO: We should not rely on InstCombine to rewrite the reduction in the
// smaller type. We should just generate a correctly typed expression
// to begin with.
Type *ComputedType;
std::tie(ComputedType, IsSigned) =
computeRecurrenceType(ExitInstruction, DB, AC, DT);
if (ComputedType != RecurrenceType)
return false;
}
// Collect cast instructions and the minimum width used by the recurrence.
// If the starting value is not the same as the phi node and the computed
// recurrence type is equal to the recurrence type, the recurrence expression
// will be represented in a narrower or wider type. If there are any cast
// instructions that will be unnecessary, collect them in CastsFromRecurTy.
// Note that the 'and' instruction was already included in this list.
//
// TODO: A better way to represent this may be to tag in some way all the
// instructions that are a part of the reduction. The vectorizer cost
// model could then apply the recurrence type to these instructions,
// without needing a white list of instructions to ignore.
// This may also be useful for the inloop reductions, if it can be
// kept simple enough.
collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
MinWidthCastToRecurrenceType);
// We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users.
// The ExitInstruction(Instruction which is allowed to have out-of-loop users)
// is saved as part of the RecurrenceDescriptor.
// Save the description of this reduction variable.
RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
RecurrenceType, IsSigned, IsOrdered, CastInsts,
MinWidthCastToRecurrenceType);
RedDes = RD;
return true;
}
// We are looking for loops that do something like this:
// int r = 0;
// for (int i = 0; i < n; i++) {
// if (src[i] > 3)
// r = 3;
// }
// where the reduction value (r) only has two states, in this example 0 or 3.
// The generated LLVM IR for this type of loop will be like this:
// for.body:
// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
// ...
// %cmp = icmp sgt i32 %5, 3
// %spec.select = select i1 %cmp, i32 3, i32 %r
// ...
// In general we can support vectorization of loops where 'r' flips between
// any two non-constants, provided they are loop invariant. The only thing
// we actually care about at the end of the loop is whether or not any lane
// in the selected vector is different from the start value. The final
// across-vector reduction after the loop simply involves choosing the start
// value if nothing changed (0 in the example above) or the other selected
// value (3 in the example above).
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
Instruction *I, InstDesc &Prev) {
// We must handle the select(cmp(),x,y) as a single instruction. Advance to
// the select.
CmpInst::Predicate Pred;
if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
return InstDesc(Select, Prev.getRecKind());
}
// Only match select with single use cmp condition.
if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
m_Value())))
return InstDesc(false, I);
SelectInst *SI = cast<SelectInst>(I);
Value *NonPhi = nullptr;
if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
NonPhi = SI->getFalseValue();
else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
NonPhi = SI->getTrueValue();
else
return InstDesc(false, I);
// We are looking for selects of the form:
// select(cmp(), phi, loop_invariant) or
// select(cmp(), loop_invariant, phi)
if (!Loop->isLoopInvariant(NonPhi))
return InstDesc(false, I);
return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
: RecurKind::SelectFCmp);
}
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
const InstDesc &Prev) {
assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
"Expected a cmp or select or call instruction");
if (!isMinMaxRecurrenceKind(Kind))
return InstDesc(false, I);
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
CmpInst::Predicate Pred;
if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
return InstDesc(Select, Prev.getRecKind());
}
// Only match select with single use cmp condition, or a min/max intrinsic.
if (!isa<IntrinsicInst>(I) &&
!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
m_Value())))
return InstDesc(false, I);
// Look for a min/max pattern.
if (match(I, m_UMin(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::UMin, I);
if (match(I, m_UMax(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::UMax, I);
if (match(I, m_SMax(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::SMax, I);
if (match(I, m_SMin(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::SMin, I);
if (match(I, m_OrdFMin(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_OrdFMax(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
if (match(I, m_UnordFMin(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_UnordFMax(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
return InstDesc(false, I);
}
/// Returns true if the select instruction has users in the compare-and-add
/// reduction pattern below. The select instruction argument is the last one
/// in the sequence.
///
/// %sum.1 = phi ...
/// ...
/// %cmp = fcmp pred %0, %CFP
/// %add = fadd %0, %sum.1
/// %sum.2 = select %cmp, %add, %sum.1
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
SelectInst *SI = dyn_cast<SelectInst>(I);
if (!SI)
return InstDesc(false, I);
CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
// Only handle single use cases for now.
if (!CI || !CI->hasOneUse())
return InstDesc(false, I);
Value *TrueVal = SI->getTrueValue();
Value *FalseVal = SI->getFalseValue();
// Handle only when either of operands of select instruction is a PHI
// node for now.
if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
(!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
return InstDesc(false, I);
Instruction *I1 =
isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
: dyn_cast<Instruction>(TrueVal);
if (!I1 || !I1->isBinaryOp())
return InstDesc(false, I);
Value *Op1, *Op2;
if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
I1->isFast())
return InstDesc(Kind == RecurKind::FAdd, SI);
if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
return InstDesc(Kind == RecurKind::FMul, SI);
return InstDesc(false, I);
}
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
Instruction *I, RecurKind Kind,
InstDesc &Prev, FastMathFlags FuncFMF) {
assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
switch (I->getOpcode()) {
default:
return InstDesc(false, I);
case Instruction::PHI:
return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
case Instruction::Sub:
case Instruction::Add:
return InstDesc(Kind == RecurKind::Add, I);
case Instruction::Mul:
return InstDesc(Kind == RecurKind::Mul, I);
case Instruction::And:
return InstDesc(Kind == RecurKind::And, I);
case Instruction::Or:
return InstDesc(Kind == RecurKind::Or, I);
case Instruction::Xor:
return InstDesc(Kind == RecurKind::Xor, I);
case Instruction::FDiv:
case Instruction::FMul:
return InstDesc(Kind == RecurKind::FMul, I,
I->hasAllowReassoc() ? nullptr : I);
case Instruction::FSub:
case Instruction::FAdd:
return InstDesc(Kind == RecurKind::FAdd, I,
I->hasAllowReassoc() ? nullptr : I);
case Instruction::Select:
if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul)
return isConditionalRdxPattern(Kind, I);
LLVM_FALLTHROUGH;
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Call:
if (isSelectCmpRecurrenceKind(Kind))
return isSelectCmpPattern(L, OrigPhi, I, Prev);
if (isIntMinMaxRecurrenceKind(Kind) ||
(((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
(isa<FPMathOperator>(I) && I->hasNoNaNs() &&
I->hasNoSignedZeros())) &&
isFPMinMaxRecurrenceKind(Kind)))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
}
}
bool RecurrenceDescriptor::hasMultipleUsesOf(
Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,
unsigned MaxNumUses) {
unsigned NumUses = 0;
for (const Use &U : I->operands()) {
if (Insts.count(dyn_cast<Instruction>(U)))
++NumUses;
if (NumUses > MaxNumUses)
return true;
}
return false;
}
bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
RecurrenceDescriptor &RedDes,
DemandedBits *DB, AssumptionCache *AC,
DominatorTree *DT) {
BasicBlock *Header = TheLoop->getHeader();
Function &F = *Header->getParent();
FastMathFlags FMF;
FMF.setNoNaNs(
F.getFnAttribute("no-nans-fp-math").getValueAsBool());
FMF.setNoSignedZeros(
F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
DT)) {
LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
<< *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
DT)) {
LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
<< " PHI." << *Phi << "\n");
return true;
}
if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC,
DT)) {
LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
return true;
}
// Not a reduction of known type.
return false;
}
bool RecurrenceDescriptor::isFirstOrderRecurrence(
PHINode *Phi, Loop *TheLoop,
MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() ||
Phi->getNumIncomingValues() != 2)
return false;
// Ensure the loop has a preheader and a single latch block. The loop
// vectorizer will need the latch to set up the next iteration of the loop.
auto *Preheader = TheLoop->getLoopPreheader();
auto *Latch = TheLoop->getLoopLatch();
if (!Preheader || !Latch)
return false;
// Ensure the phi node's incoming blocks are the loop preheader and latch.
if (Phi->getBasicBlockIndex(Preheader) < 0 ||
Phi->getBasicBlockIndex(Latch) < 0)
return false;
// Get the previous value. The previous value comes from the latch edge while
// the initial value comes form the preheader edge.
auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
return false;
// Ensure every user of the phi node (recursively) is dominated by the
// previous value. The dominance requirement ensures the loop vectorizer will
// not need to vectorize the initial value prior to the first iteration of the
// loop.
// TODO: Consider extending this sinking to handle memory instructions.
// We optimistically assume we can sink all users after Previous. Keep a set
// of instructions to sink after Previous ordered by dominance in the common
// basic block. It will be applied to SinkAfter if all users can be sunk.
auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
return A->comesBefore(B);
};
std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
CompareByComesBefore);
BasicBlock *PhiBB = Phi->getParent();
SmallVector<Instruction *, 8> WorkList;
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
// Already sunk SinkCandidate.
if (SinkCandidate->getParent() == PhiBB &&
InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
return true;
// Cyclic dependence.
if (Previous == SinkCandidate)
return false;
if (DT->dominates(Previous,
SinkCandidate)) // We already are good w/o sinking.
return true;
if (SinkCandidate->getParent() != PhiBB ||
SinkCandidate->mayHaveSideEffects() ||
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
return false;
// Do not try to sink an instruction multiple times (if multiple operands
// are first order recurrences).
// TODO: We can support this case, by sinking the instruction after the
// 'deepest' previous instruction.
if (SinkAfter.find(SinkCandidate) != SinkAfter.end())
return false;
// If we reach a PHI node that is not dominated by Previous, we reached a
// header PHI. No need for sinking.
if (isa<PHINode>(SinkCandidate))
return true;
// Sink User tentatively and check its users
InstrsToSink.insert(SinkCandidate);
WorkList.push_back(SinkCandidate);
return true;
};
WorkList.push_back(Phi);
// Try to recursively sink instructions and their users after Previous.
while (!WorkList.empty()) {
Instruction *Current = WorkList.pop_back_val();
for (User *User : Current->users()) {
if (!TryToPushSinkCandidate(cast<Instruction>(User)))
return false;
}
}
// We can sink all users of Phi. Update the mapping.
for (Instruction *I : InstrsToSink) {
SinkAfter[I] = Previous;
Previous = I;
}
return true;
}
/// This function returns the identity element (or neutral element) for
/// the operation K.
Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
FastMathFlags FMF) const {
switch (K) {
case RecurKind::Xor:
case RecurKind::Add:
case RecurKind::Or:
// Adding, Xoring, Oring zero to a number does not change it.
return ConstantInt::get(Tp, 0);
case RecurKind::Mul:
// Multiplying a number by 1 does not change it.
return ConstantInt::get(Tp, 1);
case RecurKind::And:
// AND-ing a number with an all-1 value does not change it.
return ConstantInt::get(Tp, -1, true);
case RecurKind::FMul:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
case RecurKind::FMulAdd:
case RecurKind::FAdd:
// Adding zero to a number does not change it.
// FIXME: Ideally we should not need to check FMF for FAdd and should always
// use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0.
// Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI
// nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would
// mean we can then remove the check for noSignedZeros() below (see D98963).
if (FMF.noSignedZeros())
return ConstantFP::get(Tp, 0.0L);
return ConstantFP::get(Tp, -0.0L);
case RecurKind::UMin:
return ConstantInt::get(Tp, -1);
case RecurKind::UMax:
return ConstantInt::get(Tp, 0);
case RecurKind::SMin:
return ConstantInt::get(Tp,
APInt::getSignedMaxValue(Tp->getIntegerBitWidth()));
case RecurKind::SMax:
return ConstantInt::get(Tp,
APInt::getSignedMinValue(Tp->getIntegerBitWidth()));
case RecurKind::FMin:
return ConstantFP::getInfinity(Tp, true);
case RecurKind::FMax:
return ConstantFP::getInfinity(Tp, false);
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
return getRecurrenceStartValue();
break;
default:
llvm_unreachable("Unknown recurrence kind");
}
}
unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
switch (Kind) {
case RecurKind::Add:
return Instruction::Add;
case RecurKind::Mul:
return Instruction::Mul;
case RecurKind::Or:
return Instruction::Or;
case RecurKind::And:
return Instruction::And;
case RecurKind::Xor:
return Instruction::Xor;
case RecurKind::FMul:
return Instruction::FMul;
case RecurKind::FMulAdd:
case RecurKind::FAdd:
return Instruction::FAdd;
case RecurKind::SMax:
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
case RecurKind::SelectICmp:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
case RecurKind::SelectFCmp:
return Instruction::FCmp;
default:
llvm_unreachable("Unknown recurrence operation");
}
}
SmallVector<Instruction *, 4>
RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
SmallVector<Instruction *, 4> ReductionOperations;
unsigned RedOp = getOpcode(Kind);
// Search down from the Phi to the LoopExitInstr, looking for instructions
// with a single user of the correct type for the reduction.
// Note that we check that the type of the operand is correct for each item in
// the chain, including the last (the loop exit value). This can come up from
// sub, which would otherwise be treated as an add reduction. MinMax also need
// to check for a pair of icmp/select, for which we use getNextInstruction and
// isCorrectOpcode functions to step the right number of instruction, and
// check the icmp/select pair.
// FIXME: We also do not attempt to look through Phi/Select's yet, which might
// be part of the reduction chain, or attempt to looks through And's to find a
// smaller bitwidth. Subs are also currently not allowed (which are usually
// treated as part of a add reduction) as they are expected to generally be
// more expensive than out-of-loop reductions, and need to be costed more
// carefully.
unsigned ExpectedUses = 1;
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
ExpectedUses = 2;
auto getNextInstruction = [&](Instruction *Cur) {
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
// We are expecting a icmp/select pair, which we go to the next select
// instruction if we can. We already know that Cur has 2 uses.
if (isa<SelectInst>(*Cur->user_begin()))
return cast<Instruction>(*Cur->user_begin());
else
return cast<Instruction>(*std::next(Cur->user_begin()));
}
return cast<Instruction>(*Cur->user_begin());
};
auto isCorrectOpcode = [&](Instruction *Cur) {
if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
Value *LHS, *RHS;
return SelectPatternResult::isMinOrMax(
matchSelectPattern(Cur, LHS, RHS).Flavor);
}
// Recognize a call to the llvm.fmuladd intrinsic.
if (isFMulAddIntrinsic(Cur))
return true;
return Cur->getOpcode() == RedOp;
};
// The loop exit instruction we check first (as a quick test) but add last. We
// check the opcode is correct (and dont allow them to be Subs) and that they
// have expected to have the expected number of uses. They will have one use
// from the phi and one from a LCSSA value, no matter the type.
if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2))
return {};
// Check that the Phi has one (or two for min/max) uses.
if (!Phi->hasNUses(ExpectedUses))
return {};
Instruction *Cur = getNextInstruction(Phi);
// Each other instruction in the chain should have the expected number of uses
// and be the correct opcode.
while (Cur != LoopExitInstr) {
if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
return {};
ReductionOperations.push_back(Cur);
Cur = getNextInstruction(Cur);
}
ReductionOperations.push_back(Cur);
return ReductionOperations;
}
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
const SCEV *Step, BinaryOperator *BOp,
Type *ElementType,
SmallVectorImpl<Instruction *> *Casts)
: StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp),
ElementType(ElementType) {
assert(IK != IK_NoInduction && "Not an induction");
// Start value type should match the induction kind and the value
// itself should not be null.
assert(StartValue && "StartValue is null");
assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
"StartValue is not a pointer for pointer induction");
assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
"StartValue is not an integer for integer induction");
// Check the Step Value. It should be non-zero integer value.
assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
"Step value is zero");
assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
"Step value should be constant for pointer induction");
assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
"StepValue is not an integer");
assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
"StepValue is not FP for FpInduction");
assert((IK != IK_FpInduction ||
(InductionBinOp &&
(InductionBinOp->getOpcode() == Instruction::FAdd ||
InductionBinOp->getOpcode() == Instruction::FSub))) &&
"Binary opcode should be specified for FP induction");
if (IK == IK_PtrInduction)
assert(ElementType && "Pointer induction must have element type");
else
assert(!ElementType && "Non-pointer induction cannot have element type");
if (Casts) {
for (auto &Inst : *Casts) {
RedundantCasts.push_back(Inst);
}
}
}
ConstantInt *InductionDescriptor::getConstIntStepValue() const {
if (isa<SCEVConstant>(Step))
return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
return nullptr;
}
bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
ScalarEvolution *SE,
InductionDescriptor &D) {
// Here we only handle FP induction variables.
assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
if (TheLoop->getHeader() != Phi->getParent())
return false;
// The loop may have multiple entrances or multiple exits; we can analyze
// this phi if it has a unique entry value and a unique backedge value.
if (Phi->getNumIncomingValues() != 2)
return false;
Value *BEValue = nullptr, *StartValue = nullptr;
if (TheLoop->contains(Phi->getIncomingBlock(0))) {
BEValue = Phi->getIncomingValue(0);
StartValue = Phi->getIncomingValue(1);
} else {
assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
"Unexpected Phi node in the loop");
BEValue = Phi->getIncomingValue(1);
StartValue = Phi->getIncomingValue(0);
}
BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
if (!BOp)
return false;
Value *Addend = nullptr;
if (BOp->getOpcode() == Instruction::FAdd) {
if (BOp->getOperand(0) == Phi)
Addend = BOp->getOperand(1);
else if (BOp->getOperand(1) == Phi)
Addend = BOp->getOperand(0);
} else if (BOp->getOpcode() == Instruction::FSub)
if (BOp->getOperand(0) == Phi)
Addend = BOp->getOperand(1);
if (!Addend)
return false;
// The addend should be loop invariant
if (auto *I = dyn_cast<Instruction>(Addend))
if (TheLoop->contains(I))
return false;
// FP Step has unknown SCEV
const SCEV *Step = SE->getUnknown(Addend);
D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
return true;
}
/// This function is called when we suspect that the update-chain of a phi node
/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
/// predicate P under which the SCEV expression for the phi can be the
/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
/// cast instructions that are involved in the update-chain of this induction.
/// A caller that adds the required runtime predicate can be free to drop these
/// cast instructions, and compute the phi using \p AR (instead of some scev
/// expression with casts).
///
/// For example, without a predicate the scev expression can take the following
/// form:
/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
///
/// It corresponds to the following IR sequence:
/// %for.body:
/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
/// %casted_phi = "ExtTrunc i64 %x"
/// %add = add i64 %casted_phi, %step
///
/// where %x is given in \p PN,
/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
/// several forms, for example, such as:
/// ExtTrunc1: %casted_phi = and %x, 2^n-1
/// or:
/// ExtTrunc2: %t = shl %x, m
/// %casted_phi = ashr %t, m
///
/// If we are able to find such sequence, we return the instructions
/// we found, namely %casted_phi and the instructions on its use-def chain up
/// to the phi (not including the phi).
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
const SCEVUnknown *PhiScev,
const SCEVAddRecExpr *AR,
SmallVectorImpl<Instruction *> &CastInsts) {
assert(CastInsts.empty() && "CastInsts is expected to be empty.");
auto *PN = cast<PHINode>(PhiScev->getValue());
assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
const Loop *L = AR->getLoop();
// Find any cast instructions that participate in the def-use chain of
// PhiScev in the loop.
// FORNOW/TODO: We currently expect the def-use chain to include only
// two-operand instructions, where one of the operands is an invariant.
// createAddRecFromPHIWithCasts() currently does not support anything more
// involved than that, so we keep the search simple. This can be
// extended/generalized as needed.
auto getDef = [&](const Value *Val) -> Value * {
const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
if (!BinOp)
return nullptr;
Value *Op0 = BinOp->getOperand(0);
Value *Op1 = BinOp->getOperand(1);
Value *Def = nullptr;
if (L->isLoopInvariant(Op0))
Def = Op1;
else if (L->isLoopInvariant(Op1))
Def = Op0;
return Def;
};
// Look for the instruction that defines the induction via the
// loop backedge.
BasicBlock *Latch = L->getLoopLatch();
if (!Latch)
return false;
Value *Val = PN->getIncomingValueForBlock(Latch);
if (!Val)
return false;
// Follow the def-use chain until the induction phi is reached.
// If on the way we encounter a Value that has the same SCEV Expr as the
// phi node, we can consider the instructions we visit from that point
// as part of the cast-sequence that can be ignored.
bool InCastSequence = false;
auto *Inst = dyn_cast<Instruction>(Val);
while (Val != PN) {
// If we encountered a phi node other than PN, or if we left the loop,
// we bail out.
if (!Inst || !L->contains(Inst)) {
return false;
}
auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
InCastSequence = true;
if (InCastSequence) {
// Only the last instruction in the cast sequence is expected to have
// uses outside the induction def-use chain.
if (!CastInsts.empty())
if (!Inst->hasOneUse())
return false;
CastInsts.push_back(Inst);
}
Val = getDef(Val);
if (!Val)
return false;
Inst = dyn_cast<Instruction>(Val);
}
return InCastSequence;
}
bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
PredicatedScalarEvolution &PSE,
InductionDescriptor &D, bool Assume) {
Type *PhiTy = Phi->getType();
// Handle integer and pointer inductions variables.
// Now we handle also FP induction but not trying to make a
// recurrent expression from the PHI node in-place.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
!PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
return false;
if (PhiTy->isFloatingPointTy())
return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
const SCEV *PhiScev = PSE.getSCEV(Phi);
const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
// We need this expression to be an AddRecExpr.
if (Assume && !AR)
AR = PSE.getAsAddRec(Phi);
if (!AR) {
LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return false;
}
// Record any Cast instructions that participate in the induction update
const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
// If we started from an UnknownSCEV, and managed to build an addRecurrence
// only after enabling Assume with PSCEV, this means we may have encountered
// cast instructions that required adding a runtime check in order to
// guarantee the correctness of the AddRecurrence respresentation of the
// induction.
if (PhiScev != AR && SymbolicPhi) {
SmallVector<Instruction *, 2> Casts;
if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
}
return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
}
bool InductionDescriptor::isInductionPHI(
PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
InductionDescriptor &D, const SCEV *Expr,
SmallVectorImpl<Instruction *> *CastsToIgnore) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
return false;
// Check that the PHI is consecutive.
const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
if (!AR) {
LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return false;
}
if (AR->getLoop() != TheLoop) {
// FIXME: We should treat this as a uniform. Unfortunately, we
// don't currently know how to handled uniform PHIs.
LLVM_DEBUG(
dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
return false;
}
Value *StartValue =
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
BasicBlock *Latch = AR->getLoop()->getLoopLatch();
if (!Latch)
return false;
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
// The stride may be a constant or a loop invariant integer value.
const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
return false;
if (PhiTy->isIntegerTy()) {
BinaryOperator *BOp =
dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
/* ElementType */ nullptr, CastsToIgnore);
return true;
}
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
// Pointer induction should be a constant.
if (!ConstStep)
return false;
// Always use i8 element type for opaque pointer inductions.
PointerType *PtrTy = cast<PointerType>(PhiTy);
Type *ElementType = PtrTy->isOpaque()
? Type::getInt8Ty(PtrTy->getContext())
: PtrTy->getNonOpaquePointerElementType();
if (!ElementType->isSized())
return false;
ConstantInt *CV = ConstStep->getValue();
const DataLayout &DL = Phi->getModule()->getDataLayout();
- int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(ElementType));
- if (!Size)
+ TypeSize TySize = DL.getTypeAllocSize(ElementType);
+ // TODO: We could potentially support this for scalable vectors if we can
+ // prove at compile time that the constant step is always a multiple of
+ // the scalable type.
+ if (TySize.isZero() || TySize.isScalable())
return false;
+ int64_t Size = static_cast<int64_t>(TySize.getFixedSize());
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return false;
auto *StepValue =
SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue,
/* BinOp */ nullptr, ElementType);
return true;
}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
index 4775340b3438..60895d3ced1a 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -1,6483 +1,6478 @@
//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements routines for folding instructions into simpler forms
// that do not require creating new instructions. This does constant folding
// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
// ("and i32 %x, %x" -> "%x"). All operands are assumed to have already been
// simplified: This is usually true and assuming it simplifies the logic (if
// they have not been simplified then results are correct but maybe suboptimal).
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OverflowInstAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/KnownBits.h"
#include <algorithm>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "instsimplify"
enum { RecursionLimit = 3 };
STATISTIC(NumExpand, "Number of expansions");
STATISTIC(NumReassoc, "Number of reassociations");
static Value *SimplifyAndInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *simplifyUnOp(unsigned, Value *, const SimplifyQuery &, unsigned);
static Value *simplifyFPUnOp(unsigned, Value *, const FastMathFlags &,
const SimplifyQuery &, unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyBinOp(unsigned, Value *, Value *, const FastMathFlags &,
const SimplifyQuery &, unsigned);
static Value *SimplifyCmpInst(unsigned, Value *, Value *, const SimplifyQuery &,
unsigned);
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse);
static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyCastInst(unsigned, Value *, Type *,
const SimplifyQuery &, unsigned);
static Value *SimplifyGEPInst(Type *, Value *, ArrayRef<Value *>, bool,
const SimplifyQuery &, unsigned);
static Value *SimplifySelectInst(Value *, Value *, Value *,
const SimplifyQuery &, unsigned);
static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
Value *FalseVal) {
BinaryOperator::BinaryOps BinOpCode;
if (auto *BO = dyn_cast<BinaryOperator>(Cond))
BinOpCode = BO->getOpcode();
else
return nullptr;
CmpInst::Predicate ExpectedPred, Pred1, Pred2;
if (BinOpCode == BinaryOperator::Or) {
ExpectedPred = ICmpInst::ICMP_NE;
} else if (BinOpCode == BinaryOperator::And) {
ExpectedPred = ICmpInst::ICMP_EQ;
} else
return nullptr;
// %A = icmp eq %TV, %FV
// %B = icmp eq %X, %Y (and one of these is a select operand)
// %C = and %A, %B
// %D = select %C, %TV, %FV
// -->
// %FV
// %A = icmp ne %TV, %FV
// %B = icmp ne %X, %Y (and one of these is a select operand)
// %C = or %A, %B
// %D = select %C, %TV, %FV
// -->
// %TV
Value *X, *Y;
if (!match(Cond, m_c_BinOp(m_c_ICmp(Pred1, m_Specific(TrueVal),
m_Specific(FalseVal)),
m_ICmp(Pred2, m_Value(X), m_Value(Y)))) ||
Pred1 != Pred2 || Pred1 != ExpectedPred)
return nullptr;
if (X == TrueVal || X == FalseVal || Y == TrueVal || Y == FalseVal)
return BinOpCode == BinaryOperator::Or ? TrueVal : FalseVal;
return nullptr;
}
/// For a boolean type or a vector of boolean type, return false or a vector
/// with every element false.
static Constant *getFalse(Type *Ty) {
return ConstantInt::getFalse(Ty);
}
/// For a boolean type or a vector of boolean type, return true or a vector
/// with every element true.
static Constant *getTrue(Type *Ty) {
return ConstantInt::getTrue(Ty);
}
/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
Value *RHS) {
CmpInst *Cmp = dyn_cast<CmpInst>(V);
if (!Cmp)
return false;
CmpInst::Predicate CPred = Cmp->getPredicate();
Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1);
if (CPred == Pred && CLHS == LHS && CRHS == RHS)
return true;
return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS &&
CRHS == LHS;
}
/// Simplify comparison with true or false branch of select:
/// %sel = select i1 %cond, i32 %tv, i32 %fv
/// %cmp = icmp sle i32 %sel, %rhs
/// Compose new comparison by substituting %sel with either %tv or %fv
/// and see if it simplifies.
static Value *simplifyCmpSelCase(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, Value *Cond,
const SimplifyQuery &Q, unsigned MaxRecurse,
Constant *TrueOrFalse) {
Value *SimplifiedCmp = SimplifyCmpInst(Pred, LHS, RHS, Q, MaxRecurse);
if (SimplifiedCmp == Cond) {
// %cmp simplified to the select condition (%cond).
return TrueOrFalse;
} else if (!SimplifiedCmp && isSameCompare(Cond, Pred, LHS, RHS)) {
// It didn't simplify. However, if composed comparison is equivalent
// to the select condition (%cond) then we can replace it.
return TrueOrFalse;
}
return SimplifiedCmp;
}
/// Simplify comparison with true branch of select
static Value *simplifyCmpSelTrueCase(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, Value *Cond,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse,
getTrue(Cond->getType()));
}
/// Simplify comparison with false branch of select
static Value *simplifyCmpSelFalseCase(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, Value *Cond,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
return simplifyCmpSelCase(Pred, LHS, RHS, Cond, Q, MaxRecurse,
getFalse(Cond->getType()));
}
/// We know comparison with both branches of select can be simplified, but they
/// are not equal. This routine handles some logical simplifications.
static Value *handleOtherCmpSelSimplifications(Value *TCmp, Value *FCmp,
Value *Cond,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
// If the false value simplified to false, then the result of the compare
// is equal to "Cond && TCmp". This also catches the case when the false
// value simplified to false and the true value to true, returning "Cond".
// Folding select to and/or isn't poison-safe in general; impliesPoison
// checks whether folding it does not convert a well-defined value into
// poison.
if (match(FCmp, m_Zero()) && impliesPoison(TCmp, Cond))
if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
return V;
// If the true value simplified to true, then the result of the compare
// is equal to "Cond || FCmp".
if (match(TCmp, m_One()) && impliesPoison(FCmp, Cond))
if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
return V;
// Finally, if the false value simplified to true and the true value to
// false, then the result of the compare is equal to "!Cond".
if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
if (Value *V = SimplifyXorInst(
Cond, Constant::getAllOnesValue(Cond->getType()), Q, MaxRecurse))
return V;
return nullptr;
}
/// Does the given value dominate the specified phi node?
static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
// Arguments and constants dominate all instructions.
return true;
// If we are processing instructions (and/or basic blocks) that have not been
// fully added to a function, the parent nodes may still be null. Simply
// return the conservative answer in these cases.
if (!I->getParent() || !P->getParent() || !I->getFunction())
return false;
// If we have a DominatorTree then do a precise test.
if (DT)
return DT->dominates(I, P);
// Otherwise, if the instruction is in the entry block and is not an invoke,
// then it obviously dominates all phi nodes.
if (I->getParent()->isEntryBlock() && !isa<InvokeInst>(I) &&
!isa<CallBrInst>(I))
return true;
return false;
}
/// Try to simplify a binary operator of form "V op OtherOp" where V is
/// "(B0 opex B1)" by distributing 'op' across 'opex' as
/// "(B0 op OtherOp) opex (B1 op OtherOp)".
static Value *expandBinOp(Instruction::BinaryOps Opcode, Value *V,
Value *OtherOp, Instruction::BinaryOps OpcodeToExpand,
const SimplifyQuery &Q, unsigned MaxRecurse) {
auto *B = dyn_cast<BinaryOperator>(V);
if (!B || B->getOpcode() != OpcodeToExpand)
return nullptr;
Value *B0 = B->getOperand(0), *B1 = B->getOperand(1);
Value *L = SimplifyBinOp(Opcode, B0, OtherOp, Q.getWithoutUndef(),
MaxRecurse);
if (!L)
return nullptr;
Value *R = SimplifyBinOp(Opcode, B1, OtherOp, Q.getWithoutUndef(),
MaxRecurse);
if (!R)
return nullptr;
// Does the expanded pair of binops simplify to the existing binop?
if ((L == B0 && R == B1) ||
(Instruction::isCommutative(OpcodeToExpand) && L == B1 && R == B0)) {
++NumExpand;
return B;
}
// Otherwise, return "L op' R" if it simplifies.
Value *S = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse);
if (!S)
return nullptr;
++NumExpand;
return S;
}
/// Try to simplify binops of form "A op (B op' C)" or the commuted variant by
/// distributing op over op'.
static Value *expandCommutativeBinOp(Instruction::BinaryOps Opcode,
Value *L, Value *R,
Instruction::BinaryOps OpcodeToExpand,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
if (Value *V = expandBinOp(Opcode, L, R, OpcodeToExpand, Q, MaxRecurse))
return V;
if (Value *V = expandBinOp(Opcode, R, L, OpcodeToExpand, Q, MaxRecurse))
return V;
return nullptr;
}
/// Generic simplifications for associative binary operations.
/// Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(Instruction::BinaryOps Opcode,
Value *LHS, Value *RHS,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
// Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
if (Op0 && Op0->getOpcode() == Opcode) {
Value *A = Op0->getOperand(0);
Value *B = Op0->getOperand(1);
Value *C = RHS;
// Does "B op C" simplify?
if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
// It does! Return "A op V" if it simplifies or is already available.
// If V equals B then "A op V" is just the LHS.
if (V == B) return LHS;
// Otherwise return "A op V" if it simplifies.
if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
}
}
// Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
if (Op1 && Op1->getOpcode() == Opcode) {
Value *A = LHS;
Value *B = Op1->getOperand(0);
Value *C = Op1->getOperand(1);
// Does "A op B" simplify?
if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
// It does! Return "V op C" if it simplifies or is already available.
// If V equals B then "V op C" is just the RHS.
if (V == B) return RHS;
// Otherwise return "V op C" if it simplifies.
if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
}
}
// The remaining transforms require commutativity as well as associativity.
if (!Instruction::isCommutative(Opcode))
return nullptr;
// Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
if (Op0 && Op0->getOpcode() == Opcode) {
Value *A = Op0->getOperand(0);
Value *B = Op0->getOperand(1);
Value *C = RHS;
// Does "C op A" simplify?
if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "V op B" if it simplifies or is already available.
// If V equals A then "V op B" is just the LHS.
if (V == A) return LHS;
// Otherwise return "V op B" if it simplifies.
if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
}
}
// Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
if (Op1 && Op1->getOpcode() == Opcode) {
Value *A = LHS;
Value *B = Op1->getOperand(0);
Value *C = Op1->getOperand(1);
// Does "C op A" simplify?
if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
// It does! Return "B op V" if it simplifies or is already available.
// If V equals C then "B op V" is just the RHS.
if (V == C) return RHS;
// Otherwise return "B op V" if it simplifies.
if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
++NumReassoc;
return W;
}
}
}
return nullptr;
}
/// In the case of a binary operation with a select instruction as an operand,
/// try to simplify the binop by seeing whether evaluating it on both branches
/// of the select results in the same value. Returns the common value if so,
/// otherwise returns null.
static Value *ThreadBinOpOverSelect(Instruction::BinaryOps Opcode, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
SelectInst *SI;
if (isa<SelectInst>(LHS)) {
SI = cast<SelectInst>(LHS);
} else {
assert(isa<SelectInst>(RHS) && "No select instruction operand!");
SI = cast<SelectInst>(RHS);
}
// Evaluate the BinOp on the true and false branches of the select.
Value *TV;
Value *FV;
if (SI == LHS) {
TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
} else {
TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
}
// If they simplified to the same value, then return the common value.
// If they both failed to simplify then return null.
if (TV == FV)
return TV;
// If one branch simplified to undef, return the other one.
if (TV && Q.isUndefValue(TV))
return FV;
if (FV && Q.isUndefValue(FV))
return TV;
// If applying the operation did not change the true and false select values,
// then the result of the binop is the select itself.
if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
return SI;
// If one branch simplified and the other did not, and the simplified
// value is equal to the unsimplified one, return the simplified value.
// For example, select (cond, X, X & Z) & Z -> X & Z.
if ((FV && !TV) || (TV && !FV)) {
// Check that the simplified value has the form "X op Y" where "op" is the
// same as the original operation.
Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
if (Simplified && Simplified->getOpcode() == unsigned(Opcode)) {
// The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
// We already know that "op" is the same as for the simplified value. See
// if the operands match too. If so, return the simplified value.
Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
if (Simplified->getOperand(0) == UnsimplifiedLHS &&
Simplified->getOperand(1) == UnsimplifiedRHS)
return Simplified;
if (Simplified->isCommutative() &&
Simplified->getOperand(1) == UnsimplifiedLHS &&
Simplified->getOperand(0) == UnsimplifiedRHS)
return Simplified;
}
}
return nullptr;
}
/// In the case of a comparison with a select instruction, try to simplify the
/// comparison by seeing whether both branches of the select result in the same
/// value. Returns the common value if so, otherwise returns null.
/// For example, if we have:
/// %tmp = select i1 %cmp, i32 1, i32 2
/// %cmp1 = icmp sle i32 %tmp, 3
/// We can simplify %cmp1 to true, because both branches of select are
/// less than 3. We compose new comparison by substituting %tmp with both
/// branches of select and see if it can be simplified.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
// Make sure the select is on the LHS.
if (!isa<SelectInst>(LHS)) {
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
SelectInst *SI = cast<SelectInst>(LHS);
Value *Cond = SI->getCondition();
Value *TV = SI->getTrueValue();
Value *FV = SI->getFalseValue();
// Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
// Does "cmp TV, RHS" simplify?
Value *TCmp = simplifyCmpSelTrueCase(Pred, TV, RHS, Cond, Q, MaxRecurse);
if (!TCmp)
return nullptr;
// Does "cmp FV, RHS" simplify?
Value *FCmp = simplifyCmpSelFalseCase(Pred, FV, RHS, Cond, Q, MaxRecurse);
if (!FCmp)
return nullptr;
// If both sides simplified to the same value, then use it as the result of
// the original comparison.
if (TCmp == FCmp)
return TCmp;
// The remaining cases only make sense if the select condition has the same
// type as the result of the comparison, so bail out if this is not so.
if (Cond->getType()->isVectorTy() == RHS->getType()->isVectorTy())
return handleOtherCmpSelSimplifications(TCmp, FCmp, Cond, Q, MaxRecurse);
return nullptr;
}
/// In the case of a binary operation with an operand that is a PHI instruction,
/// try to simplify the binop by seeing whether evaluating it on the incoming
/// phi values yields the same result for every value. If so returns the common
/// value, otherwise returns null.
static Value *ThreadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
PHINode *PI;
if (isa<PHINode>(LHS)) {
PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
if (!valueDominatesPHI(RHS, PI, Q.DT))
return nullptr;
} else {
assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
PI = cast<PHINode>(RHS);
// Bail out if LHS and the phi may be mutually interdependent due to a loop.
if (!valueDominatesPHI(LHS, PI, Q.DT))
return nullptr;
}
// Evaluate the BinOp on the incoming phi values.
Value *CommonValue = nullptr;
for (Value *Incoming : PI->incoming_values()) {
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
Value *V = PI == LHS ?
SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
return nullptr;
CommonValue = V;
}
return CommonValue;
}
/// In the case of a comparison with a PHI instruction, try to simplify the
/// comparison by seeing whether comparing with all of the incoming phi values
/// yields the same result every time. If so returns the common result,
/// otherwise returns null.
static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return nullptr;
// Make sure the phi is on the LHS.
if (!isa<PHINode>(LHS)) {
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
PHINode *PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
if (!valueDominatesPHI(RHS, PI, Q.DT))
return nullptr;
// Evaluate the BinOp on the incoming phi values.
Value *CommonValue = nullptr;
for (unsigned u = 0, e = PI->getNumIncomingValues(); u < e; ++u) {
Value *Incoming = PI->getIncomingValue(u);
Instruction *InTI = PI->getIncomingBlock(u)->getTerminator();
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI) continue;
// Change the context instruction to the "edge" that flows into the phi.
// This is important because that is where incoming is actually "evaluated"
// even though it is used later somewhere else.
Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI),
MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
return nullptr;
CommonValue = V;
}
return CommonValue;
}
static Constant *foldOrCommuteConstant(Instruction::BinaryOps Opcode,
Value *&Op0, Value *&Op1,
const SimplifyQuery &Q) {
if (auto *CLHS = dyn_cast<Constant>(Op0)) {
if (auto *CRHS = dyn_cast<Constant>(Op1))
return ConstantFoldBinaryOpOperands(Opcode, CLHS, CRHS, Q.DL);
// Canonicalize the constant to the RHS if this is a commutative operation.
if (Instruction::isCommutative(Opcode))
std::swap(Op0, Op1);
}
return nullptr;
}
/// Given operands for an Add, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Add, Op0, Op1, Q))
return C;
// X + poison -> poison
if (isa<PoisonValue>(Op1))
return Op1;
// X + undef -> undef
if (Q.isUndefValue(Op1))
return Op1;
// X + 0 -> X
if (match(Op1, m_Zero()))
return Op0;
// If two operands are negative, return 0.
if (isKnownNegation(Op0, Op1))
return Constant::getNullValue(Op0->getType());
// X + (Y - X) -> Y
// (Y - X) + X -> Y
// Eg: X + -X -> 0
Value *Y = nullptr;
if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
return Y;
// X + ~X -> -1 since ~X = -X-1
Type *Ty = Op0->getType();
if (match(Op0, m_Not(m_Specific(Op1))) ||
match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Ty);
// add nsw/nuw (xor Y, signmask), signmask --> Y
// The no-wrapping add guarantees that the top bit will be set by the add.
// Therefore, the xor must be clearing the already set sign bit of Y.
if ((IsNSW || IsNUW) && match(Op1, m_SignMask()) &&
match(Op0, m_Xor(m_Value(Y), m_SignMask())))
return Y;
// add nuw %x, -1 -> -1, because %x can only be 0.
if (IsNUW && match(Op1, m_AllOnes()))
return Op1; // Which is -1.
/// i1 add -> xor.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
MaxRecurse))
return V;
// Threading Add over selects and phi nodes is pointless, so don't bother.
// Threading over the select in "A + select(cond, B, C)" means evaluating
// "A+B" and "A+C" and seeing if they are equal; but they are equal if and
// only if B and C are equal. If B and C are equal then (since we assume
// that operands have already been simplified) "select(cond, B, C)" should
// have been simplified to the common value of B and C already. Analysing
// "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly
// for threading over phi nodes.
return nullptr;
}
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
const SimplifyQuery &Query) {
return ::SimplifyAddInst(Op0, Op1, IsNSW, IsNUW, Query, RecursionLimit);
}
/// Compute the base pointer and cumulative constant offsets for V.
///
/// This strips all constant offsets off of V, leaving it the base pointer, and
/// accumulates the total constant offset applied in the returned constant. It
/// returns 0 if V is not a pointer, and returns the constant '0' if there are
/// no constant offsets applied.
///
/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't
/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc.
/// folding.
static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
bool AllowNonInbounds = false) {
assert(V->getType()->isPtrOrPtrVectorTy());
APInt Offset = APInt::getZero(DL.getIndexTypeSizeInBits(V->getType()));
V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
// As that strip may trace through `addrspacecast`, need to sext or trunc
// the offset calculated.
Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth());
Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset);
if (VectorType *VecTy = dyn_cast<VectorType>(V->getType()))
return ConstantVector::getSplat(VecTy->getElementCount(), OffsetIntPtr);
return OffsetIntPtr;
}
/// Compute the constant difference between two pointer values.
/// If the difference is not a constant, returns zero.
static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
Value *RHS) {
Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
// If LHS and RHS are not related via constant offsets to the same base
// value, there is nothing we can do here.
if (LHS != RHS)
return nullptr;
// Otherwise, the difference of LHS - RHS can be computed as:
// LHS - RHS
// = (LHSOffset + Base) - (RHSOffset + Base)
// = LHSOffset - RHSOffset
return ConstantExpr::getSub(LHSOffset, RHSOffset);
}
/// Given operands for a Sub, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Sub, Op0, Op1, Q))
return C;
// X - poison -> poison
// poison - X -> poison
if (isa<PoisonValue>(Op0) || isa<PoisonValue>(Op1))
return PoisonValue::get(Op0->getType());
// X - undef -> undef
// undef - X -> undef
if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return UndefValue::get(Op0->getType());
// X - 0 -> X
if (match(Op1, m_Zero()))
return Op0;
// X - X -> 0
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
// Is this a negation?
if (match(Op0, m_Zero())) {
// 0 - X -> 0 if the sub is NUW.
if (isNUW)
return Constant::getNullValue(Op0->getType());
KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (Known.Zero.isMaxSignedValue()) {
// Op1 is either 0 or the minimum signed value. If the sub is NSW, then
// Op1 must be 0 because negating the minimum signed value is undefined.
if (isNSW)
return Constant::getNullValue(Op0->getType());
// 0 - X -> X if X is 0 or the minimum signed value.
return Op1;
}
}
// (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
// For example, (X + Y) - Y -> X; (Y + X) - Y -> X
Value *X = nullptr, *Y = nullptr, *Z = Op1;
if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
// See if "V === Y - Z" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
// It does! Now see if "X + V" simplifies.
if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
// It does! Now see if "Y + V" simplifies.
if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
}
// X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
// For example, X - (X + 1) -> -1
X = Op0;
if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
// See if "V === X - Y" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
// It does! Now see if "V - Z" simplifies.
if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// See if "V === X - Z" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
// It does! Now see if "V - Y" simplifies.
if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
}
// Z - (X - Y) -> (Z - X) + Y if everything simplifies.
// For example, X - (X - Y) -> Y.
Z = Op0;
if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
// See if "V === Z - X" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
// It does! Now see if "V + Y" simplifies.
if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
// It does, we successfully reassociated!
++NumReassoc;
return W;
}
// trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies.
if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) &&
match(Op1, m_Trunc(m_Value(Y))))
if (X->getType() == Y->getType())
// See if "V === X - Y" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
// It does! Now see if "trunc V" simplifies.
if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(),
Q, MaxRecurse - 1))
// It does, return the simplified "trunc V".
return W;
// Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
if (match(Op0, m_PtrToInt(m_Value(X))) &&
match(Op1, m_PtrToInt(m_Value(Y))))
if (Constant *Result = computePointerDifference(Q.DL, X, Y))
return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
// i1 sub -> xor.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Threading Sub over selects and phi nodes is pointless, so don't bother.
// Threading over the select in "A - select(cond, B, C)" means evaluating
// "A-B" and "A-C" and seeing if they are equal; but they are equal if and
// only if B and C are equal. If B and C are equal then (since we assume
// that operands have already been simplified) "select(cond, B, C)" should
// have been simplified to the common value of B and C already. Analysing
// "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly
// for threading over phi nodes.
return nullptr;
}
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q) {
return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
}
/// Given operands for a Mul, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Mul, Op0, Op1, Q))
return C;
// X * poison -> poison
if (isa<PoisonValue>(Op1))
return Op1;
// X * undef -> 0
// X * 0 -> 0
if (Q.isUndefValue(Op1) || match(Op1, m_Zero()))
return Constant::getNullValue(Op0->getType());
// X * 1 -> X
if (match(Op1, m_One()))
return Op0;
// (X / Y) * Y -> X if the division is exact.
Value *X = nullptr;
if (Q.IIQ.UseInstrInfo &&
(match(Op0,
m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))) // Y * (X / Y)
return X;
// i1 mul -> and.
if (MaxRecurse && Op0->getType()->isIntOrIntVectorTy(1))
if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
return V;
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
// Mul distributes over Add. Try some generic simplifications based on this.
if (Value *V = expandCommutativeBinOp(Instruction::Mul, Op0, Op1,
Instruction::Add, Q, MaxRecurse))
return V;
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
MaxRecurse))
return V;
return nullptr;
}
Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyMulInst(Op0, Op1, Q, RecursionLimit);
}
/// Check for common or similar folds of integer division or integer remainder.
/// This applies to all 4 opcodes (sdiv/udiv/srem/urem).
static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
Value *Op1, const SimplifyQuery &Q) {
bool IsDiv = (Opcode == Instruction::SDiv || Opcode == Instruction::UDiv);
bool IsSigned = (Opcode == Instruction::SDiv || Opcode == Instruction::SRem);
Type *Ty = Op0->getType();
// X / undef -> poison
// X % undef -> poison
if (Q.isUndefValue(Op1) || isa<PoisonValue>(Op1))
return PoisonValue::get(Ty);
// X / 0 -> poison
// X % 0 -> poison
// We don't need to preserve faults!
if (match(Op1, m_Zero()))
return PoisonValue::get(Ty);
// If any element of a constant divisor fixed width vector is zero or undef
// the behavior is undefined and we can fold the whole op to poison.
auto *Op1C = dyn_cast<Constant>(Op1);
auto *VTy = dyn_cast<FixedVectorType>(Ty);
if (Op1C && VTy) {
unsigned NumElts = VTy->getNumElements();
for (unsigned i = 0; i != NumElts; ++i) {
Constant *Elt = Op1C->getAggregateElement(i);
if (Elt && (Elt->isNullValue() || Q.isUndefValue(Elt)))
return PoisonValue::get(Ty);
}
}
// poison / X -> poison
// poison % X -> poison
if (isa<PoisonValue>(Op0))
return Op0;
// undef / X -> 0
// undef % X -> 0
if (Q.isUndefValue(Op0))
return Constant::getNullValue(Ty);
// 0 / X -> 0
// 0 % X -> 0
if (match(Op0, m_Zero()))
return Constant::getNullValue(Op0->getType());
// X / X -> 1
// X % X -> 0
if (Op0 == Op1)
return IsDiv ? ConstantInt::get(Ty, 1) : Constant::getNullValue(Ty);
// X / 1 -> X
// X % 1 -> 0
// If this is a boolean op (single-bit element type), we can't have
// division-by-zero or remainder-by-zero, so assume the divisor is 1.
// Similarly, if we're zero-extending a boolean divisor, then assume it's a 1.
Value *X;
if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1) ||
(match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
return IsDiv ? Op0 : Constant::getNullValue(Ty);
// If X * Y does not overflow, then:
// X * Y / Y -> X
// X * Y % Y -> 0
if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) {
auto *Mul = cast<OverflowingBinaryOperator>(Op0);
// The multiplication can't overflow if it is defined not to, or if
// X == A / Y for some A.
if ((IsSigned && Q.IIQ.hasNoSignedWrap(Mul)) ||
(!IsSigned && Q.IIQ.hasNoUnsignedWrap(Mul)) ||
(IsSigned && match(X, m_SDiv(m_Value(), m_Specific(Op1)))) ||
(!IsSigned && match(X, m_UDiv(m_Value(), m_Specific(Op1))))) {
return IsDiv ? X : Constant::getNullValue(Op0->getType());
}
}
return nullptr;
}
/// Given a predicate and two operands, return true if the comparison is true.
/// This is a helper for div/rem simplification where we return some other value
/// when we can prove a relationship between the operands.
static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
Value *V = SimplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
Constant *C = dyn_cast_or_null<Constant>(V);
return (C && C->isAllOnesValue());
}
/// Return true if we can simplify X / Y to 0. Remainder can adapt that answer
/// to simplify X % Y to X.
static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q,
unsigned MaxRecurse, bool IsSigned) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
return false;
if (IsSigned) {
// |X| / |Y| --> 0
//
// We require that 1 operand is a simple constant. That could be extended to
// 2 variables if we computed the sign bit for each.
//
// Make sure that a constant is not the minimum signed value because taking
// the abs() of that is undefined.
Type *Ty = X->getType();
const APInt *C;
if (match(X, m_APInt(C)) && !C->isMinSignedValue()) {
// Is the variable divisor magnitude always greater than the constant
// dividend magnitude?
// |Y| > |C| --> Y < -abs(C) or Y > abs(C)
Constant *PosDividendC = ConstantInt::get(Ty, C->abs());
Constant *NegDividendC = ConstantInt::get(Ty, -C->abs());
if (isICmpTrue(CmpInst::ICMP_SLT, Y, NegDividendC, Q, MaxRecurse) ||
isICmpTrue(CmpInst::ICMP_SGT, Y, PosDividendC, Q, MaxRecurse))
return true;
}
if (match(Y, m_APInt(C))) {
// Special-case: we can't take the abs() of a minimum signed value. If
// that's the divisor, then all we have to do is prove that the dividend
// is also not the minimum signed value.
if (C->isMinSignedValue())
return isICmpTrue(CmpInst::ICMP_NE, X, Y, Q, MaxRecurse);
// Is the variable dividend magnitude always less than the constant
// divisor magnitude?
// |X| < |C| --> X > -abs(C) and X < abs(C)
Constant *PosDivisorC = ConstantInt::get(Ty, C->abs());
Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs());
if (isICmpTrue(CmpInst::ICMP_SGT, X, NegDivisorC, Q, MaxRecurse) &&
isICmpTrue(CmpInst::ICMP_SLT, X, PosDivisorC, Q, MaxRecurse))
return true;
}
return false;
}
// IsSigned == false.
// Is the unsigned dividend known to be less than a constant divisor?
// TODO: Convert this (and above) to range analysis
// ("computeConstantRangeIncludingKnownBits")?
const APInt *C;
if (match(Y, m_APInt(C)) &&
computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT).getMaxValue().ult(*C))
return true;
// Try again for any divisor:
// Is the dividend unsigned less than the divisor?
return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse);
}
/// These are simplifications common to SDiv and UDiv.
static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
if (Value *V = simplifyDivRem(Opcode, Op0, Op1, Q))
return V;
bool IsSigned = Opcode == Instruction::SDiv;
// (X rem Y) / Y -> 0
if ((IsSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
(!IsSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
return Constant::getNullValue(Op0->getType());
// (X /u C1) /u C2 -> 0 if C1 * C2 overflow
ConstantInt *C1, *C2;
if (!IsSigned && match(Op0, m_UDiv(m_Value(), m_ConstantInt(C1))) &&
match(Op1, m_ConstantInt(C2))) {
bool Overflow;
(void)C1->getValue().umul_ov(C2->getValue(), Overflow);
if (Overflow)
return Constant::getNullValue(Op0->getType());
}
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned))
return Constant::getNullValue(Op0->getType());
return nullptr;
}
/// These are simplifications common to SRem and URem.
static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
if (Value *V = simplifyDivRem(Opcode, Op0, Op1, Q))
return V;
// (X % Y) % Y -> X % Y
if ((Opcode == Instruction::SRem &&
match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
(Opcode == Instruction::URem &&
match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
return Op0;
// (X << Y) % X -> 0
if (Q.IIQ.UseInstrInfo &&
((Opcode == Instruction::SRem &&
match(Op0, m_NSWShl(m_Specific(Op1), m_Value()))) ||
(Opcode == Instruction::URem &&
match(Op0, m_NUWShl(m_Specific(Op1), m_Value())))))
return Constant::getNullValue(Op0->getType());
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If X / Y == 0, then X % Y == X.
if (isDivZero(Op0, Op1, Q, MaxRecurse, Opcode == Instruction::SRem))
return Op0;
return nullptr;
}
/// Given operands for an SDiv, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// If two operands are negated and no signed overflow, return -1.
if (isKnownNegation(Op0, Op1, /*NeedNSW=*/true))
return Constant::getAllOnesValue(Op0->getType());
return simplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse);
}
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifySDivInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a UDiv, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
return simplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse);
}
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyUDivInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for an SRem, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
// If the divisor is 0, the result is undefined, so assume the divisor is -1.
// srem Op0, (sext i1 X) --> srem Op0, -1 --> 0
Value *X;
if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
return ConstantInt::getNullValue(Op0->getType());
// If the two operands are negated, return 0.
if (isKnownNegation(Op0, Op1))
return ConstantInt::getNullValue(Op0->getType());
return simplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse);
}
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifySRemInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a URem, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
return simplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse);
}
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyURemInst(Op0, Op1, Q, RecursionLimit);
}
/// Returns true if a shift by \c Amount always yields poison.
static bool isPoisonShift(Value *Amount, const SimplifyQuery &Q) {
Constant *C = dyn_cast<Constant>(Amount);
if (!C)
return false;
// X shift by undef -> poison because it may shift by the bitwidth.
if (Q.isUndefValue(C))
return true;
// Shifting by the bitwidth or more is undefined.
if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
if (CI->getValue().uge(CI->getType()->getScalarSizeInBits()))
return true;
// If all lanes of a vector shift are undefined the whole shift is.
if (isa<ConstantVector>(C) || isa<ConstantDataVector>(C)) {
for (unsigned I = 0,
E = cast<FixedVectorType>(C->getType())->getNumElements();
I != E; ++I)
if (!isPoisonShift(C->getAggregateElement(I), Q))
return false;
return true;
}
return false;
}
/// Given operands for an Shl, LShr or AShr, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
Value *Op1, bool IsNSW, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Opcode, Op0, Op1, Q))
return C;
// poison shift by X -> poison
if (isa<PoisonValue>(Op0))
return Op0;
// 0 shift by X -> 0
if (match(Op0, m_Zero()))
return Constant::getNullValue(Op0->getType());
// X shift by 0 -> X
// Shift-by-sign-extended bool must be shift-by-0 because shift-by-all-ones
// would be poison.
Value *X;
if (match(Op1, m_Zero()) ||
(match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
return Op0;
// Fold undefined shifts.
if (isPoisonShift(Op1, Q))
return PoisonValue::get(Op0->getType());
// If the operation is with the result of a select instruction, check whether
// operating on either branch of the select always yields the same value.
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
// If any bits in the shift amount make that value greater than or equal to
// the number of bits in the type, the shift is undefined.
KnownBits KnownAmt = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (KnownAmt.getMinValue().uge(KnownAmt.getBitWidth()))
return PoisonValue::get(Op0->getType());
// If all valid bits in the shift amount are known zero, the first operand is
// unchanged.
unsigned NumValidShiftBits = Log2_32_Ceil(KnownAmt.getBitWidth());
if (KnownAmt.countMinTrailingZeros() >= NumValidShiftBits)
return Op0;
// Check for nsw shl leading to a poison value.
if (IsNSW) {
assert(Opcode == Instruction::Shl && "Expected shl for nsw instruction");
KnownBits KnownVal = computeKnownBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
KnownBits KnownShl = KnownBits::shl(KnownVal, KnownAmt);
if (KnownVal.Zero.isSignBitSet())
KnownShl.Zero.setSignBit();
if (KnownVal.One.isSignBitSet())
KnownShl.One.setSignBit();
if (KnownShl.hasConflict())
return PoisonValue::get(Op0->getType());
}
return nullptr;
}
/// Given operands for an Shl, LShr or AShr, see if we can
/// fold the result. If not, this returns null.
static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
Value *Op1, bool isExact, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Value *V =
SimplifyShift(Opcode, Op0, Op1, /*IsNSW*/ false, Q, MaxRecurse))
return V;
// X >> X -> 0
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
// undef >> X -> 0
// undef >> X -> undef (if it's exact)
if (Q.isUndefValue(Op0))
return isExact ? Op0 : Constant::getNullValue(Op0->getType());
// The low bit cannot be shifted out of an exact shift if it is set.
if (isExact) {
KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
if (Op0Known.One[0])
return Op0;
}
return nullptr;
}
/// Given operands for an Shl, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V =
SimplifyShift(Instruction::Shl, Op0, Op1, isNSW, Q, MaxRecurse))
return V;
// undef << X -> 0
// undef << X -> undef if (if it's NSW/NUW)
if (Q.isUndefValue(Op0))
return isNSW || isNUW ? Op0 : Constant::getNullValue(Op0->getType());
// (X >> A) << A -> X
Value *X;
if (Q.IIQ.UseInstrInfo &&
match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
return X;
// shl nuw i8 C, %x -> C iff C has sign bit set.
if (isNUW && match(Op0, m_Negative()))
return Op0;
// NOTE: could use computeKnownBits() / LazyValueInfo,
// but the cost-benefit analysis suggests it isn't worth it.
return nullptr;
}
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const SimplifyQuery &Q) {
return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Q, RecursionLimit);
}
/// Given operands for an LShr, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
MaxRecurse))
return V;
// (X << A) >> A -> X
Value *X;
if (match(Op0, m_NUWShl(m_Value(X), m_Specific(Op1))))
return X;
// ((X << A) | Y) >> A -> X if effective width of Y is not larger than A.
// We can return X as we do in the above case since OR alters no bits in X.
// SimplifyDemandedBits in InstCombine can do more general optimization for
// bit manipulation. This pattern aims to provide opportunities for other
// optimizers by supporting a simple but common case in InstSimplify.
Value *Y;
const APInt *ShRAmt, *ShLAmt;
if (match(Op1, m_APInt(ShRAmt)) &&
match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
*ShRAmt == *ShLAmt) {
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (ShRAmt->uge(EffWidthY))
return X;
}
return nullptr;
}
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q) {
return ::SimplifyLShrInst(Op0, Op1, isExact, Q, RecursionLimit);
}
/// Given operands for an AShr, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
MaxRecurse))
return V;
// -1 >>a X --> -1
// (-1 << X) a>> X --> -1
// Do not return Op0 because it may contain undef elements if it's a vector.
if (match(Op0, m_AllOnes()) ||
match(Op0, m_Shl(m_AllOnes(), m_Specific(Op1))))
return Constant::getAllOnesValue(Op0->getType());
// (X << A) >> A -> X
Value *X;
if (Q.IIQ.UseInstrInfo && match(Op0, m_NSWShl(m_Value(X), m_Specific(Op1))))
return X;
// Arithmetic shifting an all-sign-bit value is a no-op.
unsigned NumSignBits = ComputeNumSignBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (NumSignBits == Op0->getType()->getScalarSizeInBits())
return Op0;
return nullptr;
}
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const SimplifyQuery &Q) {
return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit);
}
/// Commuted variants are assumed to be handled by calling this function again
/// with the parameters swapped.
static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
ICmpInst *UnsignedICmp, bool IsAnd,
const SimplifyQuery &Q) {
Value *X, *Y;
ICmpInst::Predicate EqPred;
if (!match(ZeroICmp, m_ICmp(EqPred, m_Value(Y), m_Zero())) ||
!ICmpInst::isEquality(EqPred))
return nullptr;
ICmpInst::Predicate UnsignedPred;
Value *A, *B;
// Y = (A - B);
if (match(Y, m_Sub(m_Value(A), m_Value(B)))) {
if (match(UnsignedICmp,
m_c_ICmp(UnsignedPred, m_Specific(A), m_Specific(B))) &&
ICmpInst::isUnsigned(UnsignedPred)) {
// A >=/<= B || (A - B) != 0 <--> true
if ((UnsignedPred == ICmpInst::ICMP_UGE ||
UnsignedPred == ICmpInst::ICMP_ULE) &&
EqPred == ICmpInst::ICMP_NE && !IsAnd)
return ConstantInt::getTrue(UnsignedICmp->getType());
// A </> B && (A - B) == 0 <--> false
if ((UnsignedPred == ICmpInst::ICMP_ULT ||
UnsignedPred == ICmpInst::ICMP_UGT) &&
EqPred == ICmpInst::ICMP_EQ && IsAnd)
return ConstantInt::getFalse(UnsignedICmp->getType());
// A </> B && (A - B) != 0 <--> A </> B
// A </> B || (A - B) != 0 <--> (A - B) != 0
if (EqPred == ICmpInst::ICMP_NE && (UnsignedPred == ICmpInst::ICMP_ULT ||
UnsignedPred == ICmpInst::ICMP_UGT))
return IsAnd ? UnsignedICmp : ZeroICmp;
// A <=/>= B && (A - B) == 0 <--> (A - B) == 0
// A <=/>= B || (A - B) == 0 <--> A <=/>= B
if (EqPred == ICmpInst::ICMP_EQ && (UnsignedPred == ICmpInst::ICMP_ULE ||
UnsignedPred == ICmpInst::ICMP_UGE))
return IsAnd ? ZeroICmp : UnsignedICmp;
}
// Given Y = (A - B)
// Y >= A && Y != 0 --> Y >= A iff B != 0
// Y < A || Y == 0 --> Y < A iff B != 0
if (match(UnsignedICmp,
m_c_ICmp(UnsignedPred, m_Specific(Y), m_Specific(A)))) {
if (UnsignedPred == ICmpInst::ICMP_UGE && IsAnd &&
EqPred == ICmpInst::ICMP_NE &&
isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
return UnsignedICmp;
if (UnsignedPred == ICmpInst::ICMP_ULT && !IsAnd &&
EqPred == ICmpInst::ICMP_EQ &&
isKnownNonZero(B, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
return UnsignedICmp;
}
}
if (match(UnsignedICmp, m_ICmp(UnsignedPred, m_Value(X), m_Specific(Y))) &&
ICmpInst::isUnsigned(UnsignedPred))
;
else if (match(UnsignedICmp,
m_ICmp(UnsignedPred, m_Specific(Y), m_Value(X))) &&
ICmpInst::isUnsigned(UnsignedPred))
UnsignedPred = ICmpInst::getSwappedPredicate(UnsignedPred);
else
return nullptr;
// X > Y && Y == 0 --> Y == 0 iff X != 0
// X > Y || Y == 0 --> X > Y iff X != 0
if (UnsignedPred == ICmpInst::ICMP_UGT && EqPred == ICmpInst::ICMP_EQ &&
isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
return IsAnd ? ZeroICmp : UnsignedICmp;
// X <= Y && Y != 0 --> X <= Y iff X != 0
// X <= Y || Y != 0 --> Y != 0 iff X != 0
if (UnsignedPred == ICmpInst::ICMP_ULE && EqPred == ICmpInst::ICMP_NE &&
isKnownNonZero(X, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
return IsAnd ? UnsignedICmp : ZeroICmp;
// The transforms below here are expected to be handled more generally with
// simplifyAndOrOfICmpsWithLimitConst() or in InstCombine's
// foldAndOrOfICmpsWithConstEq(). If we are looking to trim optimizer overlap,
// these are candidates for removal.
// X < Y && Y != 0 --> X < Y
// X < Y || Y != 0 --> Y != 0
if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_NE)
return IsAnd ? UnsignedICmp : ZeroICmp;
// X >= Y && Y == 0 --> Y == 0
// X >= Y || Y == 0 --> X >= Y
if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_EQ)
return IsAnd ? ZeroICmp : UnsignedICmp;
// X < Y && Y == 0 --> false
if (UnsignedPred == ICmpInst::ICMP_ULT && EqPred == ICmpInst::ICMP_EQ &&
IsAnd)
return getFalse(UnsignedICmp->getType());
// X >= Y || Y != 0 --> true
if (UnsignedPred == ICmpInst::ICMP_UGE && EqPred == ICmpInst::ICMP_NE &&
!IsAnd)
return getTrue(UnsignedICmp->getType());
return nullptr;
}
/// Commuted variants are assumed to be handled by calling this function again
/// with the parameters swapped.
static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
Value *A ,*B;
if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
!match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
return nullptr;
// We have (icmp Pred0, A, B) & (icmp Pred1, A, B).
// If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
// can eliminate Op1 from this 'and'.
if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
return Op0;
// Check for any combination of predicates that are guaranteed to be disjoint.
if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
(Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) ||
(Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT) ||
(Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT))
return getFalse(Op0->getType());
return nullptr;
}
/// Commuted variants are assumed to be handled by calling this function again
/// with the parameters swapped.
static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
Value *A ,*B;
if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
!match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
return nullptr;
// We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
// If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
// can eliminate Op0 from this 'or'.
if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
return Op1;
// Check for any combination of predicates that cover the entire range of
// possibilities.
if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
(Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) ||
(Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) ||
(Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE))
return getTrue(Op0->getType());
return nullptr;
}
/// Test if a pair of compares with a shared operand and 2 constants has an
/// empty set intersection, full set union, or if one compare is a superset of
/// the other.
static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1,
bool IsAnd) {
// Look for this pattern: {and/or} (icmp X, C0), (icmp X, C1)).
if (Cmp0->getOperand(0) != Cmp1->getOperand(0))
return nullptr;
const APInt *C0, *C1;
if (!match(Cmp0->getOperand(1), m_APInt(C0)) ||
!match(Cmp1->getOperand(1), m_APInt(C1)))
return nullptr;
auto Range0 = ConstantRange::makeExactICmpRegion(Cmp0->getPredicate(), *C0);
auto Range1 = ConstantRange::makeExactICmpRegion(Cmp1->getPredicate(), *C1);
// For and-of-compares, check if the intersection is empty:
// (icmp X, C0) && (icmp X, C1) --> empty set --> false
if (IsAnd && Range0.intersectWith(Range1).isEmptySet())
return getFalse(Cmp0->getType());
// For or-of-compares, check if the union is full:
// (icmp X, C0) || (icmp X, C1) --> full set --> true
if (!IsAnd && Range0.unionWith(Range1).isFullSet())
return getTrue(Cmp0->getType());
// Is one range a superset of the other?
// If this is and-of-compares, take the smaller set:
// (icmp sgt X, 4) && (icmp sgt X, 42) --> icmp sgt X, 42
// If this is or-of-compares, take the larger set:
// (icmp sgt X, 4) || (icmp sgt X, 42) --> icmp sgt X, 4
if (Range0.contains(Range1))
return IsAnd ? Cmp1 : Cmp0;
if (Range1.contains(Range0))
return IsAnd ? Cmp0 : Cmp1;
return nullptr;
}
static Value *simplifyAndOrOfICmpsWithZero(ICmpInst *Cmp0, ICmpInst *Cmp1,
bool IsAnd) {
ICmpInst::Predicate P0 = Cmp0->getPredicate(), P1 = Cmp1->getPredicate();
if (!match(Cmp0->getOperand(1), m_Zero()) ||
!match(Cmp1->getOperand(1), m_Zero()) || P0 != P1)
return nullptr;
if ((IsAnd && P0 != ICmpInst::ICMP_NE) || (!IsAnd && P1 != ICmpInst::ICMP_EQ))
return nullptr;
// We have either "(X == 0 || Y == 0)" or "(X != 0 && Y != 0)".
Value *X = Cmp0->getOperand(0);
Value *Y = Cmp1->getOperand(0);
// If one of the compares is a masked version of a (not) null check, then
// that compare implies the other, so we eliminate the other. Optionally, look
// through a pointer-to-int cast to match a null check of a pointer type.
// (X == 0) || (([ptrtoint] X & ?) == 0) --> ([ptrtoint] X & ?) == 0
// (X == 0) || ((? & [ptrtoint] X) == 0) --> (? & [ptrtoint] X) == 0
// (X != 0) && (([ptrtoint] X & ?) != 0) --> ([ptrtoint] X & ?) != 0
// (X != 0) && ((? & [ptrtoint] X) != 0) --> (? & [ptrtoint] X) != 0
if (match(Y, m_c_And(m_Specific(X), m_Value())) ||
match(Y, m_c_And(m_PtrToInt(m_Specific(X)), m_Value())))
return Cmp1;
// (([ptrtoint] Y & ?) == 0) || (Y == 0) --> ([ptrtoint] Y & ?) == 0
// ((? & [ptrtoint] Y) == 0) || (Y == 0) --> (? & [ptrtoint] Y) == 0
// (([ptrtoint] Y & ?) != 0) && (Y != 0) --> ([ptrtoint] Y & ?) != 0
// ((? & [ptrtoint] Y) != 0) && (Y != 0) --> (? & [ptrtoint] Y) != 0
if (match(X, m_c_And(m_Specific(Y), m_Value())) ||
match(X, m_c_And(m_PtrToInt(m_Specific(Y)), m_Value())))
return Cmp0;
return nullptr;
}
static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
const InstrInfoQuery &IIQ) {
// (icmp (add V, C0), C1) & (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
Value *V;
if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
return nullptr;
if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
return nullptr;
auto *AddInst = cast<OverflowingBinaryOperator>(Op0->getOperand(0));
if (AddInst->getOperand(1) != Op1->getOperand(1))
return nullptr;
Type *ITy = Op0->getType();
bool isNSW = IIQ.hasNoSignedWrap(AddInst);
bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
const APInt Delta = *C1 - *C0;
if (C0->isStrictlyPositive()) {
if (Delta == 2) {
if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT)
return getFalse(ITy);
if (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT && isNSW)
return getFalse(ITy);
}
if (Delta == 1) {
if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_SGT)
return getFalse(ITy);
if (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGT && isNSW)
return getFalse(ITy);
}
}
if (C0->getBoolValue() && isNUW) {
if (Delta == 2)
if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)
return getFalse(ITy);
if (Delta == 1)
if (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGT)
return getFalse(ITy);
}
return nullptr;
}
/// Try to eliminate compares with signed or unsigned min/max constants.
static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1,
bool IsAnd) {
// Canonicalize an equality compare as Cmp0.
if (Cmp1->isEquality())
std::swap(Cmp0, Cmp1);
if (!Cmp0->isEquality())
return nullptr;
// The non-equality compare must include a common operand (X). Canonicalize
// the common operand as operand 0 (the predicate is swapped if the common
// operand was operand 1).
ICmpInst::Predicate Pred0 = Cmp0->getPredicate();
Value *X = Cmp0->getOperand(0);
ICmpInst::Predicate Pred1;
bool HasNotOp = match(Cmp1, m_c_ICmp(Pred1, m_Not(m_Specific(X)), m_Value()));
if (!HasNotOp && !match(Cmp1, m_c_ICmp(Pred1, m_Specific(X), m_Value())))
return nullptr;
if (ICmpInst::isEquality(Pred1))
return nullptr;
// The equality compare must be against a constant. Flip bits if we matched
// a bitwise not. Convert a null pointer constant to an integer zero value.
APInt MinMaxC;
const APInt *C;
if (match(Cmp0->getOperand(1), m_APInt(C)))
MinMaxC = HasNotOp ? ~*C : *C;
else if (isa<ConstantPointerNull>(Cmp0->getOperand(1)))
MinMaxC = APInt::getZero(8);
else
return nullptr;
// DeMorganize if this is 'or': P0 || P1 --> !P0 && !P1.
if (!IsAnd) {
Pred0 = ICmpInst::getInversePredicate(Pred0);
Pred1 = ICmpInst::getInversePredicate(Pred1);
}
// Normalize to unsigned compare and unsigned min/max value.
// Example for 8-bit: -128 + 128 -> 0; 127 + 128 -> 255
if (ICmpInst::isSigned(Pred1)) {
Pred1 = ICmpInst::getUnsignedPredicate(Pred1);
MinMaxC += APInt::getSignedMinValue(MinMaxC.getBitWidth());
}
// (X != MAX) && (X < Y) --> X < Y
// (X == MAX) || (X >= Y) --> X >= Y
if (MinMaxC.isMaxValue())
if (Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_ULT)
return Cmp1;
// (X != MIN) && (X > Y) --> X > Y
// (X == MIN) || (X <= Y) --> X <= Y
if (MinMaxC.isMinValue())
if (Pred0 == ICmpInst::ICMP_NE && Pred1 == ICmpInst::ICMP_UGT)
return Cmp1;
return nullptr;
}
static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1,
const SimplifyQuery &Q) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true, Q))
return X;
if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true, Q))
return X;
if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
return X;
if (Value *X = simplifyAndOfICmpsWithSameOperands(Op1, Op0))
return X;
if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true))
return X;
if (Value *X = simplifyAndOrOfICmpsWithLimitConst(Op0, Op1, true))
return X;
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, true))
return X;
if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0, Q.IIQ))
return X;
return nullptr;
}
static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1,
const InstrInfoQuery &IIQ) {
// (icmp (add V, C0), C1) | (icmp V, C0)
ICmpInst::Predicate Pred0, Pred1;
const APInt *C0, *C1;
Value *V;
if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
return nullptr;
if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
return nullptr;
auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
if (AddInst->getOperand(1) != Op1->getOperand(1))
return nullptr;
Type *ITy = Op0->getType();
bool isNSW = IIQ.hasNoSignedWrap(AddInst);
bool isNUW = IIQ.hasNoUnsignedWrap(AddInst);
const APInt Delta = *C1 - *C0;
if (C0->isStrictlyPositive()) {
if (Delta == 2) {
if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
return getTrue(ITy);
if (Pred0 == ICmpInst::ICMP_SGE && Pred1 == ICmpInst::ICMP_SLE && isNSW)
return getTrue(ITy);
}
if (Delta == 1) {
if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_SLE)
return getTrue(ITy);
if (Pred0 == ICmpInst::ICMP_SGT && Pred1 == ICmpInst::ICMP_SLE && isNSW)
return getTrue(ITy);
}
}
if (C0->getBoolValue() && isNUW) {
if (Delta == 2)
if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
return getTrue(ITy);
if (Delta == 1)
if (Pred0 == ICmpInst::ICMP_UGT && Pred1 == ICmpInst::ICMP_ULE)
return getTrue(ITy);
}
return nullptr;
}
static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
const SimplifyQuery &Q) {
if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false, Q))
return X;
if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false, Q))
return X;
if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
return X;
if (Value *X = simplifyOrOfICmpsWithSameOperands(Op1, Op0))
return X;
if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false))
return X;
if (Value *X = simplifyAndOrOfICmpsWithLimitConst(Op0, Op1, false))
return X;
if (Value *X = simplifyAndOrOfICmpsWithZero(Op0, Op1, false))
return X;
if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1, Q.IIQ))
return X;
if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0, Q.IIQ))
return X;
return nullptr;
}
static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI,
FCmpInst *LHS, FCmpInst *RHS, bool IsAnd) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
if (LHS0->getType() != RHS0->getType())
return nullptr;
FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||
(PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) {
// (fcmp ord NNAN, X) & (fcmp ord X, Y) --> fcmp ord X, Y
// (fcmp ord NNAN, X) & (fcmp ord Y, X) --> fcmp ord Y, X
// (fcmp ord X, NNAN) & (fcmp ord X, Y) --> fcmp ord X, Y
// (fcmp ord X, NNAN) & (fcmp ord Y, X) --> fcmp ord Y, X
// (fcmp uno NNAN, X) | (fcmp uno X, Y) --> fcmp uno X, Y
// (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X
// (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y
// (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X
if ((isKnownNeverNaN(LHS0, TLI) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
(isKnownNeverNaN(LHS1, TLI) && (LHS0 == RHS0 || LHS0 == RHS1)))
return RHS;
// (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y
// (fcmp ord Y, X) & (fcmp ord NNAN, X) --> fcmp ord Y, X
// (fcmp ord X, Y) & (fcmp ord X, NNAN) --> fcmp ord X, Y
// (fcmp ord Y, X) & (fcmp ord X, NNAN) --> fcmp ord Y, X
// (fcmp uno X, Y) | (fcmp uno NNAN, X) --> fcmp uno X, Y
// (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X
// (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y
// (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X
if ((isKnownNeverNaN(RHS0, TLI) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
(isKnownNeverNaN(RHS1, TLI) && (RHS0 == LHS0 || RHS0 == LHS1)))
return LHS;
}
return nullptr;
}
static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q,
Value *Op0, Value *Op1, bool IsAnd) {
// Look through casts of the 'and' operands to find compares.
auto *Cast0 = dyn_cast<CastInst>(Op0);
auto *Cast1 = dyn_cast<CastInst>(Op1);
if (Cast0 && Cast1 && Cast0->getOpcode() == Cast1->getOpcode() &&
Cast0->getSrcTy() == Cast1->getSrcTy()) {
Op0 = Cast0->getOperand(0);
Op1 = Cast1->getOperand(0);
}
Value *V = nullptr;
auto *ICmp0 = dyn_cast<ICmpInst>(Op0);
auto *ICmp1 = dyn_cast<ICmpInst>(Op1);
if (ICmp0 && ICmp1)
V = IsAnd ? simplifyAndOfICmps(ICmp0, ICmp1, Q)
: simplifyOrOfICmps(ICmp0, ICmp1, Q);
auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
if (FCmp0 && FCmp1)
V = simplifyAndOrOfFCmps(Q.TLI, FCmp0, FCmp1, IsAnd);
if (!V)
return nullptr;
if (!Cast0)
return V;
// If we looked through casts, we can only handle a constant simplification
// because we are not allowed to create a cast instruction here.
if (auto *C = dyn_cast<Constant>(V))
return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType());
return nullptr;
}
/// Given a bitwise logic op, check if the operands are add/sub with a common
/// source value and inverted constant (identity: C - X -> ~(X + ~C)).
static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1,
Instruction::BinaryOps Opcode) {
assert(Op0->getType() == Op1->getType() && "Mismatched binop types");
assert(BinaryOperator::isBitwiseLogicOp(Opcode) && "Expected logic op");
Value *X;
Constant *C1, *C2;
if ((match(Op0, m_Add(m_Value(X), m_Constant(C1))) &&
match(Op1, m_Sub(m_Constant(C2), m_Specific(X)))) ||
(match(Op1, m_Add(m_Value(X), m_Constant(C1))) &&
match(Op0, m_Sub(m_Constant(C2), m_Specific(X))))) {
if (ConstantExpr::getNot(C1) == C2) {
// (X + C) & (~C - X) --> (X + C) & ~(X + C) --> 0
// (X + C) | (~C - X) --> (X + C) | ~(X + C) --> -1
// (X + C) ^ (~C - X) --> (X + C) ^ ~(X + C) --> -1
Type *Ty = Op0->getType();
return Opcode == Instruction::And ? ConstantInt::getNullValue(Ty)
: ConstantInt::getAllOnesValue(Ty);
}
}
return nullptr;
}
/// Given operands for an And, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::And, Op0, Op1, Q))
return C;
// X & poison -> poison
if (isa<PoisonValue>(Op1))
return Op1;
// X & undef -> 0
if (Q.isUndefValue(Op1))
return Constant::getNullValue(Op0->getType());
// X & X = X
if (Op0 == Op1)
return Op0;
// X & 0 = 0
if (match(Op1, m_Zero()))
return Constant::getNullValue(Op0->getType());
// X & -1 = X
if (match(Op1, m_AllOnes()))
return Op0;
// A & ~A = ~A & A = 0
if (match(Op0, m_Not(m_Specific(Op1))) ||
match(Op1, m_Not(m_Specific(Op0))))
return Constant::getNullValue(Op0->getType());
// (A | ?) & A = A
if (match(Op0, m_c_Or(m_Specific(Op1), m_Value())))
return Op1;
// A & (A | ?) = A
if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
return Op0;
// (X | Y) & (X | ~Y) --> X (commuted 8 ways)
Value *X, *Y;
if (match(Op0, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
match(Op1, m_c_Or(m_Deferred(X), m_Deferred(Y))))
return X;
if (match(Op1, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
match(Op0, m_c_Or(m_Deferred(X), m_Deferred(Y))))
return X;
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And))
return V;
// A mask that only clears known zeros of a shifted value is a no-op.
const APInt *Mask;
const APInt *ShAmt;
if (match(Op1, m_APInt(Mask))) {
// If all bits in the inverted and shifted mask are clear:
// and (shl X, ShAmt), Mask --> shl X, ShAmt
if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) &&
(~(*Mask)).lshr(*ShAmt).isZero())
return Op0;
// If all bits in the inverted and shifted mask are clear:
// and (lshr X, ShAmt), Mask --> lshr X, ShAmt
if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
(~(*Mask)).shl(*ShAmt).isZero())
return Op0;
}
// If we have a multiplication overflow check that is being 'and'ed with a
// check that one of the multipliers is not zero, we can omit the 'and', and
// only keep the overflow check.
if (isCheckForZeroAndMulWithOverflow(Op0, Op1, true))
return Op1;
if (isCheckForZeroAndMulWithOverflow(Op1, Op0, true))
return Op0;
// A & (-A) = A if A is a power of two or zero.
if (match(Op0, m_Neg(m_Specific(Op1))) ||
match(Op1, m_Neg(m_Specific(Op0)))) {
if (isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
Q.DT))
return Op0;
if (isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI,
Q.DT))
return Op1;
}
// This is a similar pattern used for checking if a value is a power-of-2:
// (A - 1) & A --> 0 (if A is a power-of-2 or 0)
// A & (A - 1) --> 0 (if A is a power-of-2 or 0)
if (match(Op0, m_Add(m_Specific(Op1), m_AllOnes())) &&
isKnownToBeAPowerOfTwo(Op1, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
return Constant::getNullValue(Op1->getType());
if (match(Op1, m_Add(m_Specific(Op0), m_AllOnes())) &&
isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
return Constant::getNullValue(Op0->getType());
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
return V;
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
// And distributes over Or. Try some generic simplifications based on this.
if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1,
Instruction::Or, Q, MaxRecurse))
return V;
// And distributes over Xor. Try some generic simplifications based on this.
if (Value *V = expandCommutativeBinOp(Instruction::And, Op0, Op1,
Instruction::Xor, Q, MaxRecurse))
return V;
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) {
if (Op0->getType()->isIntOrIntVectorTy(1)) {
// A & (A && B) -> A && B
if (match(Op1, m_Select(m_Specific(Op0), m_Value(), m_Zero())))
return Op1;
else if (match(Op0, m_Select(m_Specific(Op1), m_Value(), m_Zero())))
return Op0;
}
// If the operation is with the result of a select instruction, check
// whether operating on either branch of the select always yields the same
// value.
if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
}
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
MaxRecurse))
return V;
// Assuming the effective width of Y is not larger than A, i.e. all bits
// from X and Y are disjoint in (X << A) | Y,
// if the mask of this AND op covers all bits of X or Y, while it covers
// no bits from the other, we can bypass this AND op. E.g.,
// ((X << A) | Y) & Mask -> Y,
// if Mask = ((1 << effective_width_of(Y)) - 1)
// ((X << A) | Y) & Mask -> X << A,
// if Mask = ((1 << effective_width_of(X)) - 1) << A
// SimplifyDemandedBits in InstCombine can optimize the general case.
// This pattern aims to help other passes for a common case.
Value *XShifted;
if (match(Op1, m_APInt(Mask)) &&
match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
m_Value(XShifted)),
m_Value(Y)))) {
const unsigned Width = Op0->getType()->getScalarSizeInBits();
const unsigned ShftCnt = ShAmt->getLimitedValue(Width);
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (EffWidthY <= ShftCnt) {
const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
Q.DT);
const unsigned EffWidthX = XKnown.countMaxActiveBits();
const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
// If the mask is extracting all bits from X or Y as is, we can skip
// this AND op.
if (EffBitsY.isSubsetOf(*Mask) && !EffBitsX.intersects(*Mask))
return Y;
if (EffBitsX.isSubsetOf(*Mask) && !EffBitsY.intersects(*Mask))
return XShifted;
}
}
// ((X | Y) ^ X ) & ((X | Y) ^ Y) --> 0
// ((X | Y) ^ Y ) & ((X | Y) ^ X) --> 0
BinaryOperator *Or;
if (match(Op0, m_c_Xor(m_Value(X),
m_CombineAnd(m_BinOp(Or),
m_c_Or(m_Deferred(X), m_Value(Y))))) &&
match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y))))
return Constant::getNullValue(Op0->getType());
return nullptr;
}
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
}
static Value *simplifyOrLogic(Value *X, Value *Y) {
assert(X->getType() == Y->getType() && "Expected same type for 'or' ops");
Type *Ty = X->getType();
// X | ~X --> -1
if (match(Y, m_Not(m_Specific(X))))
return ConstantInt::getAllOnesValue(Ty);
// X | ~(X & ?) = -1
if (match(Y, m_Not(m_c_And(m_Specific(X), m_Value()))))
return ConstantInt::getAllOnesValue(Ty);
// X | (X & ?) --> X
if (match(Y, m_c_And(m_Specific(X), m_Value())))
return X;
Value *A, *B;
// (A ^ B) | (A | B) --> A | B
// (A ^ B) | (B | A) --> B | A
if (match(X, m_Xor(m_Value(A), m_Value(B))) &&
match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
return Y;
// ~(A ^ B) | (A | B) --> -1
// ~(A ^ B) | (B | A) --> -1
if (match(X, m_Not(m_Xor(m_Value(A), m_Value(B)))) &&
match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
return ConstantInt::getAllOnesValue(Ty);
// (A & ~B) | (A ^ B) --> A ^ B
// (~B & A) | (A ^ B) --> A ^ B
// (A & ~B) | (B ^ A) --> B ^ A
// (~B & A) | (B ^ A) --> B ^ A
if (match(X, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
match(Y, m_c_Xor(m_Specific(A), m_Specific(B))))
return Y;
// (~A ^ B) | (A & B) --> ~A ^ B
// (B ^ ~A) | (A & B) --> B ^ ~A
// (~A ^ B) | (B & A) --> ~A ^ B
// (B ^ ~A) | (B & A) --> B ^ ~A
if (match(X, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
match(Y, m_c_And(m_Specific(A), m_Specific(B))))
return X;
// (~A | B) | (A ^ B) --> -1
// (~A | B) | (B ^ A) --> -1
// (B | ~A) | (A ^ B) --> -1
// (B | ~A) | (B ^ A) --> -1
if (match(X, m_c_Or(m_Not(m_Value(A)), m_Value(B))) &&
match(Y, m_c_Xor(m_Specific(A), m_Specific(B))))
return ConstantInt::getAllOnesValue(Ty);
// (~A & B) | ~(A | B) --> ~A
// (~A & B) | ~(B | A) --> ~A
// (B & ~A) | ~(A | B) --> ~A
// (B & ~A) | ~(B | A) --> ~A
Value *NotA;
if (match(X,
m_c_And(m_CombineAnd(m_Value(NotA), m_NotForbidUndef(m_Value(A))),
m_Value(B))) &&
match(Y, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
return NotA;
// ~(A ^ B) | (A & B) --> ~(A ^ B)
// ~(A ^ B) | (B & A) --> ~(A ^ B)
Value *NotAB;
if (match(X, m_CombineAnd(m_NotForbidUndef(m_Xor(m_Value(A), m_Value(B))),
m_Value(NotAB))) &&
match(Y, m_c_And(m_Specific(A), m_Specific(B))))
return NotAB;
// ~(A & B) | (A ^ B) --> ~(A & B)
// ~(A & B) | (B ^ A) --> ~(A & B)
if (match(X, m_CombineAnd(m_NotForbidUndef(m_And(m_Value(A), m_Value(B))),
m_Value(NotAB))) &&
match(Y, m_c_Xor(m_Specific(A), m_Specific(B))))
return NotAB;
return nullptr;
}
/// Given operands for an Or, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Or, Op0, Op1, Q))
return C;
// X | poison -> poison
if (isa<PoisonValue>(Op1))
return Op1;
// X | undef -> -1
// X | -1 = -1
// Do not return Op1 because it may contain undef elements if it's a vector.
if (Q.isUndefValue(Op1) || match(Op1, m_AllOnes()))
return Constant::getAllOnesValue(Op0->getType());
// X | X = X
// X | 0 = X
if (Op0 == Op1 || match(Op1, m_Zero()))
return Op0;
if (Value *R = simplifyOrLogic(Op0, Op1))
return R;
if (Value *R = simplifyOrLogic(Op1, Op0))
return R;
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or))
return V;
// Rotated -1 is still -1:
// (-1 << X) | (-1 >> (C - X)) --> -1
// (-1 >> X) | (-1 << (C - X)) --> -1
// ...with C <= bitwidth (and commuted variants).
Value *X, *Y;
if ((match(Op0, m_Shl(m_AllOnes(), m_Value(X))) &&
match(Op1, m_LShr(m_AllOnes(), m_Value(Y)))) ||
(match(Op1, m_Shl(m_AllOnes(), m_Value(X))) &&
match(Op0, m_LShr(m_AllOnes(), m_Value(Y))))) {
const APInt *C;
if ((match(X, m_Sub(m_APInt(C), m_Specific(Y))) ||
match(Y, m_Sub(m_APInt(C), m_Specific(X)))) &&
C->ule(X->getType()->getScalarSizeInBits())) {
return ConstantInt::getAllOnesValue(X->getType());
}
}
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
// If we have a multiplication overflow check that is being 'and'ed with a
// check that one of the multipliers is not zero, we can omit the 'and', and
// only keep the overflow check.
if (isCheckForZeroAndMulWithOverflow(Op0, Op1, false))
return Op1;
if (isCheckForZeroAndMulWithOverflow(Op1, Op0, false))
return Op0;
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
return V;
// Or distributes over And. Try some generic simplifications based on this.
if (Value *V = expandCommutativeBinOp(Instruction::Or, Op0, Op1,
Instruction::And, Q, MaxRecurse))
return V;
if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) {
if (Op0->getType()->isIntOrIntVectorTy(1)) {
// A | (A || B) -> A || B
if (match(Op1, m_Select(m_Specific(Op0), m_One(), m_Value())))
return Op1;
else if (match(Op0, m_Select(m_Specific(Op1), m_One(), m_Value())))
return Op0;
}
// If the operation is with the result of a select instruction, check
// whether operating on either branch of the select always yields the same
// value.
if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
MaxRecurse))
return V;
}
// (A & C1)|(B & C2)
Value *A, *B;
const APInt *C1, *C2;
if (match(Op0, m_And(m_Value(A), m_APInt(C1))) &&
match(Op1, m_And(m_Value(B), m_APInt(C2)))) {
if (*C1 == ~*C2) {
// (A & C1)|(B & C2)
// If we have: ((V + N) & C1) | (V & C2)
// .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
// replace with V+N.
Value *N;
if (C2->isMask() && // C2 == 0+1+
match(A, m_c_Add(m_Specific(B), m_Value(N)))) {
// Add commutes, try both ways.
if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return A;
}
// Or commutes, try both ways.
if (C1->isMask() &&
match(B, m_c_Add(m_Specific(A), m_Value(N)))) {
// Add commutes, try both ways.
if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return B;
}
}
}
// If the operation is with the result of a phi instruction, check whether
// operating on all incoming values of the phi always yields the same value.
if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
return nullptr;
}
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyOrInst(Op0, Op1, Q, RecursionLimit);
}
/// Given operands for a Xor, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (Constant *C = foldOrCommuteConstant(Instruction::Xor, Op0, Op1, Q))
return C;
// X ^ poison -> poison
if (isa<PoisonValue>(Op1))
return Op1;
// A ^ undef -> undef
if (Q.isUndefValue(Op1))
return Op1;
// A ^ 0 = A
if (match(Op1, m_Zero()))
return Op0;
// A ^ A = 0
if (Op0 == Op1)
return Constant::getNullValue(Op0->getType());
// A ^ ~A = ~A ^ A = -1
if (match(Op0, m_Not(m_Specific(Op1))) ||
match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Op0->getType());
auto foldAndOrNot = [](Value *X, Value *Y) -> Value * {
Value *A, *B;
// (~A & B) ^ (A | B) --> A -- There are 8 commuted variants.
if (match(X, m_c_And(m_Not(m_Value(A)), m_Value(B))) &&
match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
return A;
// (~A | B) ^ (A & B) --> ~A -- There are 8 commuted variants.
// The 'not' op must contain a complete -1 operand (no undef elements for
// vector) for the transform to be safe.
Value *NotA;
if (match(X,
m_c_Or(m_CombineAnd(m_NotForbidUndef(m_Value(A)), m_Value(NotA)),
m_Value(B))) &&
match(Y, m_c_And(m_Specific(A), m_Specific(B))))
return NotA;
return nullptr;
};
if (Value *R = foldAndOrNot(Op0, Op1))
return R;
if (Value *R = foldAndOrNot(Op1, Op0))
return R;
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor))
return V;
// Try some generic simplifications for associative operations.
if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
MaxRecurse))
return V;
// Threading Xor over selects and phi nodes is pointless, so don't bother.
// Threading over the select in "A ^ select(cond, B, C)" means evaluating
// "A^B" and "A^C" and seeing if they are equal; but they are equal if and
// only if B and C are equal. If B and C are equal then (since we assume
// that operands have already been simplified) "select(cond, B, C)" should
// have been simplified to the common value of B and C already. Analysing
// "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly
// for threading over phi nodes.
return nullptr;
}
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyXorInst(Op0, Op1, Q, RecursionLimit);
}
static Type *GetCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
/// Rummage around inside V looking for something equivalent to the comparison
/// "LHS Pred RHS". Return such a value if found, otherwise return null.
/// Helper function for analyzing max/min idioms.
static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
Value *LHS, Value *RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
if (!SI)
return nullptr;
CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
if (!Cmp)
return nullptr;
Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
return Cmp;
if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
LHS == CmpRHS && RHS == CmpLHS)
return Cmp;
return nullptr;
}
// A significant optimization not implemented here is assuming that alloca
// addresses are not equal to incoming argument values. They don't *alias*,
// as we say, but that doesn't mean they aren't equal, so we take a
// conservative approach.
//
// This is inspired in part by C++11 5.10p1:
// "Two pointers of the same type compare equal if and only if they are both
// null, both point to the same function, or both represent the same
// address."
//
// This is pretty permissive.
//
// It's also partly due to C11 6.5.9p6:
// "Two pointers compare equal if and only if both are null pointers, both are
// pointers to the same object (including a pointer to an object and a
// subobject at its beginning) or function, both are pointers to one past the
// last element of the same array object, or one is a pointer to one past the
// end of one array object and the other is a pointer to the start of a
// different array object that happens to immediately follow the first array
// object in the address space.)
//
// C11's version is more restrictive, however there's no reason why an argument
// couldn't be a one-past-the-end value for a stack object in the caller and be
// equal to the beginning of a stack object in the callee.
//
// If the C and C++ standards are ever made sufficiently restrictive in this
// area, it may be possible to update LLVM's semantics accordingly and reinstate
// this optimization.
static Constant *
computePointerICmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
const DataLayout &DL = Q.DL;
const TargetLibraryInfo *TLI = Q.TLI;
const DominatorTree *DT = Q.DT;
const Instruction *CxtI = Q.CxtI;
const InstrInfoQuery &IIQ = Q.IIQ;
// First, skip past any trivial no-ops.
LHS = LHS->stripPointerCasts();
RHS = RHS->stripPointerCasts();
// A non-null pointer is not equal to a null pointer.
if (isa<ConstantPointerNull>(RHS) && ICmpInst::isEquality(Pred) &&
llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
IIQ.UseInstrInfo))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
// We can only fold certain predicates on pointer comparisons.
switch (Pred) {
default:
return nullptr;
// Equality comaprisons are easy to fold.
case CmpInst::ICMP_EQ:
case CmpInst::ICMP_NE:
break;
// We can only handle unsigned relational comparisons because 'inbounds' on
// a GEP only protects against unsigned wrapping.
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_UGE:
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_ULE:
// However, we have to switch them to their signed variants to handle
// negative indices from the base pointer.
Pred = ICmpInst::getSignedPredicate(Pred);
break;
}
// Strip off any constant offsets so that we can reason about them.
// It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets
// here and compare base addresses like AliasAnalysis does, however there are
// numerous hazards. AliasAnalysis and its utilities rely on special rules
// governing loads and stores which don't apply to icmps. Also, AliasAnalysis
// doesn't need to guarantee pointer inequality when it says NoAlias.
- Constant *LHSOffset = stripAndComputeConstantOffsets(DL, LHS);
- Constant *RHSOffset = stripAndComputeConstantOffsets(DL, RHS);
+
+ // Even if an non-inbounds GEP occurs along the path we can still optimize
+ // equality comparisons concerning the result.
+ bool AllowNonInbounds = ICmpInst::isEquality(Pred);
+ Constant *LHSOffset =
+ stripAndComputeConstantOffsets(DL, LHS, AllowNonInbounds);
+ Constant *RHSOffset =
+ stripAndComputeConstantOffsets(DL, RHS, AllowNonInbounds);
// If LHS and RHS are related via constant offsets to the same base
// value, we can replace it with an icmp which just compares the offsets.
if (LHS == RHS)
return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
// Various optimizations for (in)equality comparisons.
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
// Different non-empty allocations that exist at the same time have
// different addresses (if the program can tell). Global variables always
// exist, so they always exist during the lifetime of each other and all
// allocas. Two different allocas usually have different addresses...
//
// However, if there's an @llvm.stackrestore dynamically in between two
// allocas, they may have the same address. It's tempting to reduce the
// scope of the problem by only looking at *static* allocas here. That would
// cover the majority of allocas while significantly reducing the likelihood
// of having an @llvm.stackrestore pop up in the middle. However, it's not
// actually impossible for an @llvm.stackrestore to pop up in the middle of
// an entry block. Also, if we have a block that's not attached to a
// function, we can't tell if it's "static" under the current definition.
// Theoretically, this problem could be fixed by creating a new kind of
// instruction kind specifically for static allocas. Such a new instruction
// could be required to be at the top of the entry block, thus preventing it
// from being subject to a @llvm.stackrestore. Instcombine could even
// convert regular allocas into these special allocas. It'd be nifty.
// However, until then, this problem remains open.
//
// So, we'll assume that two non-empty allocas have different addresses
// for now.
//
// With all that, if the offsets are within the bounds of their allocations
// (and not one-past-the-end! so we can't use inbounds!), and their
// allocations aren't the same, the pointers are not equal.
//
// Note that it's not necessary to check for LHS being a global variable
// address, due to canonicalization and constant folding.
if (isa<AllocaInst>(LHS) &&
(isa<AllocaInst>(RHS) || isa<GlobalVariable>(RHS))) {
ConstantInt *LHSOffsetCI = dyn_cast<ConstantInt>(LHSOffset);
ConstantInt *RHSOffsetCI = dyn_cast<ConstantInt>(RHSOffset);
uint64_t LHSSize, RHSSize;
ObjectSizeOpts Opts;
Opts.NullIsUnknownSize =
NullPointerIsDefined(cast<AllocaInst>(LHS)->getFunction());
if (LHSOffsetCI && RHSOffsetCI &&
getObjectSize(LHS, LHSSize, DL, TLI, Opts) &&
getObjectSize(RHS, RHSSize, DL, TLI, Opts)) {
const APInt &LHSOffsetValue = LHSOffsetCI->getValue();
const APInt &RHSOffsetValue = RHSOffsetCI->getValue();
if (!LHSOffsetValue.isNegative() &&
!RHSOffsetValue.isNegative() &&
LHSOffsetValue.ult(LHSSize) &&
RHSOffsetValue.ult(RHSSize)) {
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
}
}
// Repeat the above check but this time without depending on DataLayout
// or being able to compute a precise size.
if (!cast<PointerType>(LHS->getType())->isEmptyTy() &&
!cast<PointerType>(RHS->getType())->isEmptyTy() &&
LHSOffset->isNullValue() &&
RHSOffset->isNullValue())
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
}
- // Even if an non-inbounds GEP occurs along the path we can still optimize
- // equality comparisons concerning the result. We avoid walking the whole
- // chain again by starting where the last calls to
- // stripAndComputeConstantOffsets left off and accumulate the offsets.
- Constant *LHSNoBound = stripAndComputeConstantOffsets(DL, LHS, true);
- Constant *RHSNoBound = stripAndComputeConstantOffsets(DL, RHS, true);
- if (LHS == RHS)
- return ConstantExpr::getICmp(Pred,
- ConstantExpr::getAdd(LHSOffset, LHSNoBound),
- ConstantExpr::getAdd(RHSOffset, RHSNoBound));
-
// If one side of the equality comparison must come from a noalias call
// (meaning a system memory allocation function), and the other side must
// come from a pointer that cannot overlap with dynamically-allocated
// memory within the lifetime of the current function (allocas, byval
// arguments, globals), then determine the comparison result here.
SmallVector<const Value *, 8> LHSUObjs, RHSUObjs;
getUnderlyingObjects(LHS, LHSUObjs);
getUnderlyingObjects(RHS, RHSUObjs);
// Is the set of underlying objects all noalias calls?
auto IsNAC = [](ArrayRef<const Value *> Objects) {
return all_of(Objects, isNoAliasCall);
};
// Is the set of underlying objects all things which must be disjoint from
// noalias calls. For allocas, we consider only static ones (dynamic
// allocas might be transformed into calls to malloc not simultaneously
// live with the compared-to allocation). For globals, we exclude symbols
// that might be resolve lazily to symbols in another dynamically-loaded
// library (and, thus, could be malloc'ed by the implementation).
auto IsAllocDisjoint = [](ArrayRef<const Value *> Objects) {
return all_of(Objects, [](const Value *V) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) &&
!GV->isThreadLocal();
if (const Argument *A = dyn_cast<Argument>(V))
return A->hasByValAttr();
return false;
});
};
if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
(IsNAC(RHSUObjs) && IsAllocDisjoint(LHSUObjs)))
return ConstantInt::get(GetCompareTy(LHS),
!CmpInst::isTrueWhenEqual(Pred));
// Fold comparisons for non-escaping pointer even if the allocation call
// cannot be elided. We cannot fold malloc comparison to null. Also, the
// dynamic allocation call could be either of the operands. Note that
// the other operand can not be based on the alloc - if it were, then
// the cmp itself would be a capture.
Value *MI = nullptr;
if (isAllocLikeFn(LHS, TLI) &&
llvm::isKnownNonZero(RHS, DL, 0, nullptr, CxtI, DT))
MI = LHS;
else if (isAllocLikeFn(RHS, TLI) &&
llvm::isKnownNonZero(LHS, DL, 0, nullptr, CxtI, DT))
MI = RHS;
// FIXME: We should also fold the compare when the pointer escapes, but the
// compare dominates the pointer escape
if (MI && !PointerMayBeCaptured(MI, true, true))
return ConstantInt::get(GetCompareTy(LHS),
CmpInst::isFalseWhenEqual(Pred));
}
// Otherwise, fail.
return nullptr;
}
/// Fold an icmp when its operands have i1 scalar type.
static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q) {
Type *ITy = GetCompareTy(LHS); // The return type.
Type *OpTy = LHS->getType(); // The operand type.
if (!OpTy->isIntOrIntVectorTy(1))
return nullptr;
// A boolean compared to true/false can be reduced in 14 out of the 20
// (10 predicates * 2 constants) possible combinations. The other
// 6 cases require a 'not' of the LHS.
auto ExtractNotLHS = [](Value *V) -> Value * {
Value *X;
if (match(V, m_Not(m_Value(X))))
return X;
return nullptr;
};
if (match(RHS, m_Zero())) {
switch (Pred) {
case CmpInst::ICMP_NE: // X != 0 -> X
case CmpInst::ICMP_UGT: // X >u 0 -> X
case CmpInst::ICMP_SLT: // X <s 0 -> X
return LHS;
case CmpInst::ICMP_EQ: // not(X) == 0 -> X != 0 -> X
case CmpInst::ICMP_ULE: // not(X) <=u 0 -> X >u 0 -> X
case CmpInst::ICMP_SGE: // not(X) >=s 0 -> X <s 0 -> X
if (Value *X = ExtractNotLHS(LHS))
return X;
break;
case CmpInst::ICMP_ULT: // X <u 0 -> false
case CmpInst::ICMP_SGT: // X >s 0 -> false
return getFalse(ITy);
case CmpInst::ICMP_UGE: // X >=u 0 -> true
case CmpInst::ICMP_SLE: // X <=s 0 -> true
return getTrue(ITy);
default: break;
}
} else if (match(RHS, m_One())) {
switch (Pred) {
case CmpInst::ICMP_EQ: // X == 1 -> X
case CmpInst::ICMP_UGE: // X >=u 1 -> X
case CmpInst::ICMP_SLE: // X <=s -1 -> X
return LHS;
case CmpInst::ICMP_NE: // not(X) != 1 -> X == 1 -> X
case CmpInst::ICMP_ULT: // not(X) <=u 1 -> X >=u 1 -> X
case CmpInst::ICMP_SGT: // not(X) >s 1 -> X <=s -1 -> X
if (Value *X = ExtractNotLHS(LHS))
return X;
break;
case CmpInst::ICMP_UGT: // X >u 1 -> false
case CmpInst::ICMP_SLT: // X <s -1 -> false
return getFalse(ITy);
case CmpInst::ICMP_ULE: // X <=u 1 -> true
case CmpInst::ICMP_SGE: // X >=s -1 -> true
return getTrue(ITy);
default: break;
}
}
switch (Pred) {
default:
break;
case ICmpInst::ICMP_UGE:
if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SGE:
/// For signed comparison, the values for an i1 are 0 and -1
/// respectively. This maps into a truth table of:
/// LHS | RHS | LHS >=s RHS | LHS implies RHS
/// 0 | 0 | 1 (0 >= 0) | 1
/// 0 | 1 | 1 (0 >= -1) | 1
/// 1 | 0 | 0 (-1 >= 0) | 0
/// 1 | 1 | 1 (-1 >= -1) | 1
if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
return getTrue(ITy);
break;
case ICmpInst::ICMP_ULE:
if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
return getTrue(ITy);
break;
}
return nullptr;
}
/// Try hard to fold icmp with zero RHS because this is a common case.
static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q) {
if (!match(RHS, m_Zero()))
return nullptr;
Type *ITy = GetCompareTy(LHS); // The return type.
switch (Pred) {
default:
llvm_unreachable("Unknown ICmp predicate!");
case ICmpInst::ICMP_ULT:
return getFalse(ITy);
case ICmpInst::ICMP_UGE:
return getTrue(ITy);
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_ULE:
if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
return getFalse(ITy);
break;
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_UGT:
if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo))
return getTrue(ITy);
break;
case ICmpInst::ICMP_SLT: {
KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (LHSKnown.isNegative())
return getTrue(ITy);
if (LHSKnown.isNonNegative())
return getFalse(ITy);
break;
}
case ICmpInst::ICMP_SLE: {
KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (LHSKnown.isNegative())
return getTrue(ITy);
if (LHSKnown.isNonNegative() &&
isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(ITy);
break;
}
case ICmpInst::ICMP_SGE: {
KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (LHSKnown.isNegative())
return getFalse(ITy);
if (LHSKnown.isNonNegative())
return getTrue(ITy);
break;
}
case ICmpInst::ICMP_SGT: {
KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (LHSKnown.isNegative())
return getFalse(ITy);
if (LHSKnown.isNonNegative() &&
isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(ITy);
break;
}
}
return nullptr;
}
static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const InstrInfoQuery &IIQ) {
Type *ITy = GetCompareTy(RHS); // The return type.
Value *X;
// Sign-bit checks can be optimized to true/false after unsigned
// floating-point casts:
// icmp slt (bitcast (uitofp X)), 0 --> false
// icmp sgt (bitcast (uitofp X)), -1 --> true
if (match(LHS, m_BitCast(m_UIToFP(m_Value(X))))) {
if (Pred == ICmpInst::ICMP_SLT && match(RHS, m_Zero()))
return ConstantInt::getFalse(ITy);
if (Pred == ICmpInst::ICMP_SGT && match(RHS, m_AllOnes()))
return ConstantInt::getTrue(ITy);
}
const APInt *C;
if (!match(RHS, m_APIntAllowUndef(C)))
return nullptr;
// Rule out tautological comparisons (eg., ult 0 or uge 0).
ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C);
if (RHS_CR.isEmptySet())
return ConstantInt::getFalse(ITy);
if (RHS_CR.isFullSet())
return ConstantInt::getTrue(ITy);
ConstantRange LHS_CR =
computeConstantRange(LHS, CmpInst::isSigned(Pred), IIQ.UseInstrInfo);
if (!LHS_CR.isFullSet()) {
if (RHS_CR.contains(LHS_CR))
return ConstantInt::getTrue(ITy);
if (RHS_CR.inverse().contains(LHS_CR))
return ConstantInt::getFalse(ITy);
}
// (mul nuw/nsw X, MulC) != C --> true (if C is not a multiple of MulC)
// (mul nuw/nsw X, MulC) == C --> false (if C is not a multiple of MulC)
const APInt *MulC;
if (ICmpInst::isEquality(Pred) &&
((match(LHS, m_NUWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
*MulC != 0 && C->urem(*MulC) != 0) ||
(match(LHS, m_NSWMul(m_Value(), m_APIntAllowUndef(MulC))) &&
*MulC != 0 && C->srem(*MulC) != 0)))
return ConstantInt::get(ITy, Pred == ICmpInst::ICMP_NE);
return nullptr;
}
static Value *simplifyICmpWithBinOpOnLHS(
CmpInst::Predicate Pred, BinaryOperator *LBO, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
Type *ITy = GetCompareTy(RHS); // The return type.
Value *Y = nullptr;
// icmp pred (or X, Y), X
if (match(LBO, m_c_Or(m_Value(Y), m_Specific(RHS)))) {
if (Pred == ICmpInst::ICMP_ULT)
return getFalse(ITy);
if (Pred == ICmpInst::ICMP_UGE)
return getTrue(ITy);
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
KnownBits RHSKnown = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (RHSKnown.isNonNegative() && YKnown.isNegative())
return Pred == ICmpInst::ICMP_SLT ? getTrue(ITy) : getFalse(ITy);
if (RHSKnown.isNegative() || YKnown.isNonNegative())
return Pred == ICmpInst::ICMP_SLT ? getFalse(ITy) : getTrue(ITy);
}
}
// icmp pred (and X, Y), X
if (match(LBO, m_c_And(m_Value(), m_Specific(RHS)))) {
if (Pred == ICmpInst::ICMP_UGT)
return getFalse(ITy);
if (Pred == ICmpInst::ICMP_ULE)
return getTrue(ITy);
}
// icmp pred (urem X, Y), Y
if (match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
switch (Pred) {
default:
break;
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE: {
KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (!Known.isNonNegative())
break;
LLVM_FALLTHROUGH;
}
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
return getFalse(ITy);
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE: {
KnownBits Known = computeKnownBits(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
if (!Known.isNonNegative())
break;
LLVM_FALLTHROUGH;
}
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
return getTrue(ITy);
}
}
// icmp pred (urem X, Y), X
if (match(LBO, m_URem(m_Specific(RHS), m_Value()))) {
if (Pred == ICmpInst::ICMP_ULE)
return getTrue(ITy);
if (Pred == ICmpInst::ICMP_UGT)
return getFalse(ITy);
}
// x >>u y <=u x --> true.
// x >>u y >u x --> false.
// x udiv y <=u x --> true.
// x udiv y >u x --> false.
if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) ||
match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
// icmp pred (X op Y), X
if (Pred == ICmpInst::ICMP_UGT)
return getFalse(ITy);
if (Pred == ICmpInst::ICMP_ULE)
return getTrue(ITy);
}
// If x is nonzero:
// x >>u C <u x --> true for C != 0.
// x >>u C != x --> true for C != 0.
// x >>u C >=u x --> false for C != 0.
// x >>u C == x --> false for C != 0.
// x udiv C <u x --> true for C != 1.
// x udiv C != x --> true for C != 1.
// x udiv C >=u x --> false for C != 1.
// x udiv C == x --> false for C != 1.
// TODO: allow non-constant shift amount/divisor
const APInt *C;
if ((match(LBO, m_LShr(m_Specific(RHS), m_APInt(C))) && *C != 0) ||
(match(LBO, m_UDiv(m_Specific(RHS), m_APInt(C))) && *C != 1)) {
if (isKnownNonZero(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) {
switch (Pred) {
default:
break;
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGE:
return getFalse(ITy);
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
return getTrue(ITy);
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_ULE:
// UGT/ULE are handled by the more general case just above
llvm_unreachable("Unexpected UGT/ULE, should have been handled");
}
}
}
// (x*C1)/C2 <= x for C1 <= C2.
// This holds even if the multiplication overflows: Assume that x != 0 and
// arithmetic is modulo M. For overflow to occur we must have C1 >= M/x and
// thus C2 >= M/x. It follows that (x*C1)/C2 <= (M-1)/C2 <= ((M-1)*x)/M < x.
//
// Additionally, either the multiplication and division might be represented
// as shifts:
// (x*C1)>>C2 <= x for C1 < 2**C2.
// (x<<C1)/C2 <= x for 2**C1 < C2.
const APInt *C1, *C2;
if ((match(LBO, m_UDiv(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) &&
C1->ule(*C2)) ||
(match(LBO, m_LShr(m_Mul(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) &&
C1->ule(APInt(C2->getBitWidth(), 1) << *C2)) ||
(match(LBO, m_UDiv(m_Shl(m_Specific(RHS), m_APInt(C1)), m_APInt(C2))) &&
(APInt(C1->getBitWidth(), 1) << *C1).ule(*C2))) {
if (Pred == ICmpInst::ICMP_UGT)
return getFalse(ITy);
if (Pred == ICmpInst::ICMP_ULE)
return getTrue(ITy);
}
return nullptr;
}
// If only one of the icmp's operands has NSW flags, try to prove that:
//
// icmp slt (x + C1), (x +nsw C2)
//
// is equivalent to:
//
// icmp slt C1, C2
//
// which is true if x + C2 has the NSW flags set and:
// *) C1 < C2 && C1 >= 0, or
// *) C2 < C1 && C1 <= 0.
//
static bool trySimplifyICmpWithAdds(CmpInst::Predicate Pred, Value *LHS,
Value *RHS) {
// TODO: only support icmp slt for now.
if (Pred != CmpInst::ICMP_SLT)
return false;
// Canonicalize nsw add as RHS.
if (!match(RHS, m_NSWAdd(m_Value(), m_Value())))
std::swap(LHS, RHS);
if (!match(RHS, m_NSWAdd(m_Value(), m_Value())))
return false;
Value *X;
const APInt *C1, *C2;
if (!match(LHS, m_c_Add(m_Value(X), m_APInt(C1))) ||
!match(RHS, m_c_Add(m_Specific(X), m_APInt(C2))))
return false;
return (C1->slt(*C2) && C1->isNonNegative()) ||
(C2->slt(*C1) && C1->isNonPositive());
}
/// TODO: A large part of this logic is duplicated in InstCombine's
/// foldICmpBinOp(). We should be able to share that and avoid the code
/// duplication.
static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
if (MaxRecurse && (LBO || RBO)) {
// Analyze the case when either LHS or RHS is an add instruction.
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
// LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
if (LBO && LBO->getOpcode() == Instruction::Add) {
A = LBO->getOperand(0);
B = LBO->getOperand(1);
NoLHSWrapProblem =
ICmpInst::isEquality(Pred) ||
(CmpInst::isUnsigned(Pred) &&
Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO))) ||
(CmpInst::isSigned(Pred) &&
Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)));
}
if (RBO && RBO->getOpcode() == Instruction::Add) {
C = RBO->getOperand(0);
D = RBO->getOperand(1);
NoRHSWrapProblem =
ICmpInst::isEquality(Pred) ||
(CmpInst::isUnsigned(Pred) &&
Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(RBO))) ||
(CmpInst::isSigned(Pred) &&
Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RBO)));
}
// icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
if ((A == RHS || B == RHS) && NoLHSWrapProblem)
if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
Constant::getNullValue(RHS->getType()), Q,
MaxRecurse - 1))
return V;
// icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
if ((C == LHS || D == LHS) && NoRHSWrapProblem)
if (Value *V =
SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
C == LHS ? D : C, Q, MaxRecurse - 1))
return V;
// icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
bool CanSimplify = (NoLHSWrapProblem && NoRHSWrapProblem) ||
trySimplifyICmpWithAdds(Pred, LHS, RHS);
if (A && C && (A == C || A == D || B == C || B == D) && CanSimplify) {
// Determine Y and Z in the form icmp (X+Y), (X+Z).
Value *Y, *Z;
if (A == C) {
// C + B == C + D -> B == D
Y = B;
Z = D;
} else if (A == D) {
// D + B == C + D -> B == C
Y = B;
Z = C;
} else if (B == C) {
// A + C == C + D -> A == D
Y = A;
Z = D;
} else {
assert(B == D);
// A + D == C + D -> A == C
Y = A;
Z = C;
}
if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
return V;
}
}
if (LBO)
if (Value *V = simplifyICmpWithBinOpOnLHS(Pred, LBO, RHS, Q, MaxRecurse))
return V;
if (RBO)
if (Value *V = simplifyICmpWithBinOpOnLHS(
ICmpInst::getSwappedPredicate(Pred), RBO, LHS, Q, MaxRecurse))
return V;
// 0 - (zext X) pred C
if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) {
const APInt *C;
if (match(RHS, m_APInt(C))) {
if (C->isStrictlyPositive()) {
if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_NE)
return ConstantInt::getTrue(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_SGE || Pred == ICmpInst::ICMP_EQ)
return ConstantInt::getFalse(GetCompareTy(RHS));
}
if (C->isNonNegative()) {
if (Pred == ICmpInst::ICMP_SLE)
return ConstantInt::getTrue(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_SGT)
return ConstantInt::getFalse(GetCompareTy(RHS));
}
}
}
// If C2 is a power-of-2 and C is not:
// (C2 << X) == C --> false
// (C2 << X) != C --> true
const APInt *C;
if (match(LHS, m_Shl(m_Power2(), m_Value())) &&
match(RHS, m_APIntAllowUndef(C)) && !C->isPowerOf2()) {
// C2 << X can equal zero in some circumstances.
// This simplification might be unsafe if C is zero.
//
// We know it is safe if:
// - The shift is nsw. We can't shift out the one bit.
// - The shift is nuw. We can't shift out the one bit.
// - C2 is one.
// - C isn't zero.
if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
match(LHS, m_Shl(m_One(), m_Value())) || !C->isZero()) {
if (Pred == ICmpInst::ICMP_EQ)
return ConstantInt::getFalse(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_NE)
return ConstantInt::getTrue(GetCompareTy(RHS));
}
}
// TODO: This is overly constrained. LHS can be any power-of-2.
// (1 << X) >u 0x8000 --> false
// (1 << X) <=u 0x8000 --> true
if (match(LHS, m_Shl(m_One(), m_Value())) && match(RHS, m_SignMask())) {
if (Pred == ICmpInst::ICMP_UGT)
return ConstantInt::getFalse(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_ULE)
return ConstantInt::getTrue(GetCompareTy(RHS));
}
if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
LBO->getOperand(1) == RBO->getOperand(1)) {
switch (LBO->getOpcode()) {
default:
break;
case Instruction::UDiv:
case Instruction::LShr:
if (ICmpInst::isSigned(Pred) || !Q.IIQ.isExact(LBO) ||
!Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::SDiv:
if (!ICmpInst::isEquality(Pred) || !Q.IIQ.isExact(LBO) ||
!Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::AShr:
if (!Q.IIQ.isExact(LBO) || !Q.IIQ.isExact(RBO))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
case Instruction::Shl: {
bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO);
bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO);
if (!NUW && !NSW)
break;
if (!NSW && ICmpInst::isSigned(Pred))
break;
if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
RBO->getOperand(0), Q, MaxRecurse - 1))
return V;
break;
}
}
}
return nullptr;
}
/// Simplify integer comparisons where at least one operand of the compare
/// matches an integer min/max idiom.
static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q,
unsigned MaxRecurse) {
Type *ITy = GetCompareTy(LHS); // The return type.
Value *A, *B;
CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
// Signed variants on "max(a,b)>=a -> true".
if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
if (A != RHS)
std::swap(A, B); // smax(A, B) pred A.
EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
// We analyze this as smax(A, B) pred A.
P = Pred;
} else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
if (A != LHS)
std::swap(A, B); // A pred smax(A, B).
EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
// We analyze this as smax(A, B) swapped-pred A.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
(A == RHS || B == RHS)) {
if (A != RHS)
std::swap(A, B); // smin(A, B) pred A.
EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
// We analyze this as smax(-A, -B) swapped-pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
if (A != LHS)
std::swap(A, B); // A pred smin(A, B).
EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
// We analyze this as smax(-A, -B) pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
P = Pred;
}
if (P != CmpInst::BAD_ICMP_PREDICATE) {
// Cases correspond to "max(A, B) p A".
switch (P) {
default:
break;
case CmpInst::ICMP_EQ:
case CmpInst::ICMP_SLE:
// Equivalent to "A EqP B". This may be the same as the condition tested
// in the max/min; if so, we can just return that.
if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
return V;
if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
return V;
break;
case CmpInst::ICMP_NE:
case CmpInst::ICMP_SGT: {
CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
// Equivalent to "A InvEqP B". This may be the same as the condition
// tested in the max/min; if so, we can just return that.
if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
return V;
if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
return V;
break;
}
case CmpInst::ICMP_SGE:
// Always true.
return getTrue(ITy);
case CmpInst::ICMP_SLT:
// Always false.
return getFalse(ITy);
}
}
// Unsigned variants on "max(a,b)>=a -> true".
P = CmpInst::BAD_ICMP_PREDICATE;
if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
if (A != RHS)
std::swap(A, B); // umax(A, B) pred A.
EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
// We analyze this as umax(A, B) pred A.
P = Pred;
} else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
if (A != LHS)
std::swap(A, B); // A pred umax(A, B).
EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
// We analyze this as umax(A, B) swapped-pred A.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
(A == RHS || B == RHS)) {
if (A != RHS)
std::swap(A, B); // umin(A, B) pred A.
EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
// We analyze this as umax(-A, -B) swapped-pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
P = CmpInst::getSwappedPredicate(Pred);
} else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
(A == LHS || B == LHS)) {
if (A != LHS)
std::swap(A, B); // A pred umin(A, B).
EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
// We analyze this as umax(-A, -B) pred -A.
// Note that we do not need to actually form -A or -B thanks to EqP.
P = Pred;
}
if (P != CmpInst::BAD_ICMP_PREDICATE) {
// Cases correspond to "max(A, B) p A".
switch (P) {
default:
break;
case CmpInst::ICMP_EQ:
case CmpInst::ICMP_ULE:
// Equivalent to "A EqP B". This may be the same as the condition tested
// in the max/min; if so, we can just return that.
if (Value *V = ExtractEquivalentCondition(LHS, EqP, A, B))
return V;
if (Value *V = ExtractEquivalentCondition(RHS, EqP, A, B))
return V;
// Otherwise, see if "A EqP B" simplifies.
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
return V;
break;
case CmpInst::ICMP_NE:
case CmpInst::ICMP_UGT: {
CmpInst::Predicate InvEqP = CmpInst::getInversePredicate(EqP);
// Equivalent to "A InvEqP B". This may be the same as the condition
// tested in the max/min; if so, we can just return that.
if (Value *V = ExtractEquivalentCondition(LHS, InvEqP, A, B))
return V;
if (Value *V = ExtractEquivalentCondition(RHS, InvEqP, A, B))
return V;
// Otherwise, see if "A InvEqP B" simplifies.
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
return V;
break;
}
case CmpInst::ICMP_UGE:
return getTrue(ITy);
case CmpInst::ICMP_ULT:
return getFalse(ITy);
}
}
// Comparing 1 each of min/max with a common operand?
// Canonicalize min operand to RHS.
if (match(LHS, m_UMin(m_Value(), m_Value())) ||
match(LHS, m_SMin(m_Value(), m_Value()))) {
std::swap(LHS, RHS);
Pred = ICmpInst::getSwappedPredicate(Pred);
}
Value *C, *D;
if (match(LHS, m_SMax(m_Value(A), m_Value(B))) &&
match(RHS, m_SMin(m_Value(C), m_Value(D))) &&
(A == C || A == D || B == C || B == D)) {
// smax(A, B) >=s smin(A, D) --> true
if (Pred == CmpInst::ICMP_SGE)
return getTrue(ITy);
// smax(A, B) <s smin(A, D) --> false
if (Pred == CmpInst::ICMP_SLT)
return getFalse(ITy);
} else if (match(LHS, m_UMax(m_Value(A), m_Value(B))) &&
match(RHS, m_UMin(m_Value(C), m_Value(D))) &&
(A == C || A == D || B == C || B == D)) {
// umax(A, B) >=u umin(A, D) --> true
if (Pred == CmpInst::ICMP_UGE)
return getTrue(ITy);
// umax(A, B) <u umin(A, D) --> false
if (Pred == CmpInst::ICMP_ULT)
return getFalse(ITy);
}
return nullptr;
}
static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate,
Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
// Gracefully handle instructions that have not been inserted yet.
if (!Q.AC || !Q.CxtI || !Q.CxtI->getParent())
return nullptr;
for (Value *AssumeBaseOp : {LHS, RHS}) {
for (auto &AssumeVH : Q.AC->assumptionsFor(AssumeBaseOp)) {
if (!AssumeVH)
continue;
CallInst *Assume = cast<CallInst>(AssumeVH);
if (Optional<bool> Imp =
isImpliedCondition(Assume->getArgOperand(0), Predicate, LHS, RHS,
Q.DL))
if (isValidAssumeForContext(Assume, Q.CxtI, Q.DT))
return ConstantInt::get(GetCompareTy(LHS), *Imp);
}
}
return nullptr;
}
/// Given operands for an ICmpInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
assert(!isa<UndefValue>(LHS) && "Unexpected icmp undef,%X");
Type *ITy = GetCompareTy(LHS); // The return type.
// icmp poison, X -> poison
if (isa<PoisonValue>(RHS))
return PoisonValue::get(ITy);
// For EQ and NE, we can always pick a value for the undef to make the
// predicate pass or fail, so we can return undef.
// Matches behavior in llvm::ConstantFoldCompareInstruction.
if (Q.isUndefValue(RHS) && ICmpInst::isEquality(Pred))
return UndefValue::get(ITy);
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
if (LHS == RHS || Q.isUndefValue(RHS))
return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q))
return V;
// TODO: Sink/common this with other potentially expensive calls that use
// ValueTracking? See comment below for isKnownNonEqual().
if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
return V;
if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS, Q.IIQ))
return V;
// If both operands have range metadata, use the metadata
// to simplify the comparison.
if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
auto RHS_Instr = cast<Instruction>(RHS);
auto LHS_Instr = cast<Instruction>(LHS);
if (Q.IIQ.getMetadata(RHS_Instr, LLVMContext::MD_range) &&
Q.IIQ.getMetadata(LHS_Instr, LLVMContext::MD_range)) {
auto RHS_CR = getConstantRangeFromMetadata(
*RHS_Instr->getMetadata(LLVMContext::MD_range));
auto LHS_CR = getConstantRangeFromMetadata(
*LHS_Instr->getMetadata(LLVMContext::MD_range));
if (LHS_CR.icmp(Pred, RHS_CR))
return ConstantInt::getTrue(RHS->getContext());
if (LHS_CR.icmp(CmpInst::getInversePredicate(Pred), RHS_CR))
return ConstantInt::getFalse(RHS->getContext());
}
}
// Compare of cast, for example (zext X) != 0 -> X != 0
if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
Instruction *LI = cast<CastInst>(LHS);
Value *SrcOp = LI->getOperand(0);
Type *SrcTy = SrcOp->getType();
Type *DstTy = LI->getType();
// Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
// if the integer type is the same size as the pointer type.
if (MaxRecurse && isa<PtrToIntInst>(LI) &&
Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
// Transfer the cast to the constant.
if (Value *V = SimplifyICmpInst(Pred, SrcOp,
ConstantExpr::getIntToPtr(RHSC, SrcTy),
Q, MaxRecurse-1))
return V;
} else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
if (RI->getOperand(0)->getType() == SrcTy)
// Compare without the cast.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
Q, MaxRecurse-1))
return V;
}
}
if (isa<ZExtInst>(LHS)) {
// Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
// same type.
if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that signed predicates become unsigned.
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
SrcOp, RI->getOperand(0), Q,
MaxRecurse-1))
return V;
}
// Fold (zext X) ule (sext X), (zext X) sge (sext X) to true.
else if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
if (SrcOp == RI->getOperand(0)) {
if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_SGE)
return ConstantInt::getTrue(ITy);
if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SLT)
return ConstantInt::getFalse(ITy);
}
}
// Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
// too. If not, then try to deduce the result of the comparison.
else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DstTy.
Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two zero-extended values.
if (RExt == CI && MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
SrcOp, Trunc, Q, MaxRecurse-1))
return V;
// Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
// there. Use this to work out the result of the comparison.
if (RExt != CI) {
switch (Pred) {
default: llvm_unreachable("Unknown ICmp predicate!");
// LHS <u RHS.
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
return ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
return ConstantInt::getTrue(CI->getContext());
// LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
// is non-negative then LHS <s RHS.
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
return CI->getValue().isNegative() ?
ConstantInt::getTrue(CI->getContext()) :
ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
return CI->getValue().isNegative() ?
ConstantInt::getFalse(CI->getContext()) :
ConstantInt::getTrue(CI->getContext());
}
}
}
}
if (isa<SExtInst>(LHS)) {
// Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
// same type.
if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
// Compare X and Y. Note that the predicate does not change.
if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
Q, MaxRecurse-1))
return V;
}
// Fold (sext X) uge (zext X), (sext X) sle (zext X) to true.
else if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
if (SrcOp == RI->getOperand(0)) {
if (Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_SLE)
return ConstantInt::getTrue(ITy);
if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SGT)
return ConstantInt::getFalse(ITy);
}
}
// Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
// too. If not, then try to deduce the result of the comparison.
else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DstTy.
Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two sign-extended values.
if (RExt == CI && MaxRecurse)
if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
return V;
// Otherwise the upper bits of LHS are all equal, while RHS has varying
// bits there. Use this to work out the result of the comparison.
if (RExt != CI) {
switch (Pred) {
default: llvm_unreachable("Unknown ICmp predicate!");
case ICmpInst::ICMP_EQ:
return ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_NE:
return ConstantInt::getTrue(CI->getContext());
// If RHS is non-negative then LHS <s RHS. If RHS is negative then
// LHS >s RHS.
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
return CI->getValue().isNegative() ?
ConstantInt::getTrue(CI->getContext()) :
ConstantInt::getFalse(CI->getContext());
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
return CI->getValue().isNegative() ?
ConstantInt::getFalse(CI->getContext()) :
ConstantInt::getTrue(CI->getContext());
// If LHS is non-negative then LHS <u RHS. If LHS is negative then
// LHS >u RHS.
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
// Comparison is true iff the LHS <s 0.
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
Constant::getNullValue(SrcTy),
Q, MaxRecurse-1))
return V;
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
// Comparison is true iff the LHS >=s 0.
if (MaxRecurse)
if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
Constant::getNullValue(SrcTy),
Q, MaxRecurse-1))
return V;
break;
}
}
}
}
}
// icmp eq|ne X, Y -> false|true if X != Y
// This is potentially expensive, and we have already computedKnownBits for
// compares with 0 above here, so only try this for a non-zero compare.
if (ICmpInst::isEquality(Pred) && !match(RHS, m_Zero()) &&
isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT, Q.IIQ.UseInstrInfo)) {
return Pred == ICmpInst::ICMP_NE ? getTrue(ITy) : getFalse(ITy);
}
if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse))
return V;
if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
return V;
if (Value *V = simplifyICmpWithDominatingAssume(Pred, LHS, RHS, Q))
return V;
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
if (auto *C = computePointerICmp(Pred, LHS, RHS, Q))
return C;
if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
if (Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) ==
Q.DL.getTypeSizeInBits(CLHS->getType()) &&
Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
Q.DL.getTypeSizeInBits(CRHS->getType()))
if (auto *C = computePointerICmp(Pred, CLHS->getPointerOperand(),
CRHS->getPointerOperand(), Q))
return C;
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return nullptr;
}
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
return ::SimplifyICmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
}
/// Given operands for an FCmpInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q,
unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
if (Constant *CRHS = dyn_cast<Constant>(RHS))
return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
// If we have a constant, make sure it is on the RHS.
std::swap(LHS, RHS);
Pred = CmpInst::getSwappedPredicate(Pred);
}
// Fold trivial predicates.
Type *RetTy = GetCompareTy(LHS);
if (Pred == FCmpInst::FCMP_FALSE)
return getFalse(RetTy);
if (Pred == FCmpInst::FCMP_TRUE)
return getTrue(RetTy);
// Fold (un)ordered comparison if we can determine there are no NaNs.
if (Pred == FCmpInst::FCMP_UNO || Pred == FCmpInst::FCMP_ORD)
if (FMF.noNaNs() ||
(isKnownNeverNaN(LHS, Q.TLI) && isKnownNeverNaN(RHS, Q.TLI)))
return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
// NaN is unordered; NaN is not ordered.
assert((FCmpInst::isOrdered(Pred) || FCmpInst::isUnordered(Pred)) &&
"Comparison must be either ordered or unordered");
if (match(RHS, m_NaN()))
return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
// fcmp pred x, poison and fcmp pred poison, x
// fold to poison
if (isa<PoisonValue>(LHS) || isa<PoisonValue>(RHS))
return PoisonValue::get(RetTy);
// fcmp pred x, undef and fcmp pred undef, x
// fold to true if unordered, false if ordered
if (Q.isUndefValue(LHS) || Q.isUndefValue(RHS)) {
// Choosing NaN for the undef will always make unordered comparison succeed
// and ordered comparison fail.
return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
}
// fcmp x,x -> true/false. Not all compares are foldable.
if (LHS == RHS) {
if (CmpInst::isTrueWhenEqual(Pred))
return getTrue(RetTy);
if (CmpInst::isFalseWhenEqual(Pred))
return getFalse(RetTy);
}
// Handle fcmp with constant RHS.
// TODO: Use match with a specific FP value, so these work with vectors with
// undef lanes.
const APFloat *C;
if (match(RHS, m_APFloat(C))) {
// Check whether the constant is an infinity.
if (C->isInfinity()) {
if (C->isNegative()) {
switch (Pred) {
case FCmpInst::FCMP_OLT:
// No value is ordered and less than negative infinity.
return getFalse(RetTy);
case FCmpInst::FCMP_UGE:
// All values are unordered with or at least negative infinity.
return getTrue(RetTy);
default:
break;
}
} else {
switch (Pred) {
case FCmpInst::FCMP_OGT:
// No value is ordered and greater than infinity.
return getFalse(RetTy);
case FCmpInst::FCMP_ULE:
// All values are unordered with and at most infinity.
return getTrue(RetTy);
default:
break;
}
}
// LHS == Inf
if (Pred == FCmpInst::FCMP_OEQ && isKnownNeverInfinity(LHS, Q.TLI))
return getFalse(RetTy);
// LHS != Inf
if (Pred == FCmpInst::FCMP_UNE && isKnownNeverInfinity(LHS, Q.TLI))
return getTrue(RetTy);
// LHS == Inf || LHS == NaN
if (Pred == FCmpInst::FCMP_UEQ && isKnownNeverInfinity(LHS, Q.TLI) &&
isKnownNeverNaN(LHS, Q.TLI))
return getFalse(RetTy);
// LHS != Inf && LHS != NaN
if (Pred == FCmpInst::FCMP_ONE && isKnownNeverInfinity(LHS, Q.TLI) &&
isKnownNeverNaN(LHS, Q.TLI))
return getTrue(RetTy);
}
if (C->isNegative() && !C->isNegZero()) {
assert(!C->isNaN() && "Unexpected NaN constant!");
// TODO: We can catch more cases by using a range check rather than
// relying on CannotBeOrderedLessThanZero.
switch (Pred) {
case FCmpInst::FCMP_UGE:
case FCmpInst::FCMP_UGT:
case FCmpInst::FCMP_UNE:
// (X >= 0) implies (X > C) when (C < 0)
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
return getTrue(RetTy);
break;
case FCmpInst::FCMP_OEQ:
case FCmpInst::FCMP_OLE:
case FCmpInst::FCMP_OLT:
// (X >= 0) implies !(X < C) when (C < 0)
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
return getFalse(RetTy);
break;
default:
break;
}
}
// Check comparison of [minnum/maxnum with constant] with other constant.
const APFloat *C2;
if ((match(LHS, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_APFloat(C2))) &&
*C2 < *C) ||
(match(LHS, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_APFloat(C2))) &&
*C2 > *C)) {
bool IsMaxNum =
cast<IntrinsicInst>(LHS)->getIntrinsicID() == Intrinsic::maxnum;
// The ordered relationship and minnum/maxnum guarantee that we do not
// have NaN constants, so ordered/unordered preds are handled the same.
switch (Pred) {
case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_UEQ:
// minnum(X, LesserC) == C --> false
// maxnum(X, GreaterC) == C --> false
return getFalse(RetTy);
case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_UNE:
// minnum(X, LesserC) != C --> true
// maxnum(X, GreaterC) != C --> true
return getTrue(RetTy);
case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_UGE:
case FCmpInst::FCMP_OGT: case FCmpInst::FCMP_UGT:
// minnum(X, LesserC) >= C --> false
// minnum(X, LesserC) > C --> false
// maxnum(X, GreaterC) >= C --> true
// maxnum(X, GreaterC) > C --> true
return ConstantInt::get(RetTy, IsMaxNum);
case FCmpInst::FCMP_OLE: case FCmpInst::FCMP_ULE:
case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_ULT:
// minnum(X, LesserC) <= C --> true
// minnum(X, LesserC) < C --> true
// maxnum(X, GreaterC) <= C --> false
// maxnum(X, GreaterC) < C --> false
return ConstantInt::get(RetTy, !IsMaxNum);
default:
// TRUE/FALSE/ORD/UNO should be handled before this.
llvm_unreachable("Unexpected fcmp predicate");
}
}
}
if (match(RHS, m_AnyZeroFP())) {
switch (Pred) {
case FCmpInst::FCMP_OGE:
case FCmpInst::FCMP_ULT:
// Positive or zero X >= 0.0 --> true
// Positive or zero X < 0.0 --> false
if ((FMF.noNaNs() || isKnownNeverNaN(LHS, Q.TLI)) &&
CannotBeOrderedLessThanZero(LHS, Q.TLI))
return Pred == FCmpInst::FCMP_OGE ? getTrue(RetTy) : getFalse(RetTy);
break;
case FCmpInst::FCMP_UGE:
case FCmpInst::FCMP_OLT:
// Positive or zero or nan X >= 0.0 --> true
// Positive or zero or nan X < 0.0 --> false
if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
return Pred == FCmpInst::FCMP_UGE ? getTrue(RetTy) : getFalse(RetTy);
break;
default:
break;
}
}
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
return V;
// If the comparison is with the result of a phi instruction, check whether
// doing the compare with each incoming phi value yields a common result.
if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
return nullptr;
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q) {
return ::SimplifyFCmpInst(Predicate, LHS, RHS, FMF, Q, RecursionLimit);
}
static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q,
bool AllowRefinement,
unsigned MaxRecurse) {
assert(!Op->getType()->isVectorTy() && "This is not safe for vectors");
// Trivial replacement.
if (V == Op)
return RepOp;
// We cannot replace a constant, and shouldn't even try.
if (isa<Constant>(Op))
return nullptr;
auto *I = dyn_cast<Instruction>(V);
if (!I || !is_contained(I->operands(), Op))
return nullptr;
// Replace Op with RepOp in instruction operands.
SmallVector<Value *, 8> NewOps(I->getNumOperands());
transform(I->operands(), NewOps.begin(),
[&](Value *V) { return V == Op ? RepOp : V; });
if (!AllowRefinement) {
// General InstSimplify functions may refine the result, e.g. by returning
// a constant for a potentially poison value. To avoid this, implement only
// a few non-refining but profitable transforms here.
if (auto *BO = dyn_cast<BinaryOperator>(I)) {
unsigned Opcode = BO->getOpcode();
// id op x -> x, x op id -> x
if (NewOps[0] == ConstantExpr::getBinOpIdentity(Opcode, I->getType()))
return NewOps[1];
if (NewOps[1] == ConstantExpr::getBinOpIdentity(Opcode, I->getType(),
/* RHS */ true))
return NewOps[0];
// x & x -> x, x | x -> x
if ((Opcode == Instruction::And || Opcode == Instruction::Or) &&
NewOps[0] == NewOps[1])
return NewOps[0];
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
// getelementptr x, 0 -> x
if (NewOps.size() == 2 && match(NewOps[1], m_Zero()) &&
!GEP->isInBounds())
return NewOps[0];
}
} else if (MaxRecurse) {
// The simplification queries below may return the original value. Consider:
// %div = udiv i32 %arg, %arg2
// %mul = mul nsw i32 %div, %arg2
// %cmp = icmp eq i32 %mul, %arg
// %sel = select i1 %cmp, i32 %div, i32 undef
// Replacing %arg by %mul, %div becomes "udiv i32 %mul, %arg2", which
// simplifies back to %arg. This can only happen because %mul does not
// dominate %div. To ensure a consistent return value contract, we make sure
// that this case returns nullptr as well.
auto PreventSelfSimplify = [V](Value *Simplified) {
return Simplified != V ? Simplified : nullptr;
};
if (auto *B = dyn_cast<BinaryOperator>(I))
return PreventSelfSimplify(SimplifyBinOp(B->getOpcode(), NewOps[0],
NewOps[1], Q, MaxRecurse - 1));
if (CmpInst *C = dyn_cast<CmpInst>(I))
return PreventSelfSimplify(SimplifyCmpInst(C->getPredicate(), NewOps[0],
NewOps[1], Q, MaxRecurse - 1));
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
return PreventSelfSimplify(SimplifyGEPInst(
GEP->getSourceElementType(), NewOps[0], makeArrayRef(NewOps).slice(1),
GEP->isInBounds(), Q, MaxRecurse - 1));
if (isa<SelectInst>(I))
return PreventSelfSimplify(
SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q,
MaxRecurse - 1));
// TODO: We could hand off more cases to instsimplify here.
}
// If all operands are constant after substituting Op for RepOp then we can
// constant fold the instruction.
SmallVector<Constant *, 8> ConstOps;
for (Value *NewOp : NewOps) {
if (Constant *ConstOp = dyn_cast<Constant>(NewOp))
ConstOps.push_back(ConstOp);
else
return nullptr;
}
// Consider:
// %cmp = icmp eq i32 %x, 2147483647
// %add = add nsw i32 %x, 1
// %sel = select i1 %cmp, i32 -2147483648, i32 %add
//
// We can't replace %sel with %add unless we strip away the flags (which
// will be done in InstCombine).
// TODO: This may be unsound, because it only catches some forms of
// refinement.
if (!AllowRefinement && canCreatePoison(cast<Operator>(I)))
return nullptr;
if (CmpInst *C = dyn_cast<CmpInst>(I))
return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
ConstOps[1], Q.DL, Q.TLI);
if (LoadInst *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
return ConstantFoldLoadFromConstPtr(ConstOps[0], LI->getType(), Q.DL);
return ConstantFoldInstOperands(I, ConstOps, Q.DL, Q.TLI);
}
Value *llvm::simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q,
bool AllowRefinement) {
return ::simplifyWithOpReplaced(V, Op, RepOp, Q, AllowRefinement,
RecursionLimit);
}
/// Try to simplify a select instruction when its condition operand is an
/// integer comparison where one operand of the compare is a constant.
static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
const APInt *Y, bool TrueWhenUnset) {
const APInt *C;
// (X & Y) == 0 ? X & ~Y : X --> X
// (X & Y) != 0 ? X & ~Y : X --> X & ~Y
if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
*Y == ~*C)
return TrueWhenUnset ? FalseVal : TrueVal;
// (X & Y) == 0 ? X : X & ~Y --> X & ~Y
// (X & Y) != 0 ? X : X & ~Y --> X
if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
*Y == ~*C)
return TrueWhenUnset ? FalseVal : TrueVal;
if (Y->isPowerOf2()) {
// (X & Y) == 0 ? X | Y : X --> X | Y
// (X & Y) != 0 ? X | Y : X --> X
if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
*Y == *C)
return TrueWhenUnset ? TrueVal : FalseVal;
// (X & Y) == 0 ? X : X | Y --> X
// (X & Y) != 0 ? X : X | Y --> X | Y
if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
*Y == *C)
return TrueWhenUnset ? TrueVal : FalseVal;
}
return nullptr;
}
/// An alternative way to test if a bit is set or not uses sgt/slt instead of
/// eq/ne.
static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS,
ICmpInst::Predicate Pred,
Value *TrueVal, Value *FalseVal) {
Value *X;
APInt Mask;
if (!decomposeBitTestICmp(CmpLHS, CmpRHS, Pred, X, Mask))
return nullptr;
return simplifySelectBitTest(TrueVal, FalseVal, X, &Mask,
Pred == ICmpInst::ICMP_EQ);
}
/// Try to simplify a select instruction when its condition operand is an
/// integer comparison.
static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
Value *FalseVal, const SimplifyQuery &Q,
unsigned MaxRecurse) {
ICmpInst::Predicate Pred;
Value *CmpLHS, *CmpRHS;
if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS))))
return nullptr;
// Canonicalize ne to eq predicate.
if (Pred == ICmpInst::ICMP_NE) {
Pred = ICmpInst::ICMP_EQ;
std::swap(TrueVal, FalseVal);
}
// Check for integer min/max with a limit constant:
// X > MIN_INT ? X : MIN_INT --> X
// X < MAX_INT ? X : MAX_INT --> X
if (TrueVal->getType()->isIntOrIntVectorTy()) {
Value *X, *Y;
SelectPatternFlavor SPF =
matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal,
X, Y).Flavor;
if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) {
APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF),
X->getType()->getScalarSizeInBits());
if (match(Y, m_SpecificInt(LimitC)))
return X;
}
}
if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) {
Value *X;
const APInt *Y;
if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y))))
if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
/*TrueWhenUnset=*/true))
return V;
// Test for a bogus zero-shift-guard-op around funnel-shift or rotate.
Value *ShAmt;
auto isFsh = m_CombineOr(m_FShl(m_Value(X), m_Value(), m_Value(ShAmt)),
m_FShr(m_Value(), m_Value(X), m_Value(ShAmt)));
// (ShAmt == 0) ? fshl(X, *, ShAmt) : X --> X
// (ShAmt == 0) ? fshr(*, X, ShAmt) : X --> X
if (match(TrueVal, isFsh) && FalseVal == X && CmpLHS == ShAmt)
return X;
// Test for a zero-shift-guard-op around rotates. These are used to
// avoid UB from oversized shifts in raw IR rotate patterns, but the
// intrinsics do not have that problem.
// We do not allow this transform for the general funnel shift case because
// that would not preserve the poison safety of the original code.
auto isRotate =
m_CombineOr(m_FShl(m_Value(X), m_Deferred(X), m_Value(ShAmt)),
m_FShr(m_Value(X), m_Deferred(X), m_Value(ShAmt)));
// (ShAmt == 0) ? X : fshl(X, X, ShAmt) --> fshl(X, X, ShAmt)
// (ShAmt == 0) ? X : fshr(X, X, ShAmt) --> fshr(X, X, ShAmt)
if (match(FalseVal, isRotate) && TrueVal == X && CmpLHS == ShAmt &&
Pred == ICmpInst::ICMP_EQ)
return FalseVal;
// X == 0 ? abs(X) : -abs(X) --> -abs(X)
// X == 0 ? -abs(X) : abs(X) --> abs(X)
if (match(TrueVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))) &&
match(FalseVal, m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))))
return FalseVal;
if (match(TrueVal,
m_Neg(m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS)))) &&
match(FalseVal, m_Intrinsic<Intrinsic::abs>(m_Specific(CmpLHS))))
return FalseVal;
}
// Check for other compares that behave like bit test.
if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, CmpRHS, Pred,
TrueVal, FalseVal))
return V;
// If we have a scalar equality comparison, then we know the value in one of
// the arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
// Note that the equivalence/replacement opportunity does not hold for vectors
// because each element of a vector select is chosen independently.
if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) {
if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ false, MaxRecurse) ==
TrueVal ||
simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
/* AllowRefinement */ false, MaxRecurse) ==
TrueVal)
return FalseVal;
if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ true, MaxRecurse) ==
FalseVal ||
simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q,
/* AllowRefinement */ true, MaxRecurse) ==
FalseVal)
return FalseVal;
}
return nullptr;
}
/// Try to simplify a select instruction when its condition operand is a
/// floating-point comparison.
static Value *simplifySelectWithFCmp(Value *Cond, Value *T, Value *F,
const SimplifyQuery &Q) {
FCmpInst::Predicate Pred;
if (!match(Cond, m_FCmp(Pred, m_Specific(T), m_Specific(F))) &&
!match(Cond, m_FCmp(Pred, m_Specific(F), m_Specific(T))))
return nullptr;
// This transform is safe if we do not have (do not care about) -0.0 or if
// at least one operand is known to not be -0.0. Otherwise, the select can
// change the sign of a zero operand.
bool HasNoSignedZeros = Q.CxtI && isa<FPMathOperator>(Q.CxtI) &&
Q.CxtI->hasNoSignedZeros();
const APFloat *C;
if (HasNoSignedZeros || (match(T, m_APFloat(C)) && C->isNonZero()) ||
(match(F, m_APFloat(C)) && C->isNonZero())) {
// (T == F) ? T : F --> F
// (F == T) ? T : F --> F
if (Pred == FCmpInst::FCMP_OEQ)
return F;
// (T != F) ? T : F --> T
// (F != T) ? T : F --> T
if (Pred == FCmpInst::FCMP_UNE)
return T;
}
return nullptr;
}
/// Given operands for a SelectInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (auto *CondC = dyn_cast<Constant>(Cond)) {
if (auto *TrueC = dyn_cast<Constant>(TrueVal))
if (auto *FalseC = dyn_cast<Constant>(FalseVal))
return ConstantFoldSelectInstruction(CondC, TrueC, FalseC);
// select poison, X, Y -> poison
if (isa<PoisonValue>(CondC))
return PoisonValue::get(TrueVal->getType());
// select undef, X, Y -> X or Y
if (Q.isUndefValue(CondC))
return isa<Constant>(FalseVal) ? FalseVal : TrueVal;
// select true, X, Y --> X
// select false, X, Y --> Y
// For vectors, allow undef/poison elements in the condition to match the
// defined elements, so we can eliminate the select.
if (match(CondC, m_One()))
return TrueVal;
if (match(CondC, m_Zero()))
return FalseVal;
}
assert(Cond->getType()->isIntOrIntVectorTy(1) &&
"Select must have bool or bool vector condition");
assert(TrueVal->getType() == FalseVal->getType() &&
"Select must have same types for true/false ops");
if (Cond->getType() == TrueVal->getType()) {
// select i1 Cond, i1 true, i1 false --> i1 Cond
if (match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt()))
return Cond;
// (X || Y) && (X || !Y) --> X (commuted 8 ways)
Value *X, *Y;
if (match(FalseVal, m_ZeroInt())) {
if (match(Cond, m_c_LogicalOr(m_Value(X), m_Not(m_Value(Y)))) &&
match(TrueVal, m_c_LogicalOr(m_Specific(X), m_Specific(Y))))
return X;
if (match(TrueVal, m_c_LogicalOr(m_Value(X), m_Not(m_Value(Y)))) &&
match(Cond, m_c_LogicalOr(m_Specific(X), m_Specific(Y))))
return X;
}
}
// select ?, X, X -> X
if (TrueVal == FalseVal)
return TrueVal;
// If the true or false value is poison, we can fold to the other value.
// If the true or false value is undef, we can fold to the other value as
// long as the other value isn't poison.
// select ?, poison, X -> X
// select ?, undef, X -> X
if (isa<PoisonValue>(TrueVal) ||
(Q.isUndefValue(TrueVal) &&
isGuaranteedNotToBePoison(FalseVal, Q.AC, Q.CxtI, Q.DT)))
return FalseVal;
// select ?, X, poison -> X
// select ?, X, undef -> X
if (isa<PoisonValue>(FalseVal) ||
(Q.isUndefValue(FalseVal) &&
isGuaranteedNotToBePoison(TrueVal, Q.AC, Q.CxtI, Q.DT)))
return TrueVal;
// Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC''
Constant *TrueC, *FalseC;
if (isa<FixedVectorType>(TrueVal->getType()) &&
match(TrueVal, m_Constant(TrueC)) &&
match(FalseVal, m_Constant(FalseC))) {
unsigned NumElts =
cast<FixedVectorType>(TrueC->getType())->getNumElements();
SmallVector<Constant *, 16> NewC;
for (unsigned i = 0; i != NumElts; ++i) {
// Bail out on incomplete vector constants.
Constant *TEltC = TrueC->getAggregateElement(i);
Constant *FEltC = FalseC->getAggregateElement(i);
if (!TEltC || !FEltC)
break;
// If the elements match (undef or not), that value is the result. If only
// one element is undef, choose the defined element as the safe result.
if (TEltC == FEltC)
NewC.push_back(TEltC);
else if (isa<PoisonValue>(TEltC) ||
(Q.isUndefValue(TEltC) && isGuaranteedNotToBePoison(FEltC)))
NewC.push_back(FEltC);
else if (isa<PoisonValue>(FEltC) ||
(Q.isUndefValue(FEltC) && isGuaranteedNotToBePoison(TEltC)))
NewC.push_back(TEltC);
else
break;
}
if (NewC.size() == NumElts)
return ConstantVector::get(NewC);
}
if (Value *V =
simplifySelectWithICmpCond(Cond, TrueVal, FalseVal, Q, MaxRecurse))
return V;
if (Value *V = simplifySelectWithFCmp(Cond, TrueVal, FalseVal, Q))
return V;
if (Value *V = foldSelectWithBinaryOp(Cond, TrueVal, FalseVal))
return V;
Optional<bool> Imp = isImpliedByDomCondition(Cond, Q.CxtI, Q.DL);
if (Imp)
return *Imp ? TrueVal : FalseVal;
return nullptr;
}
Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const SimplifyQuery &Q) {
return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Q, RecursionLimit);
}
/// Given operands for an GetElementPtrInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyGEPInst(Type *SrcTy, Value *Ptr,
ArrayRef<Value *> Indices, bool InBounds,
const SimplifyQuery &Q, unsigned) {
// The type of the GEP pointer operand.
unsigned AS =
cast<PointerType>(Ptr->getType()->getScalarType())->getAddressSpace();
// getelementptr P -> P.
if (Indices.empty())
return Ptr;
// Compute the (pointer) type returned by the GEP instruction.
Type *LastType = GetElementPtrInst::getIndexedType(SrcTy, Indices);
Type *GEPTy = PointerType::get(LastType, AS);
if (VectorType *VT = dyn_cast<VectorType>(Ptr->getType()))
GEPTy = VectorType::get(GEPTy, VT->getElementCount());
else {
for (Value *Op : Indices) {
// If one of the operands is a vector, the result type is a vector of
// pointers. All vector operands must have the same number of elements.
if (VectorType *VT = dyn_cast<VectorType>(Op->getType())) {
GEPTy = VectorType::get(GEPTy, VT->getElementCount());
break;
}
}
}
// getelementptr poison, idx -> poison
// getelementptr baseptr, poison -> poison
if (isa<PoisonValue>(Ptr) ||
any_of(Indices, [](const auto *V) { return isa<PoisonValue>(V); }))
return PoisonValue::get(GEPTy);
if (Q.isUndefValue(Ptr))
// If inbounds, we can choose an out-of-bounds pointer as a base pointer.
return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy);
bool IsScalableVec =
isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
return isa<ScalableVectorType>(V->getType());
});
if (Indices.size() == 1) {
// getelementptr P, 0 -> P.
if (match(Indices[0], m_Zero()) && Ptr->getType() == GEPTy)
return Ptr;
Type *Ty = SrcTy;
if (!IsScalableVec && Ty->isSized()) {
Value *P;
uint64_t C;
uint64_t TyAllocSize = Q.DL.getTypeAllocSize(Ty);
// getelementptr P, N -> P if P points to a type of zero size.
if (TyAllocSize == 0 && Ptr->getType() == GEPTy)
return Ptr;
// The following transforms are only safe if the ptrtoint cast
// doesn't truncate the pointers.
if (Indices[0]->getType()->getScalarSizeInBits() ==
Q.DL.getPointerSizeInBits(AS)) {
auto CanSimplify = [GEPTy, &P, Ptr]() -> bool {
return P->getType() == GEPTy &&
getUnderlyingObject(P) == getUnderlyingObject(Ptr);
};
// getelementptr V, (sub P, V) -> P if P points to a type of size 1.
if (TyAllocSize == 1 &&
match(Indices[0],
m_Sub(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Specific(Ptr)))) &&
CanSimplify())
return P;
// getelementptr V, (ashr (sub P, V), C) -> P if P points to a type of
// size 1 << C.
if (match(Indices[0], m_AShr(m_Sub(m_PtrToInt(m_Value(P)),
m_PtrToInt(m_Specific(Ptr))),
m_ConstantInt(C))) &&
TyAllocSize == 1ULL << C && CanSimplify())
return P;
// getelementptr V, (sdiv (sub P, V), C) -> P if P points to a type of
// size C.
if (match(Indices[0], m_SDiv(m_Sub(m_PtrToInt(m_Value(P)),
m_PtrToInt(m_Specific(Ptr))),
m_SpecificInt(TyAllocSize))) &&
CanSimplify())
return P;
}
}
}
if (!IsScalableVec && Q.DL.getTypeAllocSize(LastType) == 1 &&
all_of(Indices.drop_back(1),
[](Value *Idx) { return match(Idx, m_Zero()); })) {
unsigned IdxWidth =
Q.DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace());
if (Q.DL.getTypeSizeInBits(Indices.back()->getType()) == IdxWidth) {
APInt BasePtrOffset(IdxWidth, 0);
Value *StrippedBasePtr =
Ptr->stripAndAccumulateInBoundsConstantOffsets(Q.DL, BasePtrOffset);
// Avoid creating inttoptr of zero here: While LLVMs treatment of
// inttoptr is generally conservative, this particular case is folded to
// a null pointer, which will have incorrect provenance.
// gep (gep V, C), (sub 0, V) -> C
if (match(Indices.back(),
m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) &&
!BasePtrOffset.isZero()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
// gep (gep V, C), (xor V, -1) -> C-1
if (match(Indices.back(),
m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) &&
!BasePtrOffset.isOne()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
}
}
// Check to see if this is constant foldable.
if (!isa<Constant>(Ptr) ||
!all_of(Indices, [](Value *V) { return isa<Constant>(V); }))
return nullptr;
auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ptr), Indices,
InBounds);
return ConstantFoldConstant(CE, Q.DL);
}
Value *llvm::SimplifyGEPInst(Type *SrcTy, Value *Ptr, ArrayRef<Value *> Indices,
bool InBounds, const SimplifyQuery &Q) {
return ::SimplifyGEPInst(SrcTy, Ptr, Indices, InBounds, Q, RecursionLimit);
}
/// Given operands for an InsertValueInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
ArrayRef<unsigned> Idxs, const SimplifyQuery &Q,
unsigned) {
if (Constant *CAgg = dyn_cast<Constant>(Agg))
if (Constant *CVal = dyn_cast<Constant>(Val))
return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
// insertvalue x, undef, n -> x
if (Q.isUndefValue(Val))
return Agg;
// insertvalue x, (extractvalue y, n), n
if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val))
if (EV->getAggregateOperand()->getType() == Agg->getType() &&
EV->getIndices() == Idxs) {
// insertvalue undef, (extractvalue y, n), n -> y
if (Q.isUndefValue(Agg))
return EV->getAggregateOperand();
// insertvalue y, (extractvalue y, n), n -> y
if (Agg == EV->getAggregateOperand())
return Agg;
}
return nullptr;
}
Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q) {
return ::SimplifyInsertValueInst(Agg, Val, Idxs, Q, RecursionLimit);
}
Value *llvm::SimplifyInsertElementInst(Value *Vec, Value *Val, Value *Idx,
const SimplifyQuery &Q) {
// Try to constant fold.
auto *VecC = dyn_cast<Constant>(Vec);
auto *ValC = dyn_cast<Constant>(Val);
auto *IdxC = dyn_cast<Constant>(Idx);
if (VecC && ValC && IdxC)
return ConstantExpr::getInsertElement(VecC, ValC, IdxC);
// For fixed-length vector, fold into poison if index is out of bounds.
if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
if (isa<FixedVectorType>(Vec->getType()) &&
CI->uge(cast<FixedVectorType>(Vec->getType())->getNumElements()))
return PoisonValue::get(Vec->getType());
}
// If index is undef, it might be out of bounds (see above case)
if (Q.isUndefValue(Idx))
return PoisonValue::get(Vec->getType());
// If the scalar is poison, or it is undef and there is no risk of
// propagating poison from the vector value, simplify to the vector value.
if (isa<PoisonValue>(Val) ||
(Q.isUndefValue(Val) && isGuaranteedNotToBePoison(Vec)))
return Vec;
// If we are extracting a value from a vector, then inserting it into the same
// place, that's the input vector:
// insertelt Vec, (extractelt Vec, Idx), Idx --> Vec
if (match(Val, m_ExtractElt(m_Specific(Vec), m_Specific(Idx))))
return Vec;
return nullptr;
}
/// Given operands for an ExtractValueInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const SimplifyQuery &, unsigned) {
if (auto *CAgg = dyn_cast<Constant>(Agg))
return ConstantFoldExtractValueInstruction(CAgg, Idxs);
// extractvalue x, (insertvalue y, elt, n), n -> elt
unsigned NumIdxs = Idxs.size();
for (auto *IVI = dyn_cast<InsertValueInst>(Agg); IVI != nullptr;
IVI = dyn_cast<InsertValueInst>(IVI->getAggregateOperand())) {
ArrayRef<unsigned> InsertValueIdxs = IVI->getIndices();
unsigned NumInsertValueIdxs = InsertValueIdxs.size();
unsigned NumCommonIdxs = std::min(NumInsertValueIdxs, NumIdxs);
if (InsertValueIdxs.slice(0, NumCommonIdxs) ==
Idxs.slice(0, NumCommonIdxs)) {
if (NumIdxs == NumInsertValueIdxs)
return IVI->getInsertedValueOperand();
break;
}
}
return nullptr;
}
Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const SimplifyQuery &Q) {
return ::SimplifyExtractValueInst(Agg, Idxs, Q, RecursionLimit);
}
/// Given operands for an ExtractElementInst, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx,
const SimplifyQuery &Q, unsigned) {
auto *VecVTy = cast<VectorType>(Vec->getType());
if (auto *CVec = dyn_cast<Constant>(Vec)) {
if (auto *CIdx = dyn_cast<Constant>(Idx))
return ConstantExpr::getExtractElement(CVec, CIdx);
if (Q.isUndefValue(Vec))
return UndefValue::get(VecVTy->getElementType());
}
// An undef extract index can be arbitrarily chosen to be an out-of-range
// index value, which would result in the instruction being poison.
if (Q.isUndefValue(Idx))
return PoisonValue::get(VecVTy->getElementType());
// If extracting a specified index from the vector, see if we can recursively
// find a previously computed scalar that was inserted into the vector.
if (auto *IdxC = dyn_cast<ConstantInt>(Idx)) {
// For fixed-length vector, fold into undef if index is out of bounds.
unsigned MinNumElts = VecVTy->getElementCount().getKnownMinValue();
if (isa<FixedVectorType>(VecVTy) && IdxC->getValue().uge(MinNumElts))
return PoisonValue::get(VecVTy->getElementType());
// Handle case where an element is extracted from a splat.
if (IdxC->getValue().ult(MinNumElts))
if (auto *Splat = getSplatValue(Vec))
return Splat;
if (Value *Elt = findScalarElement(Vec, IdxC->getZExtValue()))
return Elt;
} else {
// The index is not relevant if our vector is a splat.
if (Value *Splat = getSplatValue(Vec))
return Splat;
}
return nullptr;
}
Value *llvm::SimplifyExtractElementInst(Value *Vec, Value *Idx,
const SimplifyQuery &Q) {
return ::SimplifyExtractElementInst(Vec, Idx, Q, RecursionLimit);
}
/// See if we can fold the given phi. If not, returns null.
static Value *SimplifyPHINode(PHINode *PN, ArrayRef<Value *> IncomingValues,
const SimplifyQuery &Q) {
// WARNING: no matter how worthwhile it may seem, we can not perform PHI CSE
// here, because the PHI we may succeed simplifying to was not
// def-reachable from the original PHI!
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
Value *CommonValue = nullptr;
bool HasUndefInput = false;
for (Value *Incoming : IncomingValues) {
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PN) continue;
if (Q.isUndefValue(Incoming)) {
// Remember that we saw an undef value, but otherwise ignore them.
HasUndefInput = true;
continue;
}
if (CommonValue && Incoming != CommonValue)
return nullptr; // Not the same, bail out.
CommonValue = Incoming;
}
// If CommonValue is null then all of the incoming values were either undef or
// equal to the phi node itself.
if (!CommonValue)
return UndefValue::get(PN->getType());
// If we have a PHI node like phi(X, undef, X), where X is defined by some
// instruction, we cannot return X as the result of the PHI node unless it
// dominates the PHI block.
if (HasUndefInput)
return valueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
return CommonValue;
}
static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
Type *Ty, const SimplifyQuery &Q, unsigned MaxRecurse) {
if (auto *C = dyn_cast<Constant>(Op))
return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
if (auto *CI = dyn_cast<CastInst>(Op)) {
auto *Src = CI->getOperand(0);
Type *SrcTy = Src->getType();
Type *MidTy = CI->getType();
Type *DstTy = Ty;
if (Src->getType() == Ty) {
auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
Type *SrcIntPtrTy =
SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
Type *MidIntPtrTy =
MidTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(MidTy) : nullptr;
Type *DstIntPtrTy =
DstTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(DstTy) : nullptr;
if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy,
SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy) == Instruction::BitCast)
return Src;
}
}
// bitcast x -> x
if (CastOpc == Instruction::BitCast)
if (Op->getType() == Ty)
return Op;
return nullptr;
}
Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
const SimplifyQuery &Q) {
return ::SimplifyCastInst(CastOpc, Op, Ty, Q, RecursionLimit);
}
/// For the given destination element of a shuffle, peek through shuffles to
/// match a root vector source operand that contains that element in the same
/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s).
static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1,
int MaskVal, Value *RootVec,
unsigned MaxRecurse) {
if (!MaxRecurse--)
return nullptr;
// Bail out if any mask value is undefined. That kind of shuffle may be
// simplified further based on demanded bits or other folds.
if (MaskVal == -1)
return nullptr;
// The mask value chooses which source operand we need to look at next.
int InVecNumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
int RootElt = MaskVal;
Value *SourceOp = Op0;
if (MaskVal >= InVecNumElts) {
RootElt = MaskVal - InVecNumElts;
SourceOp = Op1;
}
// If the source operand is a shuffle itself, look through it to find the
// matching root vector.
if (auto *SourceShuf = dyn_cast<ShuffleVectorInst>(SourceOp)) {
return foldIdentityShuffles(
DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1),
SourceShuf->getMaskValue(RootElt), RootVec, MaxRecurse);
}
// TODO: Look through bitcasts? What if the bitcast changes the vector element
// size?
// The source operand is not a shuffle. Initialize the root vector value for
// this shuffle if that has not been done yet.
if (!RootVec)
RootVec = SourceOp;
// Give up as soon as a source operand does not match the existing root value.
if (RootVec != SourceOp)
return nullptr;
// The element must be coming from the same lane in the source vector
// (although it may have crossed lanes in intermediate shuffles).
if (RootElt != DestElt)
return nullptr;
return RootVec;
}
static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
ArrayRef<int> Mask, Type *RetTy,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
if (all_of(Mask, [](int Elem) { return Elem == UndefMaskElem; }))
return UndefValue::get(RetTy);
auto *InVecTy = cast<VectorType>(Op0->getType());
unsigned MaskNumElts = Mask.size();
ElementCount InVecEltCount = InVecTy->getElementCount();
bool Scalable = InVecEltCount.isScalable();
SmallVector<int, 32> Indices;
Indices.assign(Mask.begin(), Mask.end());
// Canonicalization: If mask does not select elements from an input vector,
// replace that input vector with poison.
if (!Scalable) {
bool MaskSelects0 = false, MaskSelects1 = false;
unsigned InVecNumElts = InVecEltCount.getKnownMinValue();
for (unsigned i = 0; i != MaskNumElts; ++i) {
if (Indices[i] == -1)
continue;
if ((unsigned)Indices[i] < InVecNumElts)
MaskSelects0 = true;
else
MaskSelects1 = true;
}
if (!MaskSelects0)
Op0 = PoisonValue::get(InVecTy);
if (!MaskSelects1)
Op1 = PoisonValue::get(InVecTy);
}
auto *Op0Const = dyn_cast<Constant>(Op0);
auto *Op1Const = dyn_cast<Constant>(Op1);
// If all operands are constant, constant fold the shuffle. This
// transformation depends on the value of the mask which is not known at
// compile time for scalable vectors
if (Op0Const && Op1Const)
return ConstantExpr::getShuffleVector(Op0Const, Op1Const, Mask);
// Canonicalization: if only one input vector is constant, it shall be the
// second one. This transformation depends on the value of the mask which
// is not known at compile time for scalable vectors
if (!Scalable && Op0Const && !Op1Const) {
std::swap(Op0, Op1);
ShuffleVectorInst::commuteShuffleMask(Indices,
InVecEltCount.getKnownMinValue());
}
// A splat of an inserted scalar constant becomes a vector constant:
// shuf (inselt ?, C, IndexC), undef, <IndexC, IndexC...> --> <C, C...>
// NOTE: We may have commuted above, so analyze the updated Indices, not the
// original mask constant.
// NOTE: This transformation depends on the value of the mask which is not
// known at compile time for scalable vectors
Constant *C;
ConstantInt *IndexC;
if (!Scalable && match(Op0, m_InsertElt(m_Value(), m_Constant(C),
m_ConstantInt(IndexC)))) {
// Match a splat shuffle mask of the insert index allowing undef elements.
int InsertIndex = IndexC->getZExtValue();
if (all_of(Indices, [InsertIndex](int MaskElt) {
return MaskElt == InsertIndex || MaskElt == -1;
})) {
assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");
// Shuffle mask undefs become undefined constant result elements.
SmallVector<Constant *, 16> VecC(MaskNumElts, C);
for (unsigned i = 0; i != MaskNumElts; ++i)
if (Indices[i] == -1)
VecC[i] = UndefValue::get(C->getType());
return ConstantVector::get(VecC);
}
}
// A shuffle of a splat is always the splat itself. Legal if the shuffle's
// value type is same as the input vectors' type.
if (auto *OpShuf = dyn_cast<ShuffleVectorInst>(Op0))
if (Q.isUndefValue(Op1) && RetTy == InVecTy &&
is_splat(OpShuf->getShuffleMask()))
return Op0;
// All remaining transformation depend on the value of the mask, which is
// not known at compile time for scalable vectors.
if (Scalable)
return nullptr;
// Don't fold a shuffle with undef mask elements. This may get folded in a
// better way using demanded bits or other analysis.
// TODO: Should we allow this?
if (is_contained(Indices, -1))
return nullptr;
// Check if every element of this shuffle can be mapped back to the
// corresponding element of a single root vector. If so, we don't need this
// shuffle. This handles simple identity shuffles as well as chains of
// shuffles that may widen/narrow and/or move elements across lanes and back.
Value *RootVec = nullptr;
for (unsigned i = 0; i != MaskNumElts; ++i) {
// Note that recursion is limited for each vector element, so if any element
// exceeds the limit, this will fail to simplify.
RootVec =
foldIdentityShuffles(i, Op0, Op1, Indices[i], RootVec, MaxRecurse);
// We can't replace a widening/narrowing shuffle with one of its operands.
if (!RootVec || RootVec->getType() != RetTy)
return nullptr;
}
return RootVec;
}
/// Given operands for a ShuffleVectorInst, fold the result or return null.
Value *llvm::SimplifyShuffleVectorInst(Value *Op0, Value *Op1,
ArrayRef<int> Mask, Type *RetTy,
const SimplifyQuery &Q) {
return ::SimplifyShuffleVectorInst(Op0, Op1, Mask, RetTy, Q, RecursionLimit);
}
static Constant *foldConstant(Instruction::UnaryOps Opcode,
Value *&Op, const SimplifyQuery &Q) {
if (auto *C = dyn_cast<Constant>(Op))
return ConstantFoldUnaryOpOperand(Opcode, C, Q.DL);
return nullptr;
}
/// Given the operand for an FNeg, see if we can fold the result. If not, this
/// returns null.
static Value *simplifyFNegInst(Value *Op, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (Constant *C = foldConstant(Instruction::FNeg, Op, Q))
return C;
Value *X;
// fneg (fneg X) ==> X
if (match(Op, m_FNeg(m_Value(X))))
return X;
return nullptr;
}
Value *llvm::SimplifyFNegInst(Value *Op, FastMathFlags FMF,
const SimplifyQuery &Q) {
return ::simplifyFNegInst(Op, FMF, Q, RecursionLimit);
}
static Constant *propagateNaN(Constant *In) {
// If the input is a vector with undef elements, just return a default NaN.
if (!In->isNaN())
return ConstantFP::getNaN(In->getType());
// Propagate the existing NaN constant when possible.
// TODO: Should we quiet a signaling NaN?
return In;
}
/// Perform folds that are common to any floating-point operation. This implies
/// transforms based on poison/undef/NaN because the operation itself makes no
/// difference to the result.
static Constant *simplifyFPOp(ArrayRef<Value *> Ops, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
// Poison is independent of anything else. It always propagates from an
// operand to a math result.
if (any_of(Ops, [](Value *V) { return match(V, m_Poison()); }))
return PoisonValue::get(Ops[0]->getType());
for (Value *V : Ops) {
bool IsNan = match(V, m_NaN());
bool IsInf = match(V, m_Inf());
bool IsUndef = Q.isUndefValue(V);
// If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand
// (an undef operand can be chosen to be Nan/Inf), then the result of
// this operation is poison.
if (FMF.noNaNs() && (IsNan || IsUndef))
return PoisonValue::get(V->getType());
if (FMF.noInfs() && (IsInf || IsUndef))
return PoisonValue::get(V->getType());
if (isDefaultFPEnvironment(ExBehavior, Rounding)) {
if (IsUndef || IsNan)
return propagateNaN(cast<Constant>(V));
} else if (ExBehavior != fp::ebStrict) {
if (IsNan)
return propagateNaN(cast<Constant>(V));
}
}
return nullptr;
}
// TODO: Move this out to a header file:
static inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) {
return (EB == fp::ebIgnore || FMF.noNaNs());
}
/// Given operands for an FAdd, see if we can fold the result. If not, this
/// returns null.
static Value *
SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
if (isDefaultFPEnvironment(ExBehavior, Rounding))
if (Constant *C = foldOrCommuteConstant(Instruction::FAdd, Op0, Op1, Q))
return C;
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
// fadd X, -0 ==> X
// With strict/constrained FP, we have these possible edge cases that do
// not simplify to Op0:
// fadd SNaN, -0.0 --> QNaN
// fadd +0.0, -0.0 --> -0.0 (but only with round toward negative)
if (canIgnoreSNaN(ExBehavior, FMF) &&
(!canRoundingModeBe(Rounding, RoundingMode::TowardNegative) ||
FMF.noSignedZeros()))
if (match(Op1, m_NegZeroFP()))
return Op0;
// fadd X, 0 ==> X, when we know X is not -0
if (canIgnoreSNaN(ExBehavior, FMF))
if (match(Op1, m_PosZeroFP()) &&
(FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
return Op0;
if (!isDefaultFPEnvironment(ExBehavior, Rounding))
return nullptr;
// With nnan: -X + X --> 0.0 (and commuted variant)
// We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
// Negative zeros are allowed because we always end up with positive zero:
// X = -0.0: (-0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = -0.0: ( 0.0 - (-0.0)) + (-0.0) == ( 0.0) + (-0.0) == 0.0
// X = 0.0: (-0.0 - ( 0.0)) + ( 0.0) == (-0.0) + ( 0.0) == 0.0
// X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0
if (FMF.noNaNs()) {
if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))
return ConstantFP::getNullValue(Op0->getType());
if (match(Op0, m_FNeg(m_Specific(Op1))) ||
match(Op1, m_FNeg(m_Specific(Op0))))
return ConstantFP::getNullValue(Op0->getType());
}
// (X - Y) + Y --> X
// Y + (X - Y) --> X
Value *X;
if (FMF.noSignedZeros() && FMF.allowReassoc() &&
(match(Op0, m_FSub(m_Value(X), m_Specific(Op1))) ||
match(Op1, m_FSub(m_Value(X), m_Specific(Op0)))))
return X;
return nullptr;
}
/// Given operands for an FSub, see if we can fold the result. If not, this
/// returns null.
static Value *
SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
if (isDefaultFPEnvironment(ExBehavior, Rounding))
if (Constant *C = foldOrCommuteConstant(Instruction::FSub, Op0, Op1, Q))
return C;
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
if (!isDefaultFPEnvironment(ExBehavior, Rounding))
return nullptr;
// fsub X, +0 ==> X
if (match(Op1, m_PosZeroFP()))
return Op0;
// fsub X, -0 ==> X, when we know X is not -0
if (match(Op1, m_NegZeroFP()) &&
(FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
return Op0;
// fsub -0.0, (fsub -0.0, X) ==> X
// fsub -0.0, (fneg X) ==> X
Value *X;
if (match(Op0, m_NegZeroFP()) &&
match(Op1, m_FNeg(m_Value(X))))
return X;
// fsub 0.0, (fsub 0.0, X) ==> X if signed zeros are ignored.
// fsub 0.0, (fneg X) ==> X if signed zeros are ignored.
if (FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()) &&
(match(Op1, m_FSub(m_AnyZeroFP(), m_Value(X))) ||
match(Op1, m_FNeg(m_Value(X)))))
return X;
// fsub nnan x, x ==> 0.0
if (FMF.noNaNs() && Op0 == Op1)
return Constant::getNullValue(Op0->getType());
// Y - (Y - X) --> X
// (X + Y) - Y --> X
if (FMF.noSignedZeros() && FMF.allowReassoc() &&
(match(Op1, m_FSub(m_Specific(Op0), m_Value(X))) ||
match(Op0, m_c_FAdd(m_Specific(Op1), m_Value(X)))))
return X;
return nullptr;
}
static Value *SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
if (!isDefaultFPEnvironment(ExBehavior, Rounding))
return nullptr;
// fmul X, 1.0 ==> X
if (match(Op1, m_FPOne()))
return Op0;
// fmul 1.0, X ==> X
if (match(Op0, m_FPOne()))
return Op1;
// fmul nnan nsz X, 0 ==> 0
if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZeroFP()))
return ConstantFP::getNullValue(Op0->getType());
// fmul nnan nsz 0, X ==> 0
if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()))
return ConstantFP::getNullValue(Op1->getType());
// sqrt(X) * sqrt(X) --> X, if we can:
// 1. Remove the intermediate rounding (reassociate).
// 2. Ignore non-zero negative numbers because sqrt would produce NAN.
// 3. Ignore -0.0 because sqrt(-0.0) == -0.0, but -0.0 * -0.0 == 0.0.
Value *X;
if (Op0 == Op1 && match(Op0, m_Intrinsic<Intrinsic::sqrt>(m_Value(X))) &&
FMF.allowReassoc() && FMF.noNaNs() && FMF.noSignedZeros())
return X;
return nullptr;
}
/// Given the operands for an FMul, see if we can fold the result
static Value *
SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned MaxRecurse,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
if (isDefaultFPEnvironment(ExBehavior, Rounding))
if (Constant *C = foldOrCommuteConstant(Instruction::FMul, Op0, Op1, Q))
return C;
// Now apply simplifications that do not require rounding.
return SimplifyFMAFMul(Op0, Op1, FMF, Q, MaxRecurse, ExBehavior, Rounding);
}
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
return ::SimplifyFAddInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
return ::SimplifyFSubInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
return ::SimplifyFMulInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
Value *llvm::SimplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
return ::SimplifyFMAFMul(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
static Value *
SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
if (isDefaultFPEnvironment(ExBehavior, Rounding))
if (Constant *C = foldOrCommuteConstant(Instruction::FDiv, Op0, Op1, Q))
return C;
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
if (!isDefaultFPEnvironment(ExBehavior, Rounding))
return nullptr;
// X / 1.0 -> X
if (match(Op1, m_FPOne()))
return Op0;
// 0 / X -> 0
// Requires that NaNs are off (X could be zero) and signed zeroes are
// ignored (X could be positive or negative, so the output sign is unknown).
if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()))
return ConstantFP::getNullValue(Op0->getType());
if (FMF.noNaNs()) {
// X / X -> 1.0 is legal when NaNs are ignored.
// We can ignore infinities because INF/INF is NaN.
if (Op0 == Op1)
return ConstantFP::get(Op0->getType(), 1.0);
// (X * Y) / Y --> X if we can reassociate to the above form.
Value *X;
if (FMF.allowReassoc() && match(Op0, m_c_FMul(m_Value(X), m_Specific(Op1))))
return X;
// -X / X -> -1.0 and
// X / -X -> -1.0 are legal when NaNs are ignored.
// We can ignore signed zeros because +-0.0/+-0.0 is NaN and ignored.
if (match(Op0, m_FNegNSZ(m_Specific(Op1))) ||
match(Op1, m_FNegNSZ(m_Specific(Op0))))
return ConstantFP::get(Op0->getType(), -1.0);
}
return nullptr;
}
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
return ::SimplifyFDivInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
static Value *
SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q, unsigned,
fp::ExceptionBehavior ExBehavior = fp::ebIgnore,
RoundingMode Rounding = RoundingMode::NearestTiesToEven) {
if (isDefaultFPEnvironment(ExBehavior, Rounding))
if (Constant *C = foldOrCommuteConstant(Instruction::FRem, Op0, Op1, Q))
return C;
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
if (!isDefaultFPEnvironment(ExBehavior, Rounding))
return nullptr;
// Unlike fdiv, the result of frem always matches the sign of the dividend.
// The constant match may include undef elements in a vector, so return a full
// zero constant as the result.
if (FMF.noNaNs()) {
// +0 % X -> 0
if (match(Op0, m_PosZeroFP()))
return ConstantFP::getNullValue(Op0->getType());
// -0 % X -> -0
if (match(Op0, m_NegZeroFP()))
return ConstantFP::getNegativeZero(Op0->getType());
}
return nullptr;
}
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
const SimplifyQuery &Q,
fp::ExceptionBehavior ExBehavior,
RoundingMode Rounding) {
return ::SimplifyFRemInst(Op0, Op1, FMF, Q, RecursionLimit, ExBehavior,
Rounding);
}
//=== Helper functions for higher up the class hierarchy.
/// Given the operand for a UnaryOperator, see if we can fold the result.
/// If not, this returns null.
static Value *simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q,
unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FNeg:
return simplifyFNegInst(Op, FastMathFlags(), Q, MaxRecurse);
default:
llvm_unreachable("Unexpected opcode");
}
}
/// Given the operand for a UnaryOperator, see if we can fold the result.
/// If not, this returns null.
/// Try to use FastMathFlags when folding the result.
static Value *simplifyFPUnOp(unsigned Opcode, Value *Op,
const FastMathFlags &FMF,
const SimplifyQuery &Q, unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FNeg:
return simplifyFNegInst(Op, FMF, Q, MaxRecurse);
default:
return simplifyUnOp(Opcode, Op, Q, MaxRecurse);
}
}
Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q) {
return ::simplifyUnOp(Opcode, Op, Q, RecursionLimit);
}
Value *llvm::SimplifyUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
const SimplifyQuery &Q) {
return ::simplifyFPUnOp(Opcode, Op, FMF, Q, RecursionLimit);
}
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::Add:
return SimplifyAddInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::Sub:
return SimplifySubInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::Mul:
return SimplifyMulInst(LHS, RHS, Q, MaxRecurse);
case Instruction::SDiv:
return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::UDiv:
return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
case Instruction::SRem:
return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::URem:
return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Shl:
return SimplifyShlInst(LHS, RHS, false, false, Q, MaxRecurse);
case Instruction::LShr:
return SimplifyLShrInst(LHS, RHS, false, Q, MaxRecurse);
case Instruction::AShr:
return SimplifyAShrInst(LHS, RHS, false, Q, MaxRecurse);
case Instruction::And:
return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Or:
return SimplifyOrInst(LHS, RHS, Q, MaxRecurse);
case Instruction::Xor:
return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
case Instruction::FAdd:
return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FSub:
return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FMul:
return SimplifyFMulInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FDiv:
return SimplifyFDivInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
case Instruction::FRem:
return SimplifyFRemInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse);
default:
llvm_unreachable("Unexpected opcode");
}
}
/// Given operands for a BinaryOperator, see if we can fold the result.
/// If not, this returns null.
/// Try to use FastMathFlags when folding the result.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const FastMathFlags &FMF, const SimplifyQuery &Q,
unsigned MaxRecurse) {
switch (Opcode) {
case Instruction::FAdd:
return SimplifyFAddInst(LHS, RHS, FMF, Q, MaxRecurse);
case Instruction::FSub:
return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
case Instruction::FMul:
return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
case Instruction::FDiv:
return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
default:
return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
}
}
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
return ::SimplifyBinOp(Opcode, LHS, RHS, Q, RecursionLimit);
}
Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
FastMathFlags FMF, const SimplifyQuery &Q) {
return ::SimplifyBinOp(Opcode, LHS, RHS, FMF, Q, RecursionLimit);
}
/// Given operands for a CmpInst, see if we can fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
return SimplifyFCmpInst(Predicate, LHS, RHS, FastMathFlags(), Q, MaxRecurse);
}
Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
return ::SimplifyCmpInst(Predicate, LHS, RHS, Q, RecursionLimit);
}
static bool IsIdempotent(Intrinsic::ID ID) {
switch (ID) {
default: return false;
// Unary idempotent: f(f(x)) = f(x)
case Intrinsic::fabs:
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::trunc:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::canonicalize:
return true;
}
}
static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
const DataLayout &DL) {
GlobalValue *PtrSym;
APInt PtrOffset;
if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL))
return nullptr;
Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext());
Type *Int32Ty = Type::getInt32Ty(Ptr->getContext());
Type *Int32PtrTy = Int32Ty->getPointerTo();
Type *Int64Ty = Type::getInt64Ty(Ptr->getContext());
auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset);
if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64)
return nullptr;
uint64_t OffsetInt = OffsetConstInt->getSExtValue();
if (OffsetInt % 4 != 0)
return nullptr;
Constant *C = ConstantExpr::getGetElementPtr(
Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy),
ConstantInt::get(Int64Ty, OffsetInt / 4));
Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL);
if (!Loaded)
return nullptr;
auto *LoadedCE = dyn_cast<ConstantExpr>(Loaded);
if (!LoadedCE)
return nullptr;
if (LoadedCE->getOpcode() == Instruction::Trunc) {
LoadedCE = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0));
if (!LoadedCE)
return nullptr;
}
if (LoadedCE->getOpcode() != Instruction::Sub)
return nullptr;
auto *LoadedLHS = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0));
if (!LoadedLHS || LoadedLHS->getOpcode() != Instruction::PtrToInt)
return nullptr;
auto *LoadedLHSPtr = LoadedLHS->getOperand(0);
Constant *LoadedRHS = LoadedCE->getOperand(1);
GlobalValue *LoadedRHSSym;
APInt LoadedRHSOffset;
if (!IsConstantOffsetFromGlobal(LoadedRHS, LoadedRHSSym, LoadedRHSOffset,
DL) ||
PtrSym != LoadedRHSSym || PtrOffset != LoadedRHSOffset)
return nullptr;
return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
}
static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
const SimplifyQuery &Q) {
// Idempotent functions return the same result when called repeatedly.
Intrinsic::ID IID = F->getIntrinsicID();
if (IsIdempotent(IID))
if (auto *II = dyn_cast<IntrinsicInst>(Op0))
if (II->getIntrinsicID() == IID)
return II;
Value *X;
switch (IID) {
case Intrinsic::fabs:
if (SignBitMustBeZero(Op0, Q.TLI)) return Op0;
break;
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
if (match(Op0, m_BSwap(m_Value(X)))) return X;
break;
case Intrinsic::bitreverse:
// bitreverse(bitreverse(x)) -> x
if (match(Op0, m_BitReverse(m_Value(X)))) return X;
break;
case Intrinsic::ctpop: {
// If everything but the lowest bit is zero, that bit is the pop-count. Ex:
// ctpop(and X, 1) --> and X, 1
unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
if (MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, BitWidth - 1),
Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
return Op0;
break;
}
case Intrinsic::exp:
// exp(log(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
match(Op0, m_Intrinsic<Intrinsic::log>(m_Value(X)))) return X;
break;
case Intrinsic::exp2:
// exp2(log2(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
match(Op0, m_Intrinsic<Intrinsic::log2>(m_Value(X)))) return X;
break;
case Intrinsic::log:
// log(exp(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X)))) return X;
break;
case Intrinsic::log2:
// log2(exp2(x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
(match(Op0, m_Intrinsic<Intrinsic::exp2>(m_Value(X))) ||
match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(2.0),
m_Value(X))))) return X;
break;
case Intrinsic::log10:
// log10(pow(10.0, x)) -> x
if (Q.CxtI->hasAllowReassoc() &&
match(Op0, m_Intrinsic<Intrinsic::pow>(m_SpecificFP(10.0),
m_Value(X)))) return X;
break;
case Intrinsic::floor:
case Intrinsic::trunc:
case Intrinsic::ceil:
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::nearbyint:
case Intrinsic::rint: {
// floor (sitofp x) -> sitofp x
// floor (uitofp x) -> uitofp x
//
// Converting from int always results in a finite integral number or
// infinity. For either of those inputs, these rounding functions always
// return the same value, so the rounding can be eliminated.
if (match(Op0, m_SIToFP(m_Value())) || match(Op0, m_UIToFP(m_Value())))
return Op0;
break;
}
case Intrinsic::experimental_vector_reverse:
// experimental.vector.reverse(experimental.vector.reverse(x)) -> x
if (match(Op0,
m_Intrinsic<Intrinsic::experimental_vector_reverse>(m_Value(X))))
return X;
// experimental.vector.reverse(splat(X)) -> splat(X)
if (isSplatValue(Op0))
return Op0;
break;
default:
break;
}
return nullptr;
}
/// Given a min/max intrinsic, see if it can be removed based on having an
/// operand that is another min/max intrinsic with shared operand(s). The caller
/// is expected to swap the operand arguments to handle commutation.
static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) {
Value *X, *Y;
if (!match(Op0, m_MaxOrMin(m_Value(X), m_Value(Y))))
return nullptr;
auto *MM0 = dyn_cast<IntrinsicInst>(Op0);
if (!MM0)
return nullptr;
Intrinsic::ID IID0 = MM0->getIntrinsicID();
if (Op1 == X || Op1 == Y ||
match(Op1, m_c_MaxOrMin(m_Specific(X), m_Specific(Y)))) {
// max (max X, Y), X --> max X, Y
if (IID0 == IID)
return MM0;
// max (min X, Y), X --> X
if (IID0 == getInverseMinMaxIntrinsic(IID))
return Op1;
}
return nullptr;
}
static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
const SimplifyQuery &Q) {
Intrinsic::ID IID = F->getIntrinsicID();
Type *ReturnType = F->getReturnType();
unsigned BitWidth = ReturnType->getScalarSizeInBits();
switch (IID) {
case Intrinsic::abs:
// abs(abs(x)) -> abs(x). We don't need to worry about the nsw arg here.
// It is always ok to pick the earlier abs. We'll just lose nsw if its only
// on the outer abs.
if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(), m_Value())))
return Op0;
break;
case Intrinsic::cttz: {
Value *X;
if (match(Op0, m_Shl(m_One(), m_Value(X))))
return X;
break;
}
case Intrinsic::ctlz: {
Value *X;
if (match(Op0, m_LShr(m_Negative(), m_Value(X))))
return X;
if (match(Op0, m_AShr(m_Negative(), m_Value())))
return Constant::getNullValue(ReturnType);
break;
}
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin: {
// If the arguments are the same, this is a no-op.
if (Op0 == Op1)
return Op0;
// Canonicalize constant operand as Op1.
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
// Assume undef is the limit value.
if (Q.isUndefValue(Op1))
return ConstantInt::get(
ReturnType, MinMaxIntrinsic::getSaturationPoint(IID, BitWidth));
const APInt *C;
if (match(Op1, m_APIntAllowUndef(C))) {
// Clamp to limit value. For example:
// umax(i8 %x, i8 255) --> 255
if (*C == MinMaxIntrinsic::getSaturationPoint(IID, BitWidth))
return ConstantInt::get(ReturnType, *C);
// If the constant op is the opposite of the limit value, the other must
// be larger/smaller or equal. For example:
// umin(i8 %x, i8 255) --> %x
if (*C == MinMaxIntrinsic::getSaturationPoint(
getInverseMinMaxIntrinsic(IID), BitWidth))
return Op0;
// Remove nested call if constant operands allow it. Example:
// max (max X, 7), 5 -> max X, 7
auto *MinMax0 = dyn_cast<IntrinsicInst>(Op0);
if (MinMax0 && MinMax0->getIntrinsicID() == IID) {
// TODO: loosen undef/splat restrictions for vector constants.
Value *M00 = MinMax0->getOperand(0), *M01 = MinMax0->getOperand(1);
const APInt *InnerC;
if ((match(M00, m_APInt(InnerC)) || match(M01, m_APInt(InnerC))) &&
ICmpInst::compare(*InnerC, *C,
ICmpInst::getNonStrictPredicate(
MinMaxIntrinsic::getPredicate(IID))))
return Op0;
}
}
if (Value *V = foldMinMaxSharedOp(IID, Op0, Op1))
return V;
if (Value *V = foldMinMaxSharedOp(IID, Op1, Op0))
return V;
ICmpInst::Predicate Pred =
ICmpInst::getNonStrictPredicate(MinMaxIntrinsic::getPredicate(IID));
if (isICmpTrue(Pred, Op0, Op1, Q.getWithoutUndef(), RecursionLimit))
return Op0;
if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit))
return Op1;
if (Optional<bool> Imp =
isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL))
return *Imp ? Op0 : Op1;
if (Optional<bool> Imp =
isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL))
return *Imp ? Op1 : Op0;
break;
}
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
// X - X -> { 0, false }
// X - undef -> { 0, false }
// undef - X -> { 0, false }
if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getNullValue(ReturnType);
break;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
// X + undef -> { -1, false }
// undef + x -> { -1, false }
if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1)) {
return ConstantStruct::get(
cast<StructType>(ReturnType),
{Constant::getAllOnesValue(ReturnType->getStructElementType(0)),
Constant::getNullValue(ReturnType->getStructElementType(1))});
}
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
// 0 * X -> { 0, false }
// X * 0 -> { 0, false }
if (match(Op0, m_Zero()) || match(Op1, m_Zero()))
return Constant::getNullValue(ReturnType);
// undef * X -> { 0, false }
// X * undef -> { 0, false }
if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getNullValue(ReturnType);
break;
case Intrinsic::uadd_sat:
// sat(MAX + X) -> MAX
// sat(X + MAX) -> MAX
if (match(Op0, m_AllOnes()) || match(Op1, m_AllOnes()))
return Constant::getAllOnesValue(ReturnType);
LLVM_FALLTHROUGH;
case Intrinsic::sadd_sat:
// sat(X + undef) -> -1
// sat(undef + X) -> -1
// For unsigned: Assume undef is MAX, thus we saturate to MAX (-1).
// For signed: Assume undef is ~X, in which case X + ~X = -1.
if (Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getAllOnesValue(ReturnType);
// X + 0 -> X
if (match(Op1, m_Zero()))
return Op0;
// 0 + X -> X
if (match(Op0, m_Zero()))
return Op1;
break;
case Intrinsic::usub_sat:
// sat(0 - X) -> 0, sat(X - MAX) -> 0
if (match(Op0, m_Zero()) || match(Op1, m_AllOnes()))
return Constant::getNullValue(ReturnType);
LLVM_FALLTHROUGH;
case Intrinsic::ssub_sat:
// X - X -> 0, X - undef -> 0, undef - X -> 0
if (Op0 == Op1 || Q.isUndefValue(Op0) || Q.isUndefValue(Op1))
return Constant::getNullValue(ReturnType);
// X - 0 -> X
if (match(Op1, m_Zero()))
return Op0;
break;
case Intrinsic::load_relative:
if (auto *C0 = dyn_cast<Constant>(Op0))
if (auto *C1 = dyn_cast<Constant>(Op1))
return SimplifyRelativeLoad(C0, C1, Q.DL);
break;
case Intrinsic::powi:
if (auto *Power = dyn_cast<ConstantInt>(Op1)) {
// powi(x, 0) -> 1.0
if (Power->isZero())
return ConstantFP::get(Op0->getType(), 1.0);
// powi(x, 1) -> x
if (Power->isOne())
return Op0;
}
break;
case Intrinsic::copysign:
// copysign X, X --> X
if (Op0 == Op1)
return Op0;
// copysign -X, X --> X
// copysign X, -X --> -X
if (match(Op0, m_FNeg(m_Specific(Op1))) ||
match(Op1, m_FNeg(m_Specific(Op0))))
return Op1;
break;
case Intrinsic::maxnum:
case Intrinsic::minnum:
case Intrinsic::maximum:
case Intrinsic::minimum: {
// If the arguments are the same, this is a no-op.
if (Op0 == Op1) return Op0;
// Canonicalize constant operand as Op1.
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
// If an argument is undef, return the other argument.
if (Q.isUndefValue(Op1))
return Op0;
bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum;
// minnum(X, nan) -> X
// maxnum(X, nan) -> X
// minimum(X, nan) -> nan
// maximum(X, nan) -> nan
if (match(Op1, m_NaN()))
return PropagateNaN ? propagateNaN(cast<Constant>(Op1)) : Op0;
// In the following folds, inf can be replaced with the largest finite
// float, if the ninf flag is set.
const APFloat *C;
if (match(Op1, m_APFloat(C)) &&
(C->isInfinity() || (Q.CxtI->hasNoInfs() && C->isLargest()))) {
// minnum(X, -inf) -> -inf
// maxnum(X, +inf) -> +inf
// minimum(X, -inf) -> -inf if nnan
// maximum(X, +inf) -> +inf if nnan
if (C->isNegative() == IsMin && (!PropagateNaN || Q.CxtI->hasNoNaNs()))
return ConstantFP::get(ReturnType, *C);
// minnum(X, +inf) -> X if nnan
// maxnum(X, -inf) -> X if nnan
// minimum(X, +inf) -> X
// maximum(X, -inf) -> X
if (C->isNegative() != IsMin && (PropagateNaN || Q.CxtI->hasNoNaNs()))
return Op0;
}
// Min/max of the same operation with common operand:
// m(m(X, Y)), X --> m(X, Y) (4 commuted variants)
if (auto *M0 = dyn_cast<IntrinsicInst>(Op0))
if (M0->getIntrinsicID() == IID &&
(M0->getOperand(0) == Op1 || M0->getOperand(1) == Op1))
return Op0;
if (auto *M1 = dyn_cast<IntrinsicInst>(Op1))
if (M1->getIntrinsicID() == IID &&
(M1->getOperand(0) == Op0 || M1->getOperand(1) == Op0))
return Op1;
break;
}
case Intrinsic::experimental_vector_extract: {
Type *ReturnType = F->getReturnType();
// (extract_vector (insert_vector _, X, 0), 0) -> X
unsigned IdxN = cast<ConstantInt>(Op1)->getZExtValue();
Value *X = nullptr;
if (match(Op0, m_Intrinsic<Intrinsic::experimental_vector_insert>(
m_Value(), m_Value(X), m_Zero())) &&
IdxN == 0 && X->getType() == ReturnType)
return X;
break;
}
default:
break;
}
return nullptr;
}
static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
unsigned NumOperands = Call->arg_size();
Function *F = cast<Function>(Call->getCalledFunction());
Intrinsic::ID IID = F->getIntrinsicID();
// Most of the intrinsics with no operands have some kind of side effect.
// Don't simplify.
if (!NumOperands) {
switch (IID) {
case Intrinsic::vscale: {
// Call may not be inserted into the IR yet at point of calling simplify.
if (!Call->getParent() || !Call->getParent()->getParent())
return nullptr;
auto Attr = Call->getFunction()->getFnAttribute(Attribute::VScaleRange);
if (!Attr.isValid())
return nullptr;
unsigned VScaleMin = Attr.getVScaleRangeMin();
Optional<unsigned> VScaleMax = Attr.getVScaleRangeMax();
if (VScaleMax && VScaleMin == VScaleMax)
return ConstantInt::get(F->getReturnType(), VScaleMin);
return nullptr;
}
default:
return nullptr;
}
}
if (NumOperands == 1)
return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q);
if (NumOperands == 2)
return simplifyBinaryIntrinsic(F, Call->getArgOperand(0),
Call->getArgOperand(1), Q);
// Handle intrinsics with 3 or more arguments.
switch (IID) {
case Intrinsic::masked_load:
case Intrinsic::masked_gather: {
Value *MaskArg = Call->getArgOperand(2);
Value *PassthruArg = Call->getArgOperand(3);
// If the mask is all zeros or undef, the "passthru" argument is the result.
if (maskIsAllZeroOrUndef(MaskArg))
return PassthruArg;
return nullptr;
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1),
*ShAmtArg = Call->getArgOperand(2);
// If both operands are undef, the result is undef.
if (Q.isUndefValue(Op0) && Q.isUndefValue(Op1))
return UndefValue::get(F->getReturnType());
// If shift amount is undef, assume it is zero.
if (Q.isUndefValue(ShAmtArg))
return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
const APInt *ShAmtC;
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
if (ShAmtC->urem(BitWidth).isZero())
return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
}
// Rotating zero by anything is zero.
if (match(Op0, m_Zero()) && match(Op1, m_Zero()))
return ConstantInt::getNullValue(F->getReturnType());
// Rotating -1 by anything is -1.
if (match(Op0, m_AllOnes()) && match(Op1, m_AllOnes()))
return ConstantInt::getAllOnesValue(F->getReturnType());
return nullptr;
}
case Intrinsic::experimental_constrained_fma: {
Value *Op0 = Call->getArgOperand(0);
Value *Op1 = Call->getArgOperand(1);
Value *Op2 = Call->getArgOperand(2);
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue()))
return V;
return nullptr;
}
case Intrinsic::fma:
case Intrinsic::fmuladd: {
Value *Op0 = Call->getArgOperand(0);
Value *Op1 = Call->getArgOperand(1);
Value *Op2 = Call->getArgOperand(2);
if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q, fp::ebIgnore,
RoundingMode::NearestTiesToEven))
return V;
return nullptr;
}
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat: {
Value *Op0 = Call->getArgOperand(0);
Value *Op1 = Call->getArgOperand(1);
Value *Op2 = Call->getArgOperand(2);
Type *ReturnType = F->getReturnType();
// Canonicalize constant operand as Op1 (ConstantFolding handles the case
// when both Op0 and Op1 are constant so we do not care about that special
// case here).
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
// X * 0 -> 0
if (match(Op1, m_Zero()))
return Constant::getNullValue(ReturnType);
// X * undef -> 0
if (Q.isUndefValue(Op1))
return Constant::getNullValue(ReturnType);
// X * (1 << Scale) -> X
APInt ScaledOne =
APInt::getOneBitSet(ReturnType->getScalarSizeInBits(),
cast<ConstantInt>(Op2)->getZExtValue());
if (ScaledOne.isNonNegative() && match(Op1, m_SpecificInt(ScaledOne)))
return Op0;
return nullptr;
}
case Intrinsic::experimental_vector_insert: {
Value *Vec = Call->getArgOperand(0);
Value *SubVec = Call->getArgOperand(1);
Value *Idx = Call->getArgOperand(2);
Type *ReturnType = F->getReturnType();
// (insert_vector Y, (extract_vector X, 0), 0) -> X
// where: Y is X, or Y is undef
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
Value *X = nullptr;
if (match(SubVec, m_Intrinsic<Intrinsic::experimental_vector_extract>(
m_Value(X), m_Zero())) &&
(Q.isUndefValue(Vec) || Vec == X) && IdxN == 0 &&
X->getType() == ReturnType)
return X;
return nullptr;
}
case Intrinsic::experimental_constrained_fadd: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
return SimplifyFAddInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
break;
}
case Intrinsic::experimental_constrained_fsub: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
return SimplifyFSubInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
break;
}
case Intrinsic::experimental_constrained_fmul: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
return SimplifyFMulInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
break;
}
case Intrinsic::experimental_constrained_fdiv: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
return SimplifyFDivInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
break;
}
case Intrinsic::experimental_constrained_frem: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
return SimplifyFRemInst(FPI->getArgOperand(0), FPI->getArgOperand(1),
FPI->getFastMathFlags(), Q,
FPI->getExceptionBehavior().getValue(),
FPI->getRoundingMode().getValue());
break;
}
default:
return nullptr;
}
}
static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) {
auto *F = dyn_cast<Function>(Call->getCalledOperand());
if (!F || !canConstantFoldCallTo(Call, F))
return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
unsigned NumArgs = Call->arg_size();
ConstantArgs.reserve(NumArgs);
for (auto &Arg : Call->args()) {
Constant *C = dyn_cast<Constant>(&Arg);
if (!C) {
if (isa<MetadataAsValue>(Arg.get()))
continue;
return nullptr;
}
ConstantArgs.push_back(C);
}
return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
}
Value *llvm::SimplifyCall(CallBase *Call, const SimplifyQuery &Q) {
// musttail calls can only be simplified if they are also DCEd.
// As we can't guarantee this here, don't simplify them.
if (Call->isMustTailCall())
return nullptr;
// call undef -> poison
// call null -> poison
Value *Callee = Call->getCalledOperand();
if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee))
return PoisonValue::get(Call->getType());
if (Value *V = tryConstantFoldCall(Call, Q))
return V;
auto *F = dyn_cast<Function>(Callee);
if (F && F->isIntrinsic())
if (Value *Ret = simplifyIntrinsic(Call, Q))
return Ret;
return nullptr;
}
/// Given operands for a Freeze, see if we can fold the result.
static Value *SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
// Use a utility function defined in ValueTracking.
if (llvm::isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT))
return Op0;
// We have room for improvement.
return nullptr;
}
Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
return ::SimplifyFreezeInst(Op0, Q);
}
static Value *SimplifyLoadInst(LoadInst *LI, Value *PtrOp,
const SimplifyQuery &Q) {
if (LI->isVolatile())
return nullptr;
APInt Offset(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
auto *PtrOpC = dyn_cast<Constant>(PtrOp);
// Try to convert operand into a constant by stripping offsets while looking
// through invariant.group intrinsics. Don't bother if the underlying object
// is not constant, as calculating GEP offsets is expensive.
if (!PtrOpC && isa<Constant>(getUnderlyingObject(PtrOp))) {
PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
Q.DL, Offset, /* AllowNonInbounts */ true,
/* AllowInvariantGroup */ true);
// Index size may have changed due to address space casts.
Offset = Offset.sextOrTrunc(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()));
PtrOpC = dyn_cast<Constant>(PtrOp);
}
if (PtrOpC)
return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Offset, Q.DL);
return nullptr;
}
/// See if we can compute a simplified version of this instruction.
/// If not, this returns null.
static Value *simplifyInstructionWithOperands(Instruction *I,
ArrayRef<Value *> NewOps,
const SimplifyQuery &SQ,
OptimizationRemarkEmitter *ORE) {
const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I);
Value *Result = nullptr;
switch (I->getOpcode()) {
default:
if (llvm::all_of(NewOps, [](Value *V) { return isa<Constant>(V); })) {
SmallVector<Constant *, 8> NewConstOps(NewOps.size());
transform(NewOps, NewConstOps.begin(),
[](Value *V) { return cast<Constant>(V); });
Result = ConstantFoldInstOperands(I, NewConstOps, Q.DL, Q.TLI);
}
break;
case Instruction::FNeg:
Result = SimplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q);
break;
case Instruction::FAdd:
Result = SimplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Add:
Result = SimplifyAddInst(
NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FSub:
Result = SimplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Sub:
Result = SimplifySubInst(
NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::FMul:
Result = SimplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Mul:
Result = SimplifyMulInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::SDiv:
Result = SimplifySDivInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::UDiv:
Result = SimplifyUDivInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::FDiv:
Result = SimplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::SRem:
Result = SimplifySRemInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::URem:
Result = SimplifyURemInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::FRem:
Result = SimplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Shl:
Result = SimplifyShlInst(
NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
break;
case Instruction::LShr:
Result = SimplifyLShrInst(NewOps[0], NewOps[1],
Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::AShr:
Result = SimplifyAShrInst(NewOps[0], NewOps[1],
Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
break;
case Instruction::And:
Result = SimplifyAndInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::Or:
Result = SimplifyOrInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::Xor:
Result = SimplifyXorInst(NewOps[0], NewOps[1], Q);
break;
case Instruction::ICmp:
Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
NewOps[1], Q);
break;
case Instruction::FCmp:
Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
NewOps[1], I->getFastMathFlags(), Q);
break;
case Instruction::Select:
Result = SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
break;
case Instruction::GetElementPtr: {
auto *GEPI = cast<GetElementPtrInst>(I);
Result =
SimplifyGEPInst(GEPI->getSourceElementType(), NewOps[0],
makeArrayRef(NewOps).slice(1), GEPI->isInBounds(), Q);
break;
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
Result = SimplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q);
break;
}
case Instruction::InsertElement: {
Result = SimplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q);
break;
}
case Instruction::ExtractValue: {
auto *EVI = cast<ExtractValueInst>(I);
Result = SimplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q);
break;
}
case Instruction::ExtractElement: {
Result = SimplifyExtractElementInst(NewOps[0], NewOps[1], Q);
break;
}
case Instruction::ShuffleVector: {
auto *SVI = cast<ShuffleVectorInst>(I);
Result = SimplifyShuffleVectorInst(
NewOps[0], NewOps[1], SVI->getShuffleMask(), SVI->getType(), Q);
break;
}
case Instruction::PHI:
Result = SimplifyPHINode(cast<PHINode>(I), NewOps, Q);
break;
case Instruction::Call: {
// TODO: Use NewOps
Result = SimplifyCall(cast<CallInst>(I), Q);
break;
}
case Instruction::Freeze:
Result = llvm::SimplifyFreezeInst(NewOps[0], Q);
break;
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
#include "llvm/IR/Instruction.def"
#undef HANDLE_CAST_INST
Result = SimplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q);
break;
case Instruction::Alloca:
// No simplifications for Alloca and it can't be constant folded.
Result = nullptr;
break;
case Instruction::Load:
Result = SimplifyLoadInst(cast<LoadInst>(I), NewOps[0], Q);
break;
}
/// If called on unreachable code, the above logic may report that the
/// instruction simplified to itself. Make life easier for users by
/// detecting that case here, returning a safe value instead.
return Result == I ? UndefValue::get(I->getType()) : Result;
}
Value *llvm::SimplifyInstructionWithOperands(Instruction *I,
ArrayRef<Value *> NewOps,
const SimplifyQuery &SQ,
OptimizationRemarkEmitter *ORE) {
assert(NewOps.size() == I->getNumOperands() &&
"Number of operands should match the instruction!");
return ::simplifyInstructionWithOperands(I, NewOps, SQ, ORE);
}
Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
OptimizationRemarkEmitter *ORE) {
SmallVector<Value *, 8> Ops(I->operands());
return ::simplifyInstructionWithOperands(I, Ops, SQ, ORE);
}
/// Implementation of recursive simplification through an instruction's
/// uses.
///
/// This is the common implementation of the recursive simplification routines.
/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to
/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of
/// instructions to process and attempt to simplify it using
/// InstructionSimplify. Recursively visited users which could not be
/// simplified themselves are to the optional UnsimplifiedUsers set for
/// further processing by the caller.
///
/// This routine returns 'true' only when *it* simplifies something. The passed
/// in simplified value does not count toward this.
static bool replaceAndRecursivelySimplifyImpl(
Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
SmallSetVector<Instruction *, 8> *UnsimplifiedUsers = nullptr) {
bool Simplified = false;
SmallSetVector<Instruction *, 8> Worklist;
const DataLayout &DL = I->getModule()->getDataLayout();
// If we have an explicit value to collapse to, do that round of the
// simplification loop by hand initially.
if (SimpleV) {
for (User *U : I->users())
if (U != I)
Worklist.insert(cast<Instruction>(U));
// Replace the instruction with its simplified value.
I->replaceAllUsesWith(SimpleV);
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
!I->mayHaveSideEffects())
I->eraseFromParent();
} else {
Worklist.insert(I);
}
// Note that we must test the size on each iteration, the worklist can grow.
for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
I = Worklist[Idx];
// See if this instruction simplifies.
SimpleV = SimplifyInstruction(I, {DL, TLI, DT, AC});
if (!SimpleV) {
if (UnsimplifiedUsers)
UnsimplifiedUsers->insert(I);
continue;
}
Simplified = true;
// Stash away all the uses of the old instruction so we can check them for
// recursive simplifications after a RAUW. This is cheaper than checking all
// uses of To on the recursive step in most cases.
for (User *U : I->users())
Worklist.insert(cast<Instruction>(U));
// Replace the instruction with its simplified value.
I->replaceAllUsesWith(SimpleV);
// Gracefully handle edge cases where the instruction is not wired into any
// parent block.
if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
!I->mayHaveSideEffects())
I->eraseFromParent();
}
return Simplified;
}
bool llvm::replaceAndRecursivelySimplify(
Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC,
SmallSetVector<Instruction *, 8> *UnsimplifiedUsers) {
assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
assert(SimpleV && "Must provide a simplified value.");
return replaceAndRecursivelySimplifyImpl(I, SimpleV, TLI, DT, AC,
UnsimplifiedUsers);
}
namespace llvm {
const SimplifyQuery getBestSimplifyQuery(Pass &P, Function &F) {
auto *DTWP = P.getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *TLIWP = P.getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
auto *TLI = TLIWP ? &TLIWP->getTLI(F) : nullptr;
auto *ACWP = P.getAnalysisIfAvailable<AssumptionCacheTracker>();
auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr;
return {F.getParent()->getDataLayout(), TLI, DT, AC};
}
const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &AR,
const DataLayout &DL) {
return {DL, &AR.TLI, &AR.DT, &AR.AC};
}
template <class T, class... TArgs>
const SimplifyQuery getBestSimplifyQuery(AnalysisManager<T, TArgs...> &AM,
Function &F) {
auto *DT = AM.template getCachedResult<DominatorTreeAnalysis>(F);
auto *TLI = AM.template getCachedResult<TargetLibraryAnalysis>(F);
auto *AC = AM.template getCachedResult<AssumptionAnalysis>(F);
return {F.getParent()->getDataLayout(), TLI, DT, AC};
}
template const SimplifyQuery getBestSimplifyQuery(AnalysisManager<Function> &,
Function &);
}
void InstSimplifyFolder::anchor() {}
diff --git a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
index 208f93aa1ac6..9e26f292b789 100644
--- a/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm-project/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -1,988 +1,1003 @@
//===- MemoryBuiltins.cpp - Identify calls to memory builtins -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This family of functions identifies calls to builtin functions that allocate
// or free memory.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
using namespace llvm;
#define DEBUG_TYPE "memory-builtins"
enum AllocType : uint8_t {
OpNewLike = 1<<0, // allocates; never returns null
MallocLike = 1<<1, // allocates; may return null
AlignedAllocLike = 1<<2, // allocates with alignment; may return null
CallocLike = 1<<3, // allocates + bzero
ReallocLike = 1<<4, // reallocates
StrDupLike = 1<<5,
MallocOrOpNewLike = MallocLike | OpNewLike,
MallocOrCallocLike = MallocLike | OpNewLike | CallocLike | AlignedAllocLike,
AllocLike = MallocOrCallocLike | StrDupLike,
AnyAlloc = AllocLike | ReallocLike
};
struct AllocFnsTy {
AllocType AllocTy;
unsigned NumParams;
// First and Second size parameters (or -1 if unused)
int FstParam, SndParam;
// Alignment parameter for aligned_alloc and aligned new
int AlignParam;
};
// FIXME: certain users need more information. E.g., SimplifyLibCalls needs to
// know which functions are nounwind, noalias, nocapture parameters, etc.
static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_malloc, {MallocLike, 1, 0, -1, -1}},
{LibFunc_vec_malloc, {MallocLike, 1, 0, -1, -1}},
{LibFunc_valloc, {MallocLike, 1, 0, -1, -1}},
{LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int)
{LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow)
{LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned int, align_val_t)
{LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned int, align_val_t, nothrow)
{LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long)
{LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long, nothrow)
{LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new(unsigned long, align_val_t)
{LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new(unsigned long, align_val_t, nothrow)
{LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int)
{LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow)
{LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned int, align_val_t)
{LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned int, align_val_t, nothrow)
{LibFunc_Znam, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long)
{LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long, nothrow)
{LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1}}, // new[](unsigned long, align_val_t)
{LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1}}, // new[](unsigned long, align_val_t, nothrow)
{LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned int)
{LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned int, nothrow)
{LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1}}, // new(unsigned long long)
{LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new(unsigned long long, nothrow)
{LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned int)
{LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned int, nothrow)
{LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1}}, // new[](unsigned long long)
{LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1}}, // new[](unsigned long long, nothrow)
{LibFunc_aligned_alloc, {AlignedAllocLike, 2, 1, -1, 0}},
{LibFunc_memalign, {AlignedAllocLike, 2, 1, -1, 0}},
{LibFunc_calloc, {CallocLike, 2, 0, 1, -1}},
{LibFunc_vec_calloc, {CallocLike, 2, 0, 1, -1}},
{LibFunc_realloc, {ReallocLike, 2, 1, -1, -1}},
{LibFunc_vec_realloc, {ReallocLike, 2, 1, -1, -1}},
{LibFunc_reallocf, {ReallocLike, 2, 1, -1, -1}},
{LibFunc_strdup, {StrDupLike, 1, -1, -1, -1}},
{LibFunc_strndup, {StrDupLike, 2, 1, -1, -1}},
{LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1, -1}},
// TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
static const Function *getCalledFunction(const Value *V,
bool &IsNoBuiltin) {
// Don't care about intrinsics in this case.
if (isa<IntrinsicInst>(V))
return nullptr;
const auto *CB = dyn_cast<CallBase>(V);
if (!CB)
return nullptr;
IsNoBuiltin = CB->isNoBuiltin();
if (const Function *Callee = CB->getCalledFunction())
return Callee;
return nullptr;
}
/// Returns the allocation data for the given value if it's a call to a known
/// allocation function.
static Optional<AllocFnsTy>
getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
const TargetLibraryInfo *TLI) {
// Make sure that the function is available.
LibFunc TLIFn;
if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
return None;
const auto *Iter = find_if(
AllocationFnData, [TLIFn](const std::pair<LibFunc, AllocFnsTy> &P) {
return P.first == TLIFn;
});
if (Iter == std::end(AllocationFnData))
return None;
const AllocFnsTy *FnData = &Iter->second;
if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
return None;
// Check function prototype.
int FstParam = FnData->FstParam;
int SndParam = FnData->SndParam;
FunctionType *FTy = Callee->getFunctionType();
if (FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) &&
FTy->getNumParams() == FnData->NumParams &&
(FstParam < 0 ||
(FTy->getParamType(FstParam)->isIntegerTy(32) ||
FTy->getParamType(FstParam)->isIntegerTy(64))) &&
(SndParam < 0 ||
FTy->getParamType(SndParam)->isIntegerTy(32) ||
FTy->getParamType(SndParam)->isIntegerTy(64)))
return *FnData;
return None;
}
static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
const TargetLibraryInfo *TLI) {
bool IsNoBuiltinCall;
if (const Function *Callee = getCalledFunction(V, IsNoBuiltinCall))
if (!IsNoBuiltinCall)
return getAllocationDataForFunction(Callee, AllocTy, TLI);
return None;
}
static Optional<AllocFnsTy>
getAllocationData(const Value *V, AllocType AllocTy,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
bool IsNoBuiltinCall;
if (const Function *Callee = getCalledFunction(V, IsNoBuiltinCall))
if (!IsNoBuiltinCall)
return getAllocationDataForFunction(
Callee, AllocTy, &GetTLI(const_cast<Function &>(*Callee)));
return None;
}
static Optional<AllocFnsTy> getAllocationSize(const Value *V,
const TargetLibraryInfo *TLI) {
bool IsNoBuiltinCall;
const Function *Callee =
getCalledFunction(V, IsNoBuiltinCall);
if (!Callee)
return None;
// Prefer to use existing information over allocsize. This will give us an
// accurate AllocTy.
if (!IsNoBuiltinCall)
if (Optional<AllocFnsTy> Data =
getAllocationDataForFunction(Callee, AnyAlloc, TLI))
return Data;
Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize);
if (Attr == Attribute())
return None;
std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs();
AllocFnsTy Result;
// Because allocsize only tells us how many bytes are allocated, we're not
// really allowed to assume anything, so we use MallocLike.
Result.AllocTy = MallocLike;
Result.NumParams = Callee->getNumOperands();
Result.FstParam = Args.first;
Result.SndParam = Args.second.getValueOr(-1);
// Allocsize has no way to specify an alignment argument
Result.AlignParam = -1;
return Result;
}
/// Tests if a value is a call or invoke to a library function that
/// allocates or reallocates memory (either malloc, calloc, realloc, or strdup
/// like).
bool llvm::isAllocationFn(const Value *V, const TargetLibraryInfo *TLI) {
return getAllocationData(V, AnyAlloc, TLI).hasValue();
}
bool llvm::isAllocationFn(
const Value *V, function_ref<const TargetLibraryInfo &(Function &)> GetTLI) {
return getAllocationData(V, AnyAlloc, GetTLI).hasValue();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates uninitialized memory (such as malloc).
static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
return getAllocationData(V, MallocOrOpNewLike, TLI).hasValue();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates uninitialized memory with alignment (such as aligned_alloc).
static bool isAlignedAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
return getAllocationData(V, AlignedAllocLike, TLI)
.hasValue();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates zero-filled memory (such as calloc).
static bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
return getAllocationData(V, CallocLike, TLI).hasValue();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates memory similar to malloc or calloc.
bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
return getAllocationData(V, MallocOrCallocLike, TLI).hasValue();
}
/// Tests if a value is a call or invoke to a library function that
/// allocates memory (either malloc, calloc, or strdup like).
bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
return getAllocationData(V, AllocLike, TLI).hasValue();
}
/// Tests if a value is a call or invoke to a library function that
/// reallocates memory (e.g., realloc).
bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
return getAllocationData(V, ReallocLike, TLI).hasValue();
}
/// Tests if a functions is a call or invoke to a library function that
/// reallocates memory (e.g., realloc).
bool llvm::isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI) {
return getAllocationDataForFunction(F, ReallocLike, TLI).hasValue();
}
bool llvm::isAllocRemovable(const CallBase *CB, const TargetLibraryInfo *TLI) {
assert(isAllocationFn(CB, TLI));
// Note: Removability is highly dependent on the source language. For
// example, recent C++ requires direct calls to the global allocation
// [basic.stc.dynamic.allocation] to be observable unless part of a new
// expression [expr.new paragraph 13].
// Historically we've treated the C family allocation routines as removable
return isAllocLikeFn(CB, TLI);
}
Value *llvm::getAllocAlignment(const CallBase *V,
const TargetLibraryInfo *TLI) {
assert(isAllocationFn(V, TLI));
const Optional<AllocFnsTy> FnData = getAllocationData(V, AnyAlloc, TLI);
if (!FnData.hasValue() || FnData->AlignParam < 0) {
return nullptr;
}
return V->getOperand(FnData->AlignParam);
}
/// When we're compiling N-bit code, and the user uses parameters that are
/// greater than N bits (e.g. uint64_t on a 32-bit build), we can run into
/// trouble with APInt size issues. This function handles resizing + overflow
/// checks for us. Check and zext or trunc \p I depending on IntTyBits and
/// I's value.
static bool CheckedZextOrTrunc(APInt &I, unsigned IntTyBits) {
// More bits than we can handle. Checking the bit width isn't necessary, but
// it's faster than checking active bits, and should give `false` in the
// vast majority of cases.
if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits)
return false;
if (I.getBitWidth() != IntTyBits)
I = I.zextOrTrunc(IntTyBits);
return true;
}
Optional<APInt>
llvm::getAllocSize(const CallBase *CB,
const TargetLibraryInfo *TLI,
std::function<const Value*(const Value*)> Mapper) {
// Note: This handles both explicitly listed allocation functions and
// allocsize. The code structure could stand to be cleaned up a bit.
Optional<AllocFnsTy> FnData = getAllocationSize(CB, TLI);
if (!FnData)
return None;
// Get the index type for this address space, results and intermediate
// computations are performed at that width.
auto &DL = CB->getModule()->getDataLayout();
const unsigned IntTyBits = DL.getIndexTypeSizeInBits(CB->getType());
// Handle strdup-like functions separately.
if (FnData->AllocTy == StrDupLike) {
APInt Size(IntTyBits, GetStringLength(Mapper(CB->getArgOperand(0))));
if (!Size)
return None;
// Strndup limits strlen.
if (FnData->FstParam > 0) {
const ConstantInt *Arg =
dyn_cast<ConstantInt>(Mapper(CB->getArgOperand(FnData->FstParam)));
if (!Arg)
return None;
APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits);
if (Size.ugt(MaxSize))
Size = MaxSize + 1;
}
return Size;
}
const ConstantInt *Arg =
dyn_cast<ConstantInt>(Mapper(CB->getArgOperand(FnData->FstParam)));
if (!Arg)
return None;
APInt Size = Arg->getValue();
if (!CheckedZextOrTrunc(Size, IntTyBits))
return None;
// Size is determined by just 1 parameter.
if (FnData->SndParam < 0)
return Size;
Arg = dyn_cast<ConstantInt>(Mapper(CB->getArgOperand(FnData->SndParam)));
if (!Arg)
return None;
APInt NumElems = Arg->getValue();
if (!CheckedZextOrTrunc(NumElems, IntTyBits))
return None;
bool Overflow;
Size = Size.umul_ov(NumElems, Overflow);
if (Overflow)
return None;
return Size;
}
Constant *llvm::getInitialValueOfAllocation(const CallBase *Alloc,
const TargetLibraryInfo *TLI,
Type *Ty) {
assert(isAllocationFn(Alloc, TLI));
// malloc and aligned_alloc are uninitialized (undef)
if (isMallocLikeFn(Alloc, TLI) || isAlignedAllocLikeFn(Alloc, TLI))
return UndefValue::get(Ty);
// calloc zero initializes
if (isCallocLikeFn(Alloc, TLI))
return Constant::getNullValue(Ty);
return nullptr;
}
/// isLibFreeFunction - Returns true if the function is a builtin free()
bool llvm::isLibFreeFunction(const Function *F, const LibFunc TLIFn) {
unsigned ExpectedNumParams;
if (TLIFn == LibFunc_free ||
TLIFn == LibFunc_ZdlPv || // operator delete(void*)
TLIFn == LibFunc_ZdaPv || // operator delete[](void*)
TLIFn == LibFunc_msvc_delete_ptr32 || // operator delete(void*)
TLIFn == LibFunc_msvc_delete_ptr64 || // operator delete(void*)
TLIFn == LibFunc_msvc_delete_array_ptr32 || // operator delete[](void*)
TLIFn == LibFunc_msvc_delete_array_ptr64) // operator delete[](void*)
ExpectedNumParams = 1;
else if (TLIFn == LibFunc_ZdlPvj || // delete(void*, uint)
TLIFn == LibFunc_ZdlPvm || // delete(void*, ulong)
TLIFn == LibFunc_ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
TLIFn == LibFunc_ZdlPvSt11align_val_t || // delete(void*, align_val_t)
TLIFn == LibFunc_ZdaPvj || // delete[](void*, uint)
TLIFn == LibFunc_ZdaPvm || // delete[](void*, ulong)
TLIFn == LibFunc_ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
TLIFn == LibFunc_ZdaPvSt11align_val_t || // delete[](void*, align_val_t)
TLIFn == LibFunc_msvc_delete_ptr32_int || // delete(void*, uint)
TLIFn == LibFunc_msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
TLIFn == LibFunc_msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
TLIFn == LibFunc_msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
TLIFn == LibFunc_msvc_delete_array_ptr32_int || // delete[](void*, uint)
TLIFn == LibFunc_msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
TLIFn == LibFunc_msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
TLIFn == LibFunc_msvc_delete_array_ptr64_nothrow || // delete[](void*, nothrow)
TLIFn == LibFunc___kmpc_free_shared) // OpenMP Offloading RTL free
ExpectedNumParams = 2;
else if (TLIFn == LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t || // delete(void*, align_val_t, nothrow)
TLIFn == LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t || // delete[](void*, align_val_t, nothrow)
TLIFn == LibFunc_ZdlPvjSt11align_val_t || // delete(void*, unsigned long, align_val_t)
TLIFn == LibFunc_ZdlPvmSt11align_val_t || // delete(void*, unsigned long, align_val_t)
TLIFn == LibFunc_ZdaPvjSt11align_val_t || // delete[](void*, unsigned int, align_val_t)
TLIFn == LibFunc_ZdaPvmSt11align_val_t) // delete[](void*, unsigned long, align_val_t)
ExpectedNumParams = 3;
else
return false;
// Check free prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
FunctionType *FTy = F->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
return false;
if (FTy->getNumParams() != ExpectedNumParams)
return false;
if (FTy->getParamType(0) != Type::getInt8PtrTy(F->getContext()))
return false;
return true;
}
/// isFreeCall - Returns non-null if the value is a call to the builtin free()
const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
bool IsNoBuiltinCall;
const Function *Callee = getCalledFunction(I, IsNoBuiltinCall);
if (Callee == nullptr || IsNoBuiltinCall)
return nullptr;
LibFunc TLIFn;
if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn))
return nullptr;
return isLibFreeFunction(Callee, TLIFn) ? dyn_cast<CallInst>(I) : nullptr;
}
//===----------------------------------------------------------------------===//
// Utility functions to compute size of objects.
//
static APInt getSizeWithOverflow(const SizeOffsetType &Data) {
if (Data.second.isNegative() || Data.first.ult(Data.second))
return APInt(Data.first.getBitWidth(), 0);
return Data.first - Data.second;
}
/// Compute the size of the object pointed by Ptr. Returns true and the
/// object size in Size if successful, and false otherwise.
/// If RoundToAlign is true, then Size is rounded up to the alignment of
/// allocas, byval arguments, and global variables.
bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
const TargetLibraryInfo *TLI, ObjectSizeOpts Opts) {
ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), Opts);
SizeOffsetType Data = Visitor.compute(const_cast<Value*>(Ptr));
if (!Visitor.bothKnown(Data))
return false;
Size = getSizeWithOverflow(Data).getZExtValue();
return true;
}
Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
const DataLayout &DL,
const TargetLibraryInfo *TLI,
bool MustSucceed) {
assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
"ObjectSize must be a call to llvm.objectsize!");
bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
ObjectSizeOpts EvalOptions;
// Unless we have to fold this to something, try to be as accurate as
// possible.
if (MustSucceed)
EvalOptions.EvalMode =
MaxVal ? ObjectSizeOpts::Mode::Max : ObjectSizeOpts::Mode::Min;
else
EvalOptions.EvalMode = ObjectSizeOpts::Mode::Exact;
EvalOptions.NullIsUnknownSize =
cast<ConstantInt>(ObjectSize->getArgOperand(2))->isOne();
auto *ResultType = cast<IntegerType>(ObjectSize->getType());
bool StaticOnly = cast<ConstantInt>(ObjectSize->getArgOperand(3))->isZero();
if (StaticOnly) {
// FIXME: Does it make sense to just return a failure value if the size won't
// fit in the output and `!MustSucceed`?
uint64_t Size;
if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) &&
isUIntN(ResultType->getBitWidth(), Size))
return ConstantInt::get(ResultType, Size);
} else {
LLVMContext &Ctx = ObjectSize->getFunction()->getContext();
ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, EvalOptions);
SizeOffsetEvalType SizeOffsetPair =
Eval.compute(ObjectSize->getArgOperand(0));
if (SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown()) {
IRBuilder<TargetFolder> Builder(Ctx, TargetFolder(DL));
Builder.SetInsertPoint(ObjectSize);
// If we've outside the end of the object, then we can always access
// exactly 0 bytes.
Value *ResultSize =
Builder.CreateSub(SizeOffsetPair.first, SizeOffsetPair.second);
Value *UseZero =
Builder.CreateICmpULT(SizeOffsetPair.first, SizeOffsetPair.second);
ResultSize = Builder.CreateZExtOrTrunc(ResultSize, ResultType);
Value *Ret = Builder.CreateSelect(
UseZero, ConstantInt::get(ResultType, 0), ResultSize);
// The non-constant size expression cannot evaluate to -1.
if (!isa<Constant>(SizeOffsetPair.first) ||
!isa<Constant>(SizeOffsetPair.second))
Builder.CreateAssumption(
Builder.CreateICmpNE(Ret, ConstantInt::get(ResultType, -1)));
return Ret;
}
}
if (!MustSucceed)
return nullptr;
return ConstantInt::get(ResultType, MaxVal ? -1ULL : 0);
}
STATISTIC(ObjectVisitorArgument,
"Number of arguments with unsolved size and offset");
STATISTIC(ObjectVisitorLoad,
"Number of load instructions with unsolved size and offset");
APInt ObjectSizeOffsetVisitor::align(APInt Size, MaybeAlign Alignment) {
if (Options.RoundToAlign && Alignment)
return APInt(IntTyBits, alignTo(Size.getZExtValue(), Alignment));
return Size;
}
ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
const TargetLibraryInfo *TLI,
LLVMContext &Context,
ObjectSizeOpts Options)
: DL(DL), TLI(TLI), Options(Options) {
// Pointer size must be rechecked for each object visited since it could have
// a different address space.
}
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
+ unsigned InitialIntTyBits = DL.getIndexTypeSizeInBits(V->getType());
+
+ // Stripping pointer casts can strip address space casts which can change the
+ // index type size. The invariant is that we use the value type to determine
+ // the index type size and if we stripped address space casts we have to
+ // readjust the APInt as we pass it upwards in order for the APInt to match
+ // the type the caller passed in.
+ APInt Offset(InitialIntTyBits, 0);
+ V = V->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true, /* AllowInvariantGroup */ true);
+
+ // Later we use the index type size and zero but it will match the type of the
+ // value that is passed to computeImpl.
IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
Zero = APInt::getZero(IntTyBits);
- V = V->stripPointerCasts();
+ bool IndexTypeSizeChanged = InitialIntTyBits != IntTyBits;
+ if (!IndexTypeSizeChanged && Offset.isZero())
+ return computeImpl(V);
+
+ // We stripped an address space cast that changed the index type size or we
+ // accumulated some constant offset (or both). Readjust the bit width to match
+ // the argument index type size and apply the offset, as required.
+ SizeOffsetType SOT = computeImpl(V);
+ if (IndexTypeSizeChanged) {
+ if (knownSize(SOT) && !::CheckedZextOrTrunc(SOT.first, InitialIntTyBits))
+ SOT.first = APInt();
+ if (knownOffset(SOT) && !::CheckedZextOrTrunc(SOT.second, InitialIntTyBits))
+ SOT.second = APInt();
+ }
+ // If the computed offset is "unknown" we cannot add the stripped offset.
+ return {SOT.first,
+ SOT.second.getBitWidth() > 1 ? SOT.second + Offset : SOT.second};
+}
+
+SizeOffsetType ObjectSizeOffsetVisitor::computeImpl(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V)) {
// If we have already seen this instruction, bail out. Cycles can happen in
// unreachable code after constant propagation.
if (!SeenInsts.insert(I).second)
return unknown();
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
- return visitGEPOperator(*GEP);
return visit(*I);
}
if (Argument *A = dyn_cast<Argument>(V))
return visitArgument(*A);
if (ConstantPointerNull *P = dyn_cast<ConstantPointerNull>(V))
return visitConstantPointerNull(*P);
if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
return visitGlobalAlias(*GA);
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return visitGlobalVariable(*GV);
if (UndefValue *UV = dyn_cast<UndefValue>(V))
return visitUndefValue(*UV);
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
- if (CE->getOpcode() == Instruction::IntToPtr)
- return unknown(); // clueless
- if (CE->getOpcode() == Instruction::GetElementPtr)
- return visitGEPOperator(cast<GEPOperator>(*CE));
- }
LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: "
<< *V << '\n');
return unknown();
}
bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) {
return ::CheckedZextOrTrunc(I, IntTyBits);
}
SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
if (isa<ScalableVectorType>(I.getAllocatedType()))
return unknown();
APInt Size(IntTyBits, DL.getTypeAllocSize(I.getAllocatedType()));
if (!I.isArrayAllocation())
return std::make_pair(align(Size, I.getAlign()), Zero);
Value *ArraySize = I.getArraySize();
if (const ConstantInt *C = dyn_cast<ConstantInt>(ArraySize)) {
APInt NumElems = C->getValue();
if (!CheckedZextOrTrunc(NumElems))
return unknown();
bool Overflow;
Size = Size.umul_ov(NumElems, Overflow);
return Overflow ? unknown()
: std::make_pair(align(Size, I.getAlign()), Zero);
}
return unknown();
}
SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
Type *MemoryTy = A.getPointeeInMemoryValueType();
// No interprocedural analysis is done at the moment.
if (!MemoryTy|| !MemoryTy->isSized()) {
++ObjectVisitorArgument;
return unknown();
}
APInt Size(IntTyBits, DL.getTypeAllocSize(MemoryTy));
return std::make_pair(align(Size, A.getParamAlign()), Zero);
}
SizeOffsetType ObjectSizeOffsetVisitor::visitCallBase(CallBase &CB) {
auto Mapper = [](const Value *V) { return V; };
if (Optional<APInt> Size = getAllocSize(&CB, TLI, Mapper))
return std::make_pair(*Size, Zero);
return unknown();
}
SizeOffsetType
ObjectSizeOffsetVisitor::visitConstantPointerNull(ConstantPointerNull& CPN) {
// If null is unknown, there's nothing we can do. Additionally, non-zero
// address spaces can make use of null, so we don't presume to know anything
// about that.
//
// TODO: How should this work with address space casts? We currently just drop
// them on the floor, but it's unclear what we should do when a NULL from
// addrspace(1) gets casted to addrspace(0) (or vice-versa).
if (Options.NullIsUnknownSize || CPN.getType()->getAddressSpace())
return unknown();
return std::make_pair(Zero, Zero);
}
SizeOffsetType
ObjectSizeOffsetVisitor::visitExtractElementInst(ExtractElementInst&) {
return unknown();
}
SizeOffsetType
ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) {
// Easy cases were already folded by previous passes.
return unknown();
}
-SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) {
- SizeOffsetType PtrData = compute(GEP.getPointerOperand());
- APInt Offset(DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()), 0);
- if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(DL, Offset))
- return unknown();
-
- return std::make_pair(PtrData.first, PtrData.second + Offset);
-}
-
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) {
if (GA.isInterposable())
return unknown();
return compute(GA.getAliasee());
}
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
if (!GV.hasDefinitiveInitializer())
return unknown();
APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType()));
return std::make_pair(align(Size, GV.getAlign()), Zero);
}
SizeOffsetType ObjectSizeOffsetVisitor::visitIntToPtrInst(IntToPtrInst&) {
// clueless
return unknown();
}
SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) {
++ObjectVisitorLoad;
return unknown();
}
SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) {
// too complex to analyze statically.
return unknown();
}
SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) {
SizeOffsetType TrueSide = compute(I.getTrueValue());
SizeOffsetType FalseSide = compute(I.getFalseValue());
if (bothKnown(TrueSide) && bothKnown(FalseSide)) {
if (TrueSide == FalseSide) {
return TrueSide;
}
APInt TrueResult = getSizeWithOverflow(TrueSide);
APInt FalseResult = getSizeWithOverflow(FalseSide);
if (TrueResult == FalseResult) {
return TrueSide;
}
if (Options.EvalMode == ObjectSizeOpts::Mode::Min) {
if (TrueResult.slt(FalseResult))
return TrueSide;
return FalseSide;
}
if (Options.EvalMode == ObjectSizeOpts::Mode::Max) {
if (TrueResult.sgt(FalseResult))
return TrueSide;
return FalseSide;
}
}
return unknown();
}
SizeOffsetType ObjectSizeOffsetVisitor::visitUndefValue(UndefValue&) {
return std::make_pair(Zero, Zero);
}
SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) {
LLVM_DEBUG(dbgs() << "ObjectSizeOffsetVisitor unknown instruction:" << I
<< '\n');
return unknown();
}
ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(
const DataLayout &DL, const TargetLibraryInfo *TLI, LLVMContext &Context,
ObjectSizeOpts EvalOpts)
: DL(DL), TLI(TLI), Context(Context),
Builder(Context, TargetFolder(DL),
IRBuilderCallbackInserter(
[&](Instruction *I) { InsertedInstructions.insert(I); })),
EvalOpts(EvalOpts) {
// IntTy and Zero must be set for each compute() since the address space may
// be different for later objects.
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) {
// XXX - Are vectors of pointers possible here?
IntTy = cast<IntegerType>(DL.getIndexType(V->getType()));
Zero = ConstantInt::get(IntTy, 0);
SizeOffsetEvalType Result = compute_(V);
if (!bothKnown(Result)) {
// Erase everything that was computed in this iteration from the cache, so
// that no dangling references are left behind. We could be a bit smarter if
// we kept a dependency graph. It's probably not worth the complexity.
for (const Value *SeenVal : SeenVals) {
CacheMapTy::iterator CacheIt = CacheMap.find(SeenVal);
// non-computable results can be safely cached
if (CacheIt != CacheMap.end() && anyKnown(CacheIt->second))
CacheMap.erase(CacheIt);
}
// Erase any instructions we inserted as part of the traversal.
for (Instruction *I : InsertedInstructions) {
I->replaceAllUsesWith(UndefValue::get(I->getType()));
I->eraseFromParent();
}
}
SeenVals.clear();
InsertedInstructions.clear();
return Result;
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, EvalOpts);
SizeOffsetType Const = Visitor.compute(V);
if (Visitor.bothKnown(Const))
return std::make_pair(ConstantInt::get(Context, Const.first),
ConstantInt::get(Context, Const.second));
V = V->stripPointerCasts();
// Check cache.
CacheMapTy::iterator CacheIt = CacheMap.find(V);
if (CacheIt != CacheMap.end())
return CacheIt->second;
// Always generate code immediately before the instruction being
// processed, so that the generated code dominates the same BBs.
BuilderTy::InsertPointGuard Guard(Builder);
if (Instruction *I = dyn_cast<Instruction>(V))
Builder.SetInsertPoint(I);
// Now compute the size and offset.
SizeOffsetEvalType Result;
// Record the pointers that were handled in this run, so that they can be
// cleaned later if something fails. We also use this set to break cycles that
// can occur in dead code.
if (!SeenVals.insert(V).second) {
Result = unknown();
} else if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
Result = visitGEPOperator(*GEP);
} else if (Instruction *I = dyn_cast<Instruction>(V)) {
Result = visit(*I);
} else if (isa<Argument>(V) ||
(isa<ConstantExpr>(V) &&
cast<ConstantExpr>(V)->getOpcode() == Instruction::IntToPtr) ||
isa<GlobalAlias>(V) ||
isa<GlobalVariable>(V)) {
// Ignore values where we cannot do more than ObjectSizeVisitor.
Result = unknown();
} else {
LLVM_DEBUG(
dbgs() << "ObjectSizeOffsetEvaluator::compute() unhandled value: " << *V
<< '\n');
Result = unknown();
}
// Don't reuse CacheIt since it may be invalid at this point.
CacheMap[V] = Result;
return Result;
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
if (!I.getAllocatedType()->isSized())
return unknown();
// must be a VLA
assert(I.isArrayAllocation());
// If needed, adjust the alloca's operand size to match the pointer size.
// Subsequent math operations expect the types to match.
Value *ArraySize = Builder.CreateZExtOrTrunc(
I.getArraySize(), DL.getIntPtrType(I.getContext()));
assert(ArraySize->getType() == Zero->getType() &&
"Expected zero constant to have pointer type");
Value *Size = ConstantInt::get(ArraySize->getType(),
DL.getTypeAllocSize(I.getAllocatedType()));
Size = Builder.CreateMul(Size, ArraySize);
return std::make_pair(Size, Zero);
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallBase(CallBase &CB) {
Optional<AllocFnsTy> FnData = getAllocationSize(&CB, TLI);
if (!FnData)
return unknown();
// Handle strdup-like functions separately.
if (FnData->AllocTy == StrDupLike) {
// TODO: implement evaluation of strdup/strndup
return unknown();
}
Value *FirstArg = CB.getArgOperand(FnData->FstParam);
FirstArg = Builder.CreateZExtOrTrunc(FirstArg, IntTy);
if (FnData->SndParam < 0)
return std::make_pair(FirstArg, Zero);
Value *SecondArg = CB.getArgOperand(FnData->SndParam);
SecondArg = Builder.CreateZExtOrTrunc(SecondArg, IntTy);
Value *Size = Builder.CreateMul(FirstArg, SecondArg);
return std::make_pair(Size, Zero);
}
SizeOffsetEvalType
ObjectSizeOffsetEvaluator::visitExtractElementInst(ExtractElementInst&) {
return unknown();
}
SizeOffsetEvalType
ObjectSizeOffsetEvaluator::visitExtractValueInst(ExtractValueInst&) {
return unknown();
}
SizeOffsetEvalType
ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) {
SizeOffsetEvalType PtrData = compute_(GEP.getPointerOperand());
if (!bothKnown(PtrData))
return unknown();
Value *Offset = EmitGEPOffset(&Builder, DL, &GEP, /*NoAssumptions=*/true);
Offset = Builder.CreateAdd(PtrData.second, Offset);
return std::make_pair(PtrData.first, Offset);
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitIntToPtrInst(IntToPtrInst&) {
// clueless
return unknown();
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst&) {
return unknown();
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
// Create 2 PHIs: one for size and another for offset.
PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues());
PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues());
// Insert right away in the cache to handle recursive PHIs.
CacheMap[&PHI] = std::make_pair(SizePHI, OffsetPHI);
// Compute offset/size for each PHI incoming pointer.
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) {
Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt());
SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
if (!bothKnown(EdgeData)) {
OffsetPHI->replaceAllUsesWith(UndefValue::get(IntTy));
OffsetPHI->eraseFromParent();
InsertedInstructions.erase(OffsetPHI);
SizePHI->replaceAllUsesWith(UndefValue::get(IntTy));
SizePHI->eraseFromParent();
InsertedInstructions.erase(SizePHI);
return unknown();
}
SizePHI->addIncoming(EdgeData.first, PHI.getIncomingBlock(i));
OffsetPHI->addIncoming(EdgeData.second, PHI.getIncomingBlock(i));
}
Value *Size = SizePHI, *Offset = OffsetPHI;
if (Value *Tmp = SizePHI->hasConstantValue()) {
Size = Tmp;
SizePHI->replaceAllUsesWith(Size);
SizePHI->eraseFromParent();
InsertedInstructions.erase(SizePHI);
}
if (Value *Tmp = OffsetPHI->hasConstantValue()) {
Offset = Tmp;
OffsetPHI->replaceAllUsesWith(Offset);
OffsetPHI->eraseFromParent();
InsertedInstructions.erase(OffsetPHI);
}
return std::make_pair(Size, Offset);
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitSelectInst(SelectInst &I) {
SizeOffsetEvalType TrueSide = compute_(I.getTrueValue());
SizeOffsetEvalType FalseSide = compute_(I.getFalseValue());
if (!bothKnown(TrueSide) || !bothKnown(FalseSide))
return unknown();
if (TrueSide == FalseSide)
return TrueSide;
Value *Size = Builder.CreateSelect(I.getCondition(), TrueSide.first,
FalseSide.first);
Value *Offset = Builder.CreateSelect(I.getCondition(), TrueSide.second,
FalseSide.second);
return std::make_pair(Size, Offset);
}
SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitInstruction(Instruction &I) {
LLVM_DEBUG(dbgs() << "ObjectSizeOffsetEvaluator unknown instruction:" << I
<< '\n');
return unknown();
}
diff --git a/contrib/llvm-project/llvm/lib/BinaryFormat/COFF.cpp b/contrib/llvm-project/llvm/lib/BinaryFormat/COFF.cpp
new file mode 100644
index 000000000000..8fbee0218b79
--- /dev/null
+++ b/contrib/llvm-project/llvm/lib/BinaryFormat/COFF.cpp
@@ -0,0 +1,57 @@
+//===- llvm/BinaryFormat/COFF.cpp - The COFF format -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+
+// Maximum offsets for different string table entry encodings.
+enum : unsigned { Max7DecimalOffset = 9999999U };
+enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0
+
+// Encode a string table entry offset in base 64, padded to 6 chars, and
+// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
+// Buffer must be at least 8 bytes large. No terminating null appended.
+static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
+ assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset &&
+ "Illegal section name encoding for value");
+
+ static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+
+ Buffer[0] = '/';
+ Buffer[1] = '/';
+
+ char *Ptr = Buffer + 7;
+ for (unsigned i = 0; i < 6; ++i) {
+ unsigned Rem = Value % 64;
+ Value /= 64;
+ *(Ptr--) = Alphabet[Rem];
+ }
+}
+
+bool llvm::COFF::encodeSectionName(char *Out, uint64_t Offset) {
+ if (Offset <= Max7DecimalOffset) {
+ // Offsets of 7 digits or less are encoded in ASCII.
+ SmallVector<char, COFF::NameSize> Buffer;
+ Twine('/').concat(Twine(Offset)).toVector(Buffer);
+ assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
+ std::memcpy(Out, Buffer.data(), Buffer.size());
+ return true;
+ }
+
+ if (Offset <= MaxBase64Offset) {
+ // Starting with 10,000,000, offsets are encoded as base64.
+ encodeBase64StringEntry(Out, Offset);
+ return true;
+ }
+
+ // The offset is too large to be encoded.
+ return false;
+}
diff --git a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index b901a2d2da23..249f02f36bae 100644
--- a/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -1,426 +1,427 @@
//===------- EPCIndirectionUtils.cpp -- EPC based indirection APIs --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/Support/MathExtras.h"
#include <future>
using namespace llvm;
using namespace llvm::orc;
namespace llvm {
namespace orc {
class EPCIndirectionUtilsAccess {
public:
using IndirectStubInfo = EPCIndirectionUtils::IndirectStubInfo;
using IndirectStubInfoVector = EPCIndirectionUtils::IndirectStubInfoVector;
static Expected<IndirectStubInfoVector>
getIndirectStubs(EPCIndirectionUtils &EPCIU, unsigned NumStubs) {
return EPCIU.getIndirectStubs(NumStubs);
};
};
} // end namespace orc
} // end namespace llvm
namespace {
class EPCTrampolinePool : public TrampolinePool {
public:
EPCTrampolinePool(EPCIndirectionUtils &EPCIU);
Error deallocatePool();
protected:
Error grow() override;
using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
EPCIndirectionUtils &EPCIU;
unsigned TrampolineSize = 0;
unsigned TrampolinesPerPage = 0;
std::vector<FinalizedAlloc> TrampolineBlocks;
};
class EPCIndirectStubsManager : public IndirectStubsManager,
private EPCIndirectionUtilsAccess {
public:
EPCIndirectStubsManager(EPCIndirectionUtils &EPCIU) : EPCIU(EPCIU) {}
Error deallocateStubs();
Error createStub(StringRef StubName, JITTargetAddress StubAddr,
JITSymbolFlags StubFlags) override;
Error createStubs(const StubInitsMap &StubInits) override;
JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override;
JITEvaluatedSymbol findPointer(StringRef Name) override;
Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override;
private:
using StubInfo = std::pair<IndirectStubInfo, JITSymbolFlags>;
std::mutex ISMMutex;
EPCIndirectionUtils &EPCIU;
StringMap<StubInfo> StubInfos;
};
EPCTrampolinePool::EPCTrampolinePool(EPCIndirectionUtils &EPCIU)
: EPCIU(EPCIU) {
auto &EPC = EPCIU.getExecutorProcessControl();
auto &ABI = EPCIU.getABISupport();
TrampolineSize = ABI.getTrampolineSize();
TrampolinesPerPage =
(EPC.getPageSize() - ABI.getPointerSize()) / TrampolineSize;
}
Error EPCTrampolinePool::deallocatePool() {
Error Err = Error::success();
std::promise<MSVCPError> DeallocResultP;
auto DeallocResultF = DeallocResultP.get_future();
EPCIU.getExecutorProcessControl().getMemMgr().deallocate(
std::move(TrampolineBlocks),
[&](Error Err) { DeallocResultP.set_value(std::move(Err)); });
return DeallocResultF.get();
}
Error EPCTrampolinePool::grow() {
using namespace jitlink;
assert(AvailableTrampolines.empty() &&
"Grow called with trampolines still available");
auto ResolverAddress = EPCIU.getResolverBlockAddress();
assert(ResolverAddress && "Resolver address can not be null");
auto &EPC = EPCIU.getExecutorProcessControl();
auto PageSize = EPC.getPageSize();
auto Alloc = SimpleSegmentAlloc::Create(
EPC.getMemMgr(), nullptr,
{{MemProt::Read | MemProt::Exec, {PageSize, Align(PageSize)}}});
if (!Alloc)
return Alloc.takeError();
unsigned NumTrampolines = TrampolinesPerPage;
auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
EPCIU.getABISupport().writeTrampolines(SegInfo.WorkingMem.data(),
SegInfo.Addr.getValue(),
ResolverAddress, NumTrampolines);
for (unsigned I = 0; I < NumTrampolines; ++I)
AvailableTrampolines.push_back(SegInfo.Addr.getValue() +
(I * TrampolineSize));
auto FA = Alloc->finalize();
if (!FA)
return FA.takeError();
TrampolineBlocks.push_back(std::move(*FA));
return Error::success();
}
Error EPCIndirectStubsManager::createStub(StringRef StubName,
JITTargetAddress StubAddr,
JITSymbolFlags StubFlags) {
StubInitsMap SIM;
SIM[StubName] = std::make_pair(StubAddr, StubFlags);
return createStubs(SIM);
}
Error EPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) {
auto AvailableStubInfos = getIndirectStubs(EPCIU, StubInits.size());
if (!AvailableStubInfos)
return AvailableStubInfos.takeError();
{
std::lock_guard<std::mutex> Lock(ISMMutex);
unsigned ASIdx = 0;
for (auto &SI : StubInits) {
auto &A = (*AvailableStubInfos)[ASIdx++];
StubInfos[SI.first()] = std::make_pair(A, SI.second.second);
}
}
auto &MemAccess = EPCIU.getExecutorProcessControl().getMemoryAccess();
switch (EPCIU.getABISupport().getPointerSize()) {
case 4: {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt32Write> PtrUpdates;
for (auto &SI : StubInits)
PtrUpdates.push_back(
{ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
static_cast<uint32_t>(SI.second.first)});
return MemAccess.writeUInt32s(PtrUpdates);
}
case 8: {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt64Write> PtrUpdates;
for (auto &SI : StubInits)
PtrUpdates.push_back(
{ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
static_cast<uint64_t>(SI.second.first)});
return MemAccess.writeUInt64s(PtrUpdates);
}
default:
return make_error<StringError>("Unsupported pointer size",
inconvertibleErrorCode());
}
}
JITEvaluatedSymbol EPCIndirectStubsManager::findStub(StringRef Name,
bool ExportedStubsOnly) {
std::lock_guard<std::mutex> Lock(ISMMutex);
auto I = StubInfos.find(Name);
if (I == StubInfos.end())
return nullptr;
return {I->second.first.StubAddress, I->second.second};
}
JITEvaluatedSymbol EPCIndirectStubsManager::findPointer(StringRef Name) {
std::lock_guard<std::mutex> Lock(ISMMutex);
auto I = StubInfos.find(Name);
if (I == StubInfos.end())
return nullptr;
return {I->second.first.PointerAddress, I->second.second};
}
Error EPCIndirectStubsManager::updatePointer(StringRef Name,
JITTargetAddress NewAddr) {
JITTargetAddress PtrAddr = 0;
{
std::lock_guard<std::mutex> Lock(ISMMutex);
auto I = StubInfos.find(Name);
if (I == StubInfos.end())
return make_error<StringError>("Unknown stub name",
inconvertibleErrorCode());
PtrAddr = I->second.first.PointerAddress;
}
auto &MemAccess = EPCIU.getExecutorProcessControl().getMemoryAccess();
switch (EPCIU.getABISupport().getPointerSize()) {
case 4: {
tpctypes::UInt32Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
return MemAccess.writeUInt32s(PUpdate);
}
case 8: {
tpctypes::UInt64Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
return MemAccess.writeUInt64s(PUpdate);
}
default:
return make_error<StringError>("Unsupported pointer size",
inconvertibleErrorCode());
}
}
} // end anonymous namespace.
namespace llvm {
namespace orc {
EPCIndirectionUtils::ABISupport::~ABISupport() {}
Expected<std::unique_ptr<EPCIndirectionUtils>>
EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
const auto &TT = EPC.getTargetTriple();
switch (TT.getArch()) {
default:
return make_error<StringError>(
std::string("No EPCIndirectionUtils available for ") + TT.str(),
inconvertibleErrorCode());
case Triple::aarch64:
case Triple::aarch64_32:
return CreateWithABI<OrcAArch64>(EPC);
case Triple::x86:
return CreateWithABI<OrcI386>(EPC);
case Triple::mips:
return CreateWithABI<OrcMips32Be>(EPC);
case Triple::mipsel:
return CreateWithABI<OrcMips32Le>(EPC);
case Triple::mips64:
case Triple::mips64el:
return CreateWithABI<OrcMips64>(EPC);
case Triple::x86_64:
if (TT.getOS() == Triple::OSType::Win32)
return CreateWithABI<OrcX86_64_Win32>(EPC);
else
return CreateWithABI<OrcX86_64_SysV>(EPC);
}
}
Error EPCIndirectionUtils::cleanup() {
auto &MemMgr = EPC.getMemMgr();
auto Err = MemMgr.deallocate(std::move(IndirectStubAllocs));
if (TP)
Err = joinErrors(std::move(Err),
static_cast<EPCTrampolinePool &>(*TP).deallocatePool());
if (ResolverBlock)
Err =
joinErrors(std::move(Err), MemMgr.deallocate(std::move(ResolverBlock)));
return Err;
}
Expected<JITTargetAddress>
EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr) {
using namespace jitlink;
assert(ABI && "ABI can not be null");
auto ResolverSize = ABI->getResolverCodeSize();
auto Alloc =
SimpleSegmentAlloc::Create(EPC.getMemMgr(), nullptr,
{{MemProt::Read | MemProt::Exec,
{ResolverSize, Align(EPC.getPageSize())}}});
if (!Alloc)
return Alloc.takeError();
auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
- ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr.getValue(),
+ ResolverBlockAddr = SegInfo.Addr.getValue();
+ ABI->writeResolverCode(SegInfo.WorkingMem.data(), ResolverBlockAddr,
ReentryFnAddr, ReentryCtxAddr);
auto FA = Alloc->finalize();
if (!FA)
return FA.takeError();
ResolverBlock = std::move(*FA);
- return SegInfo.Addr.getValue();
+ return ResolverBlockAddr;
}
std::unique_ptr<IndirectStubsManager>
EPCIndirectionUtils::createIndirectStubsManager() {
return std::make_unique<EPCIndirectStubsManager>(*this);
}
TrampolinePool &EPCIndirectionUtils::getTrampolinePool() {
if (!TP)
TP = std::make_unique<EPCTrampolinePool>(*this);
return *TP;
}
LazyCallThroughManager &EPCIndirectionUtils::createLazyCallThroughManager(
ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) {
assert(!LCTM &&
"createLazyCallThroughManager can not have been called before");
LCTM = std::make_unique<LazyCallThroughManager>(ES, ErrorHandlerAddr,
&getTrampolinePool());
return *LCTM;
}
EPCIndirectionUtils::EPCIndirectionUtils(ExecutorProcessControl &EPC,
std::unique_ptr<ABISupport> ABI)
: EPC(EPC), ABI(std::move(ABI)) {
assert(this->ABI && "ABI can not be null");
assert(EPC.getPageSize() > getABISupport().getStubSize() &&
"Stubs larger than one page are not supported");
}
Expected<EPCIndirectionUtils::IndirectStubInfoVector>
EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
using namespace jitlink;
std::lock_guard<std::mutex> Lock(EPCUIMutex);
// If there aren't enough stubs available then allocate some more.
if (NumStubs > AvailableIndirectStubs.size()) {
auto NumStubsToAllocate = NumStubs;
auto PageSize = EPC.getPageSize();
auto StubBytes = alignTo(NumStubsToAllocate * ABI->getStubSize(), PageSize);
NumStubsToAllocate = StubBytes / ABI->getStubSize();
auto PtrBytes =
alignTo(NumStubsToAllocate * ABI->getPointerSize(), PageSize);
auto StubProt = MemProt::Read | MemProt::Exec;
auto PtrProt = MemProt::Read | MemProt::Write;
auto Alloc = SimpleSegmentAlloc::Create(
EPC.getMemMgr(), nullptr,
{{StubProt, {static_cast<size_t>(StubBytes), Align(PageSize)}},
{PtrProt, {static_cast<size_t>(PtrBytes), Align(PageSize)}}});
if (!Alloc)
return Alloc.takeError();
auto StubSeg = Alloc->getSegInfo(StubProt);
auto PtrSeg = Alloc->getSegInfo(PtrProt);
ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(),
StubSeg.Addr.getValue(),
PtrSeg.Addr.getValue(), NumStubsToAllocate);
auto FA = Alloc->finalize();
if (!FA)
return FA.takeError();
IndirectStubAllocs.push_back(std::move(*FA));
auto StubExecutorAddr = StubSeg.Addr;
auto PtrExecutorAddr = PtrSeg.Addr;
for (unsigned I = 0; I != NumStubsToAllocate; ++I) {
AvailableIndirectStubs.push_back(IndirectStubInfo(
StubExecutorAddr.getValue(), PtrExecutorAddr.getValue()));
StubExecutorAddr += ABI->getStubSize();
PtrExecutorAddr += ABI->getPointerSize();
}
}
assert(NumStubs <= AvailableIndirectStubs.size() &&
"Sufficient stubs should have been allocated above");
IndirectStubInfoVector Result;
while (NumStubs--) {
Result.push_back(AvailableIndirectStubs.back());
AvailableIndirectStubs.pop_back();
}
return std::move(Result);
}
static JITTargetAddress reentry(JITTargetAddress LCTMAddr,
JITTargetAddress TrampolineAddr) {
auto &LCTM = *jitTargetAddressToPointer<LazyCallThroughManager *>(LCTMAddr);
std::promise<JITTargetAddress> LandingAddrP;
auto LandingAddrF = LandingAddrP.get_future();
LCTM.resolveTrampolineLandingAddress(
TrampolineAddr,
[&](JITTargetAddress Addr) { LandingAddrP.set_value(Addr); });
return LandingAddrF.get();
}
Error setUpInProcessLCTMReentryViaEPCIU(EPCIndirectionUtils &EPCIU) {
auto &LCTM = EPCIU.getLazyCallThroughManager();
return EPCIU
.writeResolverBlock(pointerToJITTargetAddress(&reentry),
pointerToJITTargetAddress(&LCTM))
.takeError();
}
} // end namespace orc
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/IR/Mangler.cpp b/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
index 2399ea27ee9d..b8e3e40e4c1d 100644
--- a/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
+++ b/contrib/llvm-project/llvm/lib/IR/Mangler.cpp
@@ -1,260 +1,260 @@
//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Unified name mangler for assembly backends.
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/Mangler.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
enum ManglerPrefixTy {
Default, ///< Emit default string before each symbol.
Private, ///< Emit "private" prefix before each symbol.
LinkerPrivate ///< Emit "linker private" prefix before each symbol.
};
}
static void getNameWithPrefixImpl(raw_ostream &OS, const Twine &GVName,
ManglerPrefixTy PrefixTy,
const DataLayout &DL, char Prefix) {
SmallString<256> TmpData;
StringRef Name = GVName.toStringRef(TmpData);
assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
// No need to do anything special if the global has the special "do not
// mangle" flag in the name.
if (Name[0] == '\1') {
OS << Name.substr(1);
return;
}
if (DL.doNotMangleLeadingQuestionMark() && Name[0] == '?')
Prefix = '\0';
if (PrefixTy == Private)
OS << DL.getPrivateGlobalPrefix();
else if (PrefixTy == LinkerPrivate)
OS << DL.getLinkerPrivateGlobalPrefix();
if (Prefix != '\0')
OS << Prefix;
// If this is a simple string that doesn't need escaping, just append it.
OS << Name;
}
static void getNameWithPrefixImpl(raw_ostream &OS, const Twine &GVName,
const DataLayout &DL,
ManglerPrefixTy PrefixTy) {
char Prefix = DL.getGlobalPrefix();
return getNameWithPrefixImpl(OS, GVName, PrefixTy, DL, Prefix);
}
void Mangler::getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
const DataLayout &DL) {
return getNameWithPrefixImpl(OS, GVName, DL, Default);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
const Twine &GVName, const DataLayout &DL) {
raw_svector_ostream OS(OutName);
char Prefix = DL.getGlobalPrefix();
return getNameWithPrefixImpl(OS, GVName, Default, DL, Prefix);
}
static bool hasByteCountSuffix(CallingConv::ID CC) {
switch (CC) {
case CallingConv::X86_FastCall:
case CallingConv::X86_StdCall:
case CallingConv::X86_VectorCall:
return true;
default:
return false;
}
}
/// Microsoft fastcall and stdcall functions require a suffix on their name
/// indicating the number of words of arguments they take.
static void addByteCountSuffix(raw_ostream &OS, const Function *F,
const DataLayout &DL) {
// Calculate arguments size total.
unsigned ArgWords = 0;
const unsigned PtrSize = DL.getPointerSize();
for (const Argument &A : F->args()) {
// For the purposes of the byte count suffix, structs returned by pointer
// do not count as function arguments.
if (A.hasStructRetAttr())
continue;
// 'Dereference' type in case of byval or inalloca parameter attribute.
uint64_t AllocSize = A.hasPassPointeeByValueCopyAttr() ?
A.getPassPointeeByValueCopySize(DL) :
DL.getTypeAllocSize(A.getType());
// Size should be aligned to pointer size.
ArgWords += alignTo(AllocSize, PtrSize);
}
OS << '@' << ArgWords;
}
void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
bool CannotUsePrivateLabel) const {
ManglerPrefixTy PrefixTy = Default;
if (GV->hasPrivateLinkage()) {
if (CannotUsePrivateLabel)
PrefixTy = LinkerPrivate;
else
PrefixTy = Private;
}
const DataLayout &DL = GV->getParent()->getDataLayout();
if (!GV->hasName()) {
// Get the ID for the global, assigning a new one if we haven't got one
// already.
unsigned &ID = AnonGlobalIDs[GV];
if (ID == 0)
ID = AnonGlobalIDs.size();
// Must mangle the global into a unique ID.
getNameWithPrefixImpl(OS, "__unnamed_" + Twine(ID), DL, PrefixTy);
return;
}
StringRef Name = GV->getName();
char Prefix = DL.getGlobalPrefix();
// Mangle functions with Microsoft calling conventions specially. Only do
// this mangling for x86_64 vectorcall and 32-bit x86.
- const Function *MSFunc = dyn_cast<Function>(GV);
+ const Function *MSFunc = dyn_cast_or_null<Function>(GV->getAliaseeObject());
// Don't add byte count suffixes when '\01' or '?' are in the first
// character.
if (Name.startswith("\01") ||
(DL.doNotMangleLeadingQuestionMark() && Name.startswith("?")))
MSFunc = nullptr;
CallingConv::ID CC =
MSFunc ? MSFunc->getCallingConv() : (unsigned)CallingConv::C;
if (!DL.hasMicrosoftFastStdCallMangling() &&
CC != CallingConv::X86_VectorCall)
MSFunc = nullptr;
if (MSFunc) {
if (CC == CallingConv::X86_FastCall)
Prefix = '@'; // fastcall functions have an @ prefix instead of _.
else if (CC == CallingConv::X86_VectorCall)
Prefix = '\0'; // vectorcall functions have no prefix.
}
getNameWithPrefixImpl(OS, Name, PrefixTy, DL, Prefix);
if (!MSFunc)
return;
// If we are supposed to add a microsoft-style suffix for stdcall, fastcall,
// or vectorcall, add it. These functions have a suffix of @N where N is the
// cumulative byte size of all of the parameters to the function in decimal.
if (CC == CallingConv::X86_VectorCall)
OS << '@'; // vectorcall functions use a double @ suffix.
FunctionType *FT = MSFunc->getFunctionType();
if (hasByteCountSuffix(CC) &&
// "Pure" variadic functions do not receive @0 suffix.
(!FT->isVarArg() || FT->getNumParams() == 0 ||
(FT->getNumParams() == 1 && MSFunc->hasStructRetAttr())))
addByteCountSuffix(OS, MSFunc, DL);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
const GlobalValue *GV,
bool CannotUsePrivateLabel) const {
raw_svector_ostream OS(OutName);
getNameWithPrefix(OS, GV, CannotUsePrivateLabel);
}
// Check if the name needs quotes to be safe for the linker to interpret.
static bool canBeUnquotedInDirective(char C) {
return isAlnum(C) || C == '_' || C == '@';
}
static bool canBeUnquotedInDirective(StringRef Name) {
if (Name.empty())
return false;
// If any of the characters in the string is an unacceptable character, force
// quotes.
for (char C : Name) {
if (!canBeUnquotedInDirective(C))
return false;
}
return true;
}
void llvm::emitLinkerFlagsForGlobalCOFF(raw_ostream &OS, const GlobalValue *GV,
const Triple &TT, Mangler &Mangler) {
if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
return;
if (TT.isWindowsMSVCEnvironment())
OS << " /EXPORT:";
else
OS << " -export:";
bool NeedQuotes = GV->hasName() && !canBeUnquotedInDirective(GV->getName());
if (NeedQuotes)
OS << "\"";
if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
std::string Flag;
raw_string_ostream FlagOS(Flag);
Mangler.getNameWithPrefix(FlagOS, GV, false);
FlagOS.flush();
if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
OS << Flag.substr(1);
else
OS << Flag;
} else {
Mangler.getNameWithPrefix(OS, GV, false);
}
if (NeedQuotes)
OS << "\"";
if (!GV->getValueType()->isFunctionTy()) {
if (TT.isWindowsMSVCEnvironment())
OS << ",DATA";
else
OS << ",data";
}
}
void llvm::emitLinkerFlagsForUsedCOFF(raw_ostream &OS, const GlobalValue *GV,
const Triple &T, Mangler &M) {
if (!T.isWindowsMSVCEnvironment())
return;
OS << " /INCLUDE:";
bool NeedQuotes = GV->hasName() && !canBeUnquotedInDirective(GV->getName());
if (NeedQuotes)
OS << "\"";
M.getNameWithPrefix(OS, GV, false);
if (NeedQuotes)
OS << "\"";
}
diff --git a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 73c687331d30..aba2ad315535 100644
--- a/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm-project/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -1,1221 +1,1184 @@
//===- llvm/MC/WinCOFFObjectWriter.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains an implementation of a Win32 COFF object file writer.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCFragment.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolCOFF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <ctime>
#include <memory>
#include <string>
#include <vector>
using namespace llvm;
using llvm::support::endian::write32le;
#define DEBUG_TYPE "WinCOFFObjectWriter"
namespace {
constexpr int OffsetLabelIntervalBits = 20;
using name = SmallString<COFF::NameSize>;
enum AuxiliaryType {
ATWeakExternal,
ATFile,
ATSectionDefinition
};
struct AuxSymbol {
AuxiliaryType AuxType;
COFF::Auxiliary Aux;
};
class COFFSection;
class COFFSymbol {
public:
COFF::symbol Data = {};
using AuxiliarySymbols = SmallVector<AuxSymbol, 1>;
name Name;
int Index;
AuxiliarySymbols Aux;
COFFSymbol *Other = nullptr;
COFFSection *Section = nullptr;
int Relocations = 0;
const MCSymbol *MC = nullptr;
COFFSymbol(StringRef Name) : Name(Name) {}
void set_name_offset(uint32_t Offset);
int64_t getIndex() const { return Index; }
void setIndex(int Value) {
Index = Value;
if (MC)
MC->setIndex(static_cast<uint32_t>(Value));
}
};
// This class contains staging data for a COFF relocation entry.
struct COFFRelocation {
COFF::relocation Data;
COFFSymbol *Symb = nullptr;
COFFRelocation() = default;
static size_t size() { return COFF::RelocationSize; }
};
using relocations = std::vector<COFFRelocation>;
class COFFSection {
public:
COFF::section Header = {};
std::string Name;
int Number;
MCSectionCOFF const *MCSection = nullptr;
COFFSymbol *Symbol = nullptr;
relocations Relocations;
COFFSection(StringRef Name) : Name(std::string(Name)) {}
SmallVector<COFFSymbol *, 1> OffsetSymbols;
};
class WinCOFFObjectWriter : public MCObjectWriter {
public:
support::endian::Writer W;
using symbols = std::vector<std::unique_ptr<COFFSymbol>>;
using sections = std::vector<std::unique_ptr<COFFSection>>;
using symbol_map = DenseMap<MCSymbol const *, COFFSymbol *>;
using section_map = DenseMap<MCSection const *, COFFSection *>;
using symbol_list = DenseSet<COFFSymbol *>;
std::unique_ptr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
// Root level file contents.
COFF::header Header = {};
sections Sections;
symbols Symbols;
StringTableBuilder Strings{StringTableBuilder::WinCOFF};
// Maps used during object file creation.
section_map SectionMap;
symbol_map SymbolMap;
symbol_list WeakDefaults;
bool UseBigObj;
bool UseOffsetLabels = false;
bool EmitAddrsigSection = false;
MCSectionCOFF *AddrsigSection;
std::vector<const MCSymbol *> AddrsigSyms;
MCSectionCOFF *CGProfileSection = nullptr;
WinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
raw_pwrite_stream &OS);
void reset() override {
memset(&Header, 0, sizeof(Header));
Header.Machine = TargetObjectWriter->getMachine();
Sections.clear();
Symbols.clear();
Strings.clear();
SectionMap.clear();
SymbolMap.clear();
MCObjectWriter::reset();
}
COFFSymbol *createSymbol(StringRef Name);
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol *Symbol);
COFFSection *createSection(StringRef Name);
void defineSection(MCSectionCOFF const &Sec, const MCAsmLayout &Layout);
COFFSymbol *getLinkedSymbol(const MCSymbol &Symbol);
void DefineSymbol(const MCSymbol &Symbol, MCAssembler &Assembler,
const MCAsmLayout &Layout);
void SetSymbolName(COFFSymbol &S);
void SetSectionName(COFFSection &S);
bool IsPhysicalSection(COFFSection *S);
// Entity writing methods.
void WriteFileHeader(const COFF::header &Header);
void WriteSymbol(const COFFSymbol &S);
void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
void writeSectionHeaders();
void WriteRelocation(const COFF::relocation &R);
uint32_t writeSectionContents(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCSection &MCSec);
void writeSection(MCAssembler &Asm, const MCAsmLayout &Layout,
const COFFSection &Sec, const MCSection &MCSec);
// MCObjectWriter interface implementation.
void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) override;
void createFileSymbols(MCAssembler &Asm);
void setWeakDefaultNames();
void assignSectionNumbers();
void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout);
void emitAddrsigSection() override { EmitAddrsigSection = true; }
void addAddrsigSymbol(const MCSymbol *Sym) override {
AddrsigSyms.push_back(Sym);
}
uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
} // end anonymous namespace
//------------------------------------------------------------------------------
// Symbol class implementation
// In the case that the name does not fit within 8 bytes, the offset
// into the string table is stored in the last 4 bytes instead, leaving
// the first 4 bytes as 0.
void COFFSymbol::set_name_offset(uint32_t Offset) {
write32le(Data.Name + 0, 0);
write32le(Data.Name + 4, Offset);
}
//------------------------------------------------------------------------------
// WinCOFFObjectWriter class implementation
WinCOFFObjectWriter::WinCOFFObjectWriter(
std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
: W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {
Header.Machine = TargetObjectWriter->getMachine();
// Some relocations on ARM64 (the 21 bit ADRP relocations) have a slightly
// limited range for the immediate offset (+/- 1 MB); create extra offset
// label symbols with regular intervals to allow referencing a
// non-temporary symbol that is close enough.
UseOffsetLabels = Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64;
}
COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
Symbols.push_back(std::make_unique<COFFSymbol>(Name));
return Symbols.back().get();
}
COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol *Symbol) {
COFFSymbol *&Ret = SymbolMap[Symbol];
if (!Ret)
Ret = createSymbol(Symbol->getName());
return Ret;
}
COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
Sections.emplace_back(std::make_unique<COFFSection>(Name));
return Sections.back().get();
}
static uint32_t getAlignment(const MCSectionCOFF &Sec) {
switch (Sec.getAlignment()) {
case 1:
return COFF::IMAGE_SCN_ALIGN_1BYTES;
case 2:
return COFF::IMAGE_SCN_ALIGN_2BYTES;
case 4:
return COFF::IMAGE_SCN_ALIGN_4BYTES;
case 8:
return COFF::IMAGE_SCN_ALIGN_8BYTES;
case 16:
return COFF::IMAGE_SCN_ALIGN_16BYTES;
case 32:
return COFF::IMAGE_SCN_ALIGN_32BYTES;
case 64:
return COFF::IMAGE_SCN_ALIGN_64BYTES;
case 128:
return COFF::IMAGE_SCN_ALIGN_128BYTES;
case 256:
return COFF::IMAGE_SCN_ALIGN_256BYTES;
case 512:
return COFF::IMAGE_SCN_ALIGN_512BYTES;
case 1024:
return COFF::IMAGE_SCN_ALIGN_1024BYTES;
case 2048:
return COFF::IMAGE_SCN_ALIGN_2048BYTES;
case 4096:
return COFF::IMAGE_SCN_ALIGN_4096BYTES;
case 8192:
return COFF::IMAGE_SCN_ALIGN_8192BYTES;
}
llvm_unreachable("unsupported section alignment");
}
/// This function takes a section data object from the assembler
/// and creates the associated COFF section staging object.
void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec,
const MCAsmLayout &Layout) {
COFFSection *Section = createSection(MCSec.getName());
COFFSymbol *Symbol = createSymbol(MCSec.getName());
Section->Symbol = Symbol;
Symbol->Section = Section;
Symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
// Create a COMDAT symbol if needed.
if (MCSec.getSelection() != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
if (const MCSymbol *S = MCSec.getCOMDATSymbol()) {
COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(S);
if (COMDATSymbol->Section)
report_fatal_error("two sections have the same comdat");
COMDATSymbol->Section = Section;
}
}
// In this case the auxiliary symbol is a Section Definition.
Symbol->Aux.resize(1);
Symbol->Aux[0] = {};
Symbol->Aux[0].AuxType = ATSectionDefinition;
Symbol->Aux[0].Aux.SectionDefinition.Selection = MCSec.getSelection();
// Set section alignment.
Section->Header.Characteristics = MCSec.getCharacteristics();
Section->Header.Characteristics |= getAlignment(MCSec);
// Bind internal COFF section to MC section.
Section->MCSection = &MCSec;
SectionMap[&MCSec] = Section;
if (UseOffsetLabels && !MCSec.getFragmentList().empty()) {
const uint32_t Interval = 1 << OffsetLabelIntervalBits;
uint32_t N = 1;
for (uint32_t Off = Interval, E = Layout.getSectionAddressSize(&MCSec);
Off < E; Off += Interval) {
auto Name = ("$L" + MCSec.getName() + "_" + Twine(N++)).str();
COFFSymbol *Label = createSymbol(Name);
Label->Section = Section;
Label->Data.StorageClass = COFF::IMAGE_SYM_CLASS_LABEL;
Label->Data.Value = Off;
Section->OffsetSymbols.push_back(Label);
}
}
}
static uint64_t getSymbolValue(const MCSymbol &Symbol,
const MCAsmLayout &Layout) {
if (Symbol.isCommon() && Symbol.isExternal())
return Symbol.getCommonSize();
uint64_t Res;
if (!Layout.getSymbolOffset(Symbol, Res))
return 0;
return Res;
}
COFFSymbol *WinCOFFObjectWriter::getLinkedSymbol(const MCSymbol &Symbol) {
if (!Symbol.isVariable())
return nullptr;
const MCSymbolRefExpr *SymRef =
dyn_cast<MCSymbolRefExpr>(Symbol.getVariableValue());
if (!SymRef)
return nullptr;
const MCSymbol &Aliasee = SymRef->getSymbol();
if (Aliasee.isUndefined() || Aliasee.isExternal())
return GetOrCreateCOFFSymbol(&Aliasee);
else
return nullptr;
}
/// This function takes a symbol data object from the assembler
/// and creates the associated COFF symbol staging object.
void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
MCAssembler &Assembler,
const MCAsmLayout &Layout) {
COFFSymbol *Sym = GetOrCreateCOFFSymbol(&MCSym);
const MCSymbol *Base = Layout.getBaseSymbol(MCSym);
COFFSection *Sec = nullptr;
if (Base && Base->getFragment()) {
Sec = SectionMap[Base->getFragment()->getParent()];
if (Sym->Section && Sym->Section != Sec)
report_fatal_error("conflicting sections for symbol");
}
COFFSymbol *Local = nullptr;
if (cast<MCSymbolCOFF>(MCSym).isWeakExternal()) {
Sym->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
Sym->Section = nullptr;
COFFSymbol *WeakDefault = getLinkedSymbol(MCSym);
if (!WeakDefault) {
std::string WeakName = (".weak." + MCSym.getName() + ".default").str();
WeakDefault = createSymbol(WeakName);
if (!Sec)
WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
else
WeakDefault->Section = Sec;
WeakDefaults.insert(WeakDefault);
Local = WeakDefault;
}
Sym->Other = WeakDefault;
// Setup the Weak External auxiliary symbol.
Sym->Aux.resize(1);
memset(&Sym->Aux[0], 0, sizeof(Sym->Aux[0]));
Sym->Aux[0].AuxType = ATWeakExternal;
Sym->Aux[0].Aux.WeakExternal.TagIndex = 0;
Sym->Aux[0].Aux.WeakExternal.Characteristics =
COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS;
} else {
if (!Base)
Sym->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
else
Sym->Section = Sec;
Local = Sym;
}
if (Local) {
Local->Data.Value = getSymbolValue(MCSym, Layout);
const MCSymbolCOFF &SymbolCOFF = cast<MCSymbolCOFF>(MCSym);
Local->Data.Type = SymbolCOFF.getType();
Local->Data.StorageClass = SymbolCOFF.getClass();
// If no storage class was specified in the streamer, define it here.
if (Local->Data.StorageClass == COFF::IMAGE_SYM_CLASS_NULL) {
bool IsExternal = MCSym.isExternal() ||
(!MCSym.getFragment() && !MCSym.isVariable());
Local->Data.StorageClass = IsExternal ? COFF::IMAGE_SYM_CLASS_EXTERNAL
: COFF::IMAGE_SYM_CLASS_STATIC;
}
}
Sym->MC = &MCSym;
}
-// Maximum offsets for different string table entry encodings.
-enum : unsigned { Max7DecimalOffset = 9999999U };
-enum : uint64_t { MaxBase64Offset = 0xFFFFFFFFFULL }; // 64^6, including 0
-
-// Encode a string table entry offset in base 64, padded to 6 chars, and
-// prefixed with a double slash: '//AAAAAA', '//AAAAAB', ...
-// Buffer must be at least 8 bytes large. No terminating null appended.
-static void encodeBase64StringEntry(char *Buffer, uint64_t Value) {
- assert(Value > Max7DecimalOffset && Value <= MaxBase64Offset &&
- "Illegal section name encoding for value");
-
- static const char Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789+/";
-
- Buffer[0] = '/';
- Buffer[1] = '/';
-
- char *Ptr = Buffer + 7;
- for (unsigned i = 0; i < 6; ++i) {
- unsigned Rem = Value % 64;
- Value /= 64;
- *(Ptr--) = Alphabet[Rem];
- }
-}
-
void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
if (S.Name.size() <= COFF::NameSize) {
std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
return;
}
uint64_t StringTableEntry = Strings.getOffset(S.Name);
- if (StringTableEntry <= Max7DecimalOffset) {
- SmallVector<char, COFF::NameSize> Buffer;
- Twine('/').concat(Twine(StringTableEntry)).toVector(Buffer);
- assert(Buffer.size() <= COFF::NameSize && Buffer.size() >= 2);
- std::memcpy(S.Header.Name, Buffer.data(), Buffer.size());
- return;
- }
- if (StringTableEntry <= MaxBase64Offset) {
- // Starting with 10,000,000, offsets are encoded as base64.
- encodeBase64StringEntry(S.Header.Name, StringTableEntry);
- return;
- }
- report_fatal_error("COFF string table is greater than 64 GB.");
+ if (!COFF::encodeSectionName(S.Header.Name, StringTableEntry))
+ report_fatal_error("COFF string table is greater than 64 GB.");
}
void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
if (S.Name.size() > COFF::NameSize)
S.set_name_offset(Strings.getOffset(S.Name));
else
std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
}
bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
return (S->Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) ==
0;
}
//------------------------------------------------------------------------------
// entity writing methods
void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
if (UseBigObj) {
W.write<uint16_t>(COFF::IMAGE_FILE_MACHINE_UNKNOWN);
W.write<uint16_t>(0xFFFF);
W.write<uint16_t>(COFF::BigObjHeader::MinBigObjectVersion);
W.write<uint16_t>(Header.Machine);
W.write<uint32_t>(Header.TimeDateStamp);
W.OS.write(COFF::BigObjMagic, sizeof(COFF::BigObjMagic));
W.write<uint32_t>(0);
W.write<uint32_t>(0);
W.write<uint32_t>(0);
W.write<uint32_t>(0);
W.write<uint32_t>(Header.NumberOfSections);
W.write<uint32_t>(Header.PointerToSymbolTable);
W.write<uint32_t>(Header.NumberOfSymbols);
} else {
W.write<uint16_t>(Header.Machine);
W.write<uint16_t>(static_cast<int16_t>(Header.NumberOfSections));
W.write<uint32_t>(Header.TimeDateStamp);
W.write<uint32_t>(Header.PointerToSymbolTable);
W.write<uint32_t>(Header.NumberOfSymbols);
W.write<uint16_t>(Header.SizeOfOptionalHeader);
W.write<uint16_t>(Header.Characteristics);
}
}
void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) {
W.OS.write(S.Data.Name, COFF::NameSize);
W.write<uint32_t>(S.Data.Value);
if (UseBigObj)
W.write<uint32_t>(S.Data.SectionNumber);
else
W.write<uint16_t>(static_cast<int16_t>(S.Data.SectionNumber));
W.write<uint16_t>(S.Data.Type);
W.OS << char(S.Data.StorageClass);
W.OS << char(S.Data.NumberOfAuxSymbols);
WriteAuxiliarySymbols(S.Aux);
}
void WinCOFFObjectWriter::WriteAuxiliarySymbols(
const COFFSymbol::AuxiliarySymbols &S) {
for (const AuxSymbol &i : S) {
switch (i.AuxType) {
case ATWeakExternal:
W.write<uint32_t>(i.Aux.WeakExternal.TagIndex);
W.write<uint32_t>(i.Aux.WeakExternal.Characteristics);
W.OS.write_zeros(sizeof(i.Aux.WeakExternal.unused));
if (UseBigObj)
W.OS.write_zeros(COFF::Symbol32Size - COFF::Symbol16Size);
break;
case ATFile:
W.OS.write(reinterpret_cast<const char *>(&i.Aux),
UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size);
break;
case ATSectionDefinition:
W.write<uint32_t>(i.Aux.SectionDefinition.Length);
W.write<uint16_t>(i.Aux.SectionDefinition.NumberOfRelocations);
W.write<uint16_t>(i.Aux.SectionDefinition.NumberOfLinenumbers);
W.write<uint32_t>(i.Aux.SectionDefinition.CheckSum);
W.write<uint16_t>(static_cast<int16_t>(i.Aux.SectionDefinition.Number));
W.OS << char(i.Aux.SectionDefinition.Selection);
W.OS.write_zeros(sizeof(i.Aux.SectionDefinition.unused));
W.write<uint16_t>(static_cast<int16_t>(i.Aux.SectionDefinition.Number >> 16));
if (UseBigObj)
W.OS.write_zeros(COFF::Symbol32Size - COFF::Symbol16Size);
break;
}
}
}
// Write the section header.
void WinCOFFObjectWriter::writeSectionHeaders() {
// Section numbers must be monotonically increasing in the section
// header, but our Sections array is not sorted by section number,
// so make a copy of Sections and sort it.
std::vector<COFFSection *> Arr;
for (auto &Section : Sections)
Arr.push_back(Section.get());
llvm::sort(Arr, [](const COFFSection *A, const COFFSection *B) {
return A->Number < B->Number;
});
for (auto &Section : Arr) {
if (Section->Number == -1)
continue;
COFF::section &S = Section->Header;
if (Section->Relocations.size() >= 0xffff)
S.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
W.OS.write(S.Name, COFF::NameSize);
W.write<uint32_t>(S.VirtualSize);
W.write<uint32_t>(S.VirtualAddress);
W.write<uint32_t>(S.SizeOfRawData);
W.write<uint32_t>(S.PointerToRawData);
W.write<uint32_t>(S.PointerToRelocations);
W.write<uint32_t>(S.PointerToLineNumbers);
W.write<uint16_t>(S.NumberOfRelocations);
W.write<uint16_t>(S.NumberOfLineNumbers);
W.write<uint32_t>(S.Characteristics);
}
}
void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
W.write<uint32_t>(R.VirtualAddress);
W.write<uint32_t>(R.SymbolTableIndex);
W.write<uint16_t>(R.Type);
}
// Write MCSec's contents. What this function does is essentially
// "Asm.writeSectionData(&MCSec, Layout)", but it's a bit complicated
// because it needs to compute a CRC.
uint32_t WinCOFFObjectWriter::writeSectionContents(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCSection &MCSec) {
// Save the contents of the section to a temporary buffer, we need this
// to CRC the data before we dump it into the object file.
SmallVector<char, 128> Buf;
raw_svector_ostream VecOS(Buf);
Asm.writeSectionData(VecOS, &MCSec, Layout);
// Write the section contents to the object file.
W.OS << Buf;
// Calculate our CRC with an initial value of '0', this is not how
// JamCRC is specified but it aligns with the expected output.
JamCRC JC(/*Init=*/0);
JC.update(makeArrayRef(reinterpret_cast<uint8_t*>(Buf.data()), Buf.size()));
return JC.getCRC();
}
void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
const MCAsmLayout &Layout,
const COFFSection &Sec,
const MCSection &MCSec) {
if (Sec.Number == -1)
return;
// Write the section contents.
if (Sec.Header.PointerToRawData != 0) {
assert(W.OS.tell() == Sec.Header.PointerToRawData &&
"Section::PointerToRawData is insane!");
uint32_t CRC = writeSectionContents(Asm, Layout, MCSec);
// Update the section definition auxiliary symbol to record the CRC.
COFFSection *Sec = SectionMap[&MCSec];
COFFSymbol::AuxiliarySymbols &AuxSyms = Sec->Symbol->Aux;
assert(AuxSyms.size() == 1 && AuxSyms[0].AuxType == ATSectionDefinition);
AuxSymbol &SecDef = AuxSyms[0];
SecDef.Aux.SectionDefinition.CheckSum = CRC;
}
// Write relocations for this section.
if (Sec.Relocations.empty()) {
assert(Sec.Header.PointerToRelocations == 0 &&
"Section::PointerToRelocations is insane!");
return;
}
assert(W.OS.tell() == Sec.Header.PointerToRelocations &&
"Section::PointerToRelocations is insane!");
if (Sec.Relocations.size() >= 0xffff) {
// In case of overflow, write actual relocation count as first
// relocation. Including the synthetic reloc itself (+ 1).
COFF::relocation R;
R.VirtualAddress = Sec.Relocations.size() + 1;
R.SymbolTableIndex = 0;
R.Type = 0;
WriteRelocation(R);
}
for (const auto &Relocation : Sec.Relocations)
WriteRelocation(Relocation.Data);
}
////////////////////////////////////////////////////////////////////////////////
// MCObjectWriter interface implementations
void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
if (EmitAddrsigSection) {
AddrsigSection = Asm.getContext().getCOFFSection(
".llvm_addrsig", COFF::IMAGE_SCN_LNK_REMOVE,
SectionKind::getMetadata());
Asm.registerSection(*AddrsigSection);
}
if (!Asm.CGProfile.empty()) {
CGProfileSection = Asm.getContext().getCOFFSection(
".llvm.call-graph-profile", COFF::IMAGE_SCN_LNK_REMOVE,
SectionKind::getMetadata());
Asm.registerSection(*CGProfileSection);
}
// "Define" each section & symbol. This creates section & symbol
// entries in the staging area.
for (const auto &Section : Asm)
defineSection(static_cast<const MCSectionCOFF &>(Section), Layout);
for (const MCSymbol &Symbol : Asm.symbols())
if (!Symbol.isTemporary())
DefineSymbol(Symbol, Asm, Layout);
}
bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
bool InSet, bool IsPCRel) const {
// Don't drop relocations between functions, even if they are in the same text
// section. Multiple Visual C++ linker features depend on having the
// relocations present. The /INCREMENTAL flag will cause these relocations to
// point to thunks, and the /GUARD:CF flag assumes that it can use relocations
// to approximate the set of all address taken functions. LLD's implementation
// of /GUARD:CF also relies on the existance of these relocations.
uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
return false;
return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
InSet, IsPCRel);
}
void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
assert(Target.getSymA() && "Relocation must reference a symbol!");
const MCSymbol &A = Target.getSymA()->getSymbol();
if (!A.isRegistered()) {
Asm.getContext().reportError(Fixup.getLoc(),
Twine("symbol '") + A.getName() +
"' can not be undefined");
return;
}
if (A.isTemporary() && A.isUndefined()) {
Asm.getContext().reportError(Fixup.getLoc(),
Twine("assembler label '") + A.getName() +
"' can not be undefined");
return;
}
MCSection *MCSec = Fragment->getParent();
// Mark this symbol as requiring an entry in the symbol table.
assert(SectionMap.find(MCSec) != SectionMap.end() &&
"Section must already have been defined in executePostLayoutBinding!");
COFFSection *Sec = SectionMap[MCSec];
const MCSymbolRefExpr *SymB = Target.getSymB();
if (SymB) {
const MCSymbol *B = &SymB->getSymbol();
if (!B->getFragment()) {
Asm.getContext().reportError(
Fixup.getLoc(),
Twine("symbol '") + B->getName() +
"' can not be undefined in a subtraction expression");
return;
}
// Offset of the symbol in the section
int64_t OffsetOfB = Layout.getSymbolOffset(*B);
// Offset of the relocation in the section
int64_t OffsetOfRelocation =
Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
FixedValue = (OffsetOfRelocation - OffsetOfB) + Target.getConstant();
} else {
FixedValue = Target.getConstant();
}
COFFRelocation Reloc;
Reloc.Data.SymbolTableIndex = 0;
Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
// Turn relocations for temporary symbols into section relocations.
if (A.isTemporary()) {
MCSection *TargetSection = &A.getSection();
assert(
SectionMap.find(TargetSection) != SectionMap.end() &&
"Section must already have been defined in executePostLayoutBinding!");
COFFSection *Section = SectionMap[TargetSection];
Reloc.Symb = Section->Symbol;
FixedValue += Layout.getSymbolOffset(A);
// Technically, we should do the final adjustments of FixedValue (below)
// before picking an offset symbol, otherwise we might choose one which
// is slightly too far away. The relocations where it really matters
// (arm64 adrp relocations) don't get any offset though.
if (UseOffsetLabels && !Section->OffsetSymbols.empty()) {
uint64_t LabelIndex = FixedValue >> OffsetLabelIntervalBits;
if (LabelIndex > 0) {
if (LabelIndex <= Section->OffsetSymbols.size())
Reloc.Symb = Section->OffsetSymbols[LabelIndex - 1];
else
Reloc.Symb = Section->OffsetSymbols.back();
FixedValue -= Reloc.Symb->Data.Value;
}
}
} else {
assert(
SymbolMap.find(&A) != SymbolMap.end() &&
"Symbol must already have been defined in executePostLayoutBinding!");
Reloc.Symb = SymbolMap[&A];
}
++Reloc.Symb->Relocations;
Reloc.Data.VirtualAddress += Fixup.getOffset();
Reloc.Data.Type = TargetObjectWriter->getRelocType(
Asm.getContext(), Target, Fixup, SymB, Asm.getBackend());
// The *_REL32 relocations are relative to the end of the relocation,
// not to the start.
if ((Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 &&
Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32) ||
(Header.Machine == COFF::IMAGE_FILE_MACHINE_I386 &&
Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32) ||
(Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT &&
Reloc.Data.Type == COFF::IMAGE_REL_ARM_REL32) ||
(Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64 &&
Reloc.Data.Type == COFF::IMAGE_REL_ARM64_REL32))
FixedValue += 4;
if (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT) {
switch (Reloc.Data.Type) {
case COFF::IMAGE_REL_ARM_ABSOLUTE:
case COFF::IMAGE_REL_ARM_ADDR32:
case COFF::IMAGE_REL_ARM_ADDR32NB:
case COFF::IMAGE_REL_ARM_TOKEN:
case COFF::IMAGE_REL_ARM_SECTION:
case COFF::IMAGE_REL_ARM_SECREL:
break;
case COFF::IMAGE_REL_ARM_BRANCH11:
case COFF::IMAGE_REL_ARM_BLX11:
// IMAGE_REL_ARM_BRANCH11 and IMAGE_REL_ARM_BLX11 are only used for
// pre-ARMv7, which implicitly rules it out of ARMNT (it would be valid
// for Windows CE).
case COFF::IMAGE_REL_ARM_BRANCH24:
case COFF::IMAGE_REL_ARM_BLX24:
case COFF::IMAGE_REL_ARM_MOV32A:
// IMAGE_REL_ARM_BRANCH24, IMAGE_REL_ARM_BLX24, IMAGE_REL_ARM_MOV32A are
// only used for ARM mode code, which is documented as being unsupported
// by Windows on ARM. Empirical proof indicates that masm is able to
// generate the relocations however the rest of the MSVC toolchain is
// unable to handle it.
llvm_unreachable("unsupported relocation");
break;
case COFF::IMAGE_REL_ARM_MOV32T:
break;
case COFF::IMAGE_REL_ARM_BRANCH20T:
case COFF::IMAGE_REL_ARM_BRANCH24T:
case COFF::IMAGE_REL_ARM_BLX23T:
// IMAGE_REL_BRANCH20T, IMAGE_REL_ARM_BRANCH24T, IMAGE_REL_ARM_BLX23T all
// perform a 4 byte adjustment to the relocation. Relative branches are
// offset by 4 on ARM, however, because there is no RELA relocations, all
// branches are offset by 4.
FixedValue = FixedValue + 4;
break;
}
}
// The fixed value never makes sense for section indices, ignore it.
if (Fixup.getKind() == FK_SecRel_2)
FixedValue = 0;
if (TargetObjectWriter->recordRelocation(Fixup))
Sec->Relocations.push_back(Reloc);
}
static std::time_t getTime() {
std::time_t Now = time(nullptr);
if (Now < 0 || !isUInt<32>(Now))
return UINT32_MAX;
return Now;
}
// Create .file symbols.
void WinCOFFObjectWriter::createFileSymbols(MCAssembler &Asm) {
for (const std::pair<std::string, size_t> &It : Asm.getFileNames()) {
// round up to calculate the number of auxiliary symbols required
const std::string &Name = It.first;
unsigned SymbolSize = UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size;
unsigned Count = (Name.size() + SymbolSize - 1) / SymbolSize;
COFFSymbol *File = createSymbol(".file");
File->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG;
File->Data.StorageClass = COFF::IMAGE_SYM_CLASS_FILE;
File->Aux.resize(Count);
unsigned Offset = 0;
unsigned Length = Name.size();
for (auto &Aux : File->Aux) {
Aux.AuxType = ATFile;
if (Length > SymbolSize) {
memcpy(&Aux.Aux, Name.c_str() + Offset, SymbolSize);
Length = Length - SymbolSize;
} else {
memcpy(&Aux.Aux, Name.c_str() + Offset, Length);
memset((char *)&Aux.Aux + Length, 0, SymbolSize - Length);
break;
}
Offset += SymbolSize;
}
}
}
void WinCOFFObjectWriter::setWeakDefaultNames() {
if (WeakDefaults.empty())
return;
// If multiple object files use a weak symbol (either with a regular
// defined default, or an absolute zero symbol as default), the defaults
// cause duplicate definitions unless their names are made unique. Look
// for a defined extern symbol, that isn't comdat - that should be unique
// unless there are other duplicate definitions. And if none is found,
// allow picking a comdat symbol, as that's still better than nothing.
COFFSymbol *Unique = nullptr;
for (bool AllowComdat : {false, true}) {
for (auto &Sym : Symbols) {
// Don't include the names of the defaults themselves
if (WeakDefaults.count(Sym.get()))
continue;
// Only consider external symbols
if (Sym->Data.StorageClass != COFF::IMAGE_SYM_CLASS_EXTERNAL)
continue;
// Only consider symbols defined in a section or that are absolute
if (!Sym->Section && Sym->Data.SectionNumber != COFF::IMAGE_SYM_ABSOLUTE)
continue;
if (!AllowComdat && Sym->Section &&
Sym->Section->Header.Characteristics & COFF::IMAGE_SCN_LNK_COMDAT)
continue;
Unique = Sym.get();
break;
}
if (Unique)
break;
}
// If we didn't find any unique symbol to use for the names, just skip this.
if (!Unique)
return;
for (auto *Sym : WeakDefaults) {
Sym->Name.append(".");
Sym->Name.append(Unique->Name);
}
}
static bool isAssociative(const COFFSection &Section) {
return Section.Symbol->Aux[0].Aux.SectionDefinition.Selection ==
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
}
void WinCOFFObjectWriter::assignSectionNumbers() {
size_t I = 1;
auto Assign = [&](COFFSection &Section) {
Section.Number = I;
Section.Symbol->Data.SectionNumber = I;
Section.Symbol->Aux[0].Aux.SectionDefinition.Number = I;
++I;
};
// Although it is not explicitly requested by the Microsoft COFF spec,
// we should avoid emitting forward associative section references,
// because MSVC link.exe as of 2017 cannot handle that.
for (const std::unique_ptr<COFFSection> &Section : Sections)
if (!isAssociative(*Section))
Assign(*Section);
for (const std::unique_ptr<COFFSection> &Section : Sections)
if (isAssociative(*Section))
Assign(*Section);
}
// Assign file offsets to COFF object file structures.
void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
const MCAsmLayout &Layout) {
unsigned Offset = W.OS.tell();
Offset += UseBigObj ? COFF::Header32Size : COFF::Header16Size;
Offset += COFF::SectionSize * Header.NumberOfSections;
for (const auto &Section : Asm) {
COFFSection *Sec = SectionMap[&Section];
if (Sec->Number == -1)
continue;
Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
if (IsPhysicalSection(Sec)) {
Sec->Header.PointerToRawData = Offset;
Offset += Sec->Header.SizeOfRawData;
}
if (!Sec->Relocations.empty()) {
bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
if (RelocationsOverflow) {
// Signal overflow by setting NumberOfRelocations to max value. Actual
// size is found in reloc #0. Microsoft tools understand this.
Sec->Header.NumberOfRelocations = 0xffff;
} else {
Sec->Header.NumberOfRelocations = Sec->Relocations.size();
}
Sec->Header.PointerToRelocations = Offset;
if (RelocationsOverflow) {
// Reloc #0 will contain actual count, so make room for it.
Offset += COFF::RelocationSize;
}
Offset += COFF::RelocationSize * Sec->Relocations.size();
for (auto &Relocation : Sec->Relocations) {
assert(Relocation.Symb->getIndex() != -1);
Relocation.Data.SymbolTableIndex = Relocation.Symb->getIndex();
}
}
assert(Sec->Symbol->Aux.size() == 1 &&
"Section's symbol must have one aux!");
AuxSymbol &Aux = Sec->Symbol->Aux[0];
assert(Aux.AuxType == ATSectionDefinition &&
"Section's symbol's aux symbol must be a Section Definition!");
Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
Aux.Aux.SectionDefinition.NumberOfRelocations =
Sec->Header.NumberOfRelocations;
Aux.Aux.SectionDefinition.NumberOfLinenumbers =
Sec->Header.NumberOfLineNumbers;
}
Header.PointerToSymbolTable = Offset;
}
uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
uint64_t StartOffset = W.OS.tell();
if (Sections.size() > INT32_MAX)
report_fatal_error(
"PE COFF object files can't have more than 2147483647 sections");
UseBigObj = Sections.size() > COFF::MaxNumberOfSections16;
Header.NumberOfSections = Sections.size();
Header.NumberOfSymbols = 0;
setWeakDefaultNames();
assignSectionNumbers();
createFileSymbols(Asm);
for (auto &Symbol : Symbols) {
// Update section number & offset for symbols that have them.
if (Symbol->Section)
Symbol->Data.SectionNumber = Symbol->Section->Number;
Symbol->setIndex(Header.NumberOfSymbols++);
// Update auxiliary symbol info.
Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
}
// Build string table.
for (const auto &S : Sections)
if (S->Name.size() > COFF::NameSize)
Strings.add(S->Name);
for (const auto &S : Symbols)
if (S->Name.size() > COFF::NameSize)
Strings.add(S->Name);
Strings.finalize();
// Set names.
for (const auto &S : Sections)
SetSectionName(*S);
for (auto &S : Symbols)
SetSymbolName(*S);
// Fixup weak external references.
for (auto &Symbol : Symbols) {
if (Symbol->Other) {
assert(Symbol->getIndex() != -1);
assert(Symbol->Aux.size() == 1 && "Symbol must contain one aux symbol!");
assert(Symbol->Aux[0].AuxType == ATWeakExternal &&
"Symbol's aux symbol must be a Weak External!");
Symbol->Aux[0].Aux.WeakExternal.TagIndex = Symbol->Other->getIndex();
}
}
// Fixup associative COMDAT sections.
for (auto &Section : Sections) {
if (Section->Symbol->Aux[0].Aux.SectionDefinition.Selection !=
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
continue;
const MCSectionCOFF &MCSec = *Section->MCSection;
const MCSymbol *AssocMCSym = MCSec.getCOMDATSymbol();
assert(AssocMCSym);
// It's an error to try to associate with an undefined symbol or a symbol
// without a section.
if (!AssocMCSym->isInSection()) {
Asm.getContext().reportError(
SMLoc(), Twine("cannot make section ") + MCSec.getName() +
Twine(" associative with sectionless symbol ") +
AssocMCSym->getName());
continue;
}
const auto *AssocMCSec = cast<MCSectionCOFF>(&AssocMCSym->getSection());
assert(SectionMap.count(AssocMCSec));
COFFSection *AssocSec = SectionMap[AssocMCSec];
// Skip this section if the associated section is unused.
if (AssocSec->Number == -1)
continue;
Section->Symbol->Aux[0].Aux.SectionDefinition.Number = AssocSec->Number;
}
// Create the contents of the .llvm_addrsig section.
if (EmitAddrsigSection) {
auto Frag = new MCDataFragment(AddrsigSection);
Frag->setLayoutOrder(0);
raw_svector_ostream OS(Frag->getContents());
for (const MCSymbol *S : AddrsigSyms) {
if (!S->isTemporary()) {
encodeULEB128(S->getIndex(), OS);
continue;
}
MCSection *TargetSection = &S->getSection();
assert(SectionMap.find(TargetSection) != SectionMap.end() &&
"Section must already have been defined in "
"executePostLayoutBinding!");
encodeULEB128(SectionMap[TargetSection]->Symbol->getIndex(), OS);
}
}
// Create the contents of the .llvm.call-graph-profile section.
if (CGProfileSection) {
auto *Frag = new MCDataFragment(CGProfileSection);
Frag->setLayoutOrder(0);
raw_svector_ostream OS(Frag->getContents());
for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) {
uint32_t FromIndex = CGPE.From->getSymbol().getIndex();
uint32_t ToIndex = CGPE.To->getSymbol().getIndex();
support::endian::write(OS, FromIndex, W.Endian);
support::endian::write(OS, ToIndex, W.Endian);
support::endian::write(OS, CGPE.Count, W.Endian);
}
}
assignFileOffsets(Asm, Layout);
// MS LINK expects to be able to use this timestamp to implement their
// /INCREMENTAL feature.
if (Asm.isIncrementalLinkerCompatible()) {
Header.TimeDateStamp = getTime();
} else {
// Have deterministic output if /INCREMENTAL isn't needed. Also matches GNU.
Header.TimeDateStamp = 0;
}
// Write it all to disk...
WriteFileHeader(Header);
writeSectionHeaders();
// Write section contents.
sections::iterator I = Sections.begin();
sections::iterator IE = Sections.end();
MCAssembler::iterator J = Asm.begin();
MCAssembler::iterator JE = Asm.end();
for (; I != IE && J != JE; ++I, ++J)
writeSection(Asm, Layout, **I, *J);
assert(W.OS.tell() == Header.PointerToSymbolTable &&
"Header::PointerToSymbolTable is insane!");
// Write a symbol table.
for (auto &Symbol : Symbols)
if (Symbol->getIndex() != -1)
WriteSymbol(*Symbol);
// Write a string table, which completes the entire COFF file.
Strings.write(W.OS);
return W.OS.tell() - StartOffset;
}
MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
: Machine(Machine_) {}
// Pin the vtable to this file.
void MCWinCOFFObjectTargetWriter::anchor() {}
//------------------------------------------------------------------------------
// WinCOFFObjectWriter factory function
std::unique_ptr<MCObjectWriter> llvm::createWinCOFFObjectWriter(
std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS) {
return std::make_unique<WinCOFFObjectWriter>(std::move(MOTW), OS);
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c539c8617d99..ac5e51e47ddf 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1,20022 +1,20029 @@
//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the AArch64TargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64ExpandImm.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.
cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
"aarch64-elf-ldtls-generation", cl::Hidden,
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
static cl::opt<bool>
EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
cl::desc("Enable AArch64 logical imm instruction "
"optimization"),
cl::init(true));
// Temporary option added for the purpose of testing functionality added
// to DAGCombiner.cpp in D92230. It is expected that this can be removed
// in future when both implementations will be based off MGATHER rather
// than the GLD1 nodes added for the SVE gather load intrinsics.
static cl::opt<bool>
EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
cl::desc("Combine extends of AArch64 masked "
"gather intrinsics"),
cl::init(true));
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
static inline EVT getPackedSVEVectorVT(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for vector");
case MVT::i8:
return MVT::nxv16i8;
case MVT::i16:
return MVT::nxv8i16;
case MVT::i32:
return MVT::nxv4i32;
case MVT::i64:
return MVT::nxv2i64;
case MVT::f16:
return MVT::nxv8f16;
case MVT::f32:
return MVT::nxv4f32;
case MVT::f64:
return MVT::nxv2f64;
case MVT::bf16:
return MVT::nxv8bf16;
}
}
// NOTE: Currently there's only a need to return integer vector types. If this
// changes then just add an extra "type" parameter.
static inline EVT getPackedSVEVectorVT(ElementCount EC) {
switch (EC.getKnownMinValue()) {
default:
llvm_unreachable("unexpected element count for vector");
case 16:
return MVT::nxv16i8;
case 8:
return MVT::nxv8i16;
case 4:
return MVT::nxv4i32;
case 2:
return MVT::nxv2i64;
}
}
static inline EVT getPromotedVTForPredicate(EVT VT) {
assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
"Expected scalable predicate vector type!");
switch (VT.getVectorMinNumElements()) {
default:
llvm_unreachable("unexpected element count for vector");
case 2:
return MVT::nxv2i64;
case 4:
return MVT::nxv4i32;
case 8:
return MVT::nxv8i16;
case 16:
return MVT::nxv16i8;
}
}
/// Returns true if VT's elements occupy the lowest bit positions of its
/// associated register class without any intervening space.
///
/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
/// same register class, but only nxv8f16 can be treated as a packed vector.
static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal vector type!");
return VT.isFixedLengthVector() ||
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
}
// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
// predicate and end with a passthru value matching the result type.
static bool isMergePassthruOpcode(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
case AArch64ISD::BSWAP_MERGE_PASSTHRU:
case AArch64ISD::REVH_MERGE_PASSTHRU:
case AArch64ISD::REVW_MERGE_PASSTHRU:
case AArch64ISD::CTLZ_MERGE_PASSTHRU:
case AArch64ISD::CTPOP_MERGE_PASSTHRU:
case AArch64ISD::DUP_MERGE_PASSTHRU:
case AArch64ISD::ABS_MERGE_PASSTHRU:
case AArch64ISD::NEG_MERGE_PASSTHRU:
case AArch64ISD::FNEG_MERGE_PASSTHRU:
case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
case AArch64ISD::FCEIL_MERGE_PASSTHRU:
case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
case AArch64ISD::FRINT_MERGE_PASSTHRU:
case AArch64ISD::FROUND_MERGE_PASSTHRU:
case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
case AArch64ISD::FSQRT_MERGE_PASSTHRU:
case AArch64ISD::FRECPX_MERGE_PASSTHRU:
case AArch64ISD::FABS_MERGE_PASSTHRU:
return true;
}
}
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
// When comparing vectors the result sets the different elements in the
// vector to all-one or all-zero.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
if (Subtarget->hasLS64()) {
addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
setOperationAction(ISD::STORE, MVT::i64x8, Custom);
}
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
}
if (Subtarget->hasNEON()) {
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
// Someone set us up the NEON.
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
if (Subtarget->hasBF16())
addDRTypeForNEON(MVT::v4bf16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
if (Subtarget->hasBF16())
addQRTypeForNEON(MVT::v8bf16);
}
if (Subtarget->hasSVE()) {
// Add legal sve predicate types
addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
// Add legal sve data types
addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
if (Subtarget->hasBF16()) {
addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
}
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
}
for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
for (auto VT :
{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
for (auto VT :
{ MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
MVT::nxv2f64 }) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
}
}
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget->getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f80, Expand);
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// Custom lowering hooks are needed for XOR
// to fold it into CSINC/CSINV.
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
setOperationAction(ISD::FADD, MVT::f128, LibCall);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
setOperationAction(ISD::FDIV, MVT::f128, LibCall);
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::FMUL, MVT::f128, LibCall);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, LibCall);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
// Lowering for many of the conversions is actually specified by the non-f128
// type. The LowerXXX function will be trivial when f128 isn't involved.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
// Variable arguments.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Variable-sized objects.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget->isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
// BlockAddress
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
// Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
setOperationAction(ISD::ADDC, MVT::i64, Custom);
setOperationAction(ISD::ADDE, MVT::i64, Custom);
setOperationAction(ISD::SUBC, MVT::i64, Custom);
setOperationAction(ISD::SUBE, MVT::i64, Custom);
// AArch64 lacks both left-rotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
// AArch64 doesn't have i32 MULH{S|U}.
setOperationAction(ISD::MULHU, MVT::i32, Expand);
setOperationAction(ISD::MULHS, MVT::i32, Expand);
// AArch64 doesn't have {U|S}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ABS, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Custom lower Add/Sub/Mul with overflow.
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i64, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i64, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
if (Subtarget->hasFullFP16())
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::v4f16, Expand);
setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
if (!Subtarget->hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
setOperationAction(ISD::SETCC, MVT::f16, Promote);
setOperationAction(ISD::BR_CC, MVT::f16, Promote);
setOperationAction(ISD::FADD, MVT::f16, Promote);
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
setOperationAction(ISD::FDIV, MVT::f16, Promote);
setOperationAction(ISD::FMA, MVT::f16, Promote);
setOperationAction(ISD::FNEG, MVT::f16, Promote);
setOperationAction(ISD::FABS, MVT::f16, Promote);
setOperationAction(ISD::FCEIL, MVT::f16, Promote);
setOperationAction(ISD::FSQRT, MVT::f16, Promote);
setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
setOperationAction(ISD::FABS, MVT::v8f16, Expand);
setOperationAction(ISD::FADD, MVT::v8f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
setOperationAction(ISD::FMA, MVT::v8f16, Expand);
setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
}
// AArch64 has implementations of a lot of rounding-like FP operations.
for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
setOperationAction(ISD::FMINIMUM, Ty, Legal);
setOperationAction(ISD::FMAXIMUM, Ty, Legal);
setOperationAction(ISD::LROUND, Ty, Legal);
setOperationAction(ISD::LLROUND, Ty, Legal);
setOperationAction(ISD::LRINT, Ty, Legal);
setOperationAction(ISD::LLRINT, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
setOperationAction(ISD::FCEIL, MVT::f16, Legal);
setOperationAction(ISD::FRINT, MVT::f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
// Generate outline atomics library calls only if LSE was not specified for
// subtarget
if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
#define LCALLNAMES(A, B, N) \
setLibcallName(A##N##_RELAX, #B #N "_relax"); \
setLibcallName(A##N##_ACQ, #B #N "_acq"); \
setLibcallName(A##N##_REL, #B #N "_rel"); \
setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
#define LCALLNAME4(A, B) \
LCALLNAMES(A, B, 1) \
LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
#define LCALLNAME5(A, B) \
LCALLNAMES(A, B, 1) \
LCALLNAMES(A, B, 2) \
LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
#undef LCALLNAMES
#undef LCALLNAME4
#undef LCALLNAME5
}
// 128-bit loads and stores can be done without expanding
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
// Aligned 128-bit loads and stores are single-copy atomic according to the
// v8.4a spec.
if (Subtarget->hasLSE2()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
}
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
// custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
setOperationAction(ISD::STORE, MVT::v32i8, Custom);
setOperationAction(ISD::STORE, MVT::v16i16, Custom);
setOperationAction(ISD::STORE, MVT::v16f16, Custom);
setOperationAction(ISD::STORE, MVT::v8i32, Custom);
setOperationAction(ISD::STORE, MVT::v8f32, Custom);
setOperationAction(ISD::STORE, MVT::v4f64, Custom);
setOperationAction(ISD::STORE, MVT::v4i64, Custom);
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget->hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
// Issue __sincos_stret if available.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
} else {
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
if (Subtarget->getTargetTriple().isOSMSVCRT()) {
// MSVCRT doesn't have powi; fall back to pow
setLibcallName(RTLIB::POWI_F32, nullptr);
setLibcallName(RTLIB::POWI_F64, nullptr);
}
// Make floating-point constants legal for the large code model, so they don't
// become loads from the constant pool.
if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
}
// AArch64 does not have floating-point extending loads, i1 sign-extending
// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
}
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f80, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
setIndexedLoadAction(im, MVT::f16, Legal);
setIndexedLoadAction(im, MVT::bf16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
setIndexedStoreAction(im, MVT::f16, Legal);
setIndexedStoreAction(im, MVT::bf16, Legal);
}
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
// Vector add and sub nodes may conceal a high-half opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
setTargetDAGCombine(ISD::FDIV);
// Try and combine setcc with csel
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::VECTOR_SPLICE);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::STEP_VECTOR);
setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::GlobalAddress);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemcpy =
Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
MaxStoresPerMemmoveOptSize = 4;
MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp =
Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
EnableExtLdPromotion = true;
// Set required alignment.
setMinFunctionAlignment(Align(4));
// Set preferred alignments.
setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
setMaxBytesForAlignment(STI.getMaxBytesForLoopAlignment());
setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();
if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
setMaximumJumpTableSize(MaxJT);
setHasExtractBitsInsn(true);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget->hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
setOperationAction(ISD::FABS, MVT::v1f64, Expand);
setOperationAction(ISD::FADD, MVT::v1f64, Expand);
setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
setOperationAction(ISD::FMA, MVT::v1f64, Expand);
setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
// Or, direct i32 -> f16 vector conversion. Set it so custom, so the
// conversion happens in two steps: v4i32 -> v4f32 -> v4f16
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
}
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
for (auto VT : {MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
}
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quad-vector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Saturates
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
}
for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
MVT::v4i32}) {
setOperationAction(ISD::ABDS, VT, Legal);
setOperationAction(ISD::ABDU, VT, Legal);
}
// Vector reductions
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
}
}
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
setOperationAction(ISD::MULHS, VT, Legal);
setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
}
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// AArch64 has implementations of a lot of rounding-like FP operations.
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
if (Subtarget->hasFullFP16()) {
for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
}
}
if (Subtarget->hasSVE())
setOperationAction(ISD::VSCALE, MVT::i32, Custom);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
}
if (Subtarget->hasSVE()) {
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ABDS, VT, Custom);
setOperationAction(ISD::ABDU, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
// Illegal unpacked integer vector types.
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
// Legalize unpacked bitcasts to REINTERPRET_CAST.
for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
setOperationAction(ISD::BITCAST, VT, Custom);
for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
}
}
// NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
}
for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
// Avoid marking truncating FP stores as legal to prevent the
// DAGCombiner from creating unsupported truncating stores.
setTruncStoreAction(VT, InnerVT, Expand);
// SVE does not have floating-point extending loads.
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// SVE supports truncating stores of 64 and 128-bit vectors
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
MVT::nxv4f32, MVT::nxv2f64}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::FNEARBYINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
// 64bit results can mean a bigger than NEON input.
for (auto VT : {MVT::v8i8, MVT::v4i16})
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
// 128bit results imply a bigger than NEON input.
for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(ISD::TRUNCATE, VT, Custom);
for (auto VT : {MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::FP_ROUND, VT, Custom);
// These operations are not supported on NEON but SVE can do them.
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v1i64, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
// Int operations with no NEON support.
for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
}
// FP operations with no NEON support.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
MVT::v1f64, MVT::v2f64})
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
// Use SVE for vectors with more than 2 elements.
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
}
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
}
if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
// Only required for llvm.aarch64.mops.memset.tag
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}
void AArch64TargetLowering::addTypeForNEON(MVT VT) {
assert(VT.isVector() && "VT should be a vector type");
if (VT.isFloatingPoint()) {
MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
}
// Mark vector float intrinsics as expand.
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
}
// But we do support custom-lowering for FCOPYSIGN.
if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
for (MVT InnerVT : MVT::all_valuetypes())
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
// CNT supports only B element sizes, then use UADDLP to widen.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
// [SU][MIN|MAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
// F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
VT.getVectorElementType() != MVT::bf16 &&
(VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
setOperationAction(Opcode, VT, Legal);
if (Subtarget->isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
}
}
}
bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
EVT OpVT) const {
// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
if (!Subtarget->hasSVE())
return true;
// We can only support legal predicate result types.
if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
ResVT != MVT::nxv16i1)
return true;
// The whilelo instruction only works with i32 or i64 scalar inputs.
if (OpVT != MVT::i32 && OpVT != MVT::i64)
return true;
return false;
}
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
if (VT.isFloatingPoint()) {
setCondCodeAction(ISD::SETO, VT, Expand);
setCondCodeAction(ISD::SETOLT, VT, Expand);
setCondCodeAction(ISD::SETLT, VT, Expand);
setCondCodeAction(ISD::SETOLE, VT, Expand);
setCondCodeAction(ISD::SETLE, VT, Expand);
setCondCodeAction(ISD::SETULT, VT, Expand);
setCondCodeAction(ISD::SETULE, VT, Expand);
setCondCodeAction(ISD::SETUGE, VT, Expand);
setCondCodeAction(ISD::SETUGT, VT, Expand);
setCondCodeAction(ISD::SETUEQ, VT, Expand);
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
// Mark integer truncating stores/extending loads as having custom lowering
if (VT.isInteger()) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
InnerVT = InnerVT.changeVectorElementType(
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
}
}
// Mark floating-point truncating stores/extending loads as having custom
// lowering
if (VT.isFloatingPoint()) {
MVT InnerVT = VT.changeVectorElementType(MVT::f16);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
InnerVT = InnerVT.changeVectorElementType(
MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
}
}
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTTZ, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FNEARBYINT, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::FRINT, VT, Custom);
setOperationAction(ISD::FROUND, VT, Custom);
setOperationAction(ISD::FROUNDEVEN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR64RegClass);
addTypeForNEON(VT);
}
void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR128RegClass);
addTypeForNEON(VT);
}
EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
LLVMContext &C, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
if (VT.isScalableVector())
return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
const APInt &Demanded,
TargetLowering::TargetLoweringOpt &TLO,
unsigned NewOpc) {
uint64_t OldImm = Imm, NewImm, Enc;
uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
// Return if the immediate is already all zeros, all ones, a bimm32 or a
// bimm64.
if (Imm == 0 || Imm == Mask ||
AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
return false;
unsigned EltSize = Size;
uint64_t DemandedBits = Demanded.getZExtValue();
// Clear bits that are not demanded.
Imm &= DemandedBits;
while (true) {
// The goal here is to set the non-demanded bits in a way that minimizes
// the number of switching between 0 and 1. In order to achieve this goal,
// we set the non-demanded bits to the value of the preceding demanded bits.
// For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
// non-demanded bit), we copy bit0 (1) to the least significant 'x',
// bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
// The final result is 0b11000011.
uint64_t NonDemandedBits = ~DemandedBits;
uint64_t InvertedImm = ~Imm & DemandedBits;
uint64_t RotatedImm =
((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
NonDemandedBits;
uint64_t Sum = RotatedImm + NonDemandedBits;
bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
uint64_t Ones = (Sum + Carry) & NonDemandedBits;
NewImm = (Imm | Ones) & Mask;
// If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
// or all-ones or all-zeros, in which case we can stop searching. Otherwise,
// we halve the element size and continue the search.
if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
break;
// We cannot shrink the element size any further if it is 2-bits.
if (EltSize == 2)
return false;
EltSize /= 2;
Mask >>= EltSize;
uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
// Return if there is mismatch in any of the demanded bits of Imm and Hi.
if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
return false;
// Merge the upper and lower halves of Imm and DemandedBits.
Imm |= Hi;
DemandedBits |= DemandedBitsHi;
}
++NumOptimizedImms;
// Replicate the element across the register width.
while (EltSize < Size) {
NewImm |= NewImm << EltSize;
EltSize *= 2;
}
(void)OldImm;
assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered");
assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
// Create the new constant immediate node.
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue New;
// If the new constant immediate is all-zeros or all-ones, let the target
// independent DAG combine optimize this node.
if (NewImm == 0 || NewImm == OrigMask) {
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
TLO.DAG.getConstant(NewImm, DL, VT));
// Otherwise, create a machine node so that target independent DAG combine
// doesn't undo this optimization.
} else {
Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
New = SDValue(
TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
}
return TLO.CombineTo(Op, New);
}
bool AArch64TargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
// Delay this optimization to as late as possible.
if (!TLO.LegalOps)
return false;
if (!EnableOptimizeLogicalImm)
return false;
EVT VT = Op.getValueType();
if (VT.isVector())
return false;
unsigned Size = VT.getSizeInBits();
assert((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.");
// Exit early if we demand all bits.
if (DemandedBits.countPopulation() == Size)
return false;
unsigned NewOpc;
switch (Op.getOpcode()) {
default:
return false;
case ISD::AND:
NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
break;
case ISD::OR:
NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
break;
case ISD::XOR:
NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
break;
}
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;
uint64_t Imm = C->getZExtValue();
return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
}
/// computeKnownBitsForTargetNode - Determine which of the bits specified in
/// Mask are known to be either zero or one and return them Known.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known,
const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case AArch64ISD::CSEL: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::commonBits(Known, Known2);
break;
}
case AArch64ISD::BICi: {
// Compute the bit cleared value.
uint64_t Mask =
~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
break;
}
case AArch64ISD::VLSHR: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::lshr(Known, Known2);
break;
}
case AArch64ISD::VASHR: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
Known = KnownBits::ashr(Known, Known2);
break;
}
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget->isTargetILP32())
break;
// In ILP32 mode all valid pointers are in the low 4GB of the address-space.
Known.Zero = APInt::getHighBitsSet(64, 32);
break;
}
case AArch64ISD::ASSERT_ZEXT_BOOL: {
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known.Zero |= APInt(Known.getBitWidth(), 0xFE);
break;
}
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default: return;
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
unsigned BitWidth = Known.getBitWidth();
EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
return;
}
}
break;
}
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID: {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv: {
// Figure out the datatype of the vector operand. The UMINV instruction
// will zero extend the result, so we can mark as known zero all the
// bits larger than the element datatype. 32-bit or larget doesn't need
// this as those are legal types and will be handled by isel directly.
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
unsigned BitWidth = Known.getBitWidth();
if (VT == MVT::v8i8 || VT == MVT::v16i8) {
assert(BitWidth >= 8 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
Known.Zero |= Mask;
} else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
assert(BitWidth >= 16 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
Known.Zero |= Mask;
}
break;
} break;
}
}
}
}
MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::i64;
}
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Alignment <= 2 ||
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.
VT == MVT::v2i64;
}
return true;
}
// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget->requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128-bit ones.
*Fast = !Subtarget->isMisaligned128StoreSlow() ||
Ty.getSizeInBytes() != 16 ||
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Alignment <= 2 ||
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on micro-benchmarks and olden/bh.
Ty == LLT::fixed_vector(2, 64);
}
return true;
}
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return AArch64::createFastISel(funcInfo, libInfo);
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
#define MAKE_CASE(V) \
case V: \
return #V;
switch ((AArch64ISD::NodeType)Opcode) {
case AArch64ISD::FIRST_NUMBER:
break;
MAKE_CASE(AArch64ISD::CALL)
MAKE_CASE(AArch64ISD::ADRP)
MAKE_CASE(AArch64ISD::ADR)
MAKE_CASE(AArch64ISD::ADDlow)
MAKE_CASE(AArch64ISD::LOADgot)
MAKE_CASE(AArch64ISD::RET_FLAG)
MAKE_CASE(AArch64ISD::BRCOND)
MAKE_CASE(AArch64ISD::CSEL)
MAKE_CASE(AArch64ISD::CSINV)
MAKE_CASE(AArch64ISD::CSNEG)
MAKE_CASE(AArch64ISD::CSINC)
MAKE_CASE(AArch64ISD::THREAD_POINTER)
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
MAKE_CASE(AArch64ISD::ABDS_PRED)
MAKE_CASE(AArch64ISD::ABDU_PRED)
MAKE_CASE(AArch64ISD::ADD_PRED)
MAKE_CASE(AArch64ISD::MUL_PRED)
MAKE_CASE(AArch64ISD::MULHS_PRED)
MAKE_CASE(AArch64ISD::MULHU_PRED)
MAKE_CASE(AArch64ISD::SDIV_PRED)
MAKE_CASE(AArch64ISD::SHL_PRED)
MAKE_CASE(AArch64ISD::SMAX_PRED)
MAKE_CASE(AArch64ISD::SMIN_PRED)
MAKE_CASE(AArch64ISD::SRA_PRED)
MAKE_CASE(AArch64ISD::SRL_PRED)
MAKE_CASE(AArch64ISD::SUB_PRED)
MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)
MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ADC)
MAKE_CASE(AArch64ISD::SBC)
MAKE_CASE(AArch64ISD::ADDS)
MAKE_CASE(AArch64ISD::SUBS)
MAKE_CASE(AArch64ISD::ADCS)
MAKE_CASE(AArch64ISD::SBCS)
MAKE_CASE(AArch64ISD::ANDS)
MAKE_CASE(AArch64ISD::CCMP)
MAKE_CASE(AArch64ISD::CCMN)
MAKE_CASE(AArch64ISD::FCCMP)
MAKE_CASE(AArch64ISD::FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMP)
MAKE_CASE(AArch64ISD::STRICT_FCMPE)
MAKE_CASE(AArch64ISD::DUP)
MAKE_CASE(AArch64ISD::DUPLANE8)
MAKE_CASE(AArch64ISD::DUPLANE16)
MAKE_CASE(AArch64ISD::DUPLANE32)
MAKE_CASE(AArch64ISD::DUPLANE64)
MAKE_CASE(AArch64ISD::MOVI)
MAKE_CASE(AArch64ISD::MOVIshift)
MAKE_CASE(AArch64ISD::MOVIedit)
MAKE_CASE(AArch64ISD::MOVImsl)
MAKE_CASE(AArch64ISD::FMOV)
MAKE_CASE(AArch64ISD::MVNIshift)
MAKE_CASE(AArch64ISD::MVNImsl)
MAKE_CASE(AArch64ISD::BICi)
MAKE_CASE(AArch64ISD::ORRi)
MAKE_CASE(AArch64ISD::BSP)
MAKE_CASE(AArch64ISD::EXTR)
MAKE_CASE(AArch64ISD::ZIP1)
MAKE_CASE(AArch64ISD::ZIP2)
MAKE_CASE(AArch64ISD::UZP1)
MAKE_CASE(AArch64ISD::UZP2)
MAKE_CASE(AArch64ISD::TRN1)
MAKE_CASE(AArch64ISD::TRN2)
MAKE_CASE(AArch64ISD::REV16)
MAKE_CASE(AArch64ISD::REV32)
MAKE_CASE(AArch64ISD::REV64)
MAKE_CASE(AArch64ISD::EXT)
MAKE_CASE(AArch64ISD::SPLICE)
MAKE_CASE(AArch64ISD::VSHL)
MAKE_CASE(AArch64ISD::VLSHR)
MAKE_CASE(AArch64ISD::VASHR)
MAKE_CASE(AArch64ISD::VSLI)
MAKE_CASE(AArch64ISD::VSRI)
MAKE_CASE(AArch64ISD::CMEQ)
MAKE_CASE(AArch64ISD::CMGE)
MAKE_CASE(AArch64ISD::CMGT)
MAKE_CASE(AArch64ISD::CMHI)
MAKE_CASE(AArch64ISD::CMHS)
MAKE_CASE(AArch64ISD::FCMEQ)
MAKE_CASE(AArch64ISD::FCMGE)
MAKE_CASE(AArch64ISD::FCMGT)
MAKE_CASE(AArch64ISD::CMEQz)
MAKE_CASE(AArch64ISD::CMGEz)
MAKE_CASE(AArch64ISD::CMGTz)
MAKE_CASE(AArch64ISD::CMLEz)
MAKE_CASE(AArch64ISD::CMLTz)
MAKE_CASE(AArch64ISD::FCMEQz)
MAKE_CASE(AArch64ISD::FCMGEz)
MAKE_CASE(AArch64ISD::FCMGTz)
MAKE_CASE(AArch64ISD::FCMLEz)
MAKE_CASE(AArch64ISD::FCMLTz)
MAKE_CASE(AArch64ISD::SADDV)
MAKE_CASE(AArch64ISD::UADDV)
MAKE_CASE(AArch64ISD::SRHADD)
MAKE_CASE(AArch64ISD::URHADD)
MAKE_CASE(AArch64ISD::SHADD)
MAKE_CASE(AArch64ISD::UHADD)
MAKE_CASE(AArch64ISD::SDOT)
MAKE_CASE(AArch64ISD::UDOT)
MAKE_CASE(AArch64ISD::SMINV)
MAKE_CASE(AArch64ISD::UMINV)
MAKE_CASE(AArch64ISD::SMAXV)
MAKE_CASE(AArch64ISD::UMAXV)
MAKE_CASE(AArch64ISD::SADDV_PRED)
MAKE_CASE(AArch64ISD::UADDV_PRED)
MAKE_CASE(AArch64ISD::SMAXV_PRED)
MAKE_CASE(AArch64ISD::UMAXV_PRED)
MAKE_CASE(AArch64ISD::SMINV_PRED)
MAKE_CASE(AArch64ISD::UMINV_PRED)
MAKE_CASE(AArch64ISD::ORV_PRED)
MAKE_CASE(AArch64ISD::EORV_PRED)
MAKE_CASE(AArch64ISD::ANDV_PRED)
MAKE_CASE(AArch64ISD::CLASTA_N)
MAKE_CASE(AArch64ISD::CLASTB_N)
MAKE_CASE(AArch64ISD::LASTA)
MAKE_CASE(AArch64ISD::LASTB)
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
MAKE_CASE(AArch64ISD::LS64_BUILD)
MAKE_CASE(AArch64ISD::LS64_EXTRACT)
MAKE_CASE(AArch64ISD::TBL)
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
MAKE_CASE(AArch64ISD::FADDV_PRED)
MAKE_CASE(AArch64ISD::FDIV_PRED)
MAKE_CASE(AArch64ISD::FMA_PRED)
MAKE_CASE(AArch64ISD::FMAX_PRED)
MAKE_CASE(AArch64ISD::FMAXV_PRED)
MAKE_CASE(AArch64ISD::FMAXNM_PRED)
MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
MAKE_CASE(AArch64ISD::FMIN_PRED)
MAKE_CASE(AArch64ISD::FMINV_PRED)
MAKE_CASE(AArch64ISD::FMINNM_PRED)
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
MAKE_CASE(AArch64ISD::BIC)
MAKE_CASE(AArch64ISD::BIT)
MAKE_CASE(AArch64ISD::CBZ)
MAKE_CASE(AArch64ISD::CBNZ)
MAKE_CASE(AArch64ISD::TBZ)
MAKE_CASE(AArch64ISD::TBNZ)
MAKE_CASE(AArch64ISD::TC_RETURN)
MAKE_CASE(AArch64ISD::PREFETCH)
MAKE_CASE(AArch64ISD::SITOF)
MAKE_CASE(AArch64ISD::UITOF)
MAKE_CASE(AArch64ISD::NVCAST)
MAKE_CASE(AArch64ISD::MRS)
MAKE_CASE(AArch64ISD::SQSHL_I)
MAKE_CASE(AArch64ISD::UQSHL_I)
MAKE_CASE(AArch64ISD::SRSHR_I)
MAKE_CASE(AArch64ISD::URSHR_I)
MAKE_CASE(AArch64ISD::SQSHLU_I)
MAKE_CASE(AArch64ISD::WrapperLarge)
MAKE_CASE(AArch64ISD::LD2post)
MAKE_CASE(AArch64ISD::LD3post)
MAKE_CASE(AArch64ISD::LD4post)
MAKE_CASE(AArch64ISD::ST2post)
MAKE_CASE(AArch64ISD::ST3post)
MAKE_CASE(AArch64ISD::ST4post)
MAKE_CASE(AArch64ISD::LD1x2post)
MAKE_CASE(AArch64ISD::LD1x3post)
MAKE_CASE(AArch64ISD::LD1x4post)
MAKE_CASE(AArch64ISD::ST1x2post)
MAKE_CASE(AArch64ISD::ST1x3post)
MAKE_CASE(AArch64ISD::ST1x4post)
MAKE_CASE(AArch64ISD::LD1DUPpost)
MAKE_CASE(AArch64ISD::LD2DUPpost)
MAKE_CASE(AArch64ISD::LD3DUPpost)
MAKE_CASE(AArch64ISD::LD4DUPpost)
MAKE_CASE(AArch64ISD::LD1LANEpost)
MAKE_CASE(AArch64ISD::LD2LANEpost)
MAKE_CASE(AArch64ISD::LD3LANEpost)
MAKE_CASE(AArch64ISD::LD4LANEpost)
MAKE_CASE(AArch64ISD::ST2LANEpost)
MAKE_CASE(AArch64ISD::ST3LANEpost)
MAKE_CASE(AArch64ISD::ST4LANEpost)
MAKE_CASE(AArch64ISD::SMULL)
MAKE_CASE(AArch64ISD::UMULL)
MAKE_CASE(AArch64ISD::FRECPE)
MAKE_CASE(AArch64ISD::FRECPS)
MAKE_CASE(AArch64ISD::FRSQRTE)
MAKE_CASE(AArch64ISD::FRSQRTS)
MAKE_CASE(AArch64ISD::STG)
MAKE_CASE(AArch64ISD::STZG)
MAKE_CASE(AArch64ISD::ST2G)
MAKE_CASE(AArch64ISD::STZ2G)
MAKE_CASE(AArch64ISD::SUNPKHI)
MAKE_CASE(AArch64ISD::SUNPKLO)
MAKE_CASE(AArch64ISD::UUNPKHI)
MAKE_CASE(AArch64ISD::UUNPKLO)
MAKE_CASE(AArch64ISD::INSR)
MAKE_CASE(AArch64ISD::PTEST)
MAKE_CASE(AArch64ISD::PTRUE)
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::ST1_PRED)
MAKE_CASE(AArch64ISD::SST1_PRED)
MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
MAKE_CASE(AArch64ISD::LDP)
MAKE_CASE(AArch64ISD::STP)
MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
MAKE_CASE(AArch64ISD::MOPS_MEMSET)
MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
}
#undef MAKE_CASE
return nullptr;
}
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudo-instruction as some control flow and a
// phi node:
// OrigBB:
// [... previous instrs leading to comparison ...]
// b.ne TrueBB
// b EndBB
// TrueBB:
// ; Fallthrough
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator It = ++MBB->getIterator();
Register DestReg = MI.getOperand(0).getReg();
Register IfTrueReg = MI.getOperand(1).getReg();
Register IfFalseReg = MI.getOperand(2).getReg();
unsigned CondCode = MI.getOperand(3).getImm();
bool NZCVKilled = MI.getOperand(4).isKill();
MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, TrueBB);
MF->insert(It, EndBB);
// Transfer rest of current basic-block to EndBB
EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
MBB->end());
EndBB->transferSuccessorsAndUpdatePHIs(MBB);
BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
MBB->addSuccessor(TrueBB);
MBB->addSuccessor(EndBB);
// TrueBB falls through to the end.
TrueBB->addSuccessor(EndBB);
if (!NZCVKilled) {
TrueBB->addLiveIn(AArch64::NZCV);
EndBB->addLiveIn(AArch64::NZCV);
}
BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
.addReg(IfTrueReg)
.addMBB(TrueBB)
.addReg(IfFalseReg)
.addMBB(MBB);
MI.eraseFromParent();
return EndBB;
}
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
MachineInstr &MI, MachineBasicBlock *BB) const {
assert(!isAsynchronousEHPersonality(classifyEHPersonality(
BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!");
return BB;
}
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default:
#ifndef NDEBUG
MI.dump();
#endif
llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
case TargetOpcode::STATEPOINT:
// STATEPOINT is a pseudo instruction which has no implicit defs/uses
// while bl call instruction (where statepoint will be lowered at the end)
// has implicit def. Add this implicit dead def here as a workaround.
MI.addOperand(*MI.getMF(), MachineOperand::CreateReg(AArch64::LR, true,
true, false, true));
LLVM_FALLTHROUGH;
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
}
}
//===----------------------------------------------------------------------===//
// AArch64 Lowering private implementation.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
// Forward declarations of SVE fixed length lowering helpers
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG);
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT);
/// isZerosVector - Check whether SDNode N is a zero-filled vector.
static bool isZerosVector(const SDNode *N) {
// Look through a bit convert.
while (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (ISD::isConstantSplatVectorAllZeros(N))
return true;
if (N->getOpcode() != AArch64ISD::DUP)
return false;
auto Opnd0 = N->getOperand(0);
auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
return (CINT && CINT->isZero()) || (CFP && CFP->isZero());
}
/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
/// CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unknown condition code!");
case ISD::SETNE:
return AArch64CC::NE;
case ISD::SETEQ:
return AArch64CC::EQ;
case ISD::SETGT:
return AArch64CC::GT;
case ISD::SETGE:
return AArch64CC::GE;
case ISD::SETLT:
return AArch64CC::LT;
case ISD::SETLE:
return AArch64CC::LE;
case ISD::SETUGT:
return AArch64CC::HI;
case ISD::SETUGE:
return AArch64CC::HS;
case ISD::SETULT:
return AArch64CC::LO;
case ISD::SETULE:
return AArch64CC::LS;
}
}
/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static void changeFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ:
CondCode = AArch64CC::EQ;
break;
case ISD::SETGT:
case ISD::SETOGT:
CondCode = AArch64CC::GT;
break;
case ISD::SETGE:
case ISD::SETOGE:
CondCode = AArch64CC::GE;
break;
case ISD::SETOLT:
CondCode = AArch64CC::MI;
break;
case ISD::SETOLE:
CondCode = AArch64CC::LS;
break;
case ISD::SETONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case ISD::SETO:
CondCode = AArch64CC::VC;
break;
case ISD::SETUO:
CondCode = AArch64CC::VS;
break;
case ISD::SETUEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case ISD::SETUGT:
CondCode = AArch64CC::HI;
break;
case ISD::SETUGE:
CondCode = AArch64CC::PL;
break;
case ISD::SETLT:
case ISD::SETULT:
CondCode = AArch64CC::LT;
break;
case ISD::SETLE:
case ISD::SETULE:
CondCode = AArch64CC::LE;
break;
case ISD::SETNE:
case ISD::SETUNE:
CondCode = AArch64CC::NE;
break;
}
}
/// Convert a DAG fp condition code to an AArch64 CC.
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
/// should be AND'ed instead of OR'ed.
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
assert(CondCode2 == AArch64CC::AL);
break;
case ISD::SETONE:
// (a one b)
// == ((a olt b) || (a ogt b))
// == ((a ord b) && (a une b))
CondCode = AArch64CC::VC;
CondCode2 = AArch64CC::NE;
break;
case ISD::SETUEQ:
// (a ueq b)
// == ((a uno b) || (a oeq b))
// == ((a ule b) && (a uge b))
CondCode = AArch64CC::PL;
CondCode2 = AArch64CC::LE;
break;
}
}
/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
/// CC usable with the vector instructions. Fewer operations are available
/// without a real NZCV register, so we have to use less efficient combinations
/// to get the same effect.
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2,
bool &Invert) {
Invert = false;
switch (CC) {
default:
// Mostly the scalar mappings work fine.
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
break;
case ISD::SETUO:
Invert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GE;
break;
case ISD::SETUEQ:
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETUGT:
case ISD::SETUGE:
// All of the compare-mask comparisons are ordered, but we can switch
// between the two by a double inversion. E.g. ULE == !OGT.
Invert = true;
changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
CondCode, CondCode2);
break;
}
}
static bool isLegalArithImmed(uint64_t C) {
// Matches AArch64DAGToDAGISel::SelectArithImmed().
bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
LLVM_DEBUG(dbgs() << "Is imm " << C
<< " legal: " << (IsLegal ? "yes\n" : "no\n"));
return IsLegal;
}
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVM-native comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ || CC == ISD::SETNE);
}
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
SelectionDAG &DAG, SDValue Chain,
bool IsSignaling) {
EVT VT = LHS.getValueType();
assert(VT != MVT::f128);
assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
unsigned Opcode =
IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (VT.isFloatingPoint()) {
assert(VT != MVT::f128);
if (VT == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
VT = MVT::f32;
}
return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
}
// The CMP instruction is just an alias for SUBS, and representing it as
// SUBS means that it's possible to get CSE with subtract operations.
// A later phase can perform the optimization of setting the destination
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
if (isCMN(RHS, CC)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (isCMN(LHS, CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
LHS = LHS.getOperand(1);
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
if (LHS.getOpcode() == ISD::AND) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
DAG.getVTList(VT, MVT_CC),
LHS.getOperand(0),
LHS.getOperand(1));
// Replace all users of (and X, Y) with newly generated (ands X, Y)
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
return ANDSNode.getValue(1);
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
// Use result of ANDS
return LHS.getValue(1);
}
}
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
/// \defgroup AArch64CCMP CMP;CCMP matching
///
/// These functions deal with the formation of CMP;CCMP;... sequences.
/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
/// a comparison. They set the NZCV flags to a predefined value if their
/// predicate is false. This allows to express arbitrary conjunctions, for
/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
/// expressed as:
/// cmp A
/// ccmp B, inv(CB), CA
/// check for CB flags
///
/// This naturally lets us implement chains of AND operations with SETCC
/// operands. And we can even implement some other situations by transforming
/// them:
/// - We can implement (NEG SETCC) i.e. negating a single comparison by
/// negating the flags used in a CCMP/FCCMP operations.
/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
/// by negating the flags we test for afterwards. i.e.
/// NEG (CMP CCMP CCCMP ...) can be implemented.
/// - Note that we can only ever negate all previously processed results.
/// What we can not implement by flipping the flags to test is a negation
/// of two sub-trees (because the negation affects all sub-trees emitted so
/// far, so the 2nd sub-tree we emit would also affect the first).
/// With those tools we can implement some OR operations:
/// - (OR (SETCC A) (SETCC B)) can be implemented via:
/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
/// - After transforming OR to NEG/AND combinations we may be able to use NEG
/// elimination rules from earlier to implement the whole thing as a
/// CCMP/FCCMP chain.
///
/// As complete example:
/// or (or (setCA (cmp A)) (setCB (cmp B)))
/// (and (setCC (cmp C)) (setCD (cmp D)))"
/// can be reassociated to:
/// or (and (setCC (cmp C)) setCD (cmp D))
// (or (setCA (cmp A)) (setCB (cmp B)))
/// can be transformed to:
/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
/// which can be implemented as:
/// cmp C
/// ccmp D, inv(CD), CC
/// ccmp A, CA, inv(CD)
/// ccmp B, CB, inv(CA)
/// check for CB flags
///
/// A counterexample is "or (and A B) (and C D)" which translates to
/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
/// can only implement 1 of the inner (not) operations, but not both!
/// @{
/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue CCOp,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
const SDLoc &DL, SelectionDAG &DAG) {
unsigned Opcode = 0;
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (LHS.getValueType().isFloatingPoint()) {
assert(LHS.getValueType() != MVT::f128);
if (LHS.getValueType() == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
Opcode = AArch64ISD::FCCMP;
} else if (RHS.getOpcode() == ISD::SUB) {
SDValue SubOp0 = RHS.getOperand(0);
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
// See emitComparison() on why we can only do this for SETEQ and SETNE.
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
}
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
/// expressed as a conjunction. See \ref AArch64CCMP.
/// \param CanNegate Set to true if we can negate the whole sub-tree just by
/// changing the conditions on the SETCC tests.
/// (this means we can call emitConjunctionRec() with
/// Negate==true on this sub-tree)
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
bool &MustBeFirst, bool WillNegate,
unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
if (Val->getOperand(0).getValueType() == MVT::f128)
return false;
CanNegate = true;
MustBeFirst = false;
return true;
}
// Protect against exponential runtime and stack overflow.
if (Depth > 6)
return false;
if (Opcode == ISD::AND || Opcode == ISD::OR) {
bool IsOR = Opcode == ISD::OR;
SDValue O0 = Val->getOperand(0);
SDValue O1 = Val->getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
return false;
bool CanNegateR;
bool MustBeFirstR;
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
return false;
if (MustBeFirstL && MustBeFirstR)
return false;
if (IsOR) {
// For an OR expression we need to be able to naturally negate at least
// one side or we cannot do the transformation at all.
if (!CanNegateL && !CanNegateR)
return false;
// If we the result of the OR will be negated and we can naturally negate
// the leafs, then this sub-tree as a whole negates naturally.
CanNegate = WillNegate && CanNegateL && CanNegateR;
// If we cannot naturally negate the whole sub-tree, then this must be
// emitted first.
MustBeFirst = !CanNegate;
} else {
assert(Opcode == ISD::AND && "Must be OR or AND");
// We cannot naturally negate an AND operation.
CanNegate = false;
MustBeFirst = MustBeFirstL || MustBeFirstR;
}
return true;
}
return false;
}
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
/// Tries to transform the given i1 producing node @p Val to a series compare
/// and conditional compare operations. @returns an NZCV flags producing node
/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
/// transformation was not possible.
/// \p Negate is true if we want this sub-tree being negated just by changing
/// SETCC conditions.
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
AArch64CC::CondCode Predicate) {
// We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
SDValue LHS = Val->getOperand(0);
SDValue RHS = Val->getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
bool isInteger = LHS.getValueType().isInteger();
if (Negate)
CC = getSetCCInverse(CC, LHS.getValueType());
SDLoc DL(Val);
// Determine OutCC and handle FP special case.
if (isInteger) {
OutCC = changeIntCCToAArch64CC(CC);
} else {
assert(LHS.getValueType().isFloatingPoint());
AArch64CC::CondCode ExtraCC;
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
// Some floating point conditions can't be tested with a single condition
// code. Construct an additional comparison in this case.
if (ExtraCC != AArch64CC::AL) {
SDValue ExtraCmp;
if (!CCOp.getNode())
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
else
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
ExtraCC, DL, DAG);
CCOp = ExtraCmp;
Predicate = ExtraCC;
}
}
// Produce a normal comparison if we are first in the chain
if (!CCOp)
return emitComparison(LHS, RHS, CC, DL, DAG);
// Otherwise produce a ccmp.
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
DAG);
}
assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
bool IsOR = Opcode == ISD::OR;
SDValue LHS = Val->getOperand(0);
bool CanNegateL;
bool MustBeFirstL;
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
SDValue RHS = Val->getOperand(1);
bool CanNegateR;
bool MustBeFirstR;
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
// Swap sub-tree that must come first to the right side.
if (MustBeFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
std::swap(MustBeFirstL, MustBeFirstR);
}
bool NegateR;
bool NegateAfterR;
bool NegateL;
bool NegateAfterAll;
if (Opcode == ISD::OR) {
// Swap the sub-tree that we can negate naturally to the left.
if (!CanNegateL) {
assert(CanNegateR && "at least one side must be negatable");
assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
assert(!Negate);
std::swap(LHS, RHS);
NegateR = false;
NegateAfterR = true;
} else {
// Negate the left sub-tree if possible, otherwise negate the result.
NegateR = CanNegateR;
NegateAfterR = !CanNegateR;
}
NegateL = true;
NegateAfterAll = !Negate;
} else {
assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
assert(!Negate && "Valid conjunction/disjunction tree");
NegateL = false;
NegateR = false;
NegateAfterR = false;
NegateAfterAll = false;
}
// Emit sub-trees.
AArch64CC::CondCode RHSCC;
SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
if (NegateAfterR)
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
if (NegateAfterAll)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
/// See \ref AArch64CCMP.
/// \see emitConjunctionRec().
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC) {
bool DummyCanNegate;
bool DummyMustBeFirst;
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
return SDValue();
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
}
/// @}
/// Returns how profitable it is to fold a comparison's operand's shift and/or
/// extension operations.
static unsigned getCmpOperandFoldingProfit(SDValue Op) {
auto isSupportedExtend = [&](SDValue V) {
if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
return true;
if (V.getOpcode() == ISD::AND)
if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
uint64_t Mask = MaskCst->getZExtValue();
return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
}
return false;
};
if (!Op.hasOneUse())
return 0;
if (isSupportedExtend(Op))
return 1;
unsigned Opc = Op.getOpcode();
if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
uint64_t Shift = ShiftCst->getZExtValue();
if (isSupportedExtend(Op.getOperand(0)))
return (Shift <= 4) ? 2 : 1;
EVT VT = Op.getValueType();
if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
return 1;
}
return 0;
}
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC->getZExtValue();
if (!isLegalArithImmed(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default:
break;
case ISD::SETLT:
case ISD::SETGE:
if ((VT == MVT::i32 && C != 0x80000000 &&
isLegalArithImmed((uint32_t)(C - 1))) ||
(VT == MVT::i64 && C != 0x80000000ULL &&
isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
if ((VT == MVT::i32 && C != 0 &&
isLegalArithImmed((uint32_t)(C - 1))) ||
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
case ISD::SETGT:
if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
(VT == MVT::i64 && C != INT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
if ((VT == MVT::i32 && C != UINT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) ||
(VT == MVT::i64 && C != UINT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
}
}
}
// Comparisons are canonicalized so that the RHS operand is simpler than the
// LHS one, the extreme case being when RHS is an immediate. However, AArch64
// can fold some shift+extend operations on the RHS operand, so swap the
// operands if that can be done.
//
// For example:
// lsl w13, w11, #1
// cmp w13, w12
// can be turned into:
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) ||
!isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
}
}
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
// For the i8 operand, the largest immediate is 255, so this can be easily
// encoded in the compare instruction. For the i16 operand, however, the
// largest immediate cannot be encoded in the compare.
// Therefore, use a sign extending load and cmn to avoid materializing the
// -1 constant. For example,
// movz w1, #65535
// ldrh w0, [x0, #0]
// cmp w0, w1
// >
// ldrsh w0, [x0, #0]
// cmn w0, #1
// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
// if and only if (sext LHS) == (sext RHS). The checks are in place to
// ensure both the LHS and RHS are truly zero extended and to make sure the
// transformation is profitable.
if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
LHS.getNode()->hasNUsesOfValue(1, 0)) {
int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
SDValue SExt =
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
}
if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
if ((CC == ISD::SETNE) ^ RHSC->isZero())
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
}
}
if (!Cmp) {
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
static std::pair<SDValue, SDValue>
getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
"Unsupported value type");
SDValue Value, Overflow;
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
unsigned Opc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown overflow instruction!");
case ISD::SADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::VS;
break;
case ISD::UADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::HS;
break;
case ISD::SSUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::VS;
break;
case ISD::USUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::LO;
break;
// Multiply needs a little bit extra work.
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
// Extend to 64-bits, then perform a 64-bit multiply.
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
// Check that the result fits into a 32-bit integer.
SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
if (IsSigned) {
// cmp xreg, wreg, sxtw
SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
} else {
// tst xreg, #0xffffffff00000000
SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
Overflow =
DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
}
break;
}
assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
// For the 64 bit multiply
Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
if (IsSigned) {
SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
}
} // switch (...)
if (Opc) {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
// Emit the AArch64 operation with overflow check.
Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
Overflow = Value.getValue(1);
}
return std::make_pair(Value, Overflow);
}
SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToScalableOp(Op, DAG);
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
SDLoc dl(Sel);
// If the operand is an overflow checking operation, invert the condition
// code and kill the Not operation. I.e., transform:
// (xor (overflow_op_bool, 1))
// -->
// (csel 1, 0, invert(cc), overflow_op_bool)
// ... which later gets transformed to just a cset instruction with an
// inverted condition code, rather than a cset + eor sequence.
if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
return SDValue();
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
AArch64CC::CondCode CC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// If neither operand is a SELECT_CC, give up.
if (Sel.getOpcode() != ISD::SELECT_CC)
std::swap(Sel, Other);
if (Sel.getOpcode() != ISD::SELECT_CC)
return Op;
// The folding we want to perform is:
// (xor x, (select_cc a, b, cc, 0, -1) )
// -->
// (csel x, (xor x, -1), cc ...)
//
// The latter will get matched to a CSINV instruction.
ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
SDValue LHS = Sel.getOperand(0);
SDValue RHS = Sel.getOperand(1);
SDValue TVal = Sel.getOperand(2);
SDValue FVal = Sel.getOperand(3);
// FIXME: This could be generalized to non-integer comparisons.
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return Op;
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
// The values aren't constants, this isn't the pattern we're looking for.
if (!CFVal || !CTVal)
return Op;
// We can commute the SELECT_CC by inverting the condition. This
// might be needed to make this fit into a CSINV pattern.
if (CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
// If the constants line up, perform the transform!
if (CTVal->isZero() && CFVal->isAllOnes()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
FVal = Other;
TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
DAG.getConstant(-1ULL, dl, Other.getValueType()));
return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
CCVal, Cmp);
}
return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Invalid code");
case ISD::ADDC:
Opc = AArch64ISD::ADDS;
break;
case ISD::SUBC:
Opc = AArch64ISD::SUBS;
break;
case ISD::ADDE:
Opc = AArch64ISD::ADCS;
ExtraOp = true;
break;
case ISD::SUBE:
Opc = AArch64ISD::SBCS;
ExtraOp = true;
break;
}
if (!ExtraOp)
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2));
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
SDLoc dl(Op);
AArch64CC::CondCode CC;
// The actual operation that sets the overflow or carry flag.
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
// Prefetch operands are:
// 1: Address to prefetch
// 2: bool isWrite
// 3: int locality (0 = no locality ... 3 = extreme locality)
// 4: bool isDataCache
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
bool IsStream = !Locality;
// When the locality number is set
if (Locality) {
// The front-end should have filtered out the out-of-range values
assert(Locality <= 3 && "Prefetch locality out-of-range");
// The locality degree is the opposite of the cache speed.
// Put the number the other way around.
// The encoding starts at 0 for level 1
Locality = 3 - Locality;
}
// built the mask value encoding the expected behavior.
unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
(!IsData << 3) | // IsDataCache bit
(Locality << 1) | // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthFPExtendToSVE(Op, DAG);
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
return SDValue();
}
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
if (useSVEForFixedLengthVectorVT(SrcVT))
return LowerFixedLengthFPRoundToSVE(Op, DAG);
if (SrcVT != MVT::f128) {
// Expand cases where the input is a vector bigger than NEON.
if (useSVEForFixedLengthVectorVT(SrcVT))
return SDValue();
// It's legal except when f128 is involved
return Op;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
if (VT.isScalableVector()) {
unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
? AArch64ISD::FCVTZU_MERGE_PASSTHRU
: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthFPToIntToSVE(Op, DAG);
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (InVT.getVectorElementType() == MVT::f16 &&
!Subtarget->hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
}
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
}
if (VTSize > InVTSize) {
SDLoc dl(Op);
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Type changing conversions are illegal.
return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
if (SrcVal.getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
}
if (SrcVal.getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
return SDValue();
}
SDValue
AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
// AArch64 FP-to-int conversions saturate to the destination element size, so
// we can lower common saturating conversions to simple instructions.
SDValue SrcVal = Op.getOperand(0);
EVT SrcVT = SrcVal.getValueType();
EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
uint64_t SatWidth = SatVT.getScalarSizeInBits();
assert(SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width");
// TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
// Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
// types, so this is hard to reach.
if (DstVT.isScalableVector())
return SDValue();
EVT SrcElementVT = SrcVT.getVectorElementType();
// In the absence of FP16 support, promote f16 to f32 and saturate the result.
if (SrcElementVT == MVT::f16 &&
(!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
SrcVT = F32VT;
SrcElementVT = MVT::f32;
SrcElementWidth = 32;
} else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
SrcElementVT != MVT::f16)
return SDValue();
SDLoc DL(Op);
// Cases that we can emit directly.
if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
DAG.getValueType(DstVT.getScalarType()));
// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
// result. This is only valid if the legal cvt is larger than the saturate
// width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
// (at least until sqxtn is selected).
if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
return SDValue();
EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
DAG.getValueType(IntVT.getScalarType()));
SDValue Sat;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
SDValue MinC = DAG.getConstant(
APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
IntVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
SDValue MaxC = DAG.getConstant(
APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
IntVT);
Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
IntVT);
Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
}
return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
// AArch64 FP-to-int conversions saturate to the destination register size, so
// we can lower common saturating conversions to simple instructions.
SDValue SrcVal = Op.getOperand(0);
EVT SrcVT = SrcVal.getValueType();
if (SrcVT.isVector())
return LowerVectorFP_TO_INT_SAT(Op, DAG);
EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
uint64_t SatWidth = SatVT.getScalarSizeInBits();
uint64_t DstWidth = DstVT.getScalarSizeInBits();
assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");
// In the absence of FP16 support, promote f16 to f32 and saturate the result.
if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
SrcVT = MVT::f32;
} else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
return SDValue();
SDLoc DL(Op);
// Cases that we can emit directly.
if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
(SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
DAG.getValueType(DstVT));
// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
// result. This is only valid if the legal cvt is larger than the saturate
// width.
if (DstWidth < SatWidth)
return SDValue();
SDValue NativeCvt =
DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
SDValue Sat;
if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
SDValue MinC = DAG.getConstant(
APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
SDValue MaxC = DAG.getConstant(
APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
APInt::getAllOnesValue(SatWidth).zextOrSelf(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
}
return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
unsigned Opc = Op.getOpcode();
bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
if (VT.isScalableVector()) {
if (InVT.getVectorElementType() == MVT::i1) {
// We can't directly extend an SVE predicate; extend it first.
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = getPromotedVTForPredicate(InVT);
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Opc, dl, VT, In);
}
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
return LowerToPredicatedOp(Op, DAG, Opcode);
}
if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
return LowerFixedLengthIntToFPToSVE(Op, DAG);
uint64_t VTSize = VT.getFixedSizeInBits();
uint64_t InVTSize = InVT.getFixedSizeInBits();
if (VTSize < InVTSize) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
In = DAG.getNode(Opc, dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
if (VTSize > InVTSize) {
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Opc, dl, VT, In);
}
return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
bool IsStrict = Op->isStrictFPOpcode();
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getValueType() == MVT::f16 &&
!Subtarget->hasFullFP16()) {
assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
SDLoc dl(Op);
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
DAG.getIntPtrConstant(0, dl));
}
// i128 conversions are libcalls.
if (SrcVal.getValueType() == MVT::i128)
return SDValue();
// Other conversions are legal, unless it's to the completely software-based
// fp128.
if (Op.getValueType() != MVT::f128)
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
SelectionDAG &DAG) const {
// For iOS, we want to call an alternative entry point: __sincos_stret,
// which returns the values in two S / D registers.
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
: RTLIB::SINCOS_STRET_F32;
const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
static MVT getSVEContainerType(EVT ContentTy);
SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
SelectionDAG &DAG) const {
EVT OpVT = Op.getValueType();
EVT ArgVT = Op.getOperand(0).getValueType();
if (useSVEForFixedLengthVectorVT(OpVT))
return LowerFixedLengthBitcastToSVE(Op, DAG);
if (OpVT.isScalableVector()) {
if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
"Expected int->fp bitcast!");
SDValue ExtResult =
DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
Op.getOperand(0));
return getSVESafeBitCast(OpVT, ExtResult, DAG);
}
return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
}
if (OpVT != MVT::f16 && OpVT != MVT::bf16)
return SDValue();
// Bitcasts between f16 and bf16 are legal.
if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
return Op;
assert(ArgVT == MVT::i16);
SDLoc DL(Op);
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
if (OrigVT.getSizeInBits() >= 64)
return OrigVT;
assert(OrigVT.isSimple() && "Expecting a simple value type");
MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
switch (OrigSimpleTy) {
default: llvm_unreachable("Unexpected Vector Type");
case MVT::v2i8:
case MVT::v2i16:
return MVT::v2i32;
case MVT::v4i8:
return MVT::v4i16;
}
}
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
const EVT &OrigTy,
const EVT &ExtTy,
unsigned ExtOpcode) {
// The vector originally had a size of OrigTy. It was then extended to ExtTy.
// We expect the ExtTy to be 128-bits total. If the OrigTy is less than
// 64-bits we need to insert a new extension so that it will be 64-bits.
assert(ExtTy.is128BitVector() && "Unexpected extension size");
if (OrigTy.getSizeInBits() >= 64)
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);
return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {
EVT VT = N->getValueType(0);
if (N->getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Elt : N->op_values()) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getScalarSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
if (!isIntN(HalfSize, C->getSExtValue()))
return false;
} else {
if (!isUIntN(HalfSize, C->getZExtValue()))
return false;
}
continue;
}
return false;
}
return true;
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
if (N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
N->getOpcode());
assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N->getValueType(0);
SDLoc dl(N);
unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
const APInt &CInt = C->getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::SIGN_EXTEND ||
N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return N->getOpcode() == ISD::ZERO_EXTEND ||
N->getOpcode() == ISD::ANY_EXTEND ||
isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
}
return false;
}
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N->getOpcode();
if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
SDNode *N0 = N->getOperand(0).getNode();
SDNode *N1 = N->getOperand(1).getNode();
return N0->hasOneUse() && N1->hasOneUse() &&
isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
}
return false;
}
SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue Chain = Op.getOperand(0);
SDValue FPCR_64 = DAG.getNode(
ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
Chain = FPCR_64.getValue(1);
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
return DAG.getMergeValues({AND, Chain}, dl);
}
SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue RMValue = Op->getOperand(1);
// The rounding mode is in bits 23:22 of the FPCR.
// The llvm.set.rounding argument value to the rounding mode in FPCR mapping
// is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
// ((arg - 1) & 3) << 22).
//
// The argument of llvm.set.rounding must be within the segment [0, 3], so
// NearestTiesToAway (4) is not handled here. It is responsibility of the code
// generated llvm.set.rounding to ensure this condition.
// Calculate new value of FPCR[23:22].
RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
DAG.getConstant(1, DL, MVT::i32));
RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
DAG.getConstant(0x3, DL, MVT::i32));
RMValue =
DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
// Get current value of FPCR.
SDValue Ops[] = {
Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
SDValue FPCR =
DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
Chain = FPCR.getValue(1);
FPCR = FPCR.getValue(0);
// Put new rounding mode into FPSCR[23:22].
const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
DAG.getConstant(RMMask, DL, MVT::i64));
FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
SDValue Ops2[] = {
Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
FPCR};
return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
}
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// If SVE is available then i64 vector multiplications can also be made legal.
bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
// Multiplications are only custom-lowered for 128-bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
bool isMLA = false;
bool isN0SExt = isSignExtended(N0, DAG);
bool isN1SExt = isSignExtended(N1, DAG);
if (isN0SExt && isN1SExt)
NewOpc = AArch64ISD::SMULL;
else {
bool isN0ZExt = isZeroExtended(N0, DAG);
bool isN1ZExt = isZeroExtended(N1, DAG);
if (isN0ZExt && isN1ZExt)
NewOpc = AArch64ISD::UMULL;
else if (isN1SExt || isN1ZExt) {
// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
if (isN1SExt && isAddSubSExt(N0, DAG)) {
NewOpc = AArch64ISD::SMULL;
isMLA = true;
} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
NewOpc = AArch64ISD::UMULL;
isMLA = true;
} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
std::swap(N0, N1);
NewOpc = AArch64ISD::UMULL;
isMLA = true;
}
}
if (!NewOpc) {
if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.
return SDValue();
else
// Other vector multiplications are legal.
return Op;
}
}
// Legalize to a S/UMULL instruction
SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
if (!isMLA) {
Op0 = skipExtensionForVectorMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
}
// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
// isel lowering to take advantage of no-stall back to back s/umul + s/umla.
// This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
int Pattern) {
return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
DAG.getTargetConstant(Pattern, DL, MVT::i32));
}
static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT OutVT = Op.getValueType();
SDValue InOp = Op.getOperand(1);
EVT InVT = InOp.getValueType();
// Return the operand if the cast isn't changing type,
// i.e. <n x 16 x i1> -> <n x 16 x i1>
if (InVT == OutVT)
return InOp;
SDValue Reinterpret =
DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
// If the argument converted to an svbool is a ptrue or a comparison, the
// lanes introduced by the widening are zero by construction.
switch (InOp.getOpcode()) {
case AArch64ISD::SETCC_MERGE_ZERO:
return Reinterpret;
case ISD::INTRINSIC_WO_CHAIN:
if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
return Reinterpret;
}
// Otherwise, zero the newly introduced lanes.
SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
SDValue MaskReinterpret =
DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
}
SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
switch (IntNo) {
default:
return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::aarch64_mops_memset_tag: {
auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
SDLoc DL(Op);
SDValue Chain = Node->getChain();
SDValue Dst = Op.getOperand(2);
SDValue Val = Op.getOperand(3);
Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
SDValue Size = Op.getOperand(4);
auto Alignment = Node->getMemOperand()->getAlign();
bool IsVol = Node->isVolatile();
auto DstPtrInfo = Node->getPointerInfo();
const auto &SDI =
static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
SDValue MS =
SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
// MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
// intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
// LowerOperationWrapper will complain that the number of results has
// changed.
return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
}
}
}
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::aarch64_neon_abs: {
EVT Ty = Op.getValueType();
if (Ty == MVT::i64) {
SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
Op.getOperand(1));
Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
} else {
report_fatal_error("Unexpected type for AArch64 NEON intrinic");
}
}
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umax:
return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_smin:
return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_sunpkhi:
return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sunpklo:
return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uunpkhi:
return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uunpklo:
return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_clasta_n:
return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_clastb_n:
return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_lasta:
return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_lastb:
return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_rev:
return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_tbl:
return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_trn1:
return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_trn2:
return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_uzp1:
return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_uzp2:
return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_zip1:
return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_zip2:
return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_splice:
return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_ptrue:
return getPTrue(DAG, dl, Op.getValueType(),
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
case Intrinsic::aarch64_sve_clz:
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_cnt: {
SDValue Data = Op.getOperand(3);
// CTPOP only supports integer operands.
if (Data.getValueType().isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Data, Op.getOperand(1));
}
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:
return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_convert_to_svbool:
return lowerConvertToSVBool(Op, DAG);
case Intrinsic::aarch64_sve_fneg:
return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintp:
return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintm:
return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frinti:
return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintx:
return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frinta:
return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintn:
return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frintz:
return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_ucvtf:
return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_scvtf:
return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzu:
return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fcvtzs:
return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_fsqrt:
return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frecpx:
return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_frecpe_x:
return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_frecps_x:
return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_frsqrte_x:
return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_frsqrts_x:
return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_fabs:
return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_abs:
return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_neg:
return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_insr: {
SDValue Scalar = Op.getOperand(2);
EVT ScalarTy = Scalar.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
Op.getOperand(1), Scalar);
}
case Intrinsic::aarch64_sve_rbit:
return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(1));
case Intrinsic::aarch64_sve_revb:
return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_revh:
return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_revw:
return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtb:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sxth:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtw:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxtb:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxth:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
Op.getOperand(1));
case Intrinsic::aarch64_sve_uxtw:
return DAG.getNode(
AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3),
DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
Op.getOperand(1));
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();
unsigned Reg = RegInfo->getLocalAddressRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
Op.getSimpleValueType());
}
case Intrinsic::eh_recoverfp: {
// FIXME: This needs to be implemented to correctly handle highly aligned
// stack objects. For now we simply return the incoming FP. Refer D53541
// for more details.
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
case Intrinsic::aarch64_neon_vsri:
case Intrinsic::aarch64_neon_vsli: {
EVT Ty = Op.getValueType();
if (!Ty.isVector())
report_fatal_error("Unexpected type for aarch64_neon_vsli");
assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
Op.getOperand(3));
}
case Intrinsic::aarch64_neon_srhadd:
case Intrinsic::aarch64_neon_urhadd:
case Intrinsic::aarch64_neon_shadd:
case Intrinsic::aarch64_neon_uhadd: {
bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
IntNo == Intrinsic::aarch64_neon_shadd);
bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
IntNo == Intrinsic::aarch64_neon_urhadd);
unsigned Opcode =
IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
: (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
case Intrinsic::aarch64_neon_sabd:
case Intrinsic::aarch64_neon_uabd: {
unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
: ISD::ABDS;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
case Intrinsic::aarch64_neon_uaddlp: {
unsigned Opcode = AArch64ISD::UADDLP;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
}
case Intrinsic::aarch64_neon_sdot:
case Intrinsic::aarch64_neon_udot:
case Intrinsic::aarch64_sve_sdot:
case Intrinsic::aarch64_sve_udot: {
unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
IntNo == Intrinsic::aarch64_sve_udot)
? AArch64ISD::UDOT
: AArch64ISD::SDOT;
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
case Intrinsic::get_active_lane_mask: {
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
Op.getOperand(1), Op.getOperand(2));
}
}
}
bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
if (VT.getVectorElementType() == MVT::i8 ||
VT.getVectorElementType() == MVT::i16) {
EltTy = MVT::i32;
return true;
}
return false;
}
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
if (VT.getVectorElementType() == MVT::i32 &&
VT.getVectorElementCount().getKnownMinValue() >= 4 &&
!VT.isFixedLengthVector())
return true;
return false;
}
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector() ||
useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
/*OverrideNEON=*/true);
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::GLD1_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::GLD1_UXTW_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::GLD1_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::GLD1_SXTW_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::GLD1_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::GLD1_SCALED_MERGE_ZERO},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
};
auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
return AddrModes.find(Key)->second;
}
unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::SST1_PRED},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::SST1_UXTW_PRED},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::SST1_PRED},
{std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::SST1_SXTW_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
AArch64ISD::SST1_SCALED_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
AArch64ISD::SST1_UXTW_SCALED_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
AArch64ISD::SST1_SCALED_PRED},
{std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
AArch64ISD::SST1_SXTW_SCALED_PRED},
};
auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
return AddrModes.find(Key)->second;
}
unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("unimplemented opcode");
return Opcode;
case AArch64ISD::GLD1_MERGE_ZERO:
return AArch64ISD::GLD1S_MERGE_ZERO;
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
}
}
bool getGatherScatterIndexIsExtended(SDValue Index) {
unsigned Opcode = Index.getOpcode();
if (Opcode == ISD::SIGN_EXTEND_INREG)
return true;
if (Opcode == ISD::AND) {
SDValue Splat = Index.getOperand(1);
if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
return false;
ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
return false;
return true;
}
return false;
}
// If the base pointer of a masked gather or scatter is null, we
// may be able to swap BasePtr & Index and use the vector + register
// or vector + immediate addressing mode, e.g.
// VECTOR + REGISTER:
// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
// -> getelementptr %offset, <vscale x N x T> %indices
// VECTOR + IMMEDIATE:
// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
// -> getelementptr #x, <vscale x N x T> %indices
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
unsigned &Opcode, bool IsGather,
SelectionDAG &DAG) {
if (!isNullConstant(BasePtr))
return;
// FIXME: This will not match for fixed vector type codegen as the nodes in
// question will have fixed<->scalable conversions around them. This should be
// moved to a DAG combine or complex pattern so that is executes after all of
// the fixed vector insert and extracts have been removed. This deficiency
// will result in a sub-optimal addressing mode being used, i.e. an ADD not
// being folded into the scatter/gather.
ConstantSDNode *Offset = nullptr;
if (Index.getOpcode() == ISD::ADD)
if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
if (isa<ConstantSDNode>(SplatVal))
Offset = cast<ConstantSDNode>(SplatVal);
else {
BasePtr = SplatVal;
Index = Index->getOperand(0);
return;
}
}
unsigned NewOp =
IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
if (!Offset) {
std::swap(BasePtr, Index);
Opcode = NewOp;
return;
}
uint64_t OffsetVal = Offset->getZExtValue();
unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
// Index is out of range for the immediate addressing mode
BasePtr = ConstOffset;
Index = Index->getOperand(0);
return;
}
// Immediate is in range
Opcode = NewOp;
BasePtr = Index->getOperand(0);
Index = ConstOffset;
}
SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
assert(MGT && "Can only custom lower gather load nodes");
bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
SDValue Index = MGT->getIndex();
SDValue Chain = MGT->getChain();
SDValue PassThru = MGT->getPassThru();
SDValue Mask = MGT->getMask();
SDValue BasePtr = MGT->getBasePtr();
ISD::LoadExtType ExtTy = MGT->getExtensionType();
ISD::MemIndexType IndexType = MGT->getIndexType();
bool IsScaled =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
bool IsSigned =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
bool IdxNeedsExtend =
getGatherScatterIndexIsExtended(Index) ||
Index.getSimpleValueType().getVectorElementType() == MVT::i32;
bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
EVT VT = PassThru.getSimpleValueType();
EVT IndexVT = Index.getSimpleValueType();
EVT MemVT = MGT->getMemoryVT();
SDValue InputVT = DAG.getValueType(MemVT);
if (VT.getVectorElementType() == MVT::bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
} else {
MemVT = getContainerForFixedLengthVector(DAG, MemVT);
IndexVT = MemVT.changeTypeToInteger();
}
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
Mask = DAG.getNode(
ISD::SIGN_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
}
if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
PassThru = SDValue();
if (VT.isFloatingPoint() && !IsFixedLength) {
// Handle FP data by using an integer gather and casting the result.
if (PassThru) {
EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
}
InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
}
SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
if (getGatherScatterIndexIsExtended(Index))
Index = Index.getOperand(0);
unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
/*isGather=*/true, DAG);
if (ResNeedsSignExtend)
Opcode = getSignExtendedGatherOpcode(Opcode);
if (IsFixedLength) {
if (Index.getSimpleValueType().isFixedLengthVector())
Index = convertToScalableVector(DAG, IndexVT, Index);
if (BasePtr.getSimpleValueType().isFixedLengthVector())
BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
}
SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
Chain = Result.getValue(1);
if (IsFixedLength) {
Result = convertFromScalableVector(
DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
Result);
Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
if (PassThru)
Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
} else {
if (PassThru)
Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
if (VT.isFloatingPoint())
Result = getSVESafeBitCast(VT, Result, DAG);
}
return DAG.getMergeValues({Result, Chain}, DL);
}
SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
assert(MSC && "Can only custom lower scatter store nodes");
bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
SDValue Index = MSC->getIndex();
SDValue Chain = MSC->getChain();
SDValue StoreVal = MSC->getValue();
SDValue Mask = MSC->getMask();
SDValue BasePtr = MSC->getBasePtr();
ISD::MemIndexType IndexType = MSC->getIndexType();
bool IsScaled =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
bool IsSigned =
IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
bool NeedsExtend =
getGatherScatterIndexIsExtended(Index) ||
Index.getSimpleValueType().getVectorElementType() == MVT::i32;
EVT VT = StoreVal.getSimpleValueType();
EVT IndexVT = Index.getSimpleValueType();
SDVTList VTs = DAG.getVTList(MVT::Other);
EVT MemVT = MSC->getMemoryVT();
SDValue InputVT = DAG.getValueType(MemVT);
if (VT.getVectorElementType() == MVT::bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
} else {
MemVT = getContainerForFixedLengthVector(DAG, MemVT);
IndexVT = MemVT.changeTypeToInteger();
}
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
StoreVal =
DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
StoreVal = DAG.getNode(
ISD::ANY_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
Mask = DAG.getNode(
ISD::SIGN_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
} else if (VT.isFloatingPoint()) {
// Handle FP data by casting the data so an integer scatter can be used.
EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
}
if (getGatherScatterIndexIsExtended(Index))
Index = Index.getOperand(0);
unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
/*isGather=*/false, DAG);
if (IsFixedLength) {
if (Index.getSimpleValueType().isFixedLengthVector())
Index = convertToScalableVector(DAG, IndexVT, Index);
if (BasePtr.getSimpleValueType().isFixedLengthVector())
BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
Mask = convertFixedMaskToScalableVector(Mask, DAG);
}
SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
return DAG.getNode(Opcode, DL, VTs, Ops);
}
SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a masked load node");
EVT VT = Op->getValueType(0);
if (useSVEForFixedLengthVectorVT(VT, true))
return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
SDValue PassThru = LoadNode->getPassThru();
SDValue Mask = LoadNode->getMask();
if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
return Op;
SDValue Load = DAG.getMaskedLoad(
VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
LoadNode->getExtensionType());
SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
SelectionDAG &DAG) {
assert(VT.isVector() && "VT should be a vector type");
assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
SDValue Value = ST->getValue();
// It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
// the word lane which represent the v4i8 subvector. It optimizes the store
// to:
//
// xtn v0.8b, v0.8h
// str s0, [x0]
SDValue Undef = DAG.getUNDEF(MVT::i16);
SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
{Undef, Undef, Undef, Undef});
SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
Value, UndefVec);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Trunc, DAG.getConstant(0, DL, MVT::i64));
return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
ST->getBasePtr(), ST->getMemOperand());
}
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8 or volatile stores of i128.
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
assert (StoreNode && "Can only custom lower store nodes");
SDValue Value = StoreNode->getValue();
EVT VT = Value.getValueType();
EVT MemVT = StoreNode->getMemoryVT();
if (VT.isVector()) {
if (useSVEForFixedLengthVectorVT(VT, true))
return LowerFixedLengthVectorStoreToSVE(Op, DAG);
unsigned AS = StoreNode->getAddressSpace();
Align Alignment = StoreNode->getAlign();
if (Alignment < MemVT.getStoreSize() &&
!allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
StoreNode->getMemOperand()->getFlags(),
nullptr)) {
return scalarizeVectorStore(StoreNode, DAG);
}
if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
MemVT == MVT::v4i8) {
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
}
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
// the custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
ElementCount EC = MemVT.getVectorElementCount();
if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
EC.isKnownEven() &&
((MemVT.getScalarSizeInBits() == 8u ||
MemVT.getScalarSizeInBits() == 16u ||
MemVT.getScalarSizeInBits() == 32u ||
MemVT.getScalarSizeInBits() == 64u))) {
SDValue Lo =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
SDValue Hi =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
StoreNode->getValue(),
DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
return LowerStore128(Op, DAG);
} else if (MemVT == MVT::i64x8) {
SDValue Value = StoreNode->getValue();
assert(Value->getValueType(0) == MVT::i64x8);
SDValue Chain = StoreNode->getChain();
SDValue Base = StoreNode->getBasePtr();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
Value, DAG.getConstant(i, Dl, MVT::i32));
SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
DAG.getConstant(i * 8, Dl, PtrVT));
Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
StoreNode->getOriginalAlign());
}
return Chain;
}
return SDValue();
}
/// Lower atomic or volatile 128-bit stores to a single STP instruction.
SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
SelectionDAG &DAG) const {
MemSDNode *StoreNode = cast<MemSDNode>(Op);
assert(StoreNode->getMemoryVT() == MVT::i128);
assert(StoreNode->isVolatile() || StoreNode->isAtomic());
assert(!StoreNode->isAtomic() ||
StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
SDValue Value = StoreNode->getOpcode() == ISD::STORE
? StoreNode->getOperand(1)
: StoreNode->getOperand(2);
SDLoc DL(Op);
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
DAG.getConstant(0, DL, MVT::i64));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
DAG.getConstant(1, DL, MVT::i64));
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
if (LoadNode->getMemoryVT() == MVT::i64x8) {
SmallVector<SDValue, 8> Ops;
SDValue Base = LoadNode->getBasePtr();
SDValue Chain = LoadNode->getChain();
EVT PtrVT = Base.getValueType();
for (unsigned i = 0; i < 8; i++) {
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
DAG.getConstant(i * 8, DL, PtrVT));
SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
LoadNode->getPointerInfo(),
LoadNode->getOriginalAlign());
Ops.push_back(Part);
Chain = SDValue(Part.getNode(), 1);
}
SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
return DAG.getMergeValues({Loaded, Chain}, DL);
}
// Custom lowering for extending v4i8 vector loads.
EVT VT = Op->getValueType(0);
assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
if (LoadNode->getMemoryVT() != MVT::v4i8)
return SDValue();
unsigned ExtType;
if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
ExtType = ISD::SIGN_EXTEND;
else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
LoadNode->getExtensionType() == ISD::EXTLOAD)
ExtType = ISD::ZERO_EXTEND;
else
return SDValue();
SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
LoadNode->getBasePtr(), MachinePointerInfo());
SDValue Chain = Load.getValue(1);
SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
DAG.getConstant(0, DL, MVT::i64));
if (VT == MVT::v4i32)
Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
return DAG.getMergeValues({Ext, Chain}, DL);
}
// Generate SUBS and CSEL for integer abs.
SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
if (VT.isVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
SDLoc DL(Op);
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(0));
// Generate SUBS & CSEL.
SDValue Cmp =
DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
Op.getOperand(0), DAG.getConstant(0, DL, VT));
return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
Cmp.getValue(1));
}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
LLVM_DEBUG(Op.dump());
switch (Op.getOpcode()) {
default:
llvm_unreachable("unimplemented operand");
return SDValue();
case ISD::BITCAST:
return LowerBITCAST(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
return LowerGlobalTLSAddress(Op, DAG);
case ISD::SETCC:
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS:
return LowerSETCC(Op, DAG);
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
case ISD::SELECT:
return LowerSELECT(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::BR_JT:
return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
return LowerBlockAddress(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
case ISD::VACOPY:
return LowerVACOPY(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE:
return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
case ISD::FSUB:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
case ISD::FMUL:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
case ISD::FMA:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
case ISD::FDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
case ISD::FNEG:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
case ISD::FCEIL:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
case ISD::FFLOOR:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
case ISD::FNEARBYINT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
case ISD::FRINT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
case ISD::FROUND:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
case ISD::FROUNDEVEN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
case ISD::FTRUNC:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
case ISD::FSQRT:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
case ISD::FABS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
case ISD::SPONENTRY:
return LowerSPONENTRY(Op, DAG);
case ISD::RETURNADDR:
return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR:
return LowerADDROFRETURNADDR(Op, DAG);
case ISD::CONCAT_VECTORS:
return LowerCONCAT_VECTORS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::SPLAT_VECTOR:
return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
case ISD::UDIV:
return LowerDIV(Op, DAG);
case ISD::SMIN:
case ISD::UMIN:
case ISD::SMAX:
case ISD::UMAX:
return LowerMinMax(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
return LowerVectorSRA_SRL_SHL(Op, DAG);
case ISD::SHL_PARTS:
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
return LowerShiftParts(Op, DAG);
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
return LowerVectorOR(Op, DAG);
case ISD::XOR:
return LowerXOR(Op, DAG);
case ISD::PREFETCH:
return LowerPREFETCH(Op, DAG);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return LowerFP_TO_INT_SAT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
case ISD::FLT_ROUNDS_:
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::SET_ROUNDING:
return LowerSET_ROUNDING(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::MULHS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
/*OverrideNEON=*/true);
case ISD::MULHU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
/*OverrideNEON=*/true);
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::ATOMIC_STORE:
if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
assert(Subtarget->hasLSE2());
return LowerStore128(Op, DAG);
}
return SDValue();
case ISD::STORE:
return LowerSTORE(Op, DAG);
case ISD::MSTORE:
return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
case ISD::MGATHER:
return LowerMGATHER(Op, DAG);
case ISD::MSCATTER:
return LowerMSCATTER(Op, DAG);
case ISD::VECREDUCE_SEQ_FADD:
return LowerVECREDUCE_SEQ_FADD(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
case ISD::ATOMIC_LOAD_SUB:
return LowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VSCALE:
return LowerVSCALE(Op, DAG);
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
case ISD::SIGN_EXTEND_INREG: {
// Only custom lower when ExtraVT has a legal byte based element type.
EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
EVT ExtraEltVT = ExtraVT.getVectorElementType();
if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
(ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
return SDValue();
return LowerToPredicatedOp(Op, DAG,
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
}
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::LOAD:
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
case ISD::AND:
return LowerToScalableOp(Op, DAG);
case ISD::SUB:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
case ISD::FMAXIMUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
case ISD::FMAXNUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
case ISD::FMINIMUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
case ISD::FMINNUM:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
case ISD::VSELECT:
return LowerFixedLengthVectorSelectToSVE(Op, DAG);
case ISD::ABS:
return LowerABS(Op, DAG);
case ISD::ABDS:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
case ISD::ABDU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
case ISD::BITREVERSE:
return LowerBitreverse(Op, DAG);
case ISD::BSWAP:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
case ISD::CTLZ:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
/*OverrideNEON=*/true);
case ISD::CTTZ:
return LowerCTTZ(Op, DAG);
case ISD::VECTOR_SPLICE:
return LowerVECTOR_SPLICE(Op, DAG);
}
}
bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget->useSVEForFixedLengthVectors();
}
bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
EVT VT, bool OverrideNEON) const {
if (!Subtarget->useSVEForFixedLengthVectors())
return false;
if (!VT.isFixedLengthVector())
return false;
// Don't use SVE for vectors we cannot scalarize if required.
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
// Fixed length predicates should be promoted to i8.
// NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
case MVT::i1:
default:
return false;
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64:
break;
}
// All SVE implementations support NEON sized vectors.
if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
return true;
// Ensure NEON MVTs only belong to a single register class.
if (VT.getFixedSizeInBits() <= 128)
return false;
// Don't use SVE for types that don't fit.
if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
// the base fixed length SVE support in place.
if (!VT.isPow2VectorType())
return false;
return true;
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
return CC_AArch64_GHC;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
if (Subtarget->isTargetWindows() && IsVarArg)
return CC_AArch64_Win64_VarArg;
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
if (!IsVarArg)
return CC_AArch64_DarwinPCS;
return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
case CallingConv::CFGuard_Check:
return CC_AArch64_Win64_CFGuard_Check;
case CallingConv::AArch64_VectorCall:
case CallingConv::AArch64_SVE_VectorCall:
return CC_AArch64_AAPCS;
}
}
CCAssignFn *
AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
}
SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
// Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
// we use a special version of AnalyzeFormalArguments to pass in ValVT and
// LocVT.
unsigned NumArgs = Ins.size();
Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
if (Ins[i].isOrigArg()) {
std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
}
bool UseVarArgCC = false;
if (IsWin64)
UseVarArgCC = isVarArg;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
SmallVector<SDValue, 16> ArgValues;
unsigned ExtraArgLocs = 0;
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// non-compliant manner for larger structs.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
// FIXME: This works on big-endian for composite byvals, which are the common
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
}
if (Ins[i].Flags.isSwiftAsync())
MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
SDValue ArgValue;
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &AArch64::GPR32RegClass;
else if (RegVT == MVT::i64)
RC = &AArch64::GPR64RegClass;
else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
RC = &AArch64::FPR16RegClass;
else if (RegVT == MVT::f32)
RC = &AArch64::FPR32RegClass;
else if (RegVT == MVT::f64 || RegVT.is64BitVector())
RC = &AArch64::FPR64RegClass;
else if (RegVT == MVT::f128 || RegVT.is128BitVector())
RC = &AArch64::FPR128RegClass;
else if (RegVT.isScalableVector() &&
RegVT.getVectorElementType() == MVT::i1)
RC = &AArch64::PPRRegClass;
else if (RegVT.isScalableVector())
RC = &AArch64::ZPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8, 16 or 32-bit value, it is really passed promoted
// to 64 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
break;
case CCValAssign::AExt:
case CCValAssign::SExt:
case CCValAssign::ZExt:
break;
case CCValAssign::AExtUpper:
ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
DAG.getConstant(32, DL, RegVT));
ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
break;
}
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
? VA.getLocVT().getSizeInBits()
: VA.getValVT().getSizeInBits()) / 8;
uint32_t BEAlign = 0;
if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
!Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8 - ArgSize;
int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
MVT MemVT = VA.getValVT();
switch (VA.getLocInfo()) {
default:
break;
case CCValAssign::Trunc:
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
MemVT = VA.getLocVT();
break;
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
case CCValAssign::ZExt:
ExtType = ISD::ZEXTLOAD;
break;
case CCValAssign::AExt:
ExtType = ISD::EXTLOAD;
break;
}
ArgValue =
DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI), MemVT);
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
unsigned NumParts = 1;
if (Ins[i].Flags.isInConsecutiveRegs()) {
assert(!Ins[i].Flags.isInConsecutiveRegsLast());
while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
}
MVT PartLoad = VA.getValVT();
SDValue Ptr = ArgValue;
// Ensure we generate all loads for each tuple part, whilst updating the
// pointer after each load correctly using vscale.
while (NumParts > 0) {
ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
InVals.push_back(ArgValue);
NumParts--;
if (NumParts > 0) {
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
BytesIncrement, Flags);
ExtraArgLocs++;
i++;
}
}
} else {
if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
ArgValue, DAG.getValueType(MVT::i32));
// i1 arguments are zero-extended to i8 by the caller. Emit a
// hint to reflect this.
if (Ins[i].isOrigArg()) {
Argument *OrigArg = MF.getFunction().getArg(Ins[i].getOrigArgIndex());
if (OrigArg->getType()->isIntegerTy(1)) {
if (!Ins[i].Flags.isZExt()) {
ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
ArgValue.getValueType(), ArgValue);
}
}
}
InVals.push_back(ArgValue);
}
}
assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
// varargs
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
if (isVarArg) {
if (!Subtarget->isTargetDarwin() || IsWin64) {
// The AAPCS variadic function ABI is identical to the non-variadic
// one. As a result there may be more arguments in registers and we should
// save them for future reference.
// Win64 variadic functions also pass arguments in registers, but all float
// arguments are passed in integer registers.
saveVarArgRegisters(CCInfo, DAG, DL, Chain);
}
// This will point to the next argument passed via stack.
unsigned StackOffset = CCInfo.getNextStackOffset();
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
SmallVector<MVT, 2> RegParmTypes;
RegParmTypes.push_back(MVT::i64);
RegParmTypes.push_back(MVT::f128);
// Compute the set of forwarded registers. The rest are scratch.
SmallVectorImpl<ForwardedRegister> &Forwards =
FuncInfo->getForwardedMustTailRegParms();
CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
CC_AArch64_AAPCS);
// Conservatively forward X8, since it might be used for aggregate return.
if (!CCInfo.isAllocated(AArch64::X8)) {
Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
}
}
}
// On Windows, InReg pointers must be returned, so record the pointer in a
// virtual register at the start of the function so it can be returned in the
// epilogue.
if (IsWin64) {
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
if (Ins[I].Flags.isInReg()) {
assert(!FuncInfo->getSRetReturnReg());
MVT PtrTy = getPointerTy(DAG.getDataLayout());
Register Reg =
MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
FuncInfo->setSRetReturnReg(Reg);
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
break;
}
}
}
unsigned StackArgSize = CCInfo.getNextStackOffset();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
// This is a non-standard ABI so by fiat I say we're allowed to make full
// use of the stack area to be popped, which must be aligned to 16 bytes in
// any case:
StackArgSize = alignTo(StackArgSize, 16);
// If we're expected to restore the stack (e.g. fastcc) then we'll be adding
// a multiple of 16.
FuncInfo->setArgumentStackToRestore(StackArgSize);
// This realignment carries over to the available bytes below. Our own
// callers will guarantee the space is free by giving an aligned value to
// CALLSEQ_START.
}
// Even if we're not expected to free up the space, it's useful to know how
// much is there while considering tail calls (because we can reuse it).
FuncInfo->setBytesInStackArgArea(StackArgSize);
if (Subtarget->hasCustomCallingConv())
Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
return Chain;
}
void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
SelectionDAG &DAG,
const SDLoc &DL,
SDValue &Chain) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
SmallVector<SDValue, 8> MemOps;
static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
int GPRIdx = 0;
if (GPRSaveSize != 0) {
if (IsWin64) {
GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
if (GPRSaveSize & 15)
// The extra size here, if triggered, will always be 8.
MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
SDValue Store =
DAG.getStore(Val.getValue(1), DL, Val, FIN,
IsWin64 ? MachinePointerInfo::getFixedStack(
MF, GPRIdx, (i - FirstVariadicGPR) * 8)
: MachinePointerInfo::getStack(MF, i * 8));
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
}
}
FuncInfo->setVarArgsGPRIndex(GPRIdx);
FuncInfo->setVarArgsGPRSize(GPRSaveSize);
if (Subtarget->hasFPARMv8() && !IsWin64) {
static const MCPhysReg FPRArgRegs[] = {
AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
int FPRIdx = 0;
if (FPRSaveSize != 0) {
FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
Register VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
MachinePointerInfo::getStack(MF, i * 16));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getConstant(16, DL, PtrVT));
}
}
FuncInfo->setVarArgsFPRIndex(FPRIdx);
FuncInfo->setVarArgsFPRSize(FPRSaveSize);
}
if (!MemOps.empty()) {
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue AArch64TargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
// Pass 'this' value directly from the argument to return value, to avoid
// reg unit interference
if (i == 0 && isThisReturn) {
assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment");
InVals.push_back(ThisVal);
continue;
}
// Avoid copying a physreg twice since RegAllocFast is incompetent and only
// allows one use of a physreg per block.
SDValue Val = CopiedRegs.lookup(VA.getLocReg());
if (!Val) {
Val =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
CopiedRegs[VA.getLocReg()] = Val;
}
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
case CCValAssign::AExtUpper:
Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
DAG.getConstant(32, DL, VA.getLocVT()));
LLVM_FALLTHROUGH;
case CCValAssign::AExt:
LLVM_FALLTHROUGH;
case CCValAssign::ZExt:
Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
break;
}
InVals.push_back(Val);
}
return Chain;
}
/// Return true if the calling convention is one that we can guarantee TCO for.
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
}
/// Return true if we might ever do TCO for calls with this calling convention.
static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::C:
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::PreserveMost:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
case CallingConv::Fast:
return true;
default:
return false;
}
}
bool AArch64TargetLowering::isEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
if (!mayTailCallThisCC(CalleeCC))
return false;
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
// Functions using the C or Fast calling convention that have an SVE signature
// preserve more registers and should assume the SVE_VectorCall CC.
// The check for matching callee-saved regs will determine whether it is
// eligible for TCO.
if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
CallerCC = CallingConv::AArch64_SVE_VectorCall;
bool CCMatch = CallerCC == CalleeCC;
// When using the Windows calling convention on a non-windows OS, we want
// to back up and restore X18 in such functions; we can't do a tail call
// from those functions.
if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
CalleeCC != CallingConv::Win64)
return false;
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
for (Function::const_arg_iterator i = CallerF.arg_begin(),
e = CallerF.arg_end();
i != e; ++i) {
if (i->hasByValAttr())
return false;
// On Windows, "inreg" attributes signify non-aggregate indirect returns.
// In this case, it is necessary to save/restore X0 in the callee. Tail
// call opt interferes with this. So we disable tail call opt when the
// caller has an argument with "inreg" attribute.
// FIXME: Check whether the callee also has an "inreg" argument.
if (i->hasInRegAttr())
return false;
}
if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
return CCMatch;
// Externally-defined functions with weak linkage should not be
// tail-called on AArch64 when the OS does not support dynamic
// pre-emption of symbols, as the AAELF spec requires normal calls
// to undefined weak functions to be replaced with a NOP or jump to the
// next instruction. The behaviour of branch instructions in this
// situation (as used for tail calls) is implementation-defined, so we
// cannot rely on the linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
// Now we search for cases where we can use a tail call without changing the
// ABI. Sibcall is used in some places (particularly gcc) to refer to this
// concept.
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
assert((!isVarArg || CalleeCC == CallingConv::C) &&
"Unexpected variadic calling convention");
LLVMContext &C = *DAG.getContext();
if (isVarArg && !Outs.empty()) {
// At least two cases here: if caller is fastcc then we can't have any
// memory arguments (we'd be expected to clean up the stack afterwards). If
// caller is C then we could potentially use its argument area.
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
for (const CCValAssign &ArgLoc : ArgLocs)
if (!ArgLoc.isRegLoc())
return false;
}
// Check that the call results are passed in the same way.
if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
CCAssignFnForCall(CalleeCC, isVarArg),
CCAssignFnForCall(CallerCC, isVarArg)))
return false;
// The callee has to preserve all registers the caller needs to preserve.
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (!CCMatch) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (Subtarget->hasCustomCallingConv()) {
TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
}
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
// Nothing more to check if the callee is taking no arguments
if (Outs.empty())
return true;
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
// If any of the arguments is passed indirectly, it must be SVE, so the
// 'getBytesInStackArgArea' is not sufficient to determine whether we need to
// allocate space on the stack. That is why we determine this explicitly here
// the call cannot be a tailcall.
if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
assert((A.getLocInfo() != CCValAssign::Indirect ||
A.getValVT().isScalableVector()) &&
"Expected value to be scalable");
return A.getLocInfo() == CCValAssign::Indirect;
}))
return false;
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
return false;
return true;
}
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
SelectionDAG &DAG,
MachineFrameInfo &MFI,
int ClobberedFI) const {
SmallVector<SDValue, 8> ArgChains;
int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
// Include the original chain at the beginning of the list. When this is
// used by target LowerCall hooks, this helps legalize find the
// CALLSEQ_BEGIN node.
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
for (SDNode *U : DAG.getEntryNode().getNode()->uses())
if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
int64_t InLastByte = InFirstByte;
InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
(FirstByte <= InFirstByte && InFirstByte <= LastByte))
ArgChains.push_back(SDValue(L, 1));
}
// Build a tokenfactor for all the chains.
return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
}
bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
bool TailCallOpt) const {
return (CallCC == CallingConv::Fast && TailCallOpt) ||
CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
}
// Check if the value is zero-extended from i1 to i8
static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
unsigned SizeInBits = Arg.getValueType().getSizeInBits();
if (SizeInBits < 8)
return false;
APInt LowBits(SizeInBits, 0xFF);
APInt RequredZero(SizeInBits, 0xFE);
KnownBits Bits = DAG.computeKnownBits(Arg, LowBits, 4);
bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
return ZExtBool;
}
/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
/// and add input and output parameter nodes.
SDValue
AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &DL = CLI.DL;
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
MachineFunction::CallSiteInfo CSInfo;
bool IsThisReturn = false;
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
bool IsSibCall = false;
bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
// Check callee args/returns for SVE registers and set calling convention
// accordingly.
if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
return Out.VT.isScalableVector();
});
bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
return In.VT.isScalableVector();
});
if (CalleeInSVE || CalleeOutSVE)
CallConv = CallingConv::AArch64_SVE_VectorCall;
}
if (IsTailCall) {
// Check if it's really possible to do a tail call.
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
// A sibling call is one where we're under the usual C ABI and not planning
// to change that but can still do a tail call:
if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
CallConv != CallingConv::SwiftTail)
IsSibCall = true;
if (IsTailCall)
++NumTailCalls;
}
if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
// Variable vector arguments always go into memory.
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Outs[i].VT;
if (!Outs[i].IsFixed && ArgVT.isScalableVector())
report_fatal_error("Passing SVE types to variadic functions is "
"currently not supported");
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
bool UseVarArgCC = !Outs[i].IsFixed;
// On Windows, the fixed arguments in a vararg call are passed in GPRs
// too, so use the vararg CC to force them to integer registers.
if (IsCalleeWin64)
UseVarArgCC = true;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
} else {
// At this point, Outs[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
// Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
// we use a special version of AnalyzeCallOperands to pass in ValVT and
// LocVT.
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Outs[i].VT;
// Get type of the original argument.
EVT ActualVT = getValueType(DAG.getDataLayout(),
CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
}
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
if (IsSibCall) {
// Since we're not changing the ABI to make this a tail call, the memory
// operands are already available in the caller's incoming argument space.
NumBytes = 0;
}
// FPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
// by this amount for a tail call. In a sibling call it must be 0 because the
// caller will deallocate the entire stack and the callee still expects its
// arguments to begin at SP+0. Completely unused for non-tail calls.
int FPDiff = 0;
if (IsTailCall && !IsSibCall) {
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
// Since callee will pop argument stack as a tail call, we must keep the
// popped size 16-byte aligned.
NumBytes = alignTo(NumBytes, 16);
// FPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
// can actually shrink the stack.
FPDiff = NumReusableBytes - NumBytes;
// Update the required reserved area if this is the tail call requiring the
// most argument stack space.
if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
FuncInfo->setTailCallReservedStack(-FPDiff);
// The stack pointer must be 16-byte aligned at all times it's used for a
// memory operation, which in practice means at *all* times and in
// particular across call boundaries. Therefore our own arguments started at
// a 16-byte aligned SP and the delta applied for the tail call should
// satisfy the same constraint.
assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
}
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall)
Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallSet<unsigned, 8> RegsUsed;
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
RegsToPass.emplace_back(F.PReg, Val);
}
}
// Walk the register/memloc assignments, inserting copies/loads.
unsigned ExtraArgLocs = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Promote the value if needed.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::SExt:
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::ZExt:
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
//
// Check if we actually have to do this, because the value may
// already be zero-extended.
//
// We cannot just emit a (zext i8 (trunc (assert-zext i8)))
// and rely on DAGCombiner to fold this, because the following
// (anyext i32) is combined with (zext i8) in DAG.getNode:
//
// (ext (zext x)) -> (zext x)
//
// This will give us (zext i32), which we cannot remove, so
// try to check this beforehand.
if (!checkZExtBool(Arg, DAG)) {
Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
}
}
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExtUpper:
assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
DAG.getConstant(32, DL, VA.getLocVT()));
break;
case CCValAssign::BCvt:
Arg = DAG.getBitcast(VA.getLocVT(), Arg);
break;
case CCValAssign::Trunc:
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::FPExt:
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
uint64_t PartSize = StoreSize;
unsigned NumParts = 1;
if (Outs[i].Flags.isInConsecutiveRegs()) {
assert(!Outs[i].Flags.isInConsecutiveRegsLast());
while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
++NumParts;
StoreSize *= NumParts;
}
MachineFrameInfo &MFI = MF.getFrameInfo();
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
MFI.setStackID(FI, TargetStackID::ScalableVector);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
SDValue SpillSlot = Ptr;
// Ensure we generate all stores for each tuple part, whilst updating the
// pointer after each store correctly using vscale.
while (NumParts) {
Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
NumParts--;
if (NumParts > 0) {
SDValue BytesIncrement = DAG.getVScale(
DL, Ptr.getValueType(),
APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
MPI = MachinePointerInfo(MPI.getAddrSpace());
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
BytesIncrement, Flags);
ExtraArgLocs++;
i++;
}
}
Arg = SpillSlot;
break;
}
if (VA.isRegLoc()) {
if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
Outs[0].VT == MVT::i64) {
assert(VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment");
assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
"unexpected use of 'returned'");
IsThisReturn = true;
}
if (RegsUsed.count(VA.getLocReg())) {
// If this register has already been used then we're trying to pack
// parts of an [N x i32] into an X-register. The extension type will
// take care of putting the two halves in the right place but we have to
// combine them.
SDValue &Bits =
llvm::find_if(RegsToPass,
[=](const std::pair<unsigned, SDValue> &Elt) {
return Elt.first == VA.getLocReg();
})
->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
// Call site info is used for function's parameter entry value
// tracking. For now we track only simple cases when parameter
// is transferred through whole register.
llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
return ArgReg.Reg == VA.getLocReg();
});
} else {
RegsToPass.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
const TargetOptions &Options = DAG.getTarget().Options;
if (Options.EmitCallSiteInfo)
CSInfo.emplace_back(VA.getLocReg(), i);
}
} else {
assert(VA.isMemLoc());
SDValue DstAddr;
MachinePointerInfo DstInfo;
// FIXME: This works on big-endian for composite byvals, which are the
// common case. It should also work for fundamental types too.
uint32_t BEAlign = 0;
unsigned OpSize;
if (VA.getLocInfo() == CCValAssign::Indirect ||
VA.getLocInfo() == CCValAssign::Trunc)
OpSize = VA.getLocVT().getFixedSizeInBits();
else
OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
: VA.getValVT().getSizeInBits();
OpSize = (OpSize + 7) / 8;
if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
!Flags.isInConsecutiveRegs()) {
if (OpSize < 8)
BEAlign = 8 - OpSize;
}
unsigned LocMemOffset = VA.getLocMemOffset();
int32_t Offset = LocMemOffset + BEAlign;
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
if (IsTailCall) {
Offset = Offset + FPDiff;
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
// clobbered.
Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
} else {
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
SDValue SizeNode =
DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
SDValue Cpy = DAG.getMemcpy(
Chain, DL, DstAddr, Arg, SizeNode,
Outs[i].Flags.getNonZeroByValAlign(),
/*isVol = */ false, /*AlwaysInline = */ false,
/*isTailCall = */ false, DstInfo, MachinePointerInfo());
MemOpChains.push_back(Cpy);
} else {
// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
// promoted to a legal register type i32, we should truncate Arg back to
// i1/i8/i16.
if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
VA.getValVT() == MVT::i16)
Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
MemOpChains.push_back(Store);
}
}
}
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
for (auto &RegToPass : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
RegToPass.second, InFlag);
InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
auto GV = G->getGlobal();
unsigned OpFlags =
Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
if (OpFlags & AArch64II::MO_GOT) {
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const GlobalValue *GV = G->getGlobal();
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
}
} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Subtarget->isTargetMachO()) {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
} else {
const char *Sym = S->getSymbol();
Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
}
}
// We don't usually want to end the call-sequence here because we would tidy
// the frame up *after* the call, however in the ABI-changing tail-call case
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
InFlag = Chain.getValue(1);
}
std::vector<SDValue> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
if (IsTailCall) {
// Each tail call may have to adjust the stack by a different amount, so
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
}
// Add argument registers to the end of the list so that they are known live
// into the call.
for (auto &RegToPass : RegsToPass)
Ops.push_back(DAG.getRegister(RegToPass.first,
RegToPass.second.getValueType()));
// Add a register mask operand representing the call-preserved registers.
const uint32_t *Mask;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
if (IsThisReturn) {
// For 'this' returns, use the X0-preserving mask if applicable
Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
if (!Mask) {
IsThisReturn = false;
Mask = TRI->getCallPreservedMask(MF, CallConv);
}
} else
Mask = TRI->getCallPreservedMask(MF, CallConv);
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(MF, &Mask);
if (TRI->isAnyArgRegReserved(MF))
TRI->emitReservedArgRegCallError(MF);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
if (InFlag.getNode())
Ops.push_back(InFlag);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
// If we're doing a tall call, use a TC_RETURN here rather than an
// actual call instruction.
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
return Ret;
}
unsigned CallOpc = AArch64ISD::CALL;
// Calls with operand bundle "clang.arc.attachedcall" are special. They should
// be expanded to the call, directly followed by a special marker sequence and
// a call to an ObjC library function. Use CALL_RVMARKER to do that.
if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
assert(!IsTailCall &&
"tail calls cannot be marked with clang.arc.attachedcall");
CallOpc = AArch64ISD::CALL_RVMARKER;
// Add a target global address for the retainRV/claimRV runtime function
// just before the call target.
Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
Ops.insert(Ops.begin() + 1, GA);
}
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
uint64_t CalleePopBytes =
DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
DAG.getIntPtrConstant(CalleePopBytes, DL, true),
InFlag, DL);
if (!Ins.empty())
InFlag = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
InVals, IsThisReturn,
IsThisReturn ? OutVals[0] : SDValue());
}
bool AArch64TargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, RetCC);
}
SDValue
AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
auto &MF = DAG.getMachineFunction();
auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
SDValue Flag;
SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
SmallSet<unsigned, 4> RegsUsed;
for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to i8 by the producer of the
// value. This is strictly redundant on Darwin (which uses "zeroext
// i1"), but will be optimised out before ISel.
Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
}
break;
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::AExt:
case CCValAssign::ZExt:
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::AExtUpper:
assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
DAG.getConstant(32, DL, VA.getLocVT()));
break;
}
if (RegsUsed.count(VA.getLocReg())) {
SDValue &Bits =
llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
return Elt.first == VA.getLocReg();
})->second;
Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
} else {
RetVals.emplace_back(VA.getLocReg(), Arg);
RegsUsed.insert(VA.getLocReg());
}
}
SmallVector<SDValue, 4> RetOps(1, Chain);
for (auto &RetVal : RetVals) {
Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
// Windows AArch64 ABIs require that for returning structs by value we copy
// the sret argument into X0 for the return.
// We saved the argument into a virtual register in the entry block,
// so now we copy the value out and into X0.
if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
getPointerTy(MF.getDataLayout()));
unsigned RetValReg = AArch64::X0;
Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
if (I) {
for (; *I; ++I) {
if (AArch64::GPR64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::i64));
else if (AArch64::FPR64RegClass.contains(*I))
RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
}
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
if (Flag.getNode())
RetOps.push_back(Flag);
return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
}
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
}
SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flag);
}
SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
SelectionDAG &DAG,
unsigned Flag) const {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
}
// (loadGOT sym)
template <class NodeTy>
SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes instead of using a wrapper node.
return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
}
// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
template <class NodeTy>
SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
const unsigned char MO_NC = AArch64II::MO_NC;
return DAG.getNode(
AArch64ISD::WrapperLarge, DL, Ty,
getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
}
// (addlow (adrp %hi(sym)) %lo(sym))
template <class NodeTy>
SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
SDValue Lo = getTargetNode(N, Ty, DAG,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
}
// (adr sym)
template <class NodeTy>
SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
unsigned Flags) const {
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
}
SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GN->getGlobal();
unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
if (OpFlags != AArch64II::MO_NO_FLAG)
assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node");
// This also catches the large code model case for Darwin, and tiny code
// model with got relocations.
if ((OpFlags & AArch64II::MO_GOT) != 0) {
return getGOT(GN, DAG, OpFlags);
}
SDValue Result;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
Result = getAddrLarge(GN, DAG, OpFlags);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
Result = getAddrTiny(GN, DAG, OpFlags);
} else {
Result = getAddr(GN, DAG, OpFlags);
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(GN);
if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
return Result;
}
/// Convert a TLS address reference into the correct sequence of loads
/// and calls to compute the variable's address (for Darwin, currently) and
/// return an SDValue containing the final node.
/// Darwin only has one TLS scheme which must be capable of dealing with the
/// fully general situation, in the worst case. This means:
/// + "extern __thread" declaration.
/// + Defined in a possibly unknown dynamic library.
///
/// The general system is that each __thread variable has a [3 x i64] descriptor
/// which contains information used by the runtime to calculate the address. The
/// only part of this the compiler needs to know about is the first xword, which
/// contains a function pointer that must be called with the address of the
/// entire descriptor in "x0".
///
/// Since this descriptor may be in a different unit, in general even the
/// descriptor must be accessed via an indirect load. The "ideal" code sequence
/// is:
/// adrp x0, _var@TLVPPAGE
/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
/// ; the function pointer
/// blr x1 ; Uses descriptor address in x0
/// ; Address of _var is now in x0.
///
/// If the address of _var's descriptor *is* known to the linker, then it can
/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
/// a slight efficiency gain.
SDValue
AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() &&
"This function expects a Darwin target");
SDLoc DL(Op);
MVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
// The first entry in the descriptor is a function pointer that we must call
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
PtrMemVT, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
Align(PtrMemVT.getSizeInBits() / 8),
MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
Chain = FuncTLVGet.getValue(1);
// Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setAdjustsStack(true);
// TLS calls preserve all registers except those that absolutely must be
// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
// silly).
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getTLSCallPreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
// Finally, we can make the call. This is just a degenerate version of a
// normal AArch64 call node: x0 takes the address of the descriptor, and
// returns the address of the variable in this thread.
Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
Chain =
DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
DAG.getRegisterMask(Mask), Chain.getValue(1));
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
}
/// Convert a thread-local variable reference into a sequence of instructions to
/// compute the variable's address for the local exec TLS model of ELF targets.
/// The sequence depends on the maximum TLS area size.
SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
SDValue ThreadBase,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue TPOff, Addr;
switch (DAG.getTarget().Options.TLSSize) {
default:
llvm_unreachable("Unexpected TLS size");
case 12: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_lo12:a
SDValue Var = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
Var,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 24: {
// mrs x0, TPIDR_EL0
// add x0, x0, :tprel_hi12:a
// add x0, x0, :tprel_lo12_nc:a
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
}
case 32: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g1:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
case 48: {
// mrs x1, TPIDR_EL0
// movz x0, #:tprel_g2:a
// movk x0, #:tprel_g1_nc:a
// movk x0, #:tprel_g0_nc:a
// add x0, x1, x0
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
SDValue MiVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
DAG.getTargetConstant(32, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
DAG.getTargetConstant(16, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
}
}
/// When accessing thread-local variables under either the general-dynamic or
/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
/// is a function pointer to carry out the resolution.
///
/// The sequence is:
/// adrp x0, :tlsdesc:var
/// ldr x1, [x0, #:tlsdesc_lo12:var]
/// add x0, x0, #:tlsdesc_lo12:var
/// .tlsdesccall var
/// blr x1
/// (TPIDR_EL0 offset now in x0)
///
/// The above sequence must be produced unscheduled, to enable the linker to
/// optimize/relax this sequence.
/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
/// above sequence, and expanded really late in the compilation flow, to ensure
/// the sequence is produced as per above.
SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain =
DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
SDValue Glue = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
SDValue
AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() && "This function expects an ELF target");
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
if (Model == TLSModel::LocalDynamic)
Model = TLSModel::GeneralDynamic;
}
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
Model != TLSModel::LocalExec)
report_fatal_error("ELF TLS only supported in small memory model or "
"in local exec TLS model");
// Different choices can be made for the maximum size of the TLS area for a
// module. For the small address model, the default TLS size is 16MiB and the
// maximum TLS size is 4GiB.
// FIXME: add tiny and large code model support for TLS access models other
// than local exec. We currently generate the same code as small for tiny,
// which may be larger than needed.
SDValue TPOff;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
const GlobalValue *GV = GA->getGlobal();
SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
if (Model == TLSModel::LocalExec) {
return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
} else if (Model == TLSModel::InitialExec) {
TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
} else if (Model == TLSModel::LocalDynamic) {
// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
// the beginning of the module's TLS region, followed by a DTPREL offset
// calculation.
// These accesses will need deduplicating if there's more than one.
AArch64FunctionInfo *MFI =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
AArch64II::MO_TLS);
// Now we can calculate the offset from TPIDR_EL0 to this module's
// thread-local area.
TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
// Now use :dtprel_whatever: operations to calculate this variable's offset
// in its thread-storage area.
SDValue HiVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue LoVar = DAG.getTargetGlobalAddress(
GV, DL, MVT::i64, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
} else if (Model == TLSModel::GeneralDynamic) {
// The call needs a relocation too for linker relaxation. It doesn't make
// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
// the address.
SDValue SymAddr =
DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
// Finally we can make a call to calculate the offset from tpidr_el0.
TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
} else
llvm_unreachable("Unsupported ELF TLS access model");
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
SDValue
AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
SDValue Chain = DAG.getEntryNode();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
// Load the ThreadLocalStoragePointer from the TEB
// A pointer to the TLS array is located at offset 0x58 from the TEB.
SDValue TLSArray =
DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
Chain = TLSArray.getValue(1);
// Load the TLS index from the C runtime;
// This does the same as getAddr(), but without having a GlobalAddressSDNode.
// This also does the same as LOADgot, but using a generic i32 load,
// while LOADgot only loads i64.
SDValue TLSIndexHi =
DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
"_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
SDValue TLSIndex =
DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
Chain = TLSIndex.getValue(1);
// The pointer to the thread's TLS data area is at the TLS Index scaled by 8
// offset into the TLSArray.
TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
DAG.getConstant(3, DL, PtrVT));
SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
MachinePointerInfo());
Chain = TLS.getValue(1);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
SDValue TGAHi = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
SDValue TGALo = DAG.getTargetGlobalAddress(
GV, DL, PtrVT, 0,
AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
// Add the offset from the start of the .tls section (section base).
SDValue Addr =
SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
DAG.getTargetConstant(0, DL, MVT::i32)),
0);
Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
return Addr;
}
SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().useEmulatedTLS())
return LowerToTLSEmulatedModel(GA, DAG);
if (Subtarget->isTargetDarwin())
return LowerDarwinGlobalTLSAddress(Op, DAG);
if (Subtarget->isTargetELF())
return LowerELFGlobalTLSAddress(Op, DAG);
if (Subtarget->isTargetWindows())
return LowerWindowsGlobalTLSAddress(Op, DAG);
llvm_unreachable("Unexpected platform trying to use TLS");
}
// Looks through \param Val to determine the bit that can be used to
// check the sign of the value. It returns the unextended value and
// the sign bit position.
std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
return {Val.getOperand(0),
cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
1};
if (Val.getOpcode() == ISD::SIGN_EXTEND)
return {Val.getOperand(0),
Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
return {Val, Val.getValueSizeInBits() - 1};
}
SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
SDValue RHS = Op.getOperand(3);
SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
MachineFunction &MF = DAG.getMachineFunction();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
bool ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
// Handle f128 first, since lowering it will result in comparing the return
// value of a libcall against zero, which is just what the rest of LowerBR_CC
// is expecting to deal with.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
// instruction.
if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
return SDValue();
// The actual operation with overflow check.
AArch64CC::CondCode OFCC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
if (CC == ISD::SETNE)
OFCC = getInvertedCondCode(OFCC);
SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Overflow);
}
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
// If the RHS of the comparison is zero, we can potentially fold this
// to a specialized branch.
const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
if (CC == ISD::SETEQ) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
// out of bounds, a late MI-layer pass rewrites branches.
// 403.gcc is an example that hits this case.
if (LHS.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
Dest);
}
return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
} else if (CC == ISD::SETNE) {
// See if we can use a TBZ to fold in an AND as well.
// TBZ has a smaller branch displacement than CBZ. If the offset is
// out of bounds, a late MI-layer pass rewrites branches.
// 403.gcc is an example that hits this case.
if (LHS.getOpcode() == ISD::AND &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
isPowerOf2_64(LHS.getConstantOperandVal(1))) {
SDValue Test = LHS.getOperand(0);
uint64_t Mask = LHS.getConstantOperandVal(1);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
Dest);
}
return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
} else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
uint64_t SignBitPos;
std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
}
if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
// Don't combine AND since emitComparison converts the AND to an ANDS
// (a.k.a. TST) and the test in the test bit and branch instruction
// becomes redundant. This would also increase register pressure.
uint64_t SignBitPos;
std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Cmp);
}
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue BR1 =
DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
if (CC2 != AArch64CC::AL) {
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
Cmp);
}
return BR1;
}
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue In1 = Op.getOperand(0);
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
if (VT.isScalableVector()) {
if (VT != SrcVT)
return SDValue();
// copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK)
//
// A possible alternative sequence involves using FNEG_MERGE_PASSTHRU;
// maybe useful for copysign operations with mismatched VTs.
//
// IntVT here is chosen so it's a legal type with the same element width
// as the input.
EVT IntVT =
getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
unsigned NumBits = VT.getScalarSizeInBits();
SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT);
SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT);
SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask,
getSVESafeBitCast(IntVT, In2, DAG));
SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask,
getSVESafeBitCast(IntVT, In1, DAG));
SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude);
return getSVESafeBitCast(VT, IntResult, DAG);
}
if (!Subtarget->hasNEON())
return SDValue();
if (SrcVT.bitsLT(VT))
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT.bitsGT(VT))
In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
EVT VecVT;
uint64_t EltMask;
SDValue VecVal1, VecVal2;
auto setVecVal = [&] (int Idx) {
if (!VT.isVector()) {
VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In1);
VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
DAG.getUNDEF(VecVT), In2);
} else {
VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
}
};
if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
EltMask = 0x80000000ULL;
setVecVal(AArch64::ssub);
} else if (VT == MVT::f64 || VT == MVT::v2f64) {
VecVT = MVT::v2i64;
// We want to materialize a mask with the high bit set, but the AdvSIMD
// immediate moves cannot materialize that in a single instruction for
// 64-bit elements. Instead, materialize zero and then negate it.
EltMask = 0;
setVecVal(AArch64::dsub);
} else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
EltMask = 0x8000ULL;
setVecVal(AArch64::hsub);
} else {
llvm_unreachable("Invalid type for copysign!");
}
SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
// If we couldn't materialize the mask above, then the mask vector will be
// the zero vector, and we need to negate it here.
if (VT == MVT::f64 || VT == MVT::v2f64) {
BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
}
SDValue Sel =
DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
if (VT == MVT::f16)
return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
if (VT == MVT::f32)
return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
else if (VT == MVT::f64)
return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
else
return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
}
SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat))
return SDValue();
if (!Subtarget->hasNEON())
return SDValue();
// While there is no integer popcount instruction, it can
// be more efficiently lowered to the following sequence that uses
// AdvSIMD registers/instructions as long as the copies to/from
// the AdvSIMD registers are cheap.
// FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
// CNT V0.8B, V0.8B // 8xbyte pop-counts
// ADDV B0, V0.8B // sum 8xbyte pop-counts
// UMOV X0, V0.B[0] // copy byte result back to integer reg
SDValue Val = Op.getOperand(0);
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (VT == MVT::i32 || VT == MVT::i64) {
if (VT == MVT::i32)
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
if (VT == MVT::i64)
UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
return UaddLV;
} else if (VT == MVT::i128) {
Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
SDValue UaddLV = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
}
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering");
EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
Val = DAG.getBitcast(VT8Bit, Val);
Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
// Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
unsigned EltSize = 8;
unsigned NumElts = VT.is64BitVector() ? 8 : 16;
while (EltSize != VT.getScalarSizeInBits()) {
EltSize *= 2;
NumElts /= 2;
MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
Val = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
}
return Val;
}
SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
SDLoc DL(Op);
SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}
SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
ISD::CondCode CC;
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
case ISD::SMAX:
CC = ISD::SETGT;
break;
case ISD::SMIN:
CC = ISD::SETLT;
break;
case ISD::UMAX:
CC = ISD::SETUGT;
break;
case ISD::UMIN:
CC = ISD::SETULT;
break;
}
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/*OverrideNEON=*/true);
case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
/*OverrideNEON=*/true);
case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
/*OverrideNEON=*/true);
case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/*OverrideNEON=*/true);
}
}
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
}
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
true);
SDLoc DL(Op);
SDValue REVB;
MVT VST;
switch (VT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("Invalid type for bitreverse!");
case MVT::v2i32: {
VST = MVT::v8i8;
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
break;
}
case MVT::v4i32: {
VST = MVT::v16i8;
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
break;
}
case MVT::v1i64: {
VST = MVT::v8i8;
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
break;
}
case MVT::v2i64: {
VST = MVT::v16i8;
REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
break;
}
}
return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
}
SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVSETCC(Op, DAG);
bool IsStrict = Op->isStrictFPOpcode();
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
unsigned OpNo = IsStrict ? 1 : 0;
SDValue Chain;
if (IsStrict)
Chain = Op.getOperand(0);
SDValue LHS = Op.getOperand(OpNo + 0);
SDValue RHS = Op.getOperand(OpNo + 1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
SDLoc dl(Op);
// We chose ZeroOrOneBooleanContents, so use zero and one.
EVT VT = Op.getValueType();
SDValue TVal = DAG.getConstant(1, dl, VT);
SDValue FVal = DAG.getConstant(0, dl, VT);
// Handle f128 first, since one possible outcome is a normal integer
// comparison which gets picked up by the next if statement.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
IsSignaling);
// If softenSetCCOperands returned a scalar, use it.
if (!RHS.getNode()) {
assert(LHS.getValueType() == Op.getValueType() &&
"Unexpected setcc expansion!");
return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
}
}
if (LHS.getValueType().isInteger()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(
LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
// and do the comparison.
SDValue Cmp;
if (IsStrict)
Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
else
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
SDValue Res;
if (CC2 == AArch64CC::AL) {
changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
CC2);
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
// Note that we inverted the condition above, so we reverse the order of
// the true and false operands here. This will allow the setcc to be
// matched to a single CSINC instruction.
Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
} else {
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
// totally clean. Some of them require two CSELs to implement. As is in
// this case, we emit the first CSEL and then emit a second using the output
// of the first as the RHS. We're effectively OR'ing the two CC's together.
// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 =
DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
}
SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue RHS, SDValue TVal,
SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const {
// Handle f128 first, because it will result in a comparison of some RTLIB
// call result against zero.
if (LHS.getValueType() == MVT::f128) {
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
if (!RHS.getNode()) {
RHS = DAG.getConstant(0, dl, LHS.getValueType());
CC = ISD::SETNE;
}
}
// Also handle f16, for which we need to do a f32 comparison.
if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
}
// Next, handle integers.
if (LHS.getValueType().isInteger()) {
assert((LHS.getValueType() == RHS.getValueType()) &&
(LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
// Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
CTVal->isOne() && CFVal->isAllOnes() &&
LHS.getValueType() == TVal.getValueType()) {
EVT VT = LHS.getValueType();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, LHS,
DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
}
unsigned Opcode = AArch64ISD::CSEL;
// If both the TVal and the FVal are constants, see if we can swap them in
// order to for a CSINV or CSINC out of them.
if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
} else if (TVal.getOpcode() == ISD::XOR) {
// If TVal is a NOT we want to swap TVal and FVal so that we can match
// with a CSINV rather than a CSEL.
if (isAllOnesConstant(TVal.getOperand(1))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (TVal.getOpcode() == ISD::SUB) {
// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
// that we can match with a CSNEG rather than a CSEL.
if (isNullConstant(TVal.getOperand(0))) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
} else if (CTVal && CFVal) {
const int64_t TrueVal = CTVal->getSExtValue();
const int64_t FalseVal = CFVal->getSExtValue();
bool Swap = false;
// If both TVal and FVal are constants, see if FVal is the
// inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
// instead of a CSEL in that case.
if (TrueVal == ~FalseVal) {
Opcode = AArch64ISD::CSINV;
} else if (FalseVal > std::numeric_limits<int64_t>::min() &&
TrueVal == -FalseVal) {
Opcode = AArch64ISD::CSNEG;
} else if (TVal.getValueType() == MVT::i32) {
// If our operands are only 32-bit wide, make sure we use 32-bit
// arithmetic for the check whether we can use CSINC. This ensures that
// the addition in the check will wrap around properly in case there is
// an overflow (which would not be the case if we do the check with
// 64-bit arithmetic).
const uint32_t TrueVal32 = CTVal->getZExtValue();
const uint32_t FalseVal32 = CFVal->getZExtValue();
if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
Opcode = AArch64ISD::CSINC;
if (TrueVal32 > FalseVal32) {
Swap = true;
}
}
// 64-bit check whether we can use CSINC.
} else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
Opcode = AArch64ISD::CSINC;
if (TrueVal > FalseVal) {
Swap = true;
}
}
// Swap TVal and FVal if necessary.
if (Swap) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
if (Opcode != AArch64ISD::CSEL) {
// Drop FVal since we can get its value by simply inverting/negating
// TVal.
FVal = TVal;
}
}
// Avoid materializing a constant when possible by reusing a known value in
// a register. However, don't perform this optimization if the known value
// is one, zero or negative one in the case of a CSEL. We can always
// materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
// FVal, respectively.
ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
!RHSVal->isZero() && !RHSVal->isAllOnes()) {
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
// Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
// "a != C ? x : a" to avoid materializing C.
if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
TVal = LHS;
else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
FVal = LHS;
} else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
// Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
// avoid materializing C.
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
Opcode = AArch64ISD::CSINV;
TVal = LHS;
FVal = DAG.getConstant(0, dl, FVal.getValueType());
}
}
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
EVT VT = TVal.getValueType();
return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
// Now we know we're dealing with FP values.
assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
LHS.getValueType() == MVT::f64);
assert(LHS.getValueType() == RHS.getValueType());
EVT VT = TVal.getValueType();
SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two CSELs to implement.
AArch64CC::CondCode CC1, CC2;
changeFPCCToAArch64CC(CC, CC1, CC2);
if (DAG.getTarget().Options.UnsafeFPMath) {
// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
if (RHSVal && RHSVal->isZero()) {
ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
TVal = LHS;
else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
CFVal && CFVal->isZero() &&
FVal.getValueType() == LHS.getValueType())
FVal = LHS;
}
}
// Emit first, and possibly only, CSEL.
SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
// If we need a second CSEL, emit it, using the output of the first as the
// RHS. We're effectively OR'ing the two CC's together.
if (CC2 != AArch64CC::AL) {
SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
// Otherwise, return the output of the first CSEL.
return CS1;
}
SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
SelectionDAG &DAG) const {
EVT Ty = Op.getValueType();
auto Idx = Op.getConstantOperandAPInt(2);
int64_t IdxVal = Idx.getSExtValue();
assert(Ty.isScalableVector() &&
"Only expect scalable vectors for custom lowering of VECTOR_SPLICE");
// We can use the splice instruction for certain index values where we are
// able to efficiently generate the correct predicate. The index will be
// inverted and used directly as the input to the ptrue instruction, i.e.
// -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
// splice predicate. However, we can only do this if we can guarantee that
// there are enough elements in the vector, hence we check the index <= min
// number of elements.
Optional<unsigned> PredPattern;
if (Ty.isScalableVector() && IdxVal < 0 &&
(PredPattern = getSVEPredPatternFromNumElements(std::abs(IdxVal))) !=
None) {
SDLoc DL(Op);
// Create a predicate where all but the last -IdxVal elements are false.
EVT PredVT = Ty.changeVectorElementType(MVT::i1);
SDValue Pred = getPTrue(DAG, DL, PredVT, *PredPattern);
Pred = DAG.getNode(ISD::VECTOR_REVERSE, DL, PredVT, Pred);
// Now splice the two inputs together using the predicate.
return DAG.getNode(AArch64ISD::SPLICE, DL, Ty, Pred, Op.getOperand(0),
Op.getOperand(1));
}
// This will select to an EXT instruction, which has a maximum immediate
// value of 255, hence 2048-bits is the maximum value we can lower.
if (IdxVal >= 0 &&
IdxVal < int64_t(2048 / Ty.getVectorElementType().getSizeInBits()))
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
SDValue TVal = Op.getOperand(2);
SDValue FVal = Op.getOperand(3);
SDLoc DL(Op);
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
}
SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SelectionDAG &DAG) const {
SDValue CCVal = Op->getOperand(0);
SDValue TVal = Op->getOperand(1);
SDValue FVal = Op->getOperand(2);
SDLoc DL(Op);
EVT Ty = Op.getValueType();
if (Ty.isScalableVector()) {
SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
if (useSVEForFixedLengthVectorVT(Ty)) {
// FIXME: Ideally this would be the same as above using i1 types, however
// for the moment we can't deal with fixed i1 vector types properly, so
// instead extend the predicate to a result type sized integer vector.
MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
// Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
// instruction.
if (ISD::isOverflowIntrOpRes(CCVal)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
return SDValue();
AArch64CC::CondCode OFCC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// Lower it the same way as we would lower a SELECT_CC node.
ISD::CondCode CC;
SDValue LHS, RHS;
if (CCVal.getOpcode() == ISD::SETCC) {
LHS = CCVal.getOperand(0);
RHS = CCVal.getOperand(1);
CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
} else {
LHS = CCVal;
RHS = DAG.getConstant(0, DL, CCVal.getValueType());
CC = ISD::SETNE;
}
return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
}
SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(JT, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(JT, DAG);
}
return getAddr(JT, DAG);
}
SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
SelectionDAG &DAG) const {
// Jump table entries as PC relative offsets. No additional tweaking
// is necessary here. Just get the address of the jump table.
SDLoc DL(Op);
SDValue JT = Op.getOperand(1);
SDValue Entry = Op.getOperand(2);
int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
SDNode *Dest =
DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
SDValue(Dest, 0));
}
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
// Use the GOT for the large code model on iOS.
if (Subtarget->isTargetMachO()) {
return getGOT(CP, DAG);
}
return getAddrLarge(CP, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(CP, DAG);
} else {
return getAddr(CP, DAG);
}
}
SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
if (getTargetMachine().getCodeModel() == CodeModel::Large &&
!Subtarget->isTargetMachO()) {
return getAddrLarge(BA, DAG);
} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
return getAddrTiny(BA, DAG);
}
return getAddr(BA, DAG);
}
SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
SelectionDAG &DAG) const {
AArch64FunctionInfo *FuncInfo =
DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
? FuncInfo->getVarArgsGPRIndex()
: FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue VAList = Op.getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
unsigned Offset = 0;
SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
MachinePointerInfo(SV), Align(PtrSize)));
// void *__gr_top at offset 8 (4 on ILP32)
Offset += PtrSize;
int GPRSize = FuncInfo->getVarArgsGPRSize();
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
DAG.getConstant(GPRSize, DL, PtrVT));
GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
MachinePointerInfo(SV, Offset),
Align(PtrSize)));
}
// void *__vr_top at offset 16 (8 on ILP32)
Offset += PtrSize;
int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
DAG.getConstant(FPRSize, DL, PtrVT));
VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
MachinePointerInfo(SV, Offset),
Align(PtrSize)));
}
// int __gr_offs at offset 24 (12 on ILP32)
Offset += PtrSize;
SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
MemOps.push_back(
DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
// int __vr_offs at offset 28 (16 on ILP32)
Offset += 4;
SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Offset, DL, PtrVT));
MemOps.push_back(
DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
return LowerWin64_VASTART(Op, DAG);
else if (Subtarget->isTargetDarwin())
return LowerDarwin_VASTART(Op, DAG);
else
return LowerAAPCS_VASTART(Op, DAG);
}
SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
SelectionDAG &DAG) const {
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
SDLoc DL(Op);
unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
unsigned VaListSize =
(Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
? PtrSize
: Subtarget->isTargetILP32() ? 20 : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
DAG.getConstant(VaListSize, DL, MVT::i32),
Align(PtrSize), false, false, false,
MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
}
SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
assert(Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin");
const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
MaybeAlign Align(Op.getConstantOperandVal(3));
unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrVT = getPointerTy(DAG.getDataLayout());
auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
SDValue VAList =
DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
Chain = VAList.getValue(1);
VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
if (VT.isScalableVector())
report_fatal_error("Passing SVE types to variadic functions is "
"currently not supported");
if (Align && *Align > MinSlotSize) {
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Align->value() - 1, DL, PtrVT));
VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
// vaargs list to match this, and for FP values we need to introduce
// FP_ROUND nodes as well.
if (VT.isInteger() && !VT.isVector())
ArgSize = std::max(ArgSize, MinSlotSize);
bool NeedFPTrunc = false;
if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
ArgSize = 8;
NeedFPTrunc = true;
}
// Increment the pointer, VAList, to the next vaarg
SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(ArgSize, DL, PtrVT));
VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
// Store the incremented VAList to the legalized pointer
SDValue APStore =
DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
// Load the actual argument out of the pointer VAList
if (NeedFPTrunc) {
// Load the value as an f64.
SDValue WideFP =
DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
// Round the value down to an f32.
SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
DAG.getIntPtrConstant(1, DL));
SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
// Merge the rounded value with the chain output of the load.
return DAG.getMergeValues(Ops, DL);
}
return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
}
SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
while (Depth--)
FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
if (Subtarget->isTargetILP32())
FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
DAG.getValueType(VT));
return FrameAddr;
}
SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
EVT VT = getPointerTy(DAG.getDataLayout());
SDLoc DL(Op);
int FI = MFI.CreateFixedObject(4, 0, false);
return DAG.getFrameIndex(FI, VT);
}
#define GET_REGISTER_MATCHER
#include "AArch64GenAsmMatcher.inc"
// FIXME? Maybe this could be a TableGen attribute on some registers and
// this table could be generated automatically from RegInfo.
Register AArch64TargetLowering::
getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
Register Reg = MatchRegisterName(RegName);
if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
if (!Subtarget->isXRegisterReserved(DwarfRegNum))
Reg = 0;
}
if (Reg)
return Reg;
report_fatal_error(Twine("Invalid register name \""
+ StringRef(RegName) + "\"."));
}
SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
}
SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue ReturnAddress;
if (Depth) {
SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
ReturnAddress = DAG.getLoad(
VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
} else {
// Return LR, which contains the return address. Mark it an implicit
// live-in.
Register Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
}
// The XPACLRI instruction assembles to a hint-space instruction before
// Armv8.3-A therefore this instruction can be safely used for any pre
// Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
// that instead.
SDNode *St;
if (Subtarget->hasPAuth()) {
St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
} else {
// XPACLRI operates on LR therefore we must move the operand accordingly.
SDValue Chain =
DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
}
return SDValue(St, 0);
}
/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
SelectionDAG &DAG) const {
SDValue Lo, Hi;
expandShiftParts(Op.getNode(), Lo, Hi, DAG);
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
}
bool AArch64TargetLowering::isOffsetFoldingLegal(
const GlobalAddressSDNode *GA) const {
// Offsets are folded in the DAG combine rather than here so that we can
// intelligently choose an offset based on the uses.
return false;
}
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool OptForSize) const {
bool IsLegal = false;
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
// 16-bit case when target has full fp16 support.
// FIXME: We should be able to handle f128 as well with a clever lowering.
const APInt ImmInt = Imm.bitcastToAPInt();
if (VT == MVT::f64)
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f16 && Subtarget->hasFullFP16())
IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
// TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
// generate that fmov.
// If we can not materialize in immediate field for fmov, check if the
// value can be encoded as the immediate operand of a logical instruction.
// The immediate value will be created with either MOVZ, MOVN, or ORR.
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
// however the mov+fmov sequence is always better because of the reduced
// cache pressure. The timings are still the same if you consider
// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
// movw+movk is fused). So we limit up to 2 instrdduction at most.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
Insn);
unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
IsLegal = Insn.size() <= Limit;
}
LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
<< " imm value: "; Imm.dump(););
return IsLegal;
}
//===----------------------------------------------------------------------===//
// AArch64 Optimization Hooks
//===----------------------------------------------------------------------===//
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
SDValue Operand, SelectionDAG &DAG,
int &ExtraSteps) {
EVT VT = Operand.getValueType();
if ((ST->hasNEON() &&
(VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
VT == MVT::v4f32)) ||
(ST->hasSVE() &&
(VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
// For the reciprocal estimates, convergence is quadratic, so the number
// of digits is doubled after each iteration. In ARMv8, the accuracy of
// the initial estimate is 2^-8. Thus the number of extra steps to refine
// the result for float (23 mantissa bits) is 2 and for double (52
// mantissa bits) is 3.
ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
SDValue
AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
}
SDValue
AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
SelectionDAG &DAG) const {
return Op;
}
SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps,
bool &UseOneConst,
bool Reciprocal) const {
if (Enabled == ReciprocalEstimate::Enabled ||
(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
DAG, ExtraSteps)) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
SDNodeFlags Flags;
Flags.setAllowReassociation(true);
// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
Flags);
Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
if (!Reciprocal)
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
ExtraSteps = 0;
return Estimate;
}
return SDValue();
}
SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const {
if (Enabled == ReciprocalEstimate::Enabled)
if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
DAG, ExtraSteps)) {
SDLoc DL(Operand);
EVT VT = Operand.getValueType();
SDNodeFlags Flags;
Flags.setAllowReassociation(true);
// Newton reciprocal iteration: E * (2 - X * E)
// AArch64 reciprocal iteration instruction: (2 - M * N)
for (int i = ExtraSteps; i > 0; --i) {
SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
Estimate, Flags);
Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
}
ExtraSteps = 0;
return Estimate;
}
return SDValue();
}
//===----------------------------------------------------------------------===//
// AArch64 Inline Assembly Support
//===----------------------------------------------------------------------===//
// Table of Constraints
// TODO: This is the current set of constraints supported by ARM for the
// compiler, not all of them may make sense.
//
// r - A general register
// w - An FP/SIMD register of some size in the range v0-v31
// x - An FP/SIMD register of some size in the range v0-v15
// I - Constant that can be used with an ADD instruction
// J - Constant that can be used with a SUB instruction
// K - Constant that can be used with a 32-bit logical instruction
// L - Constant that can be used with a 64-bit logical instruction
// M - Constant that can be used as a 32-bit MOV immediate
// N - Constant that can be used as a 64-bit MOV immediate
// Q - A memory reference with base register and no offset
// S - A symbolic address
// Y - Floating point constant zero
// Z - Integer constant zero
//
// Note that general register operands will be output using their 64-bit x
// register name, whatever the size of the variable, unless the asm operand
// is prefixed by the %w modifier. Floating-point and SIMD register operands
// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
// %q modifier.
const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
// At this point, we have to lower this constraint to something else, so we
// lower it to an "r" or "w". However, by doing this we will force the result
// to be in register, while the X constraint is much more permissive.
//
// Although we are correct (we are free to emit anything, without
// constraints), we might break use cases that would expect us to be more
// efficient and emit something else.
if (!Subtarget->hasFPARMv8())
return "r";
if (ConstraintVT.isFloatingPoint())
return "w";
if (ConstraintVT.isVector() &&
(ConstraintVT.getSizeInBits() == 64 ||
ConstraintVT.getSizeInBits() == 128))
return "w";
return "r";
}
enum PredicateConstraint {
Upl,
Upa,
Invalid
};
static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
PredicateConstraint P = PredicateConstraint::Invalid;
if (Constraint == "Upa")
P = PredicateConstraint::Upa;
if (Constraint == "Upl")
P = PredicateConstraint::Upl;
return P;
}
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
break;
case 'x':
case 'w':
case 'y':
return C_RegisterClass;
// An address with a single base register. Due to the way we
// currently handle addresses it is the same as 'r'.
case 'Q':
return C_Memory;
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
case 'Y':
case 'Z':
return C_Immediate;
case 'z':
case 'S': // A symbolic address
return C_Other;
}
} else if (parsePredicateConstraint(Constraint) !=
PredicateConstraint::Invalid)
return C_RegisterClass;
return TargetLowering::getConstraintType(Constraint);
}
/// Examine constraint type and operand type and determine a weight value.
/// This object must already have been set up with the operand type
/// and the current alternative constraint selected.
TargetLowering::ConstraintWeight
AArch64TargetLowering::getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const {
ConstraintWeight weight = CW_Invalid;
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
switch (*constraint) {
default:
weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
break;
case 'x':
case 'w':
case 'y':
if (type->isFloatingPointTy() || type->isVectorTy())
weight = CW_Register;
break;
case 'z':
weight = CW_Constant;
break;
case 'U':
if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
weight = CW_Register;
break;
}
return weight;
}
std::pair<unsigned, const TargetRegisterClass *>
AArch64TargetLowering::getRegForInlineAsmConstraint(
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
if (VT.isScalableVector())
return std::make_pair(0U, nullptr);
if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
if (VT.getFixedSizeInBits() == 64)
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
case 'w': {
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector()) {
if (VT.getVectorElementType() != MVT::i1)
return std::make_pair(0U, &AArch64::ZPRRegClass);
return std::make_pair(0U, nullptr);
}
uint64_t VTSize = VT.getFixedSizeInBits();
if (VTSize == 16)
return std::make_pair(0U, &AArch64::FPR16RegClass);
if (VTSize == 32)
return std::make_pair(0U, &AArch64::FPR32RegClass);
if (VTSize == 64)
return std::make_pair(0U, &AArch64::FPR64RegClass);
if (VTSize == 128)
return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
}
// The instructions that this constraint is designed for can
// only take 128-bit registers so just use that regclass.
case 'x':
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector())
return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::FPR128_loRegClass);
break;
case 'y':
if (!Subtarget->hasFPARMv8())
break;
if (VT.isScalableVector())
return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
break;
}
} else {
PredicateConstraint PC = parsePredicateConstraint(Constraint);
if (PC != PredicateConstraint::Invalid) {
if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
return std::make_pair(0U, nullptr);
bool restricted = (PC == PredicateConstraint::Upl);
return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
: std::make_pair(0U, &AArch64::PPRRegClass);
}
}
if (StringRef("{cc}").equals_insensitive(Constraint))
return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
// Use the default implementation in TargetLowering to convert the register
// constraint into a member of a register class.
std::pair<unsigned, const TargetRegisterClass *> Res;
Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
// Not found as a standard register?
if (!Res.second) {
unsigned Size = Constraint.size();
if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
int RegNo;
bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
if (!Failed && RegNo >= 0 && RegNo <= 31) {
// v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
// By default we'll emit v0-v31 for this unless there's a modifier where
// we'll emit the correct register as well.
if (VT != MVT::Other && VT.getSizeInBits() == 64) {
Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
Res.second = &AArch64::FPR64RegClass;
} else {
Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
Res.second = &AArch64::FPR128RegClass;
}
}
}
}
if (Res.second && !Subtarget->hasFPARMv8() &&
!AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
!AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
return std::make_pair(0U, nullptr);
return Res;
}
EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
llvm::Type *Ty,
bool AllowUnknown) const {
if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
return EVT(MVT::i64x8);
return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void AArch64TargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
SDValue Result;
// Currently only support length 1 constraints.
if (Constraint.length() != 1)
return;
char ConstraintLetter = Constraint[0];
switch (ConstraintLetter) {
default:
break;
// This set of constraints deal with valid constants for various instructions.
// Validate and return a target constant for them if we can.
case 'z': {
// 'z' maps to xzr or wzr so it needs an input of 0.
if (!isNullConstant(Op))
return;
if (Op.getValueType() == MVT::i64)
Result = DAG.getRegister(AArch64::XZR, MVT::i64);
else
Result = DAG.getRegister(AArch64::WZR, MVT::i32);
break;
}
case 'S': {
// An absolute symbolic address or label reference.
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
GA->getValueType(0));
} else if (const BlockAddressSDNode *BA =
dyn_cast<BlockAddressSDNode>(Op)) {
Result =
DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
} else
return;
break;
}
case 'I':
case 'J':
case 'K':
case 'L':
case 'M':
case 'N':
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
if (!C)
return;
// Grab the value and do some validation.
uint64_t CVal = C->getZExtValue();
switch (ConstraintLetter) {
// The I constraint applies only to simple ADD or SUB immediate operands:
// i.e. 0 to 4095 with optional shift by 12
// The J constraint applies only to ADD or SUB immediates that would be
// valid when negated, i.e. if [an add pattern] were to be output as a SUB
// instruction [or vice versa], in other words -1 to -4095 with optional
// left shift by 12.
case 'I':
if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
break;
return;
case 'J': {
uint64_t NVal = -C->getSExtValue();
if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
CVal = C->getSExtValue();
break;
}
return;
}
// The K and L constraints apply *only* to logical immediates, including
// what used to be the MOVI alias for ORR (though the MOVI alias has now
// been removed and MOV should be used). So these constraints have to
// distinguish between bit patterns that are valid 32-bit or 64-bit
// "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
// not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
// versa.
case 'K':
if (AArch64_AM::isLogicalImmediate(CVal, 32))
break;
return;
case 'L':
if (AArch64_AM::isLogicalImmediate(CVal, 64))
break;
return;
// The M and N constraints are a superset of K and L respectively, for use
// with the MOV (immediate) alias. As well as the logical immediates they
// also match 32 or 64-bit immediates that can be loaded either using a
// *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
// (M) or 64-bit 0x1234000000000000 (N) etc.
// As a note some of this code is liberally stolen from the asm parser.
case 'M': {
if (!isUInt<32>(CVal))
return;
if (AArch64_AM::isLogicalImmediate(CVal, 32))
break;
if ((CVal & 0xFFFF) == CVal)
break;
if ((CVal & 0xFFFF0000ULL) == CVal)
break;
uint64_t NCVal = ~(uint32_t)CVal;
if ((NCVal & 0xFFFFULL) == NCVal)
break;
if ((NCVal & 0xFFFF0000ULL) == NCVal)
break;
return;
}
case 'N': {
if (AArch64_AM::isLogicalImmediate(CVal, 64))
break;
if ((CVal & 0xFFFFULL) == CVal)
break;
if ((CVal & 0xFFFF0000ULL) == CVal)
break;
if ((CVal & 0xFFFF00000000ULL) == CVal)
break;
if ((CVal & 0xFFFF000000000000ULL) == CVal)
break;
uint64_t NCVal = ~CVal;
if ((NCVal & 0xFFFFULL) == NCVal)
break;
if ((NCVal & 0xFFFF0000ULL) == NCVal)
break;
if ((NCVal & 0xFFFF00000000ULL) == NCVal)
break;
if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
break;
return;
}
default:
return;
}
// All assembler immediates are 64-bit integers.
Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
break;
}
if (Result.getNode()) {
Ops.push_back(Result);
return;
}
return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
//===----------------------------------------------------------------------===//
// AArch64 Advanced SIMD Support
//===----------------------------------------------------------------------===//
/// WidenVector - Given a value in the V64 register class, produce the
/// equivalent value in the V128 register class.
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
EVT VT = V64Reg.getValueType();
unsigned NarrowSize = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType().getSimpleVT();
MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
SDLoc DL(V64Reg);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
V64Reg, DAG.getConstant(0, DL, MVT::i64));
}
/// getExtFactor - Determine the adjustment factor for the position when
/// generating an "extract from vector registers" instruction.
static unsigned getExtFactor(SDValue &V) {
EVT EltType = V.getValueType().getVectorElementType();
return EltType.getSizeInBits() / 8;
}
/// NarrowVector - Given a value in the V128 register class, produce the
/// equivalent value in the V64 register class.
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
EVT VT = V128Reg.getValueType();
unsigned WideSize = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType().getSimpleVT();
MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
SDLoc DL(V128Reg);
return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
}
// Gather data to see if the operation can be modelled as a
// shuffle in combination with VEXTs.
SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
SDLoc dl(Op);
EVT VT = Op.getValueType();
assert(!VT.isScalableVector() &&
"Scalable vectors cannot be used with ISD::BUILD_VECTOR");
unsigned NumElts = VT.getVectorNumElements();
struct ShuffleSourceInfo {
SDValue Vec;
unsigned MinElt;
unsigned MaxElt;
// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
// be compatible with the shuffle we intend to construct. As a result
// ShuffleVec will be some sliding window into the original Vec.
SDValue ShuffleVec;
// Code should guarantee that element i in Vec starts at element "WindowBase
// + i * WindowScale in ShuffleVec".
int WindowBase;
int WindowScale;
ShuffleSourceInfo(SDValue Vec)
: Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
};
// First gather all vectors used as an immediate source for this BUILD_VECTOR
// node.
SmallVector<ShuffleSourceInfo, 2> Sources;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(V.getOperand(1))) {
+ !isa<ConstantSDNode>(V.getOperand(1)) ||
+ V.getOperand(0).getValueType().isScalableVector()) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: "
"a shuffle can only come from building a vector from "
- "various elements of other vectors, provided their "
- "indices are constant\n");
+ "various elements of other fixed-width vectors, provided "
+ "their indices are constant\n");
return SDValue();
}
// Add this element source to the list if it's not already there.
SDValue SourceVec = V.getOperand(0);
auto Source = find(Sources, SourceVec);
if (Source == Sources.end())
Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
// Update the minimum and maximum lane number seen.
unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
Source->MinElt = std::min(Source->MinElt, EltNo);
Source->MaxElt = std::max(Source->MaxElt, EltNo);
}
if (Sources.size() > 2) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n");
return SDValue();
}
// Find out the smallest element size among result and two sources, and use
// it as element size to build the shuffle_vector.
EVT SmallestEltTy = VT.getVectorElementType();
for (auto &Source : Sources) {
EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
if (SrcEltTy.bitsLT(SmallestEltTy)) {
SmallestEltTy = SrcEltTy;
}
}
unsigned ResMultiplier =
VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
uint64_t VTSize = VT.getFixedSizeInBits();
NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
// If the source vector is too wide or too narrow, we may nevertheless be able
// to construct a compatible shuffle either by concatenating it with UNDEF or
// extracting a suitable range of elements.
for (auto &Src : Sources) {
EVT SrcVT = Src.ShuffleVec.getValueType();
- uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
- if (SrcVTSize == VTSize)
+ TypeSize SrcVTSize = SrcVT.getSizeInBits();
+ if (SrcVTSize == TypeSize::Fixed(VTSize))
continue;
// This stage of the search produces a source with the same element type as
// the original, but with a total width matching the BUILD_VECTOR output.
EVT EltVT = SrcVT.getVectorElementType();
unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
- if (SrcVTSize < VTSize) {
+ if (SrcVTSize.getFixedValue() < VTSize) {
assert(2 * SrcVTSize == VTSize);
// We can pad out the smaller vector for free, so if it's part of a
// shuffle...
Src.ShuffleVec =
DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
DAG.getUNDEF(Src.ShuffleVec.getValueType()));
continue;
}
- if (SrcVTSize != 2 * VTSize) {
+ if (SrcVTSize.getFixedValue() != 2 * VTSize) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: result vector too small to extract\n");
return SDValue();
}
if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
return SDValue();
}
if (Src.MinElt >= NumSrcElts) {
// The extraction can just take the second half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
Src.WindowBase = -NumSrcElts;
} else if (Src.MaxElt < NumSrcElts) {
// The extraction can just take the first half
Src.ShuffleVec =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i64));
} else {
// An actual VEXT is needed
SDValue VEXTSrc1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(0, dl, MVT::i64));
SDValue VEXTSrc2 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
DAG.getConstant(NumSrcElts, dl, MVT::i64));
unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
if (!SrcVT.is64BitVector()) {
LLVM_DEBUG(
dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors.");
return SDValue();
}
Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
VEXTSrc2,
DAG.getConstant(Imm, dl, MVT::i32));
Src.WindowBase = -Src.MinElt;
}
}
// Another possible incompatibility occurs from the vector element types. We
// can fix this by bitcasting the source vectors to the same type we intend
// for the shuffle.
for (auto &Src : Sources) {
EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
if (SrcEltTy == SmallestEltTy)
continue;
assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
Src.WindowScale =
SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
Src.WindowBase *= Src.WindowScale;
}
// Final check before we try to actually produce a shuffle.
LLVM_DEBUG(for (auto Src
: Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT););
// The stars all align, our next step is to produce the mask for the shuffle.
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
SDValue Entry = Op.getOperand(i);
if (Entry.isUndef())
continue;
auto Src = find(Sources, Entry.getOperand(0));
int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
// segment.
EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
VT.getScalarSizeInBits());
int LanesDefined = BitsDefined / BitsPerShuffleLane;
// This source is expected to fill ResMultiplier lanes of the final shuffle,
// starting at the appropriate offset.
int *LaneMask = &Mask[i * ResMultiplier];
int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
ExtractBase += NumElts * (Src - Sources.begin());
for (int j = 0; j < LanesDefined; ++j)
LaneMask[j] = ExtractBase + j;
}
// Final check before we try to produce nonsense...
if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
return SDValue();
}
SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
for (unsigned i = 0; i < Sources.size(); ++i)
ShuffleOps[i] = Sources[i].ShuffleVec;
SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
ShuffleOps[1], Mask);
SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
dbgs() << "Reshuffle, creating node: "; V.dump(););
return V;
}
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are the same.
static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
// Assume that the first shuffle index is not UNDEF. Fail if it is.
if (M[0] < 0)
return false;
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
// element. The other shuffle indices must be the successive elements after
// the first one.
unsigned ExpectedElt = Imm;
for (unsigned i = 1; i < NumElts; ++i) {
// Increment the expected index. If it wraps around, just follow it
// back to index zero and keep going.
++ExpectedElt;
if (ExpectedElt == NumElts)
ExpectedElt = 0;
if (M[i] < 0)
continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
return true;
}
/// Check if a vector shuffle corresponds to a DUP instructions with a larger
/// element width than the vector lane type. If that is the case the function
/// returns true and writes the value of the DUP instruction lane operand into
/// DupLaneOp
static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
unsigned &DupLaneOp) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for wide DUP are: 16, 32, 64");
if (BlockSize <= VT.getScalarSizeInBits())
return false;
if (BlockSize % VT.getScalarSizeInBits() != 0)
return false;
if (VT.getSizeInBits() % BlockSize != 0)
return false;
size_t SingleVecNumElements = VT.getVectorNumElements();
size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
size_t NumBlocks = VT.getSizeInBits() / BlockSize;
// We are looking for masks like
// [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
// might be replaced by 'undefined'. BlockIndices will eventually contain
// lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
// for the above examples)
SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
for (size_t I = 0; I < NumEltsPerBlock; I++) {
int Elt = M[BlockIndex * NumEltsPerBlock + I];
if (Elt < 0)
continue;
// For now we don't support shuffles that use the second operand
if ((unsigned)Elt >= SingleVecNumElements)
return false;
if (BlockElts[I] < 0)
BlockElts[I] = Elt;
else if (BlockElts[I] != Elt)
return false;
}
// We found a candidate block (possibly with some undefs). It must be a
// sequence of consecutive integers starting with a value divisible by
// NumEltsPerBlock with some values possibly replaced by undef-s.
// Find first non-undef element
auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
assert(FirstRealEltIter != BlockElts.end() &&
"Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()");
if (FirstRealEltIter == BlockElts.end()) {
DupLaneOp = 0;
return true;
}
// Index of FirstRealElt in BlockElts
size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
if ((unsigned)*FirstRealEltIter < FirstRealIndex)
return false;
// BlockElts[0] must have the following value if it isn't undef:
size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
// Check the first element
if (Elt0 % NumEltsPerBlock != 0)
return false;
// Check that the sequence indeed consists of consecutive integers (modulo
// undefs)
for (size_t I = 0; I < NumEltsPerBlock; I++)
if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
return false;
DupLaneOp = Elt0 / NumEltsPerBlock;
return true;
}
// check if an EXT instruction can handle the shuffle mask when the
// vector sources of the shuffle are different.
static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
unsigned &Imm) {
// Look for the first non-undef element.
const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
// Benefit form APInt to handle overflow when calculating expected element.
unsigned NumElts = VT.getVectorNumElements();
unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
// The following shuffle indices must be the successive elements after the
// first real element.
const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
[&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
if (FirstWrongElt != M.end())
return false;
// The index of an EXT is the first element if it is not UNDEF.
// Watch out for the beginning UNDEFs. The EXT index should be the expected
// value of the first element. E.g.
// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
// <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
// ExpectedElt is the last mask index plus 1.
Imm = ExpectedElt.getZExtValue();
// There are two difference cases requiring to reverse input vectors.
// For example, for vector <4 x i32> we have the following cases,
// Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
// Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
// For both cases, we finally use mask <5, 6, 7, 0>, which requires
// to reverse two input vectors.
if (Imm < NumElts)
ReverseEXT = true;
else
Imm -= NumElts;
return true;
}
/// isREVMask - Check if a vector shuffle corresponds to a REV
/// instruction with the specified blocksize. (The order of the elements
/// within each block of the vector is reversed.)
static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64");
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 64)
return false;
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
// If the first shuffle index is UNDEF, be optimistic.
if (M[0] < 0)
BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
return false;
}
return true;
}
static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
return false;
Idx += 1;
}
return true;
}
static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
if (M[i] < 0)
continue; // ignore UNDEF indices
if ((unsigned)M[i] != 2 * i + WhichResult)
return false;
}
return true;
}
static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
return false;
}
return true;
}
/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
return false;
Idx += 1;
}
return true;
}
/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned Half = VT.getVectorNumElements() / 2;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
int MIdx = M[i + j * Half];
if (MIdx >= 0 && (unsigned)MIdx != Idx)
return false;
Idx += 2;
}
}
return true;
}
/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
return false;
}
return true;
}
static bool isINSMask(ArrayRef<int> M, int NumInputElements,
bool &DstIsLeft, int &Anomaly) {
if (M.size() != static_cast<size_t>(NumInputElements))
return false;
int NumLHSMatch = 0, NumRHSMatch = 0;
int LastLHSMismatch = -1, LastRHSMismatch = -1;
for (int i = 0; i < NumInputElements; ++i) {
if (M[i] == -1) {
++NumLHSMatch;
++NumRHSMatch;
continue;
}
if (M[i] == i)
++NumLHSMatch;
else
LastLHSMismatch = i;
if (M[i] == i + NumInputElements)
++NumRHSMatch;
else
LastRHSMismatch = i;
}
if (NumLHSMatch == NumInputElements - 1) {
DstIsLeft = true;
Anomaly = LastLHSMismatch;
return true;
} else if (NumRHSMatch == NumInputElements - 1) {
DstIsLeft = false;
Anomaly = LastRHSMismatch;
return true;
}
return false;
}
static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
if (VT.getSizeInBits() != 128)
return false;
unsigned NumElts = VT.getVectorNumElements();
for (int I = 0, E = NumElts / 2; I != E; I++) {
if (Mask[I] != I)
return false;
}
int Offset = NumElts / 2;
for (int I = NumElts / 2, E = NumElts; I != E; I++) {
if (Mask[I] != I + SplitLHS * Offset)
return false;
}
return true;
}
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue V0 = Op.getOperand(0);
SDValue V1 = Op.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
VT.getVectorElementType() != V1.getValueType().getVectorElementType())
return SDValue();
bool SplitV0 = V0.getValueSizeInBits() == 128;
if (!isConcatMask(Mask, VT, SplitV0))
return SDValue();
EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
if (SplitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
DAG.getConstant(0, DL, MVT::i64));
}
if (V1.getValueSizeInBits() == 128) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
DAG.getConstant(0, DL, MVT::i64));
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
const SDLoc &dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
enum {
OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
OP_VREV,
OP_VDUP0,
OP_VDUP1,
OP_VDUP2,
OP_VDUP3,
OP_VEXT1,
OP_VEXT2,
OP_VEXT3,
OP_VUZPL, // VUZP, left result
OP_VUZPR, // VUZP, right result
OP_VZIPL, // VZIP, left result
OP_VZIPR, // VZIP, right result
OP_VTRNL, // VTRN, left result
OP_VTRNR // VTRN, right result
};
if (OpNum == OP_COPY) {
if (LHSID == (1 * 9 + 2) * 9 + 3)
return LHS;
assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
return RHS;
}
SDValue OpLHS, OpRHS;
OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
EVT VT = OpLHS.getValueType();
switch (OpNum) {
default:
llvm_unreachable("Unknown shuffle opcode!");
case OP_VREV:
// VREV divides the vector in half and swaps within the half.
if (VT.getVectorElementType() == MVT::i32 ||
VT.getVectorElementType() == MVT::f32)
return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
// vrev <4 x i16> -> REV32
if (VT.getVectorElementType() == MVT::i16 ||
VT.getVectorElementType() == MVT::f16 ||
VT.getVectorElementType() == MVT::bf16)
return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
// vrev <4 x i8> -> REV16
assert(VT.getVectorElementType() == MVT::i8);
return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
case OP_VDUP0:
case OP_VDUP1:
case OP_VDUP2:
case OP_VDUP3: {
EVT EltTy = VT.getVectorElementType();
unsigned Opcode;
if (EltTy == MVT::i8)
Opcode = AArch64ISD::DUPLANE8;
else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
Opcode = AArch64ISD::DUPLANE16;
else if (EltTy == MVT::i32 || EltTy == MVT::f32)
Opcode = AArch64ISD::DUPLANE32;
else if (EltTy == MVT::i64 || EltTy == MVT::f64)
Opcode = AArch64ISD::DUPLANE64;
else
llvm_unreachable("Invalid vector element type?");
if (VT.getSizeInBits() == 64)
OpLHS = WidenVector(OpLHS, DAG);
SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
}
case OP_VEXT1:
case OP_VEXT2:
case OP_VEXT3: {
unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
DAG.getConstant(Imm, dl, MVT::i32));
}
case OP_VUZPL:
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VUZPR:
return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VZIPL:
return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VZIPR:
return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VTRNL:
return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
case OP_VTRNR:
return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
OpRHS);
}
}
static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
SelectionDAG &DAG) {
// Check to see if we can use the TBL instruction.
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
EVT EltVT = Op.getValueType().getVectorElementType();
unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
SmallVector<SDValue, 8> TBLMask;
for (int Val : ShuffleMask) {
for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
unsigned Offset = Byte + Val * BytesPerElt;
TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
}
}
MVT IndexVT = MVT::v8i8;
unsigned IndexLen = 8;
if (Op.getValueSizeInBits() == 128) {
IndexVT = MVT::v16i8;
IndexLen = 16;
}
SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
SDValue Shuffle;
if (V2.getNode()->isUndef()) {
if (IndexLen == 8)
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
if (IndexLen == 8) {
V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
} else {
// FIXME: We cannot, for the moment, emit a TBL2 instruction because we
// cannot currently represent the register constraints on the input
// table registers.
// Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
// DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
// IndexLen));
Shuffle = DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
V2Cst, DAG.getBuildVector(IndexVT, DL,
makeArrayRef(TBLMask.data(), IndexLen)));
}
}
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
}
static unsigned getDUPLANEOp(EVT EltType) {
if (EltType == MVT::i8)
return AArch64ISD::DUPLANE8;
if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
return AArch64ISD::DUPLANE16;
if (EltType == MVT::i32 || EltType == MVT::f32)
return AArch64ISD::DUPLANE32;
if (EltType == MVT::i64 || EltType == MVT::f64)
return AArch64ISD::DUPLANE64;
llvm_unreachable("Invalid vector element type?");
}
static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
unsigned Opcode, SelectionDAG &DAG) {
// Try to eliminate a bitcasted extract subvector before a DUPLANE.
auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
// Match: dup (bitcast (extract_subv X, C)), LaneC
if (BitCast.getOpcode() != ISD::BITCAST ||
BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
// The extract index must align in the destination type. That may not
// happen if the bitcast is from narrow to wide type.
SDValue Extract = BitCast.getOperand(0);
unsigned ExtIdx = Extract.getConstantOperandVal(1);
unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
if (ExtIdxInBits % CastedEltBitWidth != 0)
return false;
+ // Can't handle cases where vector size is not 128-bit
+ if (!Extract.getOperand(0).getValueType().is128BitVector())
+ return false;
+
// Update the lane value by offsetting with the scaled extract index.
LaneC += ExtIdxInBits / CastedEltBitWidth;
// Determine the casted vector type of the wide vector input.
// dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
// Examples:
// dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
// dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
unsigned SrcVecNumElts =
Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
SrcVecNumElts);
return true;
};
MVT CastVT;
if (getScaledOffsetDup(V, Lane, CastVT)) {
V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
} else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
V.getOperand(0).getValueType().is128BitVector()) {
// The lane is incremented by the index of the extract.
// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
Lane += V.getConstantOperandVal(1);
V = V.getOperand(0);
} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
// The lane is decremented if we are splatting from the 2nd operand.
// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
Lane -= Idx * VT.getVectorNumElements() / 2;
V = WidenVector(V.getOperand(Idx), DAG);
} else if (VT.getSizeInBits() == 64) {
// Widen the operand to 128-bit register with undef.
V = WidenVector(V, DAG);
}
return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
}
// Return true if we can get a new shuffle mask by checking the parameter mask
// array to test whether every two adjacent mask values are continuous and
// starting from an even number.
static bool isWideTypeMask(ArrayRef<int> M, EVT VT,
SmallVectorImpl<int> &NewMask) {
unsigned NumElts = VT.getVectorNumElements();
if (NumElts % 2 != 0)
return false;
NewMask.clear();
for (unsigned i = 0; i < NumElts; i += 2) {
int M0 = M[i];
int M1 = M[i + 1];
// If both elements are undef, new mask is undef too.
if (M0 == -1 && M1 == -1) {
NewMask.push_back(-1);
continue;
}
if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
NewMask.push_back(M1 / 2);
continue;
}
if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
NewMask.push_back(M0 / 2);
continue;
}
NewMask.clear();
return false;
}
assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
return true;
}
// Try to widen element type to get a new mask value for a better permutation
// sequence, so that we can use NEON shuffle instructions, such as zip1/2,
// UZP1/2, TRN1/2, REV, INS, etc.
// For example:
// shufflevector <4 x i32> %a, <4 x i32> %b,
// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
// is equivalent to:
// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
// Finally, we can get:
// mov v0.d[0], v1.d[1]
static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ScalarVT = VT.getVectorElementType();
unsigned ElementSize = ScalarVT.getFixedSizeInBits();
SDValue V0 = Op.getOperand(0);
SDValue V1 = Op.getOperand(1);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
// If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
// We need to make sure the wider element type is legal. Thus, ElementSize
// should be not larger than 32 bits, and i1 type should also be excluded.
if (ElementSize > 32 || ElementSize == 1)
return SDValue();
SmallVector<int, 8> NewMask;
if (isWideTypeMask(Mask, VT, NewMask)) {
MVT NewEltVT = VT.isFloatingPoint()
? MVT::getFloatingPointVT(ElementSize * 2)
: MVT::getIntegerVT(ElementSize * 2);
MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
V0 = DAG.getBitcast(NewVT, V0);
V1 = DAG.getBitcast(NewVT, V1);
return DAG.getBitcast(VT,
DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
}
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
// Convert shuffles that are directly supported on NEON to target-specific
// DAG nodes, instead of keeping them as shuffles and matching them again
// during code selection. This is more efficient and avoids the possibility
// of inconsistencies between legalization and selection.
ArrayRef<int> ShuffleMask = SVN->getMask();
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
assert(ShuffleMask.size() == VT.getVectorNumElements() &&
"Unexpected VECTOR_SHUFFLE mask size!");
if (SVN->isSplat()) {
int Lane = SVN->getSplatIndex();
// If this is undef splat, generate it via "just" vdup, if possible.
if (Lane == -1)
Lane = 0;
if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
V1.getOperand(0));
// Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
// constant. If so, we can just reference the lane's definition directly.
if (V1.getOpcode() == ISD::BUILD_VECTOR &&
!isa<ConstantSDNode>(V1.getOperand(Lane)))
return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
// Otherwise, duplicate from the lane of the input vector.
unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
return constructDup(V1, Lane, dl, VT, Opcode, DAG);
}
// Check if the mask matches a DUP for a wider element
for (unsigned LaneSize : {64U, 32U, 16U}) {
unsigned Lane = 0;
if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
: LaneSize == 32 ? AArch64ISD::DUPLANE32
: AArch64ISD::DUPLANE16;
// Cast V1 to an integer vector with required lane size
MVT NewEltTy = MVT::getIntegerVT(LaneSize);
unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
V1 = DAG.getBitcast(NewVecTy, V1);
// Constuct the DUP instruction
V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
// Cast back to the original type
return DAG.getBitcast(VT, V1);
}
}
if (isREVMask(ShuffleMask, VT, 64))
return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 32))
return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
if (isREVMask(ShuffleMask, VT, 16))
return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
(VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
ShuffleVectorInst::isReverseMask(ShuffleMask)) {
SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
DAG.getConstant(8, dl, MVT::i32));
}
bool ReverseEXT = false;
unsigned Imm;
if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
if (ReverseEXT)
std::swap(V1, V2);
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
DAG.getConstant(Imm, dl, MVT::i32));
} else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
Imm *= getExtFactor(V1);
return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isUZPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isTRNMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
}
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
}
if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
return Concat;
bool DstIsLeft;
int Anomaly;
int NumInputElements = V1.getValueType().getVectorNumElements();
if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
SDValue DstVec = DstIsLeft ? V1 : V2;
SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
SDValue SrcVec = V1;
int SrcLane = ShuffleMask[Anomaly];
if (SrcLane >= NumInputElements) {
SrcVec = V2;
SrcLane -= VT.getVectorNumElements();
}
SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
EVT ScalarVT = VT.getVectorElementType();
if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
ScalarVT = MVT::i32;
return DAG.getNode(
ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
DstLaneV);
}
if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
return NewSD;
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();
if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
PFIndexes[i] = 8;
else
PFIndexes[i] = ShuffleMask[i];
}
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
PFIndexes[2] * 9 + PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
if (Cost <= 4)
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
return GenerateTBL(Op, ShuffleMask, DAG);
}
SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT ElemVT = VT.getScalarType();
SDValue SplatVal = Op.getOperand(0);
if (useSVEForFixedLengthVectorVT(VT))
return LowerToScalableOp(Op, DAG);
// Extend input splat value where needed to fit into a GPR (32b or 64b only)
// FPRs don't have this restriction.
switch (ElemVT.getSimpleVT().SimpleTy) {
case MVT::i1: {
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
// We can hande the zero case during isel.
if (ConstVal->isZero())
return Op;
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
}
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
DAG.getValueType(MVT::i1));
SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
MVT::i64);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
DAG.getConstant(0, dl, MVT::i64), SplatVal);
}
case MVT::i8:
case MVT::i16:
case MVT::i32:
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
break;
case MVT::i64:
SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
break;
case MVT::f16:
case MVT::bf16:
case MVT::f32:
case MVT::f64:
// Fine as is
break;
default:
report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
}
return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
}
SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (!isTypeLegal(VT) || !VT.isScalableVector())
return SDValue();
// Current lowering only supports the SVE-ACLE types.
if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
return SDValue();
// The DUPQ operation is indepedent of element type so normalise to i64s.
SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
SDValue Idx128 = Op.getOperand(2);
// DUPQ can be used when idx is in range.
auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
if (CIdx && (CIdx->getZExtValue() <= 3)) {
SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
SDNode *DUPQ =
DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
}
// The ACLE says this must produce the same result as:
// svtbl(data, svadd_x(svptrue_b64(),
// svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
// index * 2))
SDValue One = DAG.getConstant(1, DL, MVT::i64);
SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
// create the vector 0,1,0,1,...
SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
// create the vector idx64,idx64+1,idx64,idx64+1,...
SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
// create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
}
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
APInt &UndefBits) {
EVT VT = BVN->getValueType(0);
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
for (unsigned i = 0; i < NumSplats; ++i) {
CnstBits <<= SplatBitSize;
UndefBits <<= SplatBitSize;
CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
}
return true;
}
return false;
}
// Try 64-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 32-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
Shift = 0;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
Shift = 8;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
Shift = 16;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
Shift = 24;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov;
if (LHS)
Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 16-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
Shift = 0;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
Shift = 8;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov;
if (LHS)
Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 32-bit splatted SIMD immediate with shifted ones.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
SelectionDAG &DAG, const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
bool isAdvSIMDModImm = false;
uint64_t Shift;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
Shift = 264;
}
else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
Shift = 272;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try 8-bit splatted SIMD immediate.
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Try FP splatted SIMD immediate.
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits) {
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
bool isWide = (VT.getSizeInBits() == 128);
MVT MovTy;
bool isAdvSIMDModImm = false;
if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
}
else if (isWide &&
(isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
MovTy = MVT::v2f64;
}
if (isAdvSIMDModImm) {
SDLoc dl(Op);
SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
DAG.getConstant(Value, dl, MVT::i32));
return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
}
}
return SDValue();
}
// Specialized code to quickly find if PotentialBVec is a BuildVector that
// consists of only the same constant int value, returned in reference arg
// ConstVal
static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
uint64_t &ConstVal) {
BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
if (!Bvec)
return false;
ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
if (!FirstElt)
return false;
EVT VT = Bvec->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
for (unsigned i = 1; i < NumElts; ++i)
if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
return false;
ConstVal = FirstElt->getZExtValue();
return true;
}
static unsigned getIntrinsicID(const SDNode *N) {
unsigned Opcode = N->getOpcode();
switch (Opcode) {
default:
return Intrinsic::not_intrinsic;
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return IID;
return Intrinsic::not_intrinsic;
}
}
}
// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
// BUILD_VECTORs with constant element C1, C2 is a constant, and:
// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (!VT.isVector())
return SDValue();
SDLoc DL(N);
SDValue And;
SDValue Shift;
SDValue FirstOp = N->getOperand(0);
unsigned FirstOpc = FirstOp.getOpcode();
SDValue SecondOp = N->getOperand(1);
unsigned SecondOpc = SecondOp.getOpcode();
// Is one of the operands an AND or a BICi? The AND may have been optimised to
// a BICi in order to use an immediate instead of a register.
// Is the other operand an shl or lshr? This will have been turned into:
// AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
(SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
And = FirstOp;
Shift = SecondOp;
} else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
(FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
And = SecondOp;
Shift = FirstOp;
} else
return SDValue();
bool IsAnd = And.getOpcode() == ISD::AND;
bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
// Is the shift amount constant?
ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
if (!C2node)
return SDValue();
uint64_t C1;
if (IsAnd) {
// Is the and mask vector all constant?
if (!isAllConstantBuildVector(And.getOperand(1), C1))
return SDValue();
} else {
// Reconstruct the corresponding AND immediate from the two BICi immediates.
ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
assert(C1nodeImm && C1nodeShift);
C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
}
// Is C1 == ~(Ones(ElemSizeInBits) << C2) or
// C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
// how much one can shift elements of a particular size?
uint64_t C2 = C2node->getZExtValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
if (C2 > ElemSizeInBits)
return SDValue();
APInt C1AsAPInt(ElemSizeInBits, C1);
APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
: APInt::getLowBitsSet(ElemSizeInBits, C2);
if (C1AsAPInt != RequiredC1)
return SDValue();
SDValue X = And.getOperand(0);
SDValue Y = Shift.getOperand(0);
unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
LLVM_DEBUG(N->dump(&DAG));
LLVM_DEBUG(dbgs() << "into: \n");
LLVM_DEBUG(ResultSLI->dump(&DAG));
++NumShiftInserts;
return ResultSLI;
}
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerToScalableOp(Op, DAG);
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
return Res;
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
if (!BVN) {
// OR commutes, so try swapping the operands.
LHS = Op.getOperand(1);
BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
}
if (!BVN)
return Op;
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
DefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
DefBits, &LHS)))
return NewOp;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
UndefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
UndefBits, &LHS)))
return NewOp;
}
// We can always fall back to a non-immediate OR.
return Op;
}
// Normalize the operands of BUILD_VECTOR. The value of constant operands will
// be truncated to fit element width.
static SDValue NormalizeBuildVector(SDValue Op,
SelectionDAG &DAG) {
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT EltTy= VT.getVectorElementType();
if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
return Op;
SmallVector<SDValue, 16> Ops;
for (SDValue Lane : Op->ops()) {
// For integer vectors, type legalization would have promoted the
// operands already. Otherwise, if Op is a floating-point splat
// (with operands cast to integers), then the only possibilities
// are constants and UNDEFs.
if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
APInt LowBits(EltTy.getSizeInBits(),
CstLane->getZExtValue());
Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
} else if (Lane.getNode()->isUndef()) {
Lane = DAG.getUNDEF(MVT::i32);
} else {
assert(Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type");
}
Ops.push_back(Lane);
}
return DAG.getBuildVector(VT, dl, Ops);
}
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;
DefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;
DefBits = UndefBits;
if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
return NewOp;
DefBits = ~UndefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
return NewOp;
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
// Try to build a simple constant vector.
Op = NormalizeBuildVector(Op, DAG);
if (VT.isInteger()) {
// Certain vector constants, used to express things like logical NOT and
// arithmetic NEG, are passed through unmodified. This allows special
// patterns for these operations to match, which will lower these constants
// to whatever is proven necessary.
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
if (BVN->isConstant())
if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
unsigned BitSize = VT.getVectorElementType().getSizeInBits();
APInt Val(BitSize,
Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
if (Val.isZero() || Val.isAllOnes())
return Op;
}
}
if (SDValue V = ConstantBuildVector(Op, DAG))
return V;
// Scan through the operands to find some interesting properties we can
// exploit:
// 1) If only one value is used, we can use a DUP, or
// 2) if only the low element is not undef, we can just insert that, or
// 3) if only one constant value is used (w/ some non-constant lanes),
// we can splat the constant value into the whole vector then fill
// in the non-constant lanes.
// 4) FIXME: If different constant values are used, but we can intelligently
// select the values we'll be overwriting for the non-constant
// lanes such that we can directly materialize the vector
// some other way (MOVI, e.g.), we can be sneaky.
// 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
SDLoc dl(Op);
unsigned NumElts = VT.getVectorNumElements();
bool isOnlyLowElement = true;
bool usesOnlyOneValue = true;
bool usesOnlyOneConstantValue = true;
bool isConstant = true;
bool AllLanesExtractElt = true;
unsigned NumConstantLanes = 0;
unsigned NumDifferentLanes = 0;
unsigned NumUndefLanes = 0;
SDValue Value;
SDValue ConstantValue;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
AllLanesExtractElt = false;
if (V.isUndef()) {
++NumUndefLanes;
continue;
}
if (i > 0)
isOnlyLowElement = false;
if (!isIntOrFPConstant(V))
isConstant = false;
if (isIntOrFPConstant(V)) {
++NumConstantLanes;
if (!ConstantValue.getNode())
ConstantValue = V;
else if (ConstantValue != V)
usesOnlyOneConstantValue = false;
}
if (!Value.getNode())
Value = V;
else if (V != Value) {
usesOnlyOneValue = false;
++NumDifferentLanes;
}
}
if (!Value.getNode()) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
return DAG.getUNDEF(VT);
}
// Convert BUILD_VECTOR where all elements but the lowest are undef into
// SCALAR_TO_VECTOR, except for when we have a single-element constant vector
// as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n");
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
}
if (AllLanesExtractElt) {
SDNode *Vector = nullptr;
bool Even = false;
bool Odd = false;
// Check whether the extract elements match the Even pattern <0,2,4,...> or
// the Odd pattern <1,3,5,...>.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
const SDNode *N = V.getNode();
if (!isa<ConstantSDNode>(N->getOperand(1)))
break;
SDValue N0 = N->getOperand(0);
// All elements are extracted from the same vector.
if (!Vector) {
Vector = N0.getNode();
// Check that the type of EXTRACT_VECTOR_ELT matches the type of
// BUILD_VECTOR.
if (VT.getVectorElementType() !=
N0.getValueType().getVectorElementType())
break;
} else if (Vector != N0.getNode()) {
Odd = false;
Even = false;
break;
}
// Extracted values are either at Even indices <0,2,4,...> or at Odd
// indices <1,3,5,...>.
uint64_t Val = N->getConstantOperandVal(1);
if (Val == 2 * i) {
Even = true;
continue;
}
if (Val - 1 == 2 * i) {
Odd = true;
continue;
}
// Something does not match: abort.
Odd = false;
Even = false;
break;
}
if (Even || Odd) {
SDValue LHS =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
DAG.getConstant(0, dl, MVT::i64));
SDValue RHS =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
DAG.getConstant(NumElts, dl, MVT::i64));
if (Even && !Odd)
return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
RHS);
if (Odd && !Even)
return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
RHS);
}
}
// Use DUP for non-constant splats. For f32 constant splats, reduce to
// i32 and try again.
if (usesOnlyOneValue) {
if (!isConstant) {
if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Value.getValueType() != VT) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
}
// This is actually a DUPLANExx operation, which keeps everything vectory.
SDValue Lane = Value.getOperand(1);
Value = Value.getOperand(0);
if (Value.getValueSizeInBits() == 64) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n");
Value = WidenVector(Value, DAG);
}
unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
return DAG.getNode(Opcode, dl, VT, Value, Lane);
}
if (VT.getVectorElementType().isFloatingPoint()) {
SmallVector<SDValue, 8> Ops;
EVT EltTy = VT.getVectorElementType();
assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
EltTy == MVT::f64) && "Unsupported floating-point vector type");
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n");
MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
Val.dump(););
Val = LowerBUILD_VECTOR(Val, DAG);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
}
// If we need to insert a small number of different non-constant elements and
// the vector width is sufficiently large, prefer using DUP with the common
// value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
// skip the constant lane handling below.
bool PreferDUPAndInsert =
!isConstant && NumDifferentLanes >= 1 &&
NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
NumDifferentLanes >= NumConstantLanes;
// If there was only one constant value used and for more than one lane,
// start by splatting that value, then replace the non-constant lanes. This
// is better than the default, which will perform a separate initialization
// for each lane.
if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
// Firstly, try to materialize the splat constant.
SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
Val = ConstantBuildVector(Vec, DAG);
if (!Val) {
// Otherwise, materialize the constant and splat it.
Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
}
// Now insert the non-constant lanes.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
if (!isIntOrFPConstant(V))
// Note that type legalization likely mucked about with the VT of the
// source operand, so we may have to convert it here before inserting.
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
}
return Val;
}
// This will generate a load from the constant pool.
if (isConstant) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n");
return SDValue();
}
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
if (SDValue shuffle = ReconstructShuffle(Op, DAG))
return shuffle;
}
if (PreferDUPAndInsert) {
// First, build a constant vector with the common element.
SmallVector<SDValue, 8> Ops(NumElts, Value);
SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
// Next, insert the elements that do not match the common value.
for (unsigned I = 0; I < NumElts; ++I)
if (Op.getOperand(I) != Value)
NewVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
return NewVector;
}
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
// scalar_to_vector for the elements followed by a shuffle (provided the
// shuffle is valid for the target) and materialization element by element
// on the stack followed by a load for everything else.
if (!isConstant && !usesOnlyOneValue) {
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n");
SDValue Vec = DAG.getUNDEF(VT);
SDValue Op0 = Op.getOperand(0);
unsigned i = 0;
// Use SCALAR_TO_VECTOR for lane zero to
// a) Avoid a RMW dependency on the full vector register, and
// b) Allow the register coalescer to fold away the copy if the
// value is already in an S or D register, and we're forced to emit an
// INSERT_SUBREG that we can't fold anywhere.
//
// We also allow types like i8 and i16 which are illegal scalar but legal
// vector element types. After type-legalization the inserted value is
// extended (i32) and it is safe to cast them to the vector type by ignoring
// the upper bits of the lowest lane (e.g. v8i8, v4i16).
if (!Op0.isUndef()) {
LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
++i;
}
LLVM_DEBUG(if (i < NumElts) dbgs()
<< "Creating nodes for the other vector elements:\n";);
for (; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.isUndef())
continue;
SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
}
return Vec;
}
LLVM_DEBUG(
dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n");
return SDValue();
}
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
assert(Op.getValueType().isScalableVector() &&
isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
if (isTypeLegal(Op.getOperand(0).getValueType())) {
unsigned NumOperands = Op->getNumOperands();
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
if (NumOperands == 2)
return Op;
// Concat each pair of subvectors and pack into the lower half of the array.
SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
while (ConcatOps.size() > 1) {
for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
SDValue V1 = ConcatOps[I];
SDValue V2 = ConcatOps[I + 1];
EVT SubVT = V1.getValueType();
EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
ConcatOps[I / 2] =
DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
}
ConcatOps.resize(ConcatOps.size() / 2);
}
return ConcatOps[0];
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthInsertVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
EVT VT = Op.getOperand(0).getValueType();
if (VT.getScalarType() == MVT::i1) {
EVT VectorVT = getPromotedVTForPredicate(VT);
SDLoc DL(Op);
SDValue ExtendedVector =
DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
SDValue ExtendedValue =
DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
VectorVT.getScalarType().getSizeInBits() < 32
? MVT::i32
: VectorVT.getScalarType());
ExtendedVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
ExtendedValue, Op.getOperand(2));
return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
}
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform insertion by expanding the value
// to a V128 type and perform the insertion on that.
SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();
SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
Op.getOperand(1), Op.getOperand(2));
// Re-narrow the resultant vector.
return NarrowVector(Node, DAG);
}
SDValue
AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
EVT VT = Op.getOperand(0).getValueType();
if (VT.getScalarType() == MVT::i1) {
// We can't directly extract from an SVE predicate; extend it first.
// (This isn't the only possible lowering, but it's straightforward.)
EVT VectorVT = getPromotedVTForPredicate(VT);
SDLoc DL(Op);
SDValue Extend =
DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
Extend, Op.getOperand(1));
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
}
if (useSVEForFixedLengthVectorVT(VT))
return LowerFixedLengthExtractVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
// Insertion/extraction are legal for V128 types.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
VT == MVT::v8f16 || VT == MVT::v8bf16)
return Op;
if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
VT != MVT::v4bf16)
return SDValue();
// For V64 types, we perform extraction by expanding the value
// to a V128 type and perform the extraction on that.
SDLoc DL(Op);
SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
EVT WideTy = WideVec.getValueType();
EVT ExtrTy = WideTy.getVectorElementType();
if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
ExtrTy = MVT::i32;
// For extractions, we just return the result directly.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType().isFixedLengthVector() &&
"Only cases that extract a fixed length vector are supported!");
EVT InVT = Op.getOperand(0).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Size = Op.getValueSizeInBits();
// If we don't have legal types yet, do nothing
if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
return SDValue();
if (InVT.isScalableVector()) {
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG))
return Op;
return SDValue();
}
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
if (Idx == 0 && InVT.getSizeInBits() <= 128)
return Op;
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
InVT.getSizeInBits() == 128)
return Op;
if (useSVEForFixedLengthVectorVT(InVT)) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue NewInVec =
convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
return convertFromScalableVector(DAG, Op.getValueType(), Splice);
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType().isScalableVector() &&
"Only expect to lower inserts into scalable vectors!");
EVT InVT = Op.getOperand(1).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
SDValue Vec0 = Op.getOperand(0);
SDValue Vec1 = Op.getOperand(1);
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (InVT.isScalableVector()) {
if (!isTypeLegal(VT))
return SDValue();
// Break down insert_subvector into simpler parts.
if (VT.getVectorElementType() == MVT::i1) {
unsigned NumElts = VT.getVectorMinNumElements();
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
SDValue Lo, Hi;
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
DAG.getVectorIdxConstant(0, DL));
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
DAG.getVectorIdxConstant(NumElts / 2, DL));
if (Idx < (NumElts / 2)) {
SDValue NewLo = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Lo, Vec1,
DAG.getVectorIdxConstant(Idx, DL));
return DAG.getNode(AArch64ISD::UZP1, DL, VT, NewLo, Hi);
} else {
SDValue NewHi =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Hi, Vec1,
DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
return DAG.getNode(AArch64ISD::UZP1, DL, VT, Lo, NewHi);
}
}
// Ensure the subvector is half the size of the main vector.
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
EVT WideVT;
SDValue ExtVec;
if (VT.isFloatingPoint()) {
// The InVT type should be legal. We can safely cast the unpacked
// subvector from InVT -> VT.
WideVT = VT;
ExtVec = getSVESafeBitCast(VT, Vec1, DAG);
} else {
// Extend elements of smaller vector...
WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
}
if (Idx == 0) {
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
} else if (Idx == InVT.getVectorMinNumElements()) {
SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
}
return SDValue();
}
if (Idx == 0 && isPackedVectorType(VT, DAG)) {
// This will be matched by custom code during ISelDAGToDAG.
if (Vec0.isUndef())
return Op;
Optional<unsigned> PredPattern =
getSVEPredPatternFromNumElements(InVT.getVectorNumElements());
auto PredTy = VT.changeVectorElementType(MVT::i1);
SDValue PTrue = getPTrue(DAG, DL, PredTy, *PredPattern);
SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);
return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);
}
return SDValue();
}
static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
if (Op.getOpcode() != AArch64ISD::DUP &&
Op.getOpcode() != ISD::SPLAT_VECTOR &&
Op.getOpcode() != ISD::BUILD_VECTOR)
return false;
if (Op.getOpcode() == ISD::BUILD_VECTOR &&
!isAllConstantBuildVector(Op, SplatVal))
return false;
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
!isa<ConstantSDNode>(Op->getOperand(0)))
return false;
SplatVal = Op->getConstantOperandVal(0);
if (Op.getValueType().getVectorElementType() != MVT::i64)
SplatVal = (int32_t)SplatVal;
Negated = false;
if (isPowerOf2_64(SplatVal))
return true;
Negated = true;
if (isPowerOf2_64(-SplatVal)) {
SplatVal = -SplatVal;
return true;
}
return false;
}
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc dl(Op);
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
assert(VT.isScalableVector() && "Expected a scalable vector.");
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
bool Negated;
uint64_t SplatVal;
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
SDValue Res =
DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
if (Negated)
Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
return Res;
}
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode);
// SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
// operations, and truncate the result.
EVT WidenedVT;
if (VT == MVT::nxv16i8)
WidenedVT = MVT::nxv8i16;
else if (VT == MVT::nxv8i16)
WidenedVT = MVT::nxv4i32;
else
llvm_unreachable("Unexpected Custom DIV operation");
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
}
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
if (useSVEForFixedLengthVectorVT(VT))
return false;
if (VT.getVectorNumElements() == 4 &&
(VT.is128BitVector() || VT.is64BitVector())) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (M[i] < 0)
PFIndexes[i] = 8;
else
PFIndexes[i] = M[i];
}
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
PFIndexes[2] * 9 + PFIndexes[3];
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
if (Cost <= 4)
return true;
}
bool DummyBool;
int DummyInt;
unsigned DummyUnsigned;
return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
// isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
isZIPMask(M, VT, DummyUnsigned) ||
isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
isConcatMask(M, VT, VT.getSizeInBits() == 128));
}
/// getVShiftImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
// Ignore bit_converts.
while (Op.getOpcode() == ISD::BITCAST)
Op = Op.getOperand(0);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
HasAnyUndefs, ElementBits) ||
SplatBitSize > ElementBits)
return false;
Cnt = SplatBits.getSExtValue();
return true;
}
/// isVShiftLImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift left operation. That value must be in the range:
/// 0 <= Value < ElementBits for a left shift; or
/// 0 <= Value <= ElementBits for a long left shift.
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
}
/// isVShiftRImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift right operation. The value must be in the range:
/// 1 <= Value <= ElementBits for a right shift; or
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
assert(VT.isVector() && "vector shift count is not a vector type");
int64_t ElementBits = VT.getScalarSizeInBits();
if (!getVShiftImm(Op, ElementBits, Cnt))
return false;
return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
}
SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
if (VT.getScalarType() == MVT::i1) {
// Lower i1 truncate to `(x & 1) != 0`.
SDLoc dl(Op);
EVT OpVT = Op.getOperand(0).getValueType();
SDValue Zero = DAG.getConstant(0, dl, OpVT);
SDValue One = DAG.getConstant(1, dl, OpVT);
SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
}
if (!VT.isVector() || VT.isScalableVector())
return SDValue();
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
return SDValue();
}
SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
int64_t Cnt;
if (!Op.getOperand(1).getValueType().isVector())
return Op;
unsigned EltSize = VT.getScalarSizeInBits();
switch (Op.getOpcode()) {
default:
llvm_unreachable("unexpected shift opcode");
case ISD::SHL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
MVT::i32),
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
: AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
}
// Right shift immediate
if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
unsigned Opc =
(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
DAG.getConstant(Cnt, DL, MVT::i32));
}
// Right shift register. Note, there is not a shift right register
// instruction, but the shift left register instruction takes a signed
// value, where negative numbers specify a right shift.
unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
: Intrinsic::aarch64_neon_ushl;
// negate the shift amount
SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
Op.getOperand(1));
SDValue NegShiftLeft =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
NegShift);
return NegShiftLeft;
}
return SDValue();
}
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
AArch64CC::CondCode CC, bool NoNans, EVT VT,
const SDLoc &dl, SelectionDAG &DAG) {
EVT SrcVT = LHS.getValueType();
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
"function only supposed to emit natural comparisons");
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
APInt CnstBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
bool IsZero = IsCnst && (CnstBits == 0);
if (SrcVT.getVectorElementType().isFloatingPoint()) {
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
SDValue Fcmeq;
if (IsZero)
Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
else
Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Fcmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
case AArch64CC::LS:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (!NoNans)
return SDValue();
// If we ignore NaNs then we can use to the MI implementation.
LLVM_FALLTHROUGH;
case AArch64CC::MI:
if (IsZero)
return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
}
}
switch (CC) {
default:
return SDValue();
case AArch64CC::NE: {
SDValue Cmeq;
if (IsZero)
Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
else
Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
return DAG.getNOT(dl, Cmeq, VT);
}
case AArch64CC::EQ:
if (IsZero)
return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
case AArch64CC::GE:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
case AArch64CC::GT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
case AArch64CC::LE:
if (IsZero)
return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
case AArch64CC::LS:
return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
case AArch64CC::LO:
return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
case AArch64CC::LT:
if (IsZero)
return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
case AArch64CC::HI:
return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
case AArch64CC::HS:
return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
}
}
SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
SDLoc dl(Op);
if (LHS.getValueType().getVectorElementType().isInteger()) {
assert(LHS.getValueType() == RHS.getValueType());
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
SDValue Cmp =
EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
// Make v4f16 (only) fcmp operations utilise vector instructions
// v8f16 support will be a litle more complicated
if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
if (LHS.getValueType().getVectorNumElements() == 4) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
DAG.ReplaceAllUsesWith(Op, NewSetcc);
CmpVT = MVT::v4i32;
} else
return SDValue();
}
assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
LHS.getValueType().getVectorElementType() != MVT::f128);
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
// clean. Some of them require two branches to implement.
AArch64CC::CondCode CC1, CC2;
bool ShouldInvert;
changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
SDValue Cmp =
EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
if (!Cmp.getNode())
return SDValue();
if (CC2 != AArch64CC::AL) {
SDValue Cmp2 =
EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
if (!Cmp2.getNode())
return SDValue();
Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
}
Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
if (ShouldInvert)
Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
return Cmp;
}
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
SelectionDAG &DAG) {
SDValue VecOp = ScalarOp.getOperand(0);
auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
DAG.getConstant(0, DL, MVT::i64));
}
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();
bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
Op.getOpcode() == ISD::VECREDUCE_XOR ||
Op.getOpcode() == ISD::VECREDUCE_FADD ||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);
if (SrcVT.isScalableVector() ||
useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
case ISD::VECREDUCE_AND:
return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
case ISD::VECREDUCE_OR:
return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
case ISD::VECREDUCE_SMAX:
return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_SMIN:
return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
case ISD::VECREDUCE_UMAX:
return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_UMIN:
return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
case ISD::VECREDUCE_XOR:
return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
case ISD::VECREDUCE_FADD:
return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
case ISD::VECREDUCE_FMAX:
return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
case ISD::VECREDUCE_FMIN:
return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
default:
llvm_unreachable("Unhandled fixed length reduction");
}
}
// Lower NEON reductions.
SDLoc dl(Op);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
case ISD::VECREDUCE_SMAX:
return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
case ISD::VECREDUCE_SMIN:
return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
case ISD::VECREDUCE_UMAX:
return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
case ISD::VECREDUCE_UMIN:
return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
case ISD::VECREDUCE_FMAX: {
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
Src);
}
case ISD::VECREDUCE_FMIN: {
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
Src);
}
default:
llvm_unreachable("Unhandled reduction");
}
}
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-add instruction, but not a load-sub.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
Op.getOperand(0), Op.getOperand(1), RHS,
AN->getMemOperand());
}
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-clear instruction, but not a load-and.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
Op.getOperand(0), Op.getOperand(1), RHS,
AN->getMemOperand());
}
SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
if (Subtarget->hasCustomCallingConv())
TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
Chain =
DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
DAG.getRegisterMask(Mask), Chain.getValue(1));
// To match the actual intent better, we should read the output from X15 here
// again (instead of potentially spilling it to the stack), but rereading Size
// from X15 here doesn't work at -O0, since it thinks that X15 is undefined
// here.
Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
DAG.getConstant(4, dl, MVT::i64));
return Chain;
}
SDValue
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported");
SDLoc dl(Op);
// Get the inputs.
SDNode *Node = Op.getNode();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
MaybeAlign Align =
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
EVT VT = Node->getValueType(0);
if (DAG.getMachineFunction().getFunction().hasFnAttribute(
"no-stack-arg-probe")) {
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
Chain = SP.getValue(1);
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
SDValue Ops[2] = {SP, Chain};
return DAG.getMergeValues(Ops, dl);
}
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
Chain = SP.getValue(1);
SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
if (Align)
SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
SDValue Ops[2] = {SP, Chain};
return DAG.getMergeValues(Ops, dl);
}
SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT != MVT::i64 && "Expected illegal VSCALE node");
SDLoc DL(Op);
APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
DL, VT);
}
/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
template <unsigned NumVecs>
static bool
setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
Info.opc = ISD::INTRINSIC_VOID;
// Retrieve EC from first vector argument.
const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
ElementCount EC = VT.getVectorElementCount();
#ifndef NDEBUG
// Check the assumption that all input vectors are the same type.
for (unsigned I = 0; I < NumVecs; ++I)
assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
"Invalid type.");
#endif
// memVT is `NumVecs * VT`.
Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
EC * NumVecs);
Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
Info.flags = MachineMemOperand::MOStore;
return true;
}
/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
/// specified in the intrinsic calls.
bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
case Intrinsic::aarch64_sve_st2:
return setInfoSVEStN<2>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st3:
return setInfoSVEStN<3>(*this, DL, Info, I);
case Intrinsic::aarch64_sve_st4:
return setInfoSVEStN<4>(*this, DL, Info, I);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
}
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4:
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane: {
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
for (const Value *Arg : I.args()) {
Type *ArgTy = Arg->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
}
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
}
case Intrinsic::aarch64_ldaxp:
case Intrinsic::aarch64_ldxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(16);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_stlxp:
case Intrinsic::aarch64_stxp:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i128;
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = Align(16);
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
return true;
case Intrinsic::aarch64_sve_ldnt1: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getType());
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_sve_stnt1: {
PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(I.getOperand(0)->getType());
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_mops_memset_tag: {
Value *Dst = I.getArgOperand(0);
Value *Val = I.getArgOperand(1);
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(Val->getType());
Info.ptrVal = Dst;
Info.offset = 0;
Info.align = I.getParamAlign(0).valueOrOne();
Info.flags = MachineMemOperand::MOStore;
// The size of the memory being operated on is unknown at this point
Info.size = MemoryLocation::UnknownSize;
return true;
}
default:
break;
}
return false;
}
bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
ISD::LoadExtType ExtTy,
EVT NewVT) const {
// TODO: This may be worth removing. Check regression tests for diffs.
if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
return false;
// If we're reducing the load width in order to avoid having to use an extra
// instruction to do extension then it's probably a good idea.
if (ExtTy != ISD::NON_EXTLOAD)
return true;
// Don't reduce load width if it would prevent us from combining a shift into
// the offset.
MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
assert(Mem);
const SDValue &Base = Mem->getBasePtr();
if (Base.getOpcode() == ISD::ADD &&
Base.getOperand(1).getOpcode() == ISD::SHL &&
Base.getOperand(1).hasOneUse() &&
Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
// It's unknown whether a scalable vector has a power-of-2 bitwidth.
if (Mem->getMemoryVT().isScalableVector())
return false;
// The shift can be combined if it matches the size of the value being
// loaded (and so reducing the width would make it not match).
uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
if (ShiftAmount == Log2_32(LoadBytes))
return false;
}
// We have no reason to disallow reducing the load width, so allow it.
return true;
}
// Truncations from 64-bit GPR to 32-bit GPR is free.
bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
return NumBits1 > NumBits2;
}
bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
uint64_t NumBits1 = VT1.getFixedSizeInBits();
uint64_t NumBits2 = VT2.getFixedSizeInBits();
return NumBits1 > NumBits2;
}
/// Check if it is profitable to hoist instruction in then/else to if.
/// Not profitable if I and it's user can form a FMA instruction
/// because we prefer FMSUB/FMADD.
bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
if (I->getOpcode() != Instruction::FMul)
return true;
if (!I->hasOneUse())
return true;
Instruction *User = I->user_back();
if (User &&
!(User->getOpcode() == Instruction::FSub ||
User->getOpcode() == Instruction::FAdd))
return true;
const TargetOptions &Options = getTargetMachine().Options;
const Function *F = I->getFunction();
const DataLayout &DL = F->getParent()->getDataLayout();
Type *Ty = User->getOperand(0)->getType();
return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
(Options.AllowFPOpFusion == FPOpFusion::Fast ||
Options.UnsafeFPMath));
}
// All 32-bit GPR operations implicitly zero the high-half of the corresponding
// 64-bit GPR.
bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
return false;
unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
return false;
unsigned NumBits1 = VT1.getSizeInBits();
unsigned NumBits2 = VT2.getSizeInBits();
return NumBits1 == 32 && NumBits2 == 64;
}
bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
EVT VT1 = Val.getValueType();
if (isZExtFree(VT1, VT2)) {
return true;
}
if (Val.getOpcode() != ISD::LOAD)
return false;
// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
VT1.getSizeInBits() <= 32);
}
bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
if (isa<FPExtInst>(Ext))
return false;
// Vector types are not free.
if (Ext->getType()->isVectorTy())
return false;
for (const Use &U : Ext->uses()) {
// The extension is free if we can fold it with a left shift in an
// addressing mode or an arithmetic operation: add, sub, and cmp.
// Is there a shift?
const Instruction *Instr = cast<Instruction>(U.getUser());
// Is this a constant shift?
switch (Instr->getOpcode()) {
case Instruction::Shl:
if (!isa<ConstantInt>(Instr->getOperand(1)))
return false;
break;
case Instruction::GetElementPtr: {
gep_type_iterator GTI = gep_type_begin(Instr);
auto &DL = Ext->getModule()->getDataLayout();
std::advance(GTI, U.getOperandNo()-1);
Type *IdxTy = GTI.getIndexedType();
// This extension will end up with a shift because of the scaling factor.
// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
uint64_t ShiftAmt =
countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
if (ShiftAmt == 0 || ShiftAmt > 4)
return false;
break;
}
case Instruction::Trunc:
// Check if this is a noop.
// trunc(sext ty1 to ty2) to ty1.
if (Instr->getType() == Ext->getOperand(0)->getType())
continue;
LLVM_FALLTHROUGH;
default:
return false;
}
// At this point we can use the bfm family, so this extension is free
// for that use.
}
return true;
}
/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
/// or upper half of the vector elements.
static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
auto *FullTy = FullV->getType();
auto *HalfTy = HalfV->getType();
return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
};
auto extractHalf = [](Value *FullV, Value *HalfV) {
auto *FullVT = cast<FixedVectorType>(FullV->getType());
auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
};
ArrayRef<int> M1, M2;
Value *S1Op1, *S2Op1;
if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
!match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
return false;
// Check that the operands are half as wide as the result and we extract
// half of the elements of the input vectors.
if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
!extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
return false;
// Check the mask extracts either the lower or upper half of vector
// elements.
int M1Start = -1;
int M2Start = -1;
int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
return false;
return true;
}
/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
/// of the vector elements.
static bool areExtractExts(Value *Ext1, Value *Ext2) {
auto areExtDoubled = [](Instruction *Ext) {
return Ext->getType()->getScalarSizeInBits() ==
2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
};
if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
!match(Ext2, m_ZExtOrSExt(m_Value())) ||
!areExtDoubled(cast<Instruction>(Ext1)) ||
!areExtDoubled(cast<Instruction>(Ext2)))
return false;
return true;
}
/// Check if Op could be used with vmull_high_p64 intrinsic.
static bool isOperandOfVmullHighP64(Value *Op) {
Value *VectorOperand = nullptr;
ConstantInt *ElementIndex = nullptr;
return match(Op, m_ExtractElt(m_Value(VectorOperand),
m_ConstantInt(ElementIndex))) &&
ElementIndex->getValue() == 1 &&
isa<FixedVectorType>(VectorOperand->getType()) &&
cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
}
/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
}
static bool isSplatShuffle(Value *V) {
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
return is_splat(Shuf->getShuffleMask());
return false;
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
bool AArch64TargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
if (!I->getType()->isVectorTy())
return false;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) {
Ops.push_back(&II->getOperandUse(0));
Ops.push_back(&II->getOperandUse(1));
return true;
}
LLVM_FALLTHROUGH;
case Intrinsic::aarch64_neon_sqdmull:
case Intrinsic::aarch64_neon_sqdmulh:
case Intrinsic::aarch64_neon_sqrdmulh:
// Sink splats for index lane variants
if (isSplatShuffle(II->getOperand(0)))
Ops.push_back(&II->getOperandUse(0));
if (isSplatShuffle(II->getOperand(1)))
Ops.push_back(&II->getOperandUse(1));
return !Ops.empty();
case Intrinsic::aarch64_neon_pmull64:
if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
II->getArgOperand(1)))
return false;
Ops.push_back(&II->getArgOperandUse(0));
Ops.push_back(&II->getArgOperandUse(1));
return true;
default:
return false;
}
}
switch (I->getOpcode()) {
case Instruction::Sub:
case Instruction::Add: {
if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
return false;
// If the exts' operands extract either the lower or upper elements, we
// can sink them too.
auto Ext1 = cast<Instruction>(I->getOperand(0));
auto Ext2 = cast<Instruction>(I->getOperand(1));
if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
Ops.push_back(&Ext1->getOperandUse(0));
Ops.push_back(&Ext2->getOperandUse(0));
}
Ops.push_back(&I->getOperandUse(0));
Ops.push_back(&I->getOperandUse(1));
return true;
}
case Instruction::Mul: {
bool IsProfitable = false;
for (auto &Op : I->operands()) {
// Make sure we are not already sinking this operand
if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
if (!Shuffle || !Shuffle->isZeroEltSplat())
continue;
Value *ShuffleOperand = Shuffle->getOperand(0);
InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
if (!Insert)
continue;
Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
if (!OperandInstr)
continue;
ConstantInt *ElementConstant =
dyn_cast<ConstantInt>(Insert->getOperand(2));
// Check that the insertelement is inserting into element 0
if (!ElementConstant || ElementConstant->getZExtValue() != 0)
continue;
unsigned Opcode = OperandInstr->getOpcode();
if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
continue;
Ops.push_back(&Shuffle->getOperandUse(0));
Ops.push_back(&Op);
IsProfitable = true;
}
return IsProfitable;
}
default:
return false;
}
return false;
}
bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
Align &RequiredAligment) const {
if (!LoadedType.isSimple() ||
(!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
return false;
// Cyclone supports unaligned accesses.
RequiredAligment = Align(1);
unsigned NumBits = LoadedType.getSizeInBits();
return NumBits == 32 || NumBits == 64;
}
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
unsigned AArch64TargetLowering::getNumInterleavedAccesses(
VectorType *VecTy, const DataLayout &DL, bool UseScalable) const {
unsigned VecSize = UseScalable ? Subtarget->getMinSVEVectorSizeInBits() : 128;
return std::max<unsigned>(1, (DL.getTypeSizeInBits(VecTy) + 127) / VecSize);
}
MachineMemOperand::Flags
AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
return MOStridedAccess;
return MachineMemOperand::MONone;
}
bool AArch64TargetLowering::isLegalInterleavedAccessType(
VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
UseScalable = false;
// Ensure the number of vector elements is greater than 1.
if (NumElements < 2)
return false;
// Ensure the element type is legal.
if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
return false;
if (Subtarget->useSVEForFixedLengthVectors() &&
(VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
(VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
isPowerOf2_32(NumElements) && VecSize > 128))) {
UseScalable = true;
return true;
}
// Ensure the total vector size is 64 or a multiple of 128. Types larger than
// 128 will be split into multiple interleaved accesses.
return VecSize == 64 || VecSize % 128 == 0;
}
static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) {
if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 2);
if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 4);
if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 8);
if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 8);
if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 2);
if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 4);
if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 8);
if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
return ScalableVectorType::get(VTy->getElementType(), 16);
llvm_unreachable("Cannot handle input vector type");
}
/// Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
///
/// Into:
/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
bool AArch64TargetLowering::lowerInterleavedLoad(
LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices, unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
assert(!Shuffles.empty() && "Empty shufflevector input");
assert(Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices");
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VTy = Shuffles[0]->getType();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
bool UseScalable;
if (!Subtarget->hasNEON() ||
!isLegalInterleavedAccessType(VTy, DL, UseScalable))
return false;
unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
auto *FVTy = cast<FixedVectorType>(VTy);
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = FVTy->getElementType();
if (EltTy->isPointerTy())
FVTy =
FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
FVTy = FixedVectorType::get(FVTy->getElementType(),
FVTy->getNumElements() / NumLoads);
auto *LDVTy =
UseScalable ? cast<VectorType>(getSVEContainerIRType(FVTy)) : FVTy;
IRBuilder<> Builder(LI);
// The base address of the load.
Value *BaseAddr = LI->getPointerOperand();
if (NumLoads > 1) {
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
Type *PtrTy =
UseScalable
? LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())
: LDVTy->getPointerTo(LI->getPointerAddressSpace());
Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
LDVTy->getElementCount());
static const Intrinsic::ID SVELoadIntrs[3] = {
Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
Intrinsic::aarch64_sve_ld4_sret};
static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
Intrinsic::aarch64_neon_ld3,
Intrinsic::aarch64_neon_ld4};
Function *LdNFunc;
if (UseScalable)
LdNFunc = Intrinsic::getDeclaration(LI->getModule(),
SVELoadIntrs[Factor - 2], {LDVTy});
else
LdNFunc = Intrinsic::getDeclaration(
LI->getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
// replace.
DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
Value *PTrue = nullptr;
if (UseScalable) {
Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(FVTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&
Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
PgPattern = AArch64SVEPredPattern::all;
auto *PTruePat =
ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});
}
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
FVTy->getNumElements() * Factor);
CallInst *LdN;
if (UseScalable)
LdN = Builder.CreateCall(
LdNFunc, {PTrue, Builder.CreateBitCast(BaseAddr, PtrTy)}, "ldN");
else
LdN = Builder.CreateCall(LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy),
"ldN");
// Extract and store the sub-vectors returned by the load intrinsic.
for (unsigned i = 0; i < Shuffles.size(); i++) {
ShuffleVectorInst *SVI = Shuffles[i];
unsigned Index = Indices[i];
Value *SubVec = Builder.CreateExtractValue(LdN, Index);
if (UseScalable)
SubVec = Builder.CreateExtractVector(
FVTy, SubVec,
ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
FVTy->getNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
// Replace uses of the shufflevector instructions with the sub-vectors
// returned by the load intrinsic. If a shufflevector instruction is
// associated with more than one sub-vector, those sub-vectors will be
// concatenated into a single wide vector.
for (ShuffleVectorInst *SVI : Shuffles) {
auto &SubVec = SubVecs[SVI];
auto *WideVec =
SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
SVI->replaceAllUsesWith(WideVec);
}
return true;
}
/// Lower an interleaved store into a stN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// st3 instruction in CodeGen.
///
/// Example for a more general valid mask (Factor 3). Lower:
/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
unsigned LaneLen = VecTy->getNumElements() / Factor;
Type *EltTy = VecTy->getElementType();
auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
bool UseScalable;
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
if (!Subtarget->hasNEON() ||
!isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
return false;
unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
IRBuilder<> Builder(SI);
// StN intrinsics don't support pointer vectors as arguments. Convert pointer
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
Type *IntTy = DL.getIntPtrType(EltTy);
unsigned NumOpElts =
cast<FixedVectorType>(Op0->getType())->getNumElements();
// Convert to the corresponding integer vector.
auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
SubVecTy = FixedVectorType::get(IntTy, LaneLen);
}
// If we're going to generate more than one store, reset the lane length
// and sub-vector type to something legal.
LaneLen /= NumStores;
SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
auto *STVTy = UseScalable ? cast<VectorType>(getSVEContainerIRType(SubVecTy))
: SubVecTy;
// The base address of the store.
Value *BaseAddr = SI->getPointerOperand();
if (NumStores > 1) {
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
}
auto Mask = SVI->getShuffleMask();
Type *PtrTy =
UseScalable
? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
: STVTy->getPointerTo(SI->getPointerAddressSpace());
Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
STVTy->getElementCount());
static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
Intrinsic::aarch64_sve_st3,
Intrinsic::aarch64_sve_st4};
static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
Intrinsic::aarch64_neon_st3,
Intrinsic::aarch64_neon_st4};
Function *StNFunc;
if (UseScalable)
StNFunc = Intrinsic::getDeclaration(SI->getModule(),
SVEStoreIntrs[Factor - 2], {STVTy});
else
StNFunc = Intrinsic::getDeclaration(
SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});
Value *PTrue = nullptr;
if (UseScalable) {
Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
if (Subtarget->getMinSVEVectorSizeInBits() ==
Subtarget->getMaxSVEVectorSizeInBits() &&
Subtarget->getMinSVEVectorSizeInBits() ==
DL.getTypeSizeInBits(SubVecTy))
PgPattern = AArch64SVEPredPattern::all;
auto *PTruePat =
ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
{PTruePat});
}
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
SmallVector<Value *, 5> Ops;
// Split the shufflevector operands into sub vectors for the new stN call.
for (unsigned i = 0; i < Factor; i++) {
Value *Shuffle;
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
Shuffle = Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
unsigned IdxJ = StoreCount * LaneLen * Factor + j;
if (Mask[IdxJ * Factor + IdxI] >= 0) {
StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
break;
}
}
// Note: Filling undef gaps with random elements is ok, since
// those elements were being written anyway (with undefs).
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
Shuffle = Builder.CreateShuffleVector(
Op0, Op1, createSequentialMask(StartMask, LaneLen, 0));
}
if (UseScalable)
Shuffle = Builder.CreateInsertVector(
STVTy, UndefValue::get(STVTy), Shuffle,
ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
Ops.push_back(Shuffle);
}
if (UseScalable)
Ops.push_back(PTrue);
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
BaseAddr, LaneLen * Factor);
Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
Builder.CreateCall(StNFunc, Ops);
}
return true;
}
// Lower an SVE structured load intrinsic returning a tuple type to target
// specific intrinsic taking the same input but returning a multi-result value
// of the split tuple type.
//
// E.g. Lowering an LD3:
//
// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
// <vscale x 4 x i1> %pred,
// <vscale x 4 x i32>* %addr)
//
// Output DAG:
//
// t0: ch = EntryToken
// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
// t4: i64,ch = CopyFromReg t0, Register:i64 %1
// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
//
// This is called pre-legalization to avoid widening/splitting issues with
// non-power-of-2 tuple types used for LD3, such as nxv12i32.
SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
ArrayRef<SDValue> LoadOps,
EVT VT, SelectionDAG &DAG,
const SDLoc &DL) const {
assert(VT.isScalableVector() && "Can only lower scalable vectors");
unsigned N, Opcode;
static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
{Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
{Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
"invalid tuple vector type!");
EVT SplitVT =
EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
VT.getVectorElementCount().divideCoefficientBy(N));
assert(isTypeLegal(SplitVT));
SmallVector<EVT, 5> VTs(N, SplitVT);
VTs.push_back(MVT::Other); // Chain
SDVTList NodeTys = DAG.getVTList(VTs);
SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
SmallVector<SDValue, 4> PseudoLoadOps;
for (unsigned I = 0; I < N; ++I)
PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
}
EVT AArch64TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v16i8, Align(16)))
return MVT::v16i8;
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return MVT::f128;
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return MVT::i64;
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return MVT::i32;
return MVT::Other;
}
LLT AArch64TargetLowering::getOptimalMemOpLLT(
const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
// taken one instruction to materialize the v2i64 zero and one store (with
// restrictive addressing mode). Just do i64 stores.
bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
if (Op.isAligned(AlignCheck))
return true;
bool Fast;
return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
MachineMemOperand::MONone, &Fast) &&
Fast;
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
AlignmentIsAcceptable(MVT::v2i64, Align(16)))
return LLT::fixed_vector(2, 64);
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return LLT::scalar(128);
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
return LLT::scalar(64);
if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
return LLT::scalar(32);
return LLT();
}
// 12-bit optionally shifted immediates are legal for adds.
bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
if (Immed == std::numeric_limits<int64_t>::min()) {
LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
<< ": avoid UB for INT64_MIN\n");
return false;
}
// Same encoding for add/sub, just flip the sign.
Immed = std::abs(Immed);
bool IsLegal = ((Immed >> 12) == 0 ||
((Immed & 0xfff) == 0 && Immed >> 24 == 0));
LLVM_DEBUG(dbgs() << "Is " << Immed
<< " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
return IsLegal;
}
// Return false to prevent folding
// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
// if the folding leads to worse code.
bool AArch64TargetLowering::isMulAddWithConstProfitable(
const SDValue &AddNode, const SDValue &ConstNode) const {
// Let the DAGCombiner decide for vector types and large types.
const EVT VT = AddNode.getValueType();
if (VT.isVector() || VT.getScalarSizeInBits() > 64)
return true;
// It is worse if c1 is legal add immediate, while c1*c2 is not
// and has to be composed by at least two instructions.
const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
const int64_t C1 = C1Node->getSExtValue();
const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
if (!isLegalAddImmediate(C1) || isLegalAddImmediate(C1C2.getSExtValue()))
return true;
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
if (Insn.size() > 1)
return false;
// Default to true and let the DAGCombiner decide.
return true;
}
// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
// immediates is the same as for an add or a sub.
bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
return isLegalAddImmediate(Immed);
}
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS, Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
// reg + SIZE_IN_BYTES * 12-bit unsigned offset
// reg1 + reg2
// reg + SIZE_IN_BYTES * reg
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// No reg+reg+imm addressing.
if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
return false;
// FIXME: Update this method to support scalable addressing modes.
if (isa<ScalableVectorType>(Ty)) {
uint64_t VecElemNumBytes =
DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
return AM.HasBaseReg && !AM.BaseOffs &&
(AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
}
// check reg + imm case:
// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
uint64_t NumBytes = 0;
if (Ty->isSized()) {
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
NumBytes = NumBits / 8;
if (!isPowerOf2_64(NumBits))
NumBytes = 0;
}
if (!AM.Scale) {
int64_t Offset = AM.BaseOffs;
// 9-bit signed offset
if (isInt<9>(Offset))
return true;
// 12-bit unsigned offset
unsigned shift = Log2_64(NumBytes);
if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
// Must be a multiple of NumBytes (NumBytes is a power of 2)
(Offset >> shift) << shift == Offset)
return true;
return false;
}
// Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
}
bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
// Consider splitting large offset of struct or array.
return true;
}
InstructionCost AArch64TargetLowering::getScalingFactorCost(
const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
// Scaling factors are not free at all.
// Operands | Rt Latency
// -------------------------------------------
// Rt, [Xn, Xm] | 4
// -------------------------------------------
// Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
// Rt, [Xn, Wm, <extend> #imm] |
if (isLegalAddressingMode(DL, AM, Ty, AS))
// Scale represents reg2 * scale, thus account for 1 if
// it is not equal to 0 or 1.
return AM.Scale != 0 && AM.Scale != 1;
return -1;
}
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
const MachineFunction &MF, EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f16:
return Subtarget->hasFullFP16();
case MVT::f32:
case MVT::f64:
return true;
default:
break;
}
return false;
}
bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
Type *Ty) const {
switch (Ty->getScalarType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
return true;
default:
return false;
}
}
bool AArch64TargetLowering::generateFMAsInMachineCombiner(
EVT VT, CodeGenOpt::Level OptLevel) const {
return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() &&
!useSVEForFixedLengthVectorVT(VT);
}
const MCPhysReg *
AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
// site. Hence we include LR in the scratch registers, which are in turn added
// as implicit-defs for stackmaps and patchpoints.
static const MCPhysReg ScratchRegs[] = {
AArch64::X16, AArch64::X17, AArch64::LR, 0
};
return ScratchRegs;
}
bool
AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
N = N->getOperand(0).getNode();
EVT VT = N->getValueType(0);
// If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
// it with shift to let it be lowered to UBFX.
if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
isa<ConstantSDNode>(N->getOperand(1))) {
uint64_t TruncMask = N->getConstantOperandVal(1);
if (isMask_64(TruncMask) &&
N->getOperand(0).getOpcode() == ISD::SRL &&
isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
return false;
}
return true;
}
bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
if (BitSize == 0)
return false;
int64_t Val = Imm.getSExtValue();
if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
return true;
if ((int64_t)Val < 0)
Val = ~Val;
if (BitSize == 32)
Val &= (1LL << 32) - 1;
unsigned LZ = countLeadingZeros((uint64_t)Val);
unsigned Shift = (63 - LZ) / 16;
// MOVZ is free so return true for one or fewer MOVK.
return Shift < 3;
}
bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
return false;
return (Index == 0 || Index == ResVT.getVectorMinNumElements());
}
/// Turn vector tests of the signbit in the form of:
/// xor (sra X, elt_size(X)-1), -1
/// into:
/// cmge X, X, #0
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
EVT VT = N->getValueType(0);
if (!Subtarget->hasNEON() || !VT.isVector())
return SDValue();
// There must be a shift right algebraic before the xor, and the xor must be a
// 'not' operation.
SDValue Shift = N->getOperand(0);
SDValue Ones = N->getOperand(1);
if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
!ISD::isBuildVectorAllOnes(Ones.getNode()))
return SDValue();
// The shift should be smearing the sign bit across each vector element.
auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
return SDValue();
return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
}
// Given a vecreduce_add node, detect the below pattern and convert it to the
// node sequence with UABDL, [S|U]ADB and UADDLP.
//
// i32 vecreduce_add(
// v16i32 abs(
// v16i32 sub(
// v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
// =================>
// i32 vecreduce_add(
// v4i32 UADDLP(
// v8i16 add(
// v8i16 zext(
// v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
// v8i16 zext(
// v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
SelectionDAG &DAG) {
// Assumed i32 vecreduce_add
if (N->getValueType(0) != MVT::i32)
return SDValue();
SDValue VecReduceOp0 = N->getOperand(0);
unsigned Opcode = VecReduceOp0.getOpcode();
// Assumed v16i32 abs
if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
return SDValue();
SDValue ABS = VecReduceOp0;
// Assumed v16i32 sub
if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
return SDValue();
SDValue SUB = ABS->getOperand(0);
unsigned Opcode0 = SUB->getOperand(0).getOpcode();
unsigned Opcode1 = SUB->getOperand(1).getOpcode();
// Assumed v16i32 type
if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
return SDValue();
// Assumed zext or sext
bool IsZExt = false;
if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
IsZExt = true;
} else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
IsZExt = false;
} else
return SDValue();
SDValue EXT0 = SUB->getOperand(0);
SDValue EXT1 = SUB->getOperand(1);
// Assumed zext's operand has v16i8 type
if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
return SDValue();
// Pattern is dectected. Let's convert it to sequence of nodes.
SDLoc DL(N);
// First, create the node pattern of UABD/SABD.
SDValue UABDHigh8Op0 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
DAG.getConstant(8, DL, MVT::i64));
SDValue UABDHigh8Op1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
DAG.getConstant(8, DL, MVT::i64));
SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
UABDHigh8Op0, UABDHigh8Op1);
SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
// Second, create the node pattern of UABAL.
SDValue UABDLo8Op0 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
DAG.getConstant(0, DL, MVT::i64));
SDValue UABDLo8Op1 =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
DAG.getConstant(0, DL, MVT::i64));
SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
UABDLo8Op0, UABDLo8Op1);
SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
// Third, create the node of UADDLP.
SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);
// Fourth, create the node of VECREDUCE_ADD.
return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
}
// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
if (!ST->hasDotProd())
return performVecReduceAddCombineWithUADDLP(N, DAG);
SDValue Op0 = N->getOperand(0);
if (N->getValueType(0) != MVT::i32 ||
Op0.getValueType().getVectorElementType() != MVT::i32)
return SDValue();
unsigned ExtOpcode = Op0.getOpcode();
SDValue A = Op0;
SDValue B;
if (ExtOpcode == ISD::MUL) {
A = Op0.getOperand(0);
B = Op0.getOperand(1);
if (A.getOpcode() != B.getOpcode() ||
A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
return SDValue();
ExtOpcode = A.getOpcode();
}
if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
return SDValue();
EVT Op0VT = A.getOperand(0).getValueType();
if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
return SDValue();
SDLoc DL(Op0);
// For non-mla reductions B can be set to 1. For MLA we take the operand of
// the extend B.
if (!B)
B = DAG.getConstant(1, DL, Op0VT);
else
B = B.getOperand(0);
SDValue Zeros =
DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
auto DotOpcode =
(ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
A.getOperand(0), B);
return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
}
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
}
SDValue
AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
EVT VT = N->getValueType(0);
// For scalable and fixed types, mark them as cheap so we can handle it much
// later. This allows us to handle larger than legal types.
if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
return SDValue(N, 0);
// fold (sdiv X, pow2)
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
unsigned Lg2 = Divisor.countTrailingZeros();
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(CSel.getNode());
// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;
Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
static bool IsSVECntIntrinsic(SDValue S) {
switch(getIntrinsicID(S.getNode())) {
default:
break;
case Intrinsic::aarch64_sve_cntb:
case Intrinsic::aarch64_sve_cnth:
case Intrinsic::aarch64_sve_cntw:
case Intrinsic::aarch64_sve_cntd:
return true;
}
return false;
}
/// Calculates what the pre-extend type is, based on the extension
/// operation node provided by \p Extend.
///
/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
/// pre-extend type is pulled directly from the operand, while other extend
/// operations need a bit more inspection to get this information.
///
/// \param Extend The SDNode from the DAG that represents the extend operation
/// \param DAG The SelectionDAG hosting the \p Extend node
///
/// \returns The type representing the \p Extend source type, or \p MVT::Other
/// if no valid type can be determined
static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
switch (Extend.getOpcode()) {
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return Extend.getOperand(0).getValueType();
case ISD::AssertSext:
case ISD::AssertZext:
case ISD::SIGN_EXTEND_INREG: {
VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
if (!TypeNode)
return MVT::Other;
return TypeNode->getVT();
}
case ISD::AND: {
ConstantSDNode *Constant =
dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
if (!Constant)
return MVT::Other;
uint32_t Mask = Constant->getZExtValue();
if (Mask == UCHAR_MAX)
return MVT::i8;
else if (Mask == USHRT_MAX)
return MVT::i16;
else if (Mask == UINT_MAX)
return MVT::i32;
return MVT::Other;
}
default:
return MVT::Other;
}
llvm_unreachable("Code path unhandled in calculatePreExtendType!");
}
/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
SelectionDAG &DAG) {
ShuffleVectorSDNode *ShuffleNode =
dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
if (!ShuffleNode)
return SDValue();
// Ensuring the mask is zero before continuing
if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
return SDValue();
SDValue InsertVectorElt = VectorShuffle.getOperand(0);
if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
SDValue InsertLane = InsertVectorElt.getOperand(2);
ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
// Ensures the insert is inserting into lane 0
if (!Constant || Constant->getZExtValue() != 0)
return SDValue();
SDValue Extend = InsertVectorElt.getOperand(1);
unsigned ExtendOpcode = Extend.getOpcode();
bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
ExtendOpcode == ISD::AssertSext;
if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
return SDValue();
EVT TargetType = VectorShuffle.getValueType();
EVT PreExtendType = calculatePreExtendType(Extend, DAG);
if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
TargetType != MVT::v2i64) ||
(PreExtendType == MVT::Other))
return SDValue();
// Restrict valid pre-extend data type
if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
PreExtendType != MVT::i32)
return SDValue();
EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
return SDValue();
if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
return SDValue();
SDLoc DL(VectorShuffle);
SDValue InsertVectorNode = DAG.getNode(
InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
DAG.getConstant(0, DL, MVT::i64));
std::vector<int> ShuffleMask(TargetType.getVectorNumElements());
SDValue VectorShuffleNode =
DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
DAG.getUNDEF(PreExtendVT), ShuffleMask);
SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
DL, TargetType, VectorShuffleNode);
return ExtendNode;
}
/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
// If the value type isn't a vector, none of the operands are going to be dups
if (!Mul->getValueType(0).isVector())
return SDValue();
SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
// Neither operands have been changed, don't make any further changes
if (!Op0 && !Op1)
return SDValue();
SDLoc DL(Mul);
return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
Op0 ? Op0 : Mul->getOperand(0),
Op1 ? Op1 : Mul->getOperand(1));
}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
return Ext;
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
// and in MachineCombiner pass, add+mul will be combined into madd.
// Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue MulOper;
unsigned AddSubOpc;
auto IsAddSubWith1 = [&](SDValue V) -> bool {
AddSubOpc = V->getOpcode();
if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
SDValue Opnd = V->getOperand(1);
MulOper = V->getOperand(0);
if (AddSubOpc == ISD::SUB)
std::swap(Opnd, MulOper);
if (auto C = dyn_cast<ConstantSDNode>(Opnd))
return C->isOne();
}
return false;
};
if (IsAddSubWith1(N0)) {
SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
}
if (IsAddSubWith1(N1)) {
SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
}
// The below optimizations require a constant RHS.
if (!isa<ConstantSDNode>(N1))
return SDValue();
ConstantSDNode *C = cast<ConstantSDNode>(N1);
const APInt &ConstValue = C->getAPIntValue();
// Allow the scaling to be folded into the `cnt` instruction by preventing
// the scaling to be obscured here. This makes it easier to pattern match.
if (IsSVECntIntrinsic(N0) ||
(N0->getOpcode() == ISD::TRUNCATE &&
(IsSVECntIntrinsic(N0->getOperand(0)))))
if (ConstValue.sge(1) && ConstValue.sle(16))
return SDValue();
// Multiplication of a power of two plus/minus one can be done more
// cheaply as as shift+add/sub. For now, this is true unilaterally. If
// future CPUs have a cheaper MADD instruction, this may need to be
// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
// 64-bit is 5 cycles, so this is always a win.
// More aggressively, some multiplications N0 * C can be lowered to
// shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
// e.g. 6=3*2=(2+1)*2.
// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
// which equals to (1+2)*16-(1+2).
// TrailingZeroes is used to test if the mul can be lowered to
// shift+add+shift.
unsigned TrailingZeroes = ConstValue.countTrailingZeros();
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
isZeroExtended(N0.getNode(), DAG)))
return SDValue();
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into madd or msub.
if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
N->use_begin()->getOpcode() == ISD::SUB))
return SDValue();
}
// Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
// and shift+add+shift.
APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
unsigned ShiftAmt;
// Is the shifted value the LHS operand of the add/sub?
bool ShiftValUseIsN0 = true;
// Do we need to negate the result?
bool NegateResult = false;
if (ConstValue.isNonNegative()) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
// (mul x, 2^N - 1) => (sub (shl x, N), x)
// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
APInt SCVMinus1 = ShiftedConstValue - 1;
APInt CVPlus1 = ConstValue + 1;
if (SCVMinus1.isPowerOf2()) {
ShiftAmt = SCVMinus1.logBase2();
AddSubOpc = ISD::ADD;
} else if (CVPlus1.isPowerOf2()) {
ShiftAmt = CVPlus1.logBase2();
AddSubOpc = ISD::SUB;
} else
return SDValue();
} else {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
APInt CVNegPlus1 = -ConstValue + 1;
APInt CVNegMinus1 = -ConstValue - 1;
if (CVNegPlus1.isPowerOf2()) {
ShiftAmt = CVNegPlus1.logBase2();
AddSubOpc = ISD::SUB;
ShiftValUseIsN0 = false;
} else if (CVNegMinus1.isPowerOf2()) {
ShiftAmt = CVNegMinus1.logBase2();
AddSubOpc = ISD::ADD;
NegateResult = true;
} else
return SDValue();
}
SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(ShiftAmt, DL, MVT::i64));
SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
assert(!(NegateResult && TrailingZeroes) &&
"NegateResult and TrailingZeroes cannot both be true for now.");
// Negate the result.
if (NegateResult)
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
// Shift the result.
if (TrailingZeroes)
return DAG.getNode(ISD::SHL, DL, VT, Res,
DAG.getConstant(TrailingZeroes, DL, MVT::i64));
return Res;
}
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
SelectionDAG &DAG) {
// Take advantage of vector comparisons producing 0 or -1 in each lane to
// optimize away operation when it's from a constant.
//
// The general transformation is:
// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
// AND(VECTOR_CMP(x,y), constant2)
// constant2 = UNARYOP(constant)
// Early exit if this isn't a vector operation, the operand of the
// unary operation isn't a bitwise AND, or if the sizes of the operations
// aren't the same.
EVT VT = N->getValueType(0);
if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
return SDValue();
// Now check that the other operand of the AND is a constant. We could
// make the transformation for non-constant splats as well, but it's unclear
// that would be a benefit as it would not eliminate any operations, just
// perform one more step in scalar code before moving to the vector unit.
if (BuildVectorSDNode *BV =
dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
// Bail out if the vector isn't a constant.
if (!BV->isConstant())
return SDValue();
// Everything checks out. Build up the new and improved node.
SDLoc DL(N);
EVT IntVT = BV->getValueType(0);
// Create a new constant of the appropriate type for the transformed
// DAG.
SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
// The AND node needs bitcasts to/from an integer vector type around it.
SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
N->getOperand(0)->getOperand(0), MaskConst);
SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
return Res;
}
return SDValue();
}
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
// First try to optimize away the conversion when it's conditionally from
// a constant. Vectors only.
if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
return Res;
EVT VT = N->getValueType(0);
if (VT != MVT::f32 && VT != MVT::f64)
return SDValue();
// Only optimize when the source and destination types have the same width.
if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
return SDValue();
// If the result of an integer load is only used by an integer-to-float
// conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
// This eliminates an "integer-to-vector-move" UOP and improves throughput.
SDValue N0 = N->getOperand(0);
if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
// Do not change the width of a volatile load.
!cast<LoadSDNode>(N0)->isVolatile()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
LN0->getPointerInfo(), LN0->getAlignment(),
LN0->getMemOperand()->getFlags());
// Make sure successors of the original load stay after it by updating them
// to use the new Chain.
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
unsigned Opcode =
(N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
return DAG.getNode(Opcode, SDLoc(N), VT, Load);
}
return SDValue();
}
/// Fold a floating-point multiply by power of two into floating-point to
/// fixed-point conversion.
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
if (!N->getValueType(0).isSimple())
return SDValue();
SDValue Op = N->getOperand(0);
- if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
- Op.getOpcode() != ISD::FMUL)
+ if (!Op.getValueType().isSimple() || Op.getOpcode() != ISD::FMUL)
+ return SDValue();
+
+ if (!Op.getValueType().is64BitVector() && !Op.getValueType().is128BitVector())
return SDValue();
SDValue ConstVec = Op->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
if (FloatBits != 32 && FloatBits != 64 &&
(FloatBits != 16 || !Subtarget->hasFullFP16()))
return SDValue();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
uint32_t IntBits = IntTy.getSizeInBits();
if (IntBits != 16 && IntBits != 32 && IntBits != 64)
return SDValue();
// Avoid conversions where iN is larger than the float (e.g., float -> i64).
if (IntBits > FloatBits)
return SDValue();
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t Bits = IntBits == 64 ? 64 : 32;
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
if (C == -1 || C == 0 || C > Bits)
return SDValue();
EVT ResTy = Op.getValueType().changeVectorElementTypeToInteger();
if (!DAG.getTargetLoweringInfo().isTypeLegal(ResTy))
return SDValue();
if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
N->getOpcode() == ISD::FP_TO_UINT_SAT) {
EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (SatVT.getScalarSizeInBits() != IntBits)
return SDValue();
}
SDLoc DL(N);
bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
N->getOpcode() == ISD::FP_TO_SINT_SAT);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
// We can handle smaller integers by generating an extra trunc.
if (IntBits < FloatBits)
FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
return FixConv;
}
/// Fold a floating-point divide by power of two into fixed-point to
/// floating-point conversion.
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
if (!Subtarget->hasNEON())
return SDValue();
SDValue Op = N->getOperand(0);
unsigned Opc = Op->getOpcode();
if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
!Op.getOperand(0).getValueType().isSimple() ||
(Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
return SDValue();
SDValue ConstVec = N->getOperand(1);
if (!isa<BuildVectorSDNode>(ConstVec))
return SDValue();
MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
int32_t IntBits = IntTy.getSizeInBits();
if (IntBits != 16 && IntBits != 32 && IntBits != 64)
return SDValue();
MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
int32_t FloatBits = FloatTy.getSizeInBits();
if (FloatBits != 32 && FloatBits != 64)
return SDValue();
// Avoid conversions where iN is larger than the float (e.g., i64 -> float).
if (IntBits > FloatBits)
return SDValue();
BitVector UndefElements;
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
if (C == -1 || C == 0 || C > FloatBits)
return SDValue();
MVT ResTy;
unsigned NumLanes = Op.getValueType().getVectorNumElements();
switch (NumLanes) {
default:
return SDValue();
case 2:
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
break;
case 4:
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
break;
}
if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
return SDValue();
SDLoc DL(N);
SDValue ConvInput = Op.getOperand(0);
bool IsSigned = Opc == ISD::SINT_TO_FP;
if (IntBits < FloatBits)
ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
ResTy, ConvInput);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
: Intrinsic::aarch64_neon_vcvtfxu2fp;
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
DAG.getConstant(C, DL, MVT::i32));
}
/// An EXTR instruction is made up of two shifts, ORed together. This helper
/// searches for and classifies those shifts.
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
bool &FromHi) {
if (N.getOpcode() == ISD::SHL)
FromHi = false;
else if (N.getOpcode() == ISD::SRL)
FromHi = true;
else
return false;
if (!isa<ConstantSDNode>(N.getOperand(1)))
return false;
ShiftAmount = N->getConstantOperandVal(1);
Src = N->getOperand(0);
return true;
}
/// EXTR instruction extracts a contiguous chunk of bits from two existing
/// registers viewed as a high/low pair. This function looks for the pattern:
/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
/// with an EXTR. Can't quite be done in TableGen because the two immediates
/// aren't independent.
static SDValue tryCombineToEXTR(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
EVT VT = N->getValueType(0);
assert(N->getOpcode() == ISD::OR && "Unexpected root");
if (VT != MVT::i32 && VT != MVT::i64)
return SDValue();
SDValue LHS;
uint32_t ShiftLHS = 0;
bool LHSFromHi = false;
if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
return SDValue();
SDValue RHS;
uint32_t ShiftRHS = 0;
bool RHSFromHi = false;
if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
return SDValue();
// If they're both trying to come from the high part of the register, they're
// not really an EXTR.
if (LHSFromHi == RHSFromHi)
return SDValue();
if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
return SDValue();
if (LHSFromHi) {
std::swap(LHS, RHS);
std::swap(ShiftLHS, ShiftRHS);
}
return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
DAG.getConstant(ShiftRHS, DL, MVT::i64));
}
static SDValue tryCombineToBSL(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
if (!VT.isVector())
return SDValue();
// The combining code currently only works for NEON vectors. In particular,
// it does not work for SVE when dealing with vectors wider than 128 bits.
if (!VT.is64BitVector() && !VT.is128BitVector())
return SDValue();
SDValue N0 = N->getOperand(0);
if (N0.getOpcode() != ISD::AND)
return SDValue();
SDValue N1 = N->getOperand(1);
if (N1.getOpcode() != ISD::AND)
return SDValue();
// InstCombine does (not (neg a)) => (add a -1).
// Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
// Loop over all combinations of AND operands.
for (int i = 1; i >= 0; --i) {
for (int j = 1; j >= 0; --j) {
SDValue O0 = N0->getOperand(i);
SDValue O1 = N1->getOperand(j);
SDValue Sub, Add, SubSibling, AddSibling;
// Find a SUB and an ADD operand, one from each AND.
if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
Sub = O0;
Add = O1;
SubSibling = N0->getOperand(1 - i);
AddSibling = N1->getOperand(1 - j);
} else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
Add = O0;
Sub = O1;
AddSibling = N0->getOperand(1 - i);
SubSibling = N1->getOperand(1 - j);
} else
continue;
if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
continue;
// Constant ones is always righthand operand of the Add.
if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
continue;
if (Sub.getOperand(1) != Add.getOperand(0))
continue;
return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
}
}
// (or (and a b) (and (not a) c)) => (bsl a b c)
// We only have to look for constant vectors here since the general, variable
// case can be handled in TableGen.
unsigned Bits = VT.getScalarSizeInBits();
uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
for (int i = 1; i >= 0; --i)
for (int j = 1; j >= 0; --j) {
BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
if (!BVN0 || !BVN1)
continue;
bool FoundMatch = true;
for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
if (!CN0 || !CN1 ||
CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
FoundMatch = false;
break;
}
}
if (FoundMatch)
return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
N0->getOperand(1 - i), N1->getOperand(1 - j));
}
return SDValue();
}
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
if (SDValue Res = tryCombineToEXTR(N, DCI))
return Res;
if (SDValue Res = tryCombineToBSL(N, DCI))
return Res;
return SDValue();
}
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
if (!MemVT.getVectorElementType().isSimple())
return false;
uint64_t MaskForTy = 0ull;
switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
case MVT::i8:
MaskForTy = 0xffull;
break;
case MVT::i16:
MaskForTy = 0xffffull;
break;
case MVT::i32:
MaskForTy = 0xffffffffull;
break;
default:
return false;
break;
}
if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
return false;
}
static SDValue performSVEAndCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
// Zero/any extend of an unsigned unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
SDValue UnpkOp = Src->getOperand(0);
SDValue Dup = N->getOperand(1);
if (Dup.getOpcode() != AArch64ISD::DUP)
return SDValue();
SDLoc DL(N);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
if (!C)
return SDValue();
uint64_t ExtVal = C->getZExtValue();
// If the mask is fully covered by the unpack, we don't need to push
// a new AND onto the operand
EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
(ExtVal == 0xFFFF && EltTy == MVT::i16) ||
(ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
return Src;
// Truncate to prevent a DUP with an over wide constant
APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
// Otherwise, make sure we propagate the AND to the operand
// of the unpack
Dup = DAG.getNode(AArch64ISD::DUP, DL,
UnpkOp->getValueType(0),
DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
SDValue And = DAG.getNode(ISD::AND, DL,
UnpkOp->getValueType(0), UnpkOp, Dup);
return DAG.getNode(Opc, DL, N->getValueType(0), And);
}
if (!EnableCombineMGatherIntrinsics)
return SDValue();
SDValue Mask = N->getOperand(1);
if (!Src.hasOneUse())
return SDValue();
EVT MemVT;
// SVE load instructions perform an implicit zero-extend, which makes them
// perfect candidates for combining.
switch (Opc) {
case AArch64ISD::LD1_MERGE_ZERO:
case AArch64ISD::LDNF1_MERGE_ZERO:
case AArch64ISD::LDFF1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
break;
case AArch64ISD::GLD1_MERGE_ZERO:
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
case AArch64ISD::GLDFF1_MERGE_ZERO:
case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
case AArch64ISD::GLDNT1_MERGE_ZERO:
MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
break;
default:
return SDValue();
}
if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
return Src;
return SDValue();
}
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
EVT VT = N->getValueType(0);
if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
if (VT.isScalableVector())
return performSVEAndCombine(N, DCI);
// The combining code below works only for NEON vectors. In particular, it
// does not work for SVE when dealing with vectors wider than 128 bits.
if (!(VT.is64BitVector() || VT.is128BitVector()))
return SDValue();
BuildVectorSDNode *BVN =
dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
if (!BVN)
return SDValue();
// AND does not accept an immediate, so check if we can use a BIC immediate
// instruction instead. We do this here instead of using a (and x, (mvni imm))
// pattern in isel, because some immediates may be lowered to the preferred
// (and x, (movi imm)) form, even though an mvni representation also exists.
APInt DefBits(VT.getSizeInBits(), 0);
APInt UndefBits(VT.getSizeInBits(), 0);
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
DefBits = ~DefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)))
return NewOp;
UndefBits = ~UndefBits;
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
UndefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
UndefBits, &LHS)))
return NewOp;
}
return SDValue();
}
// Attempt to form urhadd(OpA, OpB) from
// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
// The original form of the first expression is
// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
// Before this function is called the srl will have been lowered to
// AArch64ISD::VLSHR.
// This pass can also recognize signed variants of the patterns that use sign
// extension instead of zero extension and form a srhadd(OpA, OpB) or a
// shadd(OpA, OpB) from them.
static SDValue
performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Since we are looking for a right shift by a constant value of 1 and we are
// operating on types at least 16 bits in length (sign/zero extended OpA and
// OpB, which are at least 8 bits), it follows that the truncate will always
// discard the shifted-in bit and therefore the right shift will be logical
// regardless of the signedness of OpA and OpB.
SDValue Shift = N->getOperand(0);
if (Shift.getOpcode() != AArch64ISD::VLSHR)
return SDValue();
// Is the right shift using an immediate value of 1?
uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
if (ShiftAmount != 1)
return SDValue();
SDValue ExtendOpA, ExtendOpB;
SDValue ShiftOp0 = Shift.getOperand(0);
unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
if (ShiftOp0Opc == ISD::SUB) {
SDValue Xor = ShiftOp0.getOperand(1);
if (Xor.getOpcode() != ISD::XOR)
return SDValue();
// Is the XOR using a constant amount of all ones in the right hand side?
uint64_t C;
if (!isAllConstantBuildVector(Xor.getOperand(1), C))
return SDValue();
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
APInt CAsAPInt(ElemSizeInBits, C);
if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits))
return SDValue();
ExtendOpA = Xor.getOperand(0);
ExtendOpB = ShiftOp0.getOperand(0);
} else if (ShiftOp0Opc == ISD::ADD) {
ExtendOpA = ShiftOp0.getOperand(0);
ExtendOpB = ShiftOp0.getOperand(1);
} else
return SDValue();
unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
if (!(ExtendOpAOpc == ExtendOpBOpc &&
(ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
return SDValue();
// Is the result of the right shift being truncated to the same value type as
// the original operands, OpA and OpB?
SDValue OpA = ExtendOpA.getOperand(0);
SDValue OpB = ExtendOpB.getOperand(0);
EVT OpAVT = OpA.getValueType();
assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
return SDValue();
SDLoc DL(N);
bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
bool IsRHADD = ShiftOp0Opc == ISD::SUB;
unsigned HADDOpc = IsSignExtend
? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
: (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
return ResultHADD;
}
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
switch (Opcode) {
case ISD::FADD:
return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
case ISD::ADD:
return VT == MVT::i64;
default:
return false;
}
}
static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
// Rewrite for pairwise fadd pattern
// (f32 (extract_vector_elt
// (fadd (vXf32 Other)
// (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
// ->
// (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
// (extract_vector_elt (vXf32 Other) 1))
if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
SDLoc DL(N0);
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
SDValue Other = N00;
// And handle the commutative case.
if (!Shuffle) {
Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
Other = N01;
}
if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
Other == Shuffle->getOperand(0)) {
return DAG.getNode(N0->getOpcode(), DL, VT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
DAG.getConstant(0, DL, MVT::i64)),
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
DAG.getConstant(1, DL, MVT::i64)));
}
}
return SDValue();
}
static SDValue performConcatVectorsCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
if (VT.isScalableVector())
return SDValue();
// Optimize concat_vectors of truncated vectors, where the intermediate
// type is illegal, to avoid said illegality, e.g.,
// (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
// (v2i16 (truncate (v2i64)))))
// ->
// (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
// (v4i32 (bitcast (v2i64))),
// <0, 2, 4, 6>)))
// This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
// on both input and result type, so we might generate worse code.
// On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
N1Opc == ISD::TRUNCATE) {
SDValue N00 = N0->getOperand(0);
SDValue N10 = N1->getOperand(0);
EVT N00VT = N00.getValueType();
if (N00VT == N10.getValueType() &&
(N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
for (size_t i = 0; i < Mask.size(); ++i)
Mask[i] = i * 2;
return DAG.getNode(ISD::TRUNCATE, dl, VT,
DAG.getVectorShuffle(
MidVT, dl,
DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
}
}
// Wait 'til after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
// subvectors from the same original vectors. Combine these into a single
// [us]rhadd or [us]hadd that operates on the two original vectors. Example:
// (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
// extract_subvector (v16i8 OpB,
// <0>))),
// (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
// extract_subvector (v16i8 OpB,
// <8>)))))
// ->
// (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
(N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
SDValue N00 = N0->getOperand(0);
SDValue N01 = N0->getOperand(1);
SDValue N10 = N1->getOperand(0);
SDValue N11 = N1->getOperand(1);
EVT N00VT = N00.getValueType();
EVT N10VT = N10.getValueType();
if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
SDValue N00Source = N00->getOperand(0);
SDValue N01Source = N01->getOperand(0);
SDValue N10Source = N10->getOperand(0);
SDValue N11Source = N11->getOperand(0);
if (N00Source == N10Source && N01Source == N11Source &&
N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
assert(N0.getValueType() == N1.getValueType());
uint64_t N00Index = N00.getConstantOperandVal(1);
uint64_t N01Index = N01.getConstantOperandVal(1);
uint64_t N10Index = N10.getConstantOperandVal(1);
uint64_t N11Index = N11.getConstantOperandVal(1);
if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
N10Index == N00VT.getVectorNumElements())
return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
}
}
}
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getScalarSizeInBits() == 64);
return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, dl, MVT::i64));
}
// Canonicalise concat_vectors so that the right-hand vector has as few
// bit-casts as possible before its real operation. The primary matching
// destination for these operations will be the narrowing "2" instructions,
// which depend on the operation being performed on this right-hand vector.
// For example,
// (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
return SDValue();
SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
// If the RHS is not a vector, this is not the pattern we're looking for.
if (!RHSTy.isVector())
return SDValue();
LLVM_DEBUG(
dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
RHSTy.getVectorNumElements() * 2);
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
RHS));
}
static SDValue
performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Vec = N->getOperand(0);
SDValue SubVec = N->getOperand(1);
uint64_t IdxVal = N->getConstantOperandVal(2);
EVT VecVT = Vec.getValueType();
EVT SubVT = SubVec.getValueType();
// Only do this for legal fixed vector types.
if (!VecVT.isFixedLengthVector() ||
!DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
return SDValue();
// Ignore widening patterns.
if (IdxVal == 0 && Vec.isUndef())
return SDValue();
// Subvector must be half the width and an "aligned" insertion.
unsigned NumSubElts = SubVT.getVectorNumElements();
if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
(IdxVal != 0 && IdxVal != NumSubElts))
return SDValue();
// Fold insert_subvector -> concat_vectors
// insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
// insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
SDValue Lo, Hi;
if (IdxVal == 0) {
Lo = SubVec;
Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
DAG.getVectorIdxConstant(NumSubElts, DL));
} else {
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
DAG.getVectorIdxConstant(0, DL));
Hi = SubVec;
}
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
}
static SDValue tryCombineFixedPointConvert(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// Wait until after everything is legalized to try this. That way we have
// legal vector types and such.
if (DCI.isBeforeLegalizeOps())
return SDValue();
// Transform a scalar conversion of a value from a lane extract into a
// lane extract of a vector conversion. E.g., from foo1 to foo2:
// double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
// double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
//
// The second form interacts better with instruction selection and the
// register allocator to avoid cross-class register copies that aren't
// coalescable due to a lane reference.
// Check the operand and see if it originates from a lane extract.
SDValue Op1 = N->getOperand(1);
if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
// Yep, no additional predication needed. Perform the transform.
SDValue IID = N->getOperand(0);
SDValue Shift = N->getOperand(2);
SDValue Vec = Op1.getOperand(0);
SDValue Lane = Op1.getOperand(1);
EVT ResTy = N->getValueType(0);
EVT VecResTy;
SDLoc DL(N);
// The vector width should be 128 bits by the time we get here, even
// if it started as 64 bits (the extract_vector handling will have
// done so).
assert(Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!");
if (Vec.getValueType() == MVT::v4i32)
VecResTy = MVT::v4f32;
else if (Vec.getValueType() == MVT::v2i64)
VecResTy = MVT::v2f64;
else
llvm_unreachable("unexpected vector type!");
SDValue Convert =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
}
return SDValue();
}
// AArch64 high-vector "long" operations are formed by performing the non-high
// version on an extract_subvector of each operand which gets the high half:
//
// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
//
// However, there are cases which don't have an extract_high explicitly, but
// have another operation that can be made compatible with one for free. For
// example:
//
// (dupv64 scalar) --> (extract_high (dup128 scalar))
//
// This routine does the actual conversion of such DUPs, once outer routines
// have determined that everything else is in order.
// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
// similarly here.
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
switch (N.getOpcode()) {
case AArch64ISD::DUP:
case AArch64ISD::DUPLANE8:
case AArch64ISD::DUPLANE16:
case AArch64ISD::DUPLANE32:
case AArch64ISD::DUPLANE64:
case AArch64ISD::MOVI:
case AArch64ISD::MOVIshift:
case AArch64ISD::MOVIedit:
case AArch64ISD::MOVImsl:
case AArch64ISD::MVNIshift:
case AArch64ISD::MVNImsl:
break;
default:
// FMOV could be supported, but isn't very useful, as it would only occur
// if you passed a bitcast' floating point immediate to an eligible long
// integer op (addl, smull, ...).
return SDValue();
}
MVT NarrowTy = N.getSimpleValueType();
if (!NarrowTy.is64BitVector())
return SDValue();
MVT ElementTy = NarrowTy.getVectorElementType();
unsigned NumElems = NarrowTy.getVectorNumElements();
MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
SDLoc dl(N);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
DAG.getConstant(NumElems, dl, MVT::i64));
}
static bool isEssentiallyExtractHighSubvector(SDValue N) {
if (N.getOpcode() == ISD::BITCAST)
N = N.getOperand(0);
if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
if (N.getOperand(0).getValueType().isScalableVector())
return false;
return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
N.getOperand(0).getValueType().getVectorNumElements() / 2;
}
/// Helper structure to keep track of ISD::SET_CC operands.
struct GenericSetCCInfo {
const SDValue *Opnd0;
const SDValue *Opnd1;
ISD::CondCode CC;
};
/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
struct AArch64SetCCInfo {
const SDValue *Cmp;
AArch64CC::CondCode CC;
};
/// Helper structure to keep track of SetCC information.
union SetCCInfo {
GenericSetCCInfo Generic;
AArch64SetCCInfo AArch64;
};
/// Helper structure to be able to read SetCC information. If set to
/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
/// GenericSetCCInfo.
struct SetCCInfoAndKind {
SetCCInfo Info;
bool IsAArch64;
};
/// Check whether or not \p Op is a SET_CC operation, either a generic or
/// an
/// AArch64 lowered one.
/// \p SetCCInfo is filled accordingly.
/// \post SetCCInfo is meanginfull only when this function returns true.
/// \return True when Op is a kind of SET_CC operation.
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
// If this is a setcc, this is straight forward.
if (Op.getOpcode() == ISD::SETCC) {
SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SetCCInfo.IsAArch64 = false;
return true;
}
// Otherwise, check if this is a matching csel instruction.
// In other words:
// - csel 1, 0, cc
// - csel 0, 1, !cc
if (Op.getOpcode() != AArch64ISD::CSEL)
return false;
// Set the information about the operands.
// TODO: we want the operands of the Cmp not the csel
SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
SetCCInfo.IsAArch64 = true;
SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
// Check that the operands matches the constraints:
// (1) Both operands must be constants.
// (2) One must be 1 and the other must be 0.
ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
// Check (1).
if (!TValue || !FValue)
return false;
// Check (2).
if (!TValue->isOne()) {
// Update the comparison when we are interested in !cc.
std::swap(TValue, FValue);
SetCCInfo.Info.AArch64.CC =
AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
}
return TValue->isOne() && FValue->isZero();
}
// Returns true if Op is setcc or zext of setcc.
static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
if (isSetCC(Op, Info))
return true;
return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
isSetCC(Op->getOperand(0), Info));
}
// The folding we want to perform is:
// (add x, [zext] (setcc cc ...) )
// -->
// (csel x, (add x, 1), !cc ...)
//
// The latter will get matched to a CSINC instruction.
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
SDValue LHS = Op->getOperand(0);
SDValue RHS = Op->getOperand(1);
SetCCInfoAndKind InfoAndKind;
// If both operands are a SET_CC, then we don't want to perform this
// folding and create another csel as this results in more instructions
// (and higher register usage).
if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
isSetCCOrZExtSetCC(RHS, InfoAndKind))
return SDValue();
// If neither operand is a SET_CC, give up.
if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
std::swap(LHS, RHS);
if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
return SDValue();
}
// FIXME: This could be generatized to work for FP comparisons.
EVT CmpVT = InfoAndKind.IsAArch64
? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
: InfoAndKind.Info.Generic.Opnd0->getValueType();
if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
return SDValue();
SDValue CCVal;
SDValue Cmp;
SDLoc dl(Op);
if (InfoAndKind.IsAArch64) {
CCVal = DAG.getConstant(
AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
MVT::i32);
Cmp = *InfoAndKind.Info.AArch64.Cmp;
} else
Cmp = getAArch64Cmp(
*InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
dl);
EVT VT = Op->getValueType(0);
LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
}
// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
// Only scalar integer and vector types.
if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
return SDValue();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
return SDValue();
auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
return SDValue();
SDValue Op1 = LHS->getOperand(0);
SDValue Op2 = RHS->getOperand(0);
EVT OpVT1 = Op1.getValueType();
EVT OpVT2 = Op2.getValueType();
if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
Op2.getOpcode() != AArch64ISD::UADDV ||
OpVT1.getVectorElementType() != VT)
return SDValue();
SDValue Val1 = Op1.getOperand(0);
SDValue Val2 = Op2.getOperand(0);
EVT ValVT = Val1->getValueType(0);
SDLoc DL(N);
SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
DAG.getConstant(0, DL, MVT::i64));
}
// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
if (N->getOpcode() != ISD::ADD)
return SDValue();
SDValue Dot = N->getOperand(0);
SDValue A = N->getOperand(1);
// Handle commutivity
auto isZeroDot = [](SDValue Dot) {
return (Dot.getOpcode() == AArch64ISD::UDOT ||
Dot.getOpcode() == AArch64ISD::SDOT) &&
isZerosVector(Dot.getOperand(0).getNode());
};
if (!isZeroDot(Dot))
std::swap(Dot, A);
if (!isZeroDot(Dot))
return SDValue();
return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
Dot.getOperand(2));
}
// The basic add/sub long vector instructions have variants with "2" on the end
// which act on the high-half of their inputs. They are normally matched by
// patterns like:
//
// (add (zeroext (extract_high LHS)),
// (zeroext (extract_high RHS)))
// -> uaddl2 vD, vN, vM
//
// However, if one of the extracts is something like a duplicate, this
// instruction can still be used profitably. This function puts the DAG into a
// more appropriate form for those patterns to trigger.
static SDValue performAddSubLongCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
MVT VT = N->getSimpleValueType(0);
if (!VT.is128BitVector()) {
if (N->getOpcode() == ISD::ADD)
return performSetccAddFolding(N, DAG);
return SDValue();
}
// Make sure both branches are extended in the same way.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
LHS.getOpcode() != ISD::SIGN_EXTEND) ||
LHS.getOpcode() != RHS.getOpcode())
return SDValue();
unsigned ExtType = LHS.getOpcode();
// It's not worth doing if at least one of the inputs isn't already an
// extract, but we don't know which it'll be so we have to try both.
if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
if (!RHS.getNode())
return SDValue();
RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
if (!LHS.getNode())
return SDValue();
LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
}
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
}
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// Try to change sum of two reductions.
if (SDValue Val = performUADDVCombine(N, DAG))
return Val;
if (SDValue Val = performAddDotCombine(N, DAG))
return Val;
return performAddSubLongCombine(N, DCI, DAG);
}
// Massage DAGs which we can use the high-half "long" operations on into
// something isel will recognize better. E.g.
//
// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
// (aarch64_neon_umull (extract_high (v2i64 vec)))
// (extract_high (v2i64 (dup128 scalar)))))
//
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
assert(LHS.getValueType().is64BitVector() &&
RHS.getValueType().is64BitVector() &&
"unexpected shape for long operation");
// Either node could be a DUP, but it's not worth doing both of them (you'd
// just as well use the non-high version) so look for a corresponding extract
// operation on the other "wing".
if (isEssentiallyExtractHighSubvector(LHS)) {
RHS = tryExtendDUPToExtractHigh(RHS, DAG);
if (!RHS.getNode())
return SDValue();
} else if (isEssentiallyExtractHighSubvector(RHS)) {
LHS = tryExtendDUPToExtractHigh(LHS, DAG);
if (!LHS.getNode())
return SDValue();
}
if (IID == Intrinsic::not_intrinsic)
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
N->getOperand(0), LHS, RHS);
}
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
MVT ElemTy = N->getSimpleValueType(0).getScalarType();
unsigned ElemBits = ElemTy.getSizeInBits();
int64_t ShiftAmount;
if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
APInt SplatValue, SplatUndef;
unsigned SplatBitSize;
bool HasAnyUndefs;
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, ElemBits) ||
SplatBitSize != ElemBits)
return SDValue();
ShiftAmount = SplatValue.getSExtValue();
} else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
ShiftAmount = CVN->getSExtValue();
} else
return SDValue();
unsigned Opcode;
bool IsRightShift;
switch (IID) {
default:
llvm_unreachable("Unknown shift intrinsic");
case Intrinsic::aarch64_neon_sqshl:
Opcode = AArch64ISD::SQSHL_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_uqshl:
Opcode = AArch64ISD::UQSHL_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_srshl:
Opcode = AArch64ISD::SRSHR_I;
IsRightShift = true;
break;
case Intrinsic::aarch64_neon_urshl:
Opcode = AArch64ISD::URSHR_I;
IsRightShift = true;
break;
case Intrinsic::aarch64_neon_sqshlu:
Opcode = AArch64ISD::SQSHLU_I;
IsRightShift = false;
break;
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
// For positive shift amounts we can use SHL, as ushl/sshl perform a regular
// left shift for positive shift amounts. Below, we only replace the current
// node with VSHL, if this condition is met.
Opcode = AArch64ISD::VSHL;
IsRightShift = false;
break;
}
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
SDLoc dl(N);
return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
DAG.getConstant(-ShiftAmount, dl, MVT::i32));
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
SDLoc dl(N);
return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
DAG.getConstant(ShiftAmount, dl, MVT::i32));
}
return SDValue();
}
// The CRC32[BH] instructions ignore the high bits of their data operand. Since
// the intrinsics must be legal and take an i32, this means there's almost
// certainly going to be a zext in the DAG which we can eliminate.
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
SDValue AndN = N->getOperand(2);
if (AndN.getOpcode() != ISD::AND)
return SDValue();
ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
if (!CMask || CMask->getZExtValue() != Mask)
return SDValue();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
}
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
SelectionDAG &DAG) {
SDLoc dl(N);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
DAG.getNode(Opc, dl,
N->getOperand(1).getSimpleValueType(),
N->getOperand(1)),
DAG.getConstant(0, dl, MVT::i64));
}
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op1 = N->getOperand(1);
SDValue Op2 = N->getOperand(2);
EVT ScalarTy = Op2.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
ScalarTy = MVT::i32;
// Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
}
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
SDValue Scalar = N->getOperand(3);
EVT ScalarTy = Scalar.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
SDValue Passthru = N->getOperand(1);
SDValue Pred = N->getOperand(2);
return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
Pred, Scalar, Passthru);
}
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
LLVMContext &Ctx = *DAG.getContext();
EVT VT = N->getValueType(0);
assert(VT.isScalableVector() && "Expected a scalable vector.");
// Current lowering only supports the SVE-ACLE types.
if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
return SDValue();
unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
EVT ByteVT =
EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
// Convert everything to the domain of EXT (i.e bytes).
SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
DAG.getConstant(ElemSize, dl, MVT::i32));
SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
}
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize())
return SDValue();
SDValue Comparator = N->getOperand(3);
if (Comparator.getOpcode() == AArch64ISD::DUP ||
Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
unsigned IID = getIntrinsicID(N);
EVT VT = N->getValueType(0);
EVT CmpVT = N->getOperand(2).getValueType();
SDValue Pred = N->getOperand(1);
SDValue Imm;
SDLoc DL(N);
switch (IID) {
default:
llvm_unreachable("Called with wrong intrinsic!");
break;
// Signed comparisons
case Intrinsic::aarch64_sve_cmpeq_wide:
case Intrinsic::aarch64_sve_cmpne_wide:
case Intrinsic::aarch64_sve_cmpge_wide:
case Intrinsic::aarch64_sve_cmpgt_wide:
case Intrinsic::aarch64_sve_cmplt_wide:
case Intrinsic::aarch64_sve_cmple_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
int64_t ImmVal = CN->getSExtValue();
if (ImmVal >= -16 && ImmVal <= 15)
Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
else
return SDValue();
}
break;
}
// Unsigned comparisons
case Intrinsic::aarch64_sve_cmphs_wide:
case Intrinsic::aarch64_sve_cmphi_wide:
case Intrinsic::aarch64_sve_cmplo_wide:
case Intrinsic::aarch64_sve_cmpls_wide: {
if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
uint64_t ImmVal = CN->getZExtValue();
if (ImmVal <= 127)
Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
else
return SDValue();
}
break;
}
}
if (!Imm)
return SDValue();
SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
N->getOperand(2), Splat, DAG.getCondCode(CC));
}
return SDValue();
}
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
AArch64CC::CondCode Cond) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc DL(Op);
assert(Op.getValueType().isScalableVector() &&
TLI.isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
// Ensure target specific opcodes are using legal type.
EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue TVal = DAG.getConstant(1, DL, OutVT);
SDValue FVal = DAG.getConstant(0, DL, OutVT);
// Set condition code (CC) flags.
SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
// Convert CC to integer based on requested condition.
// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
return DAG.getZExtOrTrunc(Res, DL, VT);
}
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue VecToReduce = N->getOperand(2);
// NOTE: The integer reduction's result type is not always linked to the
// operand's element type so we construct it from the intrinsic's result type.
EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue VecToReduce = N->getOperand(2);
EVT ReduceVT = VecToReduce.getValueType();
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Pred = N->getOperand(1);
SDValue InitVal = N->getOperand(2);
SDValue VecToReduce = N->getOperand(3);
EVT ReduceVT = VecToReduce.getValueType();
// Ordered reductions use the first lane of the result vector as the
// reduction's initial value.
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
DAG.getUNDEF(ReduceVT), InitVal, Zero);
SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
// SVE reductions set the whole vector register with the first element
// containing the reduction result, which we'll now extract.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
Zero);
}
static bool isAllInactivePredicate(SDValue N) {
// Look through cast.
while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
N = N.getOperand(0);
return ISD::isConstantSplatVectorAllZeros(N.getNode());
}
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
unsigned NumElts = N.getValueType().getVectorMinNumElements();
// Look through cast.
while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
N = N.getOperand(0);
// When reinterpreting from a type with fewer elements the "new" elements
// are not active, so bail if they're likely to be used.
if (N.getValueType().getVectorMinNumElements() < NumElts)
return false;
}
// "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
// or smaller than the implicit element type represented by N.
// NOTE: A larger element count implies a smaller element type.
if (N.getOpcode() == AArch64ISD::PTRUE &&
N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
return N.getValueType().getVectorMinNumElements() >= NumElts;
// If we're compiling for a specific vector-length, we can check if the
// pattern's VL equals that of the scalable vector at runtime.
if (N.getOpcode() == AArch64ISD::PTRUE) {
const auto &Subtarget =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
if (MaxSVESize && MinSVESize == MaxSVESize) {
unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
unsigned PatNumElts =
getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
return PatNumElts == (NumElts * VScale);
}
}
return false;
}
// If a merged operation has no inactive lanes we can relax it to a predicated
// or unpredicated operation, which potentially allows better isel (perhaps
// using immediate forms) or relaxing register reuse requirements.
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
SelectionDAG &DAG, bool UnpredOp = false,
bool SwapOperands = false) {
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
SDValue Pg = N->getOperand(1);
SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
// ISD way to specify an all active predicate.
if (isAllActivePredicate(DAG, Pg)) {
if (UnpredOp)
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
}
// FUTURE: SplatVector(true)
return SDValue();
}
static SDValue performIntrinsicCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
unsigned IID = getIntrinsicID(N);
switch (IID) {
default:
break;
case Intrinsic::aarch64_neon_vcvtfxs2fp:
case Intrinsic::aarch64_neon_vcvtfxu2fp:
return tryCombineFixedPointConvert(N, DCI, DAG);
case Intrinsic::aarch64_neon_saddv:
return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
case Intrinsic::aarch64_neon_uaddv:
return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
case Intrinsic::aarch64_neon_sminv:
return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
case Intrinsic::aarch64_neon_uminv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
case Intrinsic::aarch64_neon_smaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
case Intrinsic::aarch64_neon_umaxv:
return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
case Intrinsic::aarch64_neon_fmax:
return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmin:
return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fmaxnm:
return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_fminnm:
return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
case Intrinsic::aarch64_neon_pmull:
case Intrinsic::aarch64_neon_sqdmull:
return tryCombineLongOpWithDup(IID, N, DCI, DAG);
case Intrinsic::aarch64_neon_sqshl:
case Intrinsic::aarch64_neon_uqshl:
case Intrinsic::aarch64_neon_sqshlu:
case Intrinsic::aarch64_neon_srshl:
case Intrinsic::aarch64_neon_urshl:
case Intrinsic::aarch64_neon_sshl:
case Intrinsic::aarch64_neon_ushl:
return tryCombineShiftImm(IID, N, DAG);
case Intrinsic::aarch64_crc32b:
case Intrinsic::aarch64_crc32cb:
return tryCombineCRC32(0xff, N, DAG);
case Intrinsic::aarch64_crc32h:
case Intrinsic::aarch64_crc32ch:
return tryCombineCRC32(0xffff, N, DAG);
case Intrinsic::aarch64_sve_saddv:
// There is no i64 version of SADDV because the sign is irrelevant.
if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
else
return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
case Intrinsic::aarch64_sve_uaddv:
return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
case Intrinsic::aarch64_sve_smaxv:
return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_umaxv:
return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_sminv:
return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
case Intrinsic::aarch64_sve_uminv:
return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
case Intrinsic::aarch64_sve_orv:
return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
case Intrinsic::aarch64_sve_eorv:
return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
case Intrinsic::aarch64_sve_andv:
return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
case Intrinsic::aarch64_sve_index:
return LowerSVEIntrinsicIndex(N, DAG);
case Intrinsic::aarch64_sve_dup:
return LowerSVEIntrinsicDUP(N, DAG);
case Intrinsic::aarch64_sve_dup_x:
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
N->getOperand(1));
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
case Intrinsic::aarch64_sve_mul:
return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
case Intrinsic::aarch64_sve_smulh:
return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
case Intrinsic::aarch64_sve_umulh:
return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
case Intrinsic::aarch64_sve_smin:
return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
case Intrinsic::aarch64_sve_umin:
return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
case Intrinsic::aarch64_sve_smax:
return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
case Intrinsic::aarch64_sve_umax:
return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
case Intrinsic::aarch64_sve_lsl:
return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
case Intrinsic::aarch64_sve_lsr:
return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
case Intrinsic::aarch64_sve_asr:
return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
case Intrinsic::aarch64_sve_fadd:
return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
case Intrinsic::aarch64_sve_fsub:
return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
case Intrinsic::aarch64_sve_fmul:
return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
case Intrinsic::aarch64_sve_add:
return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
case Intrinsic::aarch64_sve_sub:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
case Intrinsic::aarch64_sve_subr:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
case Intrinsic::aarch64_sve_and:
return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
case Intrinsic::aarch64_sve_bic:
return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
case Intrinsic::aarch64_sve_eor:
return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
case Intrinsic::aarch64_sve_orr:
return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
case Intrinsic::aarch64_sve_sqadd:
return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
case Intrinsic::aarch64_sve_sqsub:
return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
case Intrinsic::aarch64_sve_uqadd:
return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
case Intrinsic::aarch64_sve_uqsub:
return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
case Intrinsic::aarch64_sve_sqadd_x:
return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_sqsub_x:
return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqadd_x:
return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_uqsub_x:
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_sve_asrd:
return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
break;
case Intrinsic::aarch64_sve_cmphi:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
break;
case Intrinsic::aarch64_sve_fcmpge:
case Intrinsic::aarch64_sve_cmpge:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGE));
break;
case Intrinsic::aarch64_sve_fcmpgt:
case Intrinsic::aarch64_sve_cmpgt:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETGT));
break;
case Intrinsic::aarch64_sve_fcmpeq:
case Intrinsic::aarch64_sve_cmpeq:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
break;
case Intrinsic::aarch64_sve_fcmpne:
case Intrinsic::aarch64_sve_cmpne:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETNE));
break;
case Intrinsic::aarch64_sve_fcmpuo:
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
N->getValueType(0), N->getOperand(1), N->getOperand(2),
N->getOperand(3), DAG.getCondCode(ISD::SETUO));
break;
case Intrinsic::aarch64_sve_fadda:
return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
case Intrinsic::aarch64_sve_faddv:
return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxnmv:
return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fmaxv:
return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
case Intrinsic::aarch64_sve_fminnmv:
return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
case Intrinsic::aarch64_sve_fminv:
return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
case Intrinsic::aarch64_sve_sel:
return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmpeq_wide:
return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
case Intrinsic::aarch64_sve_cmpne_wide:
return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpge_wide:
return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmpgt_wide:
return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplt_wide:
return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
case Intrinsic::aarch64_sve_cmple_wide:
return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphs_wide:
return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
case Intrinsic::aarch64_sve_cmphi_wide:
return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
case Intrinsic::aarch64_sve_cmplo_wide:
return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
case Intrinsic::aarch64_sve_cmpls_wide:
return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
case Intrinsic::aarch64_sve_ptest_any:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::ANY_ACTIVE);
case Intrinsic::aarch64_sve_ptest_first:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::FIRST_ACTIVE);
case Intrinsic::aarch64_sve_ptest_last:
return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
AArch64CC::LAST_ACTIVE);
}
return SDValue();
}
static bool isCheapToExtend(const SDValue &N) {
unsigned OC = N->getOpcode();
return OC == ISD::LOAD || OC == ISD::MLOAD ||
ISD::isConstantSplatVectorAllZeros(N.getNode());
}
static SDValue
performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// If we have (sext (setcc A B)) and A and B are cheap to extend,
// we can move the sext into the arguments and have the same result. For
// example, if A and B are both loads, we can make those extending loads and
// avoid an extra instruction. This pattern appears often in VLS code
// generation where the inputs to the setcc have a different size to the
// instruction that wants to use the result of the setcc.
assert(N->getOpcode() == ISD::SIGN_EXTEND &&
N->getOperand(0)->getOpcode() == ISD::SETCC);
const SDValue SetCC = N->getOperand(0);
const SDValue CCOp0 = SetCC.getOperand(0);
const SDValue CCOp1 = SetCC.getOperand(1);
if (!CCOp0->getValueType(0).isInteger() ||
!CCOp1->getValueType(0).isInteger())
return SDValue();
ISD::CondCode Code =
cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
ISD::NodeType ExtType =
isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
if (isCheapToExtend(SetCC.getOperand(0)) &&
isCheapToExtend(SetCC.getOperand(1))) {
const SDValue Ext1 =
DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
const SDValue Ext2 =
DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
return DAG.getSetCC(
SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
}
return SDValue();
}
static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
// we can convert that DUP into another extract_high (of a bigger DUP), which
// helps the backend to decide that an sabdl2 would be useful, saving a real
// extract_high operation.
if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
(N->getOperand(0).getOpcode() == ISD::ABDU ||
N->getOperand(0).getOpcode() == ISD::ABDS)) {
SDNode *ABDNode = N->getOperand(0).getNode();
SDValue NewABD =
tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
if (!NewABD.getNode())
return SDValue();
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
if (N->getValueType(0).isFixedLengthVector() &&
N->getOpcode() == ISD::SIGN_EXTEND &&
N->getOperand(0)->getOpcode() == ISD::SETCC)
return performSignExtendSetCCCombine(N, DCI, DAG);
return SDValue();
}
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
SDValue SplatVal, unsigned NumVecElts) {
assert(!St.isTruncatingStore() && "cannot split truncating vector store");
unsigned OrigAlignment = St.getAlignment();
unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
// Create scalar stores. This is at least as good as the code sequence for a
// split unaligned store which is a dup.s, ext.b, and two stores.
// Most of the time the three stores should be replaced by store pair
// instructions (stp).
SDLoc DL(&St);
SDValue BasePtr = St.getBasePtr();
uint64_t BaseOffset = 0;
const MachinePointerInfo &PtrInfo = St.getPointerInfo();
SDValue NewST1 =
DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
OrigAlignment, St.getMemOperand()->getFlags());
// As this in ISel, we will not merge this add which may degrade results.
if (BasePtr->getOpcode() == ISD::ADD &&
isa<ConstantSDNode>(BasePtr->getOperand(1))) {
BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
BasePtr = BasePtr->getOperand(0);
}
unsigned Offset = EltOffset;
while (--NumVecElts) {
unsigned Alignment = MinAlign(OrigAlignment, Offset);
SDValue OffsetPtr =
DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
PtrInfo.getWithOffset(Offset), Alignment,
St.getMemOperand()->getFlags());
Offset += EltOffset;
}
return NewST1;
}
// Returns an SVE type that ContentTy can be trivially sign or zero extended
// into.
static MVT getSVEContainerType(EVT ContentTy) {
assert(ContentTy.isSimple() && "No SVE containers for extended types");
switch (ContentTy.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("No known SVE container for this MVT type");
case MVT::nxv2i8:
case MVT::nxv2i16:
case MVT::nxv2i32:
case MVT::nxv2i64:
case MVT::nxv2f32:
case MVT::nxv2f64:
return MVT::nxv2i64;
case MVT::nxv4i8:
case MVT::nxv4i16:
case MVT::nxv4i32:
case MVT::nxv4f32:
return MVT::nxv4i32;
case MVT::nxv8i8:
case MVT::nxv8i16:
case MVT::nxv8f16:
case MVT::nxv8bf16:
return MVT::nxv8i16;
case MVT::nxv16i8:
return MVT::nxv16i8;
}
}
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
EVT ContainerVT = VT;
if (ContainerVT.isInteger())
ContainerVT = getSVEContainerType(ContainerVT);
SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
SDValue Ops[] = { N->getOperand(0), // Chain
N->getOperand(2), // Pg
N->getOperand(3), // Base
DAG.getValueType(VT) };
SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (ContainerVT.isInteger() && (VT != ContainerVT))
Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
return DAG.getMergeValues({ Load, LoadChain }, DL);
}
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
EVT PtrTy = N->getOperand(3).getValueType();
if (VT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
auto *MINode = cast<MemIntrinsicSDNode>(N);
SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
MINode->getOperand(3), DAG.getUNDEF(PtrTy),
MINode->getOperand(2), PassThru,
MINode->getMemoryVT(), MINode->getMemOperand(),
ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
if (VT.isFloatingPoint()) {
SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
return DAG.getMergeValues(Ops, DL);
}
return L;
}
template <unsigned Opcode>
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
"Unsupported opcode.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
if (VT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
EVT LoadVT = VT;
if (VT.isFloatingPoint())
LoadVT = VT.changeTypeToInteger();
SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (VT.isFloatingPoint())
Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Data = N->getOperand(2);
EVT DataVT = Data.getValueType();
EVT HwSrcVt = getSVEContainerType(DataVT);
SDValue InputVT = DAG.getValueType(DataVT);
if (DataVT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (DataVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
SDValue SrcNew;
if (Data.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
SDValue Ops[] = { N->getOperand(0), // Chain
SrcNew,
N->getOperand(4), // Base
N->getOperand(3), // Pg
InputVT
};
return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
}
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Data = N->getOperand(2);
EVT DataVT = Data.getValueType();
EVT PtrTy = N->getOperand(4).getValueType();
if (DataVT == MVT::nxv8bf16 &&
!static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
return SDValue();
if (DataVT.isFloatingPoint())
Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
auto *MINode = cast<MemIntrinsicSDNode>(N);
return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
DAG.getUNDEF(PtrTy), MINode->getOperand(3),
MINode->getMemoryVT(), MINode->getMemOperand(),
ISD::UNINDEXED, false, false);
}
/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
/// load store optimizer pass will merge them to store pair stores. This should
/// be better than a movi to create the vector zero followed by a vector store
/// if the zero constant is not re-used, since one instructions and one register
/// live range will be removed.
///
/// For example, the final generated code should be:
///
/// stp xzr, xzr, [x0]
///
/// instead of:
///
/// movi v0.2d, #0
/// str q0, [x0]
///
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
SDValue StVal = St.getValue();
EVT VT = StVal.getValueType();
// Avoid scalarizing zero splat stores for scalable vectors.
if (VT.isScalableVector())
return SDValue();
// It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
// 2, 3 or 4 i32 elements.
int NumVecElts = VT.getVectorNumElements();
if (!(((NumVecElts == 2 || NumVecElts == 3) &&
VT.getVectorElementType().getSizeInBits() == 64) ||
((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
VT.getVectorElementType().getSizeInBits() == 32)))
return SDValue();
if (StVal.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
// If the zero constant has more than one use then the vector store could be
// better since the constant mov will be amortized and stp q instructions
// should be able to be formed.
if (!StVal.hasOneUse())
return SDValue();
// If the store is truncating then it's going down to i16 or smaller, which
// means it can be implemented in a single store anyway.
if (St.isTruncatingStore())
return SDValue();
// If the immediate offset of the address operand is too large for the stp
// instruction, then bail out.
if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
if (Offset < -512 || Offset > 504)
return SDValue();
}
for (int I = 0; I < NumVecElts; ++I) {
SDValue EltVal = StVal.getOperand(I);
if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
return SDValue();
}
// Use a CopyFromReg WZR/XZR here to prevent
// DAGCombiner::MergeConsecutiveStores from undoing this transformation.
SDLoc DL(&St);
unsigned ZeroReg;
EVT ZeroVT;
if (VT.getVectorElementType().getSizeInBits() == 32) {
ZeroReg = AArch64::WZR;
ZeroVT = MVT::i32;
} else {
ZeroReg = AArch64::XZR;
ZeroVT = MVT::i64;
}
SDValue SplatVal =
DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
/// value. The load store optimizer pass will merge them to store pair stores.
/// This has better performance than a splat of the scalar followed by a split
/// vector store. Even if the stores are not merged it is four stores vs a dup,
/// followed by an ext.b and two stores.
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
SDValue StVal = St.getValue();
EVT VT = StVal.getValueType();
// Don't replace floating point stores, they possibly won't be transformed to
// stp because of the store pair suppress pass.
if (VT.isFloatingPoint())
return SDValue();
// We can express a splat as store pair(s) for 2 or 4 elements.
unsigned NumVecElts = VT.getVectorNumElements();
if (NumVecElts != 4 && NumVecElts != 2)
return SDValue();
// If the store is truncating then it's going down to i16 or smaller, which
// means it can be implemented in a single store anyway.
if (St.isTruncatingStore())
return SDValue();
// Check that this is a splat.
// Make sure that each of the relevant vector element locations are inserted
// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
SDValue SplatVal;
for (unsigned I = 0; I < NumVecElts; ++I) {
// Check for insert vector elements.
if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
return SDValue();
// Check that same value is inserted at each vector element.
if (I == 0)
SplatVal = StVal.getOperand(1);
else if (StVal.getOperand(1) != SplatVal)
return SDValue();
// Check insert element index.
ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
if (!CIndex)
return SDValue();
uint64_t IndexVal = CIndex->getZExtValue();
if (IndexVal >= NumVecElts)
return SDValue();
IndexNotInserted.reset(IndexVal);
StVal = StVal.getOperand(0);
}
// Check that all vector element locations were inserted to.
if (IndexNotInserted.any())
return SDValue();
return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
}
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
StoreSDNode *S = cast<StoreSDNode>(N);
if (S->isVolatile() || S->isIndexed())
return SDValue();
SDValue StVal = S->getValue();
EVT VT = StVal.getValueType();
if (!VT.isFixedLengthVector())
return SDValue();
// If we get a splat of zeros, convert this vector store to a store of
// scalars. They will be merged into store pairs of xzr thereby removing one
// instruction and one register.
if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
return ReplacedZeroSplat;
// FIXME: The logic for deciding if an unaligned store should be split should
// be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
// a call to that function here.
if (!Subtarget->isMisaligned128StoreSlow())
return SDValue();
// Don't split at -Oz.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
// Don't split v2i64 vectors. Memcpy lowering produces those and splitting
// those up regresses performance on micro-benchmarks and olden/bh.
if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
return SDValue();
// Split unaligned 16B stores. They are terrible for performance.
// Don't split stores with alignment of 1 or 2. Code that uses clang vector
// extensions can use this to mark that it does not want splitting to happen
// (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
// eliminating alignment hazards is only 1 in 8 for alignment of 2.
if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
S->getAlignment() <= 2)
return SDValue();
// If we get a splat of a scalar convert this vector store to a store of
// scalars. They will be merged into store pairs thereby removing two
// instructions.
if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
return ReplacedSplat;
SDLoc DL(S);
// Split VT into two.
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
unsigned NumElts = HalfVT.getVectorNumElements();
SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
DAG.getConstant(0, DL, MVT::i64));
SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
DAG.getConstant(NumElts, DL, MVT::i64));
SDValue BasePtr = S->getBasePtr();
SDValue NewST1 =
DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
S->getAlignment(), S->getMemOperand()->getFlags());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(8, DL, MVT::i64));
return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
S->getPointerInfo(), S->getAlignment(),
S->getMemOperand()->getFlags());
}
static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
// splice(pg, op1, undef) -> op1
if (N->getOperand(2).isUndef())
return N->getOperand(1);
return SDValue();
}
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) {
assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
N->getOpcode() == AArch64ISD::UUNPKLO) &&
"Unexpected Opcode!");
// uunpklo/hi undef -> undef
if (N->getOperand(0).isUndef())
return DAG.getUNDEF(N->getValueType(0));
return SDValue();
}
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
EVT ResVT = N->getValueType(0);
// uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
SDValue X = Op0.getOperand(0).getOperand(0);
return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
}
}
// uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
SDValue Z = Op1.getOperand(0).getOperand(1);
return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
}
}
return SDValue();
}
static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) ||
(Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&
"Invalid opcode.");
const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Pg = N->getOperand(1);
SDValue Base = N->getOperand(2);
SDValue Offset = N->getOperand(3);
SDValue Ty = N->getOperand(4);
EVT ResVT = N->getValueType(0);
const auto OffsetOpc = Offset.getOpcode();
const bool OffsetIsZExt =
OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
const bool OffsetIsSExt =
OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
// Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
SDValue ExtPg = Offset.getOperand(0);
VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
// If the predicate for the sign- or zero-extended offset is the
// same as the predicate used for this load and the sign-/zero-extension
// was from a 32-bits...
if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
SDValue UnextendedOffset = Offset.getOperand(1);
unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
if (Signed)
NewOpc = getSignExtendedGatherOpcode(NewOpc);
return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
{Chain, Pg, Base, UnextendedOffset, Ty});
}
}
return SDValue();
}
/// Optimize a vector shift instruction and its operand if shifted out
/// bits are not used.
static SDValue performVectorShiftCombine(SDNode *N,
const AArch64TargetLowering &TLI,
TargetLowering::DAGCombinerInfo &DCI) {
assert(N->getOpcode() == AArch64ISD::VASHR ||
N->getOpcode() == AArch64ISD::VLSHR);
SDValue Op = N->getOperand(0);
unsigned OpScalarSize = Op.getScalarValueSizeInBits();
unsigned ShiftImm = N->getConstantOperandVal(1);
assert(OpScalarSize > ShiftImm && "Invalid shift imm");
APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
APInt DemandedMask = ~ShiftedOutBits;
if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
return SDValue(N, 0);
return SDValue();
}
static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG) {
// sunpklo(sext(pred)) -> sext(extract_low_half(pred))
// This transform works in partnership with performSetCCPunpkCombine to
// remove unnecessary transfer of predicates into standard registers and back
if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
MVT::i1) {
SDValue CC = N->getOperand(0)->getOperand(0);
auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
SDValue Unpk = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, CC,
DAG.getVectorIdxConstant(0, SDLoc(N)));
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
}
return SDValue();
}
/// Target-specific DAG combine function for post-increment LD1 (lane) and
/// post-increment LD1R.
static SDValue performPostLD1Combine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
bool IsLaneOp) {
if (DCI.isBeforeLegalizeOps())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
if (!VT.is128BitVector() && !VT.is64BitVector())
return SDValue();
unsigned LoadIdx = IsLaneOp ? 1 : 0;
SDNode *LD = N->getOperand(LoadIdx).getNode();
// If it is not LOAD, can not do such combine.
if (LD->getOpcode() != ISD::LOAD)
return SDValue();
// The vector lane must be a constant in the LD1LANE opcode.
SDValue Lane;
if (IsLaneOp) {
Lane = N->getOperand(2);
auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
}
LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
EVT MemVT = LoadSDN->getMemoryVT();
// Check if memory operand is the same type as the vector element.
if (MemVT != VT.getVectorElementType())
return SDValue();
// Check if there are other uses. If so, do not combine as it will introduce
// an extra load.
for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
++UI) {
if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
continue;
if (*UI != N)
return SDValue();
}
SDValue Addr = LD->getOperand(1);
SDValue Vector = N->getOperand(0);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD
|| UI.getUse().getResNo() != Addr.getResNo())
continue;
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
unsigned NumBytes = VT.getScalarSizeInBits() / 8;
if (IncVal != NumBytes)
continue;
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
// To avoid cycle construction make sure that neither the load nor the add
// are predecessors to each other or the Vector.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Visited.insert(Addr.getNode());
Worklist.push_back(User);
Worklist.push_back(LD);
Worklist.push_back(Vector.getNode());
if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
SmallVector<SDValue, 8> Ops;
Ops.push_back(LD->getOperand(0)); // Chain
if (IsLaneOp) {
Ops.push_back(Vector); // The vector to be inserted
Ops.push_back(Lane); // The lane to be inserted in the vector
}
Ops.push_back(Addr);
Ops.push_back(Inc);
EVT Tys[3] = { VT, MVT::i64, MVT::Other };
SDVTList SDTys = DAG.getVTList(Tys);
unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
MemVT,
LoadSDN->getMemOperand());
// Update the uses.
SDValue NewResults[] = {
SDValue(LD, 0), // The result of load
SDValue(UpdN.getNode(), 2) // Chain
};
DCI.CombineTo(LD, NewResults);
DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
break;
}
return SDValue();
}
/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
/// address translation.
static bool performTBISimplification(SDValue Addr,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
APInt DemandedMask = APInt::getLowBitsSet(64, 56);
KnownBits Known;
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
DCI.CommitTargetLoweringOpt(TLO);
return true;
}
return false;
}
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&
"Expected STORE dag node in input!");
if (auto Store = dyn_cast<StoreSDNode>(N)) {
if (!Store->isTruncatingStore() || Store->isIndexed())
return SDValue();
SDValue Ext = Store->getValue();
auto ExtOpCode = Ext.getOpcode();
if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
ExtOpCode != ISD::ANY_EXTEND)
return SDValue();
SDValue Orig = Ext->getOperand(0);
if (Store->getMemoryVT() != Orig.getValueType())
return SDValue();
return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
Store->getBasePtr(), Store->getMemOperand());
}
return SDValue();
}
static SDValue performSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
// If this is an FP_ROUND followed by a store, fold this into a truncating
// store. We can do this even if this is already a truncstore.
// We purposefully don't care about legality of the nodes here as we know
// they can be split down into something legal.
if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND &&
Value.getNode()->hasOneUse() && ST->isUnindexed() &&
Subtarget->useSVEForFixedLengthVectors() &&
Value.getValueType().isFixedLengthVector() &&
Value.getValueType().getFixedSizeInBits() >=
Subtarget->getMinSVEVectorSizeInBits())
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemoryVT(), ST->getMemOperand());
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
if (Subtarget->supportsAddressTopByteIgnored() &&
performTBISimplification(N->getOperand(2), DCI, DAG))
return SDValue(N, 0);
if (SDValue Store = foldTruncStoreOfExt(DAG, N))
return Store;
return SDValue();
}
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
static SDValue performNEONPostLDSTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
unsigned AddrOpIdx = N->getNumOperands() - 1;
SDValue Addr = N->getOperand(AddrOpIdx);
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
if (User->getOpcode() != ISD::ADD ||
UI.getUse().getResNo() != Addr.getResNo())
continue;
// Check that the add is independent of the load/store. Otherwise, folding
// it would create a cycle.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Visited.insert(Addr.getNode());
Worklist.push_back(N);
Worklist.push_back(User);
if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
SDNode::hasPredecessorHelper(User, Visited, Worklist))
continue;
// Find the new opcode for the updating load/store.
bool IsStore = false;
bool IsLaneOp = false;
bool IsDupOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntNo) {
default: llvm_unreachable("unexpected intrinsic for Neon base update");
case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
NumVecs = 2; break;
case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
NumVecs = 3; break;
case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
NumVecs = 4; break;
case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
NumVecs = 2; IsStore = true; break;
case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
NumVecs = 3; IsStore = true; break;
case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
NumVecs = 4; IsStore = true; break;
case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
NumVecs = 2; break;
case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
NumVecs = 3; break;
case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
NumVecs = 4; break;
case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
NumVecs = 2; IsStore = true; break;
case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
NumVecs = 3; IsStore = true; break;
case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
NumVecs = 4; IsStore = true; break;
case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
NumVecs = 2; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
NumVecs = 3; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
NumVecs = 4; IsDupOp = true; break;
case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
NumVecs = 2; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
NumVecs = 3; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
NumVecs = 4; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
NumVecs = 2; IsStore = true; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
NumVecs = 3; IsStore = true; IsLaneOp = true; break;
case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
NumVecs = 4; IsStore = true; IsLaneOp = true; break;
}
EVT VecTy;
if (IsStore)
VecTy = N->getOperand(2).getValueType();
else
VecTy = N->getValueType(0);
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
if (IsLaneOp || IsDupOp)
NumBytes /= VecTy.getVectorNumElements();
if (IncVal != NumBytes)
continue;
Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // Incoming chain
// Load lane and store have vector list as input.
if (IsLaneOp || IsStore)
for (unsigned i = 2; i < AddrOpIdx; ++i)
Ops.push_back(N->getOperand(i));
Ops.push_back(Addr); // Base register
Ops.push_back(Inc);
// Return Types.
EVT Tys[6];
unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = VecTy;
Tys[n++] = MVT::i64; // Type of write back register
Tys[n] = MVT::Other; // Type of the chain
SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
std::vector<SDValue> NewResults;
for (unsigned i = 0; i < NumResultVecs; ++i) {
NewResults.push_back(SDValue(UpdN.getNode(), i));
}
NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
break;
}
return SDValue();
}
// Checks to see if the value is the prescribed width and returns information
// about its extension mode.
static
bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
ExtType = ISD::NON_EXTLOAD;
switch(V.getNode()->getOpcode()) {
default:
return false;
case ISD::LOAD: {
LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
|| (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
ExtType = LoadNode->getExtensionType();
return true;
}
return false;
}
case ISD::AssertSext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
if ((TypeNode->getVT() == MVT::i8 && width == 8)
|| (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::SEXTLOAD;
return true;
}
return false;
}
case ISD::AssertZext: {
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
if ((TypeNode->getVT() == MVT::i8 && width == 8)
|| (TypeNode->getVT() == MVT::i16 && width == 16)) {
ExtType = ISD::ZEXTLOAD;
return true;
}
return false;
}
case ISD::Constant:
case ISD::TargetConstant: {
return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
1LL << (width - 1);
}
}
return true;
}
// This function does a whole lot of voodoo to determine if the tests are
// equivalent without and with a mask. Essentially what happens is that given a
// DAG resembling:
//
// +-------------+ +-------------+ +-------------+ +-------------+
// | Input | | AddConstant | | CompConstant| | CC |
// +-------------+ +-------------+ +-------------+ +-------------+
// | | | |
// V V | +----------+
// +-------------+ +----+ | |
// | ADD | |0xff| | |
// +-------------+ +----+ | |
// | | | |
// V V | |
// +-------------+ | |
// | AND | | |
// +-------------+ | |
// | | |
// +-----+ | |
// | | |
// V V V
// +-------------+
// | CMP |
// +-------------+
//
// The AND node may be safely removed for some combinations of inputs. In
// particular we need to take into account the extension type of the Input,
// the exact values of AddConstant, CompConstant, and CC, along with the nominal
// width of the input (this can work for any width inputs, the above graph is
// specific to 8 bits.
//
// The specific equations were worked out by generating output tables for each
// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
// problem was simplified by working with 4 bit inputs, which means we only
// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
// patterns present in both extensions (0,7). For every distinct set of
// AddConstant and CompConstants bit patterns we can consider the masked and
// unmasked versions to be equivalent if the result of this function is true for
// all 16 distinct bit patterns of for the current extension type of Input (w0).
//
// sub w8, w0, w1
// and w10, w8, #0x0f
// cmp w8, w2
// cset w9, AArch64CC
// cmp w10, w2
// cset w11, AArch64CC
// cmp w9, w11
// cset w0, eq
// ret
//
// Since the above function shows when the outputs are equivalent it defines
// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
// would be expensive to run during compiles. The equations below were written
// in a test harness that confirmed they gave equivalent outputs to the above
// for all inputs function, so they can be used determine if the removal is
// legal instead.
//
// isEquivalentMaskless() is the code for testing if the AND can be removed
// factored out of the DAG recognition as the DAG can take several forms.
static bool isEquivalentMaskless(unsigned CC, unsigned width,
ISD::LoadExtType ExtType, int AddConstant,
int CompConstant) {
// By being careful about our equations and only writing the in term
// symbolic values and well known constants (0, 1, -1, MaxUInt) we can
// make them generally applicable to all bit widths.
int MaxUInt = (1 << width);
// For the purposes of these comparisons sign extending the type is
// equivalent to zero extending the add and displacing it by half the integer
// width. Provided we are careful and make sure our equations are valid over
// the whole range we can just adjust the input and avoid writing equations
// for sign extended inputs.
if (ExtType == ISD::SEXTLOAD)
AddConstant -= (1 << (width-1));
switch(CC) {
case AArch64CC::LE:
case AArch64CC::GT:
if ((AddConstant == 0) ||
(CompConstant == MaxUInt - 1 && AddConstant < 0) ||
(AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
return true;
break;
case AArch64CC::LT:
case AArch64CC::GE:
if ((AddConstant == 0) ||
(AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::HI:
case AArch64CC::LS:
if ((AddConstant >= 0 && CompConstant < 0) ||
(AddConstant <= 0 && CompConstant >= -1 &&
CompConstant < AddConstant + MaxUInt))
return true;
break;
case AArch64CC::PL:
case AArch64CC::MI:
if ((AddConstant == 0) ||
(AddConstant > 0 && CompConstant <= 0) ||
(AddConstant < 0 && CompConstant <= AddConstant))
return true;
break;
case AArch64CC::LO:
case AArch64CC::HS:
if ((AddConstant >= 0 && CompConstant <= 0) ||
(AddConstant <= 0 && CompConstant >= 0 &&
CompConstant <= AddConstant + MaxUInt))
return true;
break;
case AArch64CC::EQ:
case AArch64CC::NE:
if ((AddConstant > 0 && CompConstant < 0) ||
(AddConstant < 0 && CompConstant >= 0 &&
CompConstant < AddConstant + MaxUInt) ||
(AddConstant >= 0 && CompConstant >= 0 &&
CompConstant >= AddConstant) ||
(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
return true;
break;
case AArch64CC::VS:
case AArch64CC::VC:
case AArch64CC::AL:
case AArch64CC::NV:
return true;
case AArch64CC::Invalid:
break;
}
return false;
}
static
SDValue performCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG, unsigned CCIndex,
unsigned CmpIndex) {
unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
unsigned CondOpcode = SubsNode->getOpcode();
if (CondOpcode != AArch64ISD::SUBS)
return SDValue();
// There is a SUBS feeding this condition. Is it fed by a mask we can
// use?
SDNode *AndNode = SubsNode->getOperand(0).getNode();
unsigned MaskBits = 0;
if (AndNode->getOpcode() != ISD::AND)
return SDValue();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
uint32_t CNV = CN->getZExtValue();
if (CNV == 255)
MaskBits = 8;
else if (CNV == 65535)
MaskBits = 16;
}
if (!MaskBits)
return SDValue();
SDValue AddValue = AndNode->getOperand(0);
if (AddValue.getOpcode() != ISD::ADD)
return SDValue();
// The basic dag structure is correct, grab the inputs and validate them.
SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
SDValue SubsInputValue = SubsNode->getOperand(1);
// The mask is present and the provenance of all the values is a smaller type,
// lets see if the mask is superfluous.
if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
!isa<ConstantSDNode>(SubsInputValue.getNode()))
return SDValue();
ISD::LoadExtType ExtType;
if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
!checkValueWidth(AddInputValue1, MaskBits, ExtType) )
return SDValue();
if(!isEquivalentMaskless(CC, MaskBits, ExtType,
cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
return SDValue();
// The AND is not necessary, remove it.
SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
SubsNode->getValueType(1));
SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
return SDValue(N, 0);
}
// Optimize compare with zero and branch.
static SDValue performBRCONDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
// will not be produced, as they are conditional branch instructions that do
// not set flags.
if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
return SDValue();
if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
N = NV.getNode();
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue CCVal = N->getOperand(2);
SDValue Cmp = N->getOperand(3);
assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
return SDValue();
unsigned CmpOpc = Cmp.getOpcode();
if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
return SDValue();
// Only attempt folding if there is only one use of the flag and no use of the
// value.
if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
return SDValue();
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
assert(LHS.getValueType() == RHS.getValueType() &&
"Expected the value type to be the same for both operands!");
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return SDValue();
if (isNullConstant(LHS))
std::swap(LHS, RHS);
if (!isNullConstant(RHS))
return SDValue();
if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
LHS.getOpcode() == ISD::SRL)
return SDValue();
// Fold the compare into the branch instruction.
SDValue BR;
if (CC == AArch64CC::EQ)
BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
else
BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
// Do not add new nodes to DAG combiner worklist.
DCI.CombineTo(N, BR, false);
return SDValue();
}
// Optimize CSEL instructions
static SDValue performCSELCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
// CSEL x, x, cc -> x
if (N->getOperand(0) == N->getOperand(1))
return N->getOperand(0);
return performCONDCombine(N, DCI, DAG, 2, 3);
}
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
if (Cond == ISD::SETNE && isOneConstant(RHS) &&
LHS->getOpcode() == AArch64ISD::CSEL &&
isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
LHS->hasOneUse()) {
SDLoc DL(N);
// Invert CSEL's condition.
auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
auto NewCond = getInvertedCondCode(OldCond);
// csel 0, 1, !cond, X
SDValue CSEL =
DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
LHS.getOperand(3));
return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
}
return SDValue();
}
static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
// setcc_merge_zero pred
// (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne
// => extract_subvector (inner setcc_merge_zero)
SDValue Pred = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
if (Cond != ISD::SETNE || !isZerosVector(RHS.getNode()) ||
LHS->getOpcode() != ISD::SIGN_EXTEND)
return SDValue();
SDValue Extract = LHS->getOperand(0);
if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
Extract->getValueType(0) != N->getValueType(0) ||
Extract->getConstantOperandVal(1) != 0)
return SDValue();
SDValue InnerSetCC = Extract->getOperand(0);
if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
return SDValue();
// By this point we've effectively got
// zero_inactive_lanes_and_trunc_i1(sext_i1(A)). If we can prove A's inactive
// lanes are already zero then the trunc(sext()) sequence is redundant and we
// can operate on A directly.
SDValue InnerPred = InnerSetCC.getOperand(0);
if (Pred.getOpcode() == AArch64ISD::PTRUE &&
InnerPred.getOpcode() == AArch64ISD::PTRUE &&
Pred.getConstantOperandVal(0) == InnerPred.getConstantOperandVal(0) &&
Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
return Extract;
return SDValue();
}
static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
"Unexpected opcode!");
SDValue Pred = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
// setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
// => inner setcc_merge_zero
if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
LHS->getOpcode() == ISD::SIGN_EXTEND &&
LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
LHS->getOperand(0)->getOperand(0) == Pred)
return LHS->getOperand(0);
if (SDValue V = performSetCCPunpkCombine(N, DAG))
return V;
return SDValue();
}
// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
// as well as whether the test should be inverted. This code is required to
// catch these cases (as opposed to standard dag combines) because
// AArch64ISD::TBZ is matched during legalization.
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
SelectionDAG &DAG) {
if (!Op->hasOneUse())
return Op;
// We don't handle undef/constant-fold cases below, as they should have
// already been taken care of (e.g. and of 0, test of undefined shifted bits,
// etc.)
// (tbz (trunc x), b) -> (tbz x, b)
// This case is just here to enable more of the below cases to be caught.
if (Op->getOpcode() == ISD::TRUNCATE &&
Bit < Op->getValueType(0).getSizeInBits()) {
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
if (Op->getOpcode() == ISD::ANY_EXTEND &&
Bit < Op->getOperand(0).getValueSizeInBits()) {
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
if (Op->getNumOperands() != 2)
return Op;
auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
if (!C)
return Op;
switch (Op->getOpcode()) {
default:
return Op;
// (tbz (and x, m), b) -> (tbz x, b)
case ISD::AND:
if ((C->getZExtValue() >> Bit) & 1)
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
return Op;
// (tbz (shl x, c), b) -> (tbz x, b-c)
case ISD::SHL:
if (C->getZExtValue() <= Bit &&
(Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit - C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
case ISD::SRA:
Bit = Bit + C->getZExtValue();
if (Bit >= Op->getValueType(0).getSizeInBits())
Bit = Op->getValueType(0).getSizeInBits() - 1;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
// (tbz (srl x, c), b) -> (tbz x, b+c)
case ISD::SRL:
if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
Bit = Bit + C->getZExtValue();
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
return Op;
// (tbz (xor x, -1), b) -> (tbnz x, b)
case ISD::XOR:
if ((C->getZExtValue() >> Bit) & 1)
Invert = !Invert;
return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
}
}
// Optimize test single bit zero/non-zero and branch.
static SDValue performTBZCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
bool Invert = false;
SDValue TestSrc = N->getOperand(1);
SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
if (TestSrc == NewTestSrc)
return SDValue();
unsigned NewOpc = N->getOpcode();
if (Invert) {
if (NewOpc == AArch64ISD::TBZ)
NewOpc = AArch64ISD::TBNZ;
else {
assert(NewOpc == AArch64ISD::TBNZ);
NewOpc = AArch64ISD::TBZ;
}
}
SDLoc DL(N);
return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
}
// vselect (v1i1 setcc) ->
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
// such VSELECT.
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
EVT CCVT = N0.getValueType();
if (isAllActivePredicate(DAG, N0))
return N->getOperand(1);
if (isAllInactivePredicate(N0))
return N->getOperand(2);
// Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
SDValue SetCC = N->getOperand(0);
if (SetCC.getOpcode() == ISD::SETCC &&
SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
SDValue CmpLHS = SetCC.getOperand(0);
EVT VT = CmpLHS.getValueType();
SDNode *CmpRHS = SetCC.getOperand(1).getNode();
SDNode *SplatLHS = N->getOperand(1).getNode();
SDNode *SplatRHS = N->getOperand(2).getNode();
APInt SplatLHSVal;
if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
VT.isSimple() &&
is_contained(
makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
MVT::v2i32, MVT::v4i32, MVT::v2i64}),
VT.getSimpleVT().SimpleTy) &&
ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(
NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
VT.getScalarType()));
SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
return Or;
}
}
if (N0.getOpcode() != ISD::SETCC ||
CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
CCVT.getVectorElementType() != MVT::i1)
return SDValue();
EVT ResVT = N->getValueType(0);
EVT CmpVT = N0.getOperand(0).getValueType();
// Only combine when the result type is of the same size as the compared
// operands.
if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
return SDValue();
SDValue IfTrue = N->getOperand(1);
SDValue IfFalse = N->getOperand(2);
SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
IfTrue, IfFalse);
}
/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
/// the compare-mask instructions rather than going via NZCV, even if LHS and
/// RHS are really scalar. This replaces any scalar setcc in the above pattern
/// with a vector one followed by a DUP shuffle on the result.
static SDValue performSelectCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0);
EVT ResVT = N->getValueType(0);
if (N0.getOpcode() != ISD::SETCC)
return SDValue();
if (ResVT.isScalableVector())
return SDValue();
// Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
// scalar SetCCResultType. We also don't expect vectors, because we assume
// that selects fed by vector SETCCs are canonicalized to VSELECT.
assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
"Scalar-SETCC feeding SELECT has unexpected result type!");
// If NumMaskElts == 0, the comparison is larger than select result. The
// largest real NEON comparison is 64-bits per lane, which means the result is
// at most 32-bits and an illegal vector. Just bail out for now.
EVT SrcVT = N0.getOperand(0).getValueType();
// Don't try to do this optimization when the setcc itself has i1 operands.
// There are no legal vectors of i1, so this would be pointless.
if (SrcVT == MVT::i1)
return SDValue();
int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
if (!ResVT.isVector() || NumMaskElts == 0)
return SDValue();
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
// Also bail out if the vector CCVT isn't the same size as ResVT.
// This can happen if the SETCC operand size doesn't divide the ResVT size
// (e.g., f64 vs v3f32).
if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
return SDValue();
// Make sure we didn't create illegal types, if we're not supposed to.
assert(DCI.isBeforeLegalize() ||
DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
// First perform a vector comparison, where lane 0 is the one we're interested
// in.
SDLoc DL(N0);
SDValue LHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
SDValue RHS =
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
// Now duplicate the comparison mask we want across all other lanes.
SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
Mask = DAG.getNode(ISD::BITCAST, DL,
ResVT.changeVectorElementTypeToInteger(), Mask);
return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
/// Get rid of unnecessary NVCASTs (that don't change the type).
static SDValue performNVCASTCombine(SDNode *N) {
if (N->getValueType(0) == N->getOperand(0).getValueType())
return N->getOperand(0);
return SDValue();
}
// If all users of the globaladdr are of the form (globaladdr + constant), find
// the smallest constant, fold it into the globaladdr's offset and rewrite the
// globaladdr as (globaladdr + constant) - constant.
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *Subtarget,
const TargetMachine &TM) {
auto *GN = cast<GlobalAddressSDNode>(N);
if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
AArch64II::MO_NO_FLAG)
return SDValue();
uint64_t MinOffset = -1ull;
for (SDNode *N : GN->uses()) {
if (N->getOpcode() != ISD::ADD)
return SDValue();
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
if (!C)
C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!C)
return SDValue();
MinOffset = std::min(MinOffset, C->getZExtValue());
}
uint64_t Offset = MinOffset + GN->getOffset();
// Require that the new offset is larger than the existing one. Otherwise, we
// can end up oscillating between two possible DAGs, for example,
// (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
if (Offset <= uint64_t(GN->getOffset()))
return SDValue();
// Check whether folding this offset is legal. It must not go out of bounds of
// the referenced object to avoid violating the code model, and must be
// smaller than 2^21 because this is the largest offset expressible in all
// object formats.
//
// This check also prevents us from folding negative offsets, which will end
// up being treated in the same way as large positive ones. They could also
// cause code model violations, and aren't really common enough to matter.
if (Offset >= (1 << 21))
return SDValue();
const GlobalValue *GV = GN->getGlobal();
Type *T = GV->getValueType();
if (!T->isSized() ||
Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
return SDValue();
SDLoc DL(GN);
SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
DAG.getConstant(MinOffset, DL, MVT::i64));
}
// Turns the vector of indices into a vector of byte offstes by scaling Offset
// by (BitWidth / 8).
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
SDLoc DL, unsigned BitWidth) {
assert(Offset.getValueType().isScalableVector() &&
"This method is only for scalable vectors of offsets");
SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
}
/// Check if the value of \p OffsetInBytes can be used as an immediate for
/// the gather load/prefetch and scatter store instructions with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
unsigned ScalarSizeInBytes) {
// The immediate is not a multiple of the scalar size.
if (OffsetInBytes % ScalarSizeInBytes)
return false;
// The immediate is out of range.
if (OffsetInBytes / ScalarSizeInBytes > 31)
return false;
return true;
}
/// Check if the value of \p Offset represents a valid immediate for the SVE
/// gather load/prefetch and scatter store instructiona with vector base and
/// immediate offset addressing mode:
///
/// [<Zn>.[S|D]{, #<imm>}]
///
/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
unsigned ScalarSizeInBytes) {
ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
return OffsetConst && isValidImmForSVEVecImmAddrMode(
OffsetConst->getZExtValue(), ScalarSizeInBytes);
}
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
const SDValue Src = N->getOperand(2);
const EVT SrcVT = Src->getValueType(0);
assert(SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors");
SDLoc DL(N);
MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
// Make sure that source data will fit into an SVE register
if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// For FPs, ACLE only supports _packed_ single and double precision types.
if (SrcElVT.isFloatingPoint())
if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
SDValue Base = N->getOperand(4);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(5);
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal scatters because there's no instruction that takes
// indicies.
if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
Offset =
getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
Opcode = AArch64ISD::SSTNT1_PRED;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
std::swap(Base, Offset);
// SST1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the stored items. For
// immediates outside that range and non-immediate scalar offsets use SST1 or
// SST1_UXTW instead.
if (Opcode == AArch64ISD::SST1_IMM_PRED) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
SrcVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = AArch64ISD::SST1_UXTW_PRED;
else
Opcode = AArch64ISD::SST1_PRED;
std::swap(Base, Offset);
}
}
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
// Some scatter store variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
if (!TLI.isTypeLegal(Offset.getValueType()))
return SDValue();
// Source value type that is representable in hardware
EVT HwSrcVt = getSVEContainerType(SrcVT);
// Keep the original type of the input data to store - this is needed to be
// able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
// FP values we want the integer equivalent, so just use HwSrcVt.
SDValue InputVT = DAG.getValueType(SrcVT);
if (SrcVT.isFloatingPoint())
InputVT = DAG.getValueType(HwSrcVt);
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue SrcNew;
if (Src.getValueType().isFloatingPoint())
SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
else
SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
SDValue Ops[] = {N->getOperand(0), // Chain
SrcNew,
N->getOperand(3), // Pg
Base,
Offset,
InputVT};
return DAG.getNode(Opcode, DL, VTs, Ops);
}
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode,
bool OnlyPackedOffsets = true) {
const EVT RetVT = N->getValueType(0);
assert(RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors");
SDLoc DL(N);
// Make sure that the loaded data will fit into an SVE register
if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
return SDValue();
// Depending on the addressing mode, this is either a pointer or a vector of
// pointers (that fits into one register)
SDValue Base = N->getOperand(3);
// Depending on the addressing mode, this is either a single offset or a
// vector of offsets (that fits into one register)
SDValue Offset = N->getOperand(4);
// For "scalar + vector of indices", just scale the indices. This only
// applies to non-temporal gathers because there's no instruction that takes
// indicies.
if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
RetVT.getScalarSizeInBits());
Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
}
// In the case of non-temporal gather loads there's only one SVE instruction
// per data-size: "scalar + vector", i.e.
// * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
// Since we do have intrinsics that allow the arguments to be in a different
// order, we may need to swap them to match the spec.
if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
Offset.getValueType().isVector())
std::swap(Base, Offset);
// GLD{FF}1_IMM requires that the offset is an immediate that is:
// * a multiple of #SizeInBytes,
// * in the range [0, 31 x #SizeInBytes],
// where #SizeInBytes is the size in bytes of the loaded items. For
// immediates outside that range and non-immediate scalar offsets use
// GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
if (!isValidImmForSVEVecImmAddrMode(Offset,
RetVT.getScalarSizeInBits() / 8)) {
if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
? AArch64ISD::GLD1_UXTW_MERGE_ZERO
: AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
else
Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
? AArch64ISD::GLD1_MERGE_ZERO
: AArch64ISD::GLDFF1_MERGE_ZERO;
std::swap(Base, Offset);
}
}
auto &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(Base.getValueType()))
return SDValue();
// Some gather load variants allow unpacked offsets, but only as nxv2i32
// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
// nxv2i64. Legalize accordingly.
if (!OnlyPackedOffsets &&
Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
// Return value type that is representable in hardware
EVT HwRetVt = getSVEContainerType(RetVT);
// Keep the original output value type around - this is needed to be able to
// select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
// values we want the integer equivalent, so just use HwRetVT.
SDValue OutVT = DAG.getValueType(RetVT);
if (RetVT.isFloatingPoint())
OutVT = DAG.getValueType(HwRetVt);
SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
SDValue Ops[] = {N->getOperand(0), // Chain
N->getOperand(2), // Pg
Base, Offset, OutVT};
SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
SDValue LoadChain = SDValue(Load.getNode(), 1);
if (RetVT.isInteger() && (RetVT != HwRetVt))
Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
// If the original return value was FP, bitcast accordingly. Doing it here
// means that we can avoid adding TableGen patterns for FPs.
if (RetVT.isFloatingPoint())
Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
return DAG.getMergeValues({Load, LoadChain}, DL);
}
static SDValue
performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Src = N->getOperand(0);
unsigned Opc = Src->getOpcode();
// Sign extend of an unsigned unpack -> signed unpack
if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
: AArch64ISD::SUNPKLO;
// Push the sign extend to the operand of the unpack
// This is necessary where, for example, the operand of the unpack
// is another unpack:
// 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
// ->
// 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
// ->
// 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
SDValue ExtOp = Src->getOperand(0);
auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT EltTy = VT.getVectorElementType();
(void)EltTy;
assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
"Sign extending from an invalid type");
EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
ExtOp, DAG.getValueType(ExtVT));
return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
}
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (!EnableCombineMGatherIntrinsics)
return SDValue();
// SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
unsigned NewOpc;
unsigned MemVTOpNum = 4;
switch (Opc) {
case AArch64ISD::LD1_MERGE_ZERO:
NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::LDNF1_MERGE_ZERO:
NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::LDFF1_MERGE_ZERO:
NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
MemVTOpNum = 3;
break;
case AArch64ISD::GLD1_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
break;
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
break;
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
break;
case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
break;
case AArch64ISD::GLDNT1_MERGE_ZERO:
NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
break;
default:
return SDValue();
}
EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
return SDValue();
EVT DstVT = N->getValueType(0);
SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
SmallVector<SDValue, 5> Ops;
for (unsigned I = 0; I < Src->getNumOperands(); ++I)
Ops.push_back(Src->getOperand(I));
SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
DCI.CombineTo(N, ExtLoad);
DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
// Return N so it doesn't get rechecked
return SDValue(N, 0);
}
/// Legalize the gather prefetch (scalar + vector addressing mode) when the
/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
/// != nxv2i32) do not need legalization.
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
const unsigned OffsetPos = 4;
SDValue Offset = N->getOperand(OffsetPos);
// Not an unpacked vector, bail out.
if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
return SDValue();
// Extend the unpacked offset vector to 64-bit lanes.
SDLoc DL(N);
Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
// Replace the offset operand with the 64-bit one.
Ops[OffsetPos] = Offset;
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}
/// Combines a node carrying the intrinsic
/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
/// sve gather prefetch instruction with vector plus immediate addressing mode.
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
unsigned ScalarSizeInBytes) {
const unsigned ImmPos = 4, OffsetPos = 3;
// No need to combine the node if the immediate is valid...
if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
return SDValue();
// ...otherwise swap the offset base with the offset...
SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
std::swap(Ops[ImmPos], Ops[OffsetPos]);
// ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
// `aarch64_sve_prfb_gather_uxtw_index`.
SDLoc DL(N);
Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
MVT::i64);
return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
}
// Return true if the vector operation can guarantee only the first lane of its
// result contains data, with all bits in other lanes set to zero.
static bool isLanes1toNKnownZero(SDValue Op) {
switch (Op.getOpcode()) {
default:
return false;
case AArch64ISD::ANDV_PRED:
case AArch64ISD::EORV_PRED:
case AArch64ISD::FADDA_PRED:
case AArch64ISD::FADDV_PRED:
case AArch64ISD::FMAXNMV_PRED:
case AArch64ISD::FMAXV_PRED:
case AArch64ISD::FMINNMV_PRED:
case AArch64ISD::FMINV_PRED:
case AArch64ISD::ORV_PRED:
case AArch64ISD::SADDV_PRED:
case AArch64ISD::SMAXV_PRED:
case AArch64ISD::SMINV_PRED:
case AArch64ISD::UADDV_PRED:
case AArch64ISD::UMAXV_PRED:
case AArch64ISD::UMINV_PRED:
return true;
}
}
static SDValue removeRedundantInsertVectorElt(SDNode *N) {
assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
SDValue InsertVec = N->getOperand(0);
SDValue InsertElt = N->getOperand(1);
SDValue InsertIdx = N->getOperand(2);
// We only care about inserts into the first element...
if (!isNullConstant(InsertIdx))
return SDValue();
// ...of a zero'd vector...
if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
return SDValue();
// ...where the inserted data was previously extracted...
if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
SDValue ExtractVec = InsertElt.getOperand(0);
SDValue ExtractIdx = InsertElt.getOperand(1);
// ...from the first element of a vector.
if (!isNullConstant(ExtractIdx))
return SDValue();
// If we get here we are effectively trying to zero lanes 1-N of a vector.
// Ensure there's no type conversion going on.
if (N->getValueType(0) != ExtractVec.getValueType())
return SDValue();
if (!isLanes1toNKnownZero(ExtractVec))
return SDValue();
// The explicit zeroing is redundant.
return ExtractVec;
}
static SDValue
performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
if (SDValue Res = removeRedundantInsertVectorElt(N))
return Res;
return performPostLD1Combine(N, DCI, true);
}
SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
EVT Ty = N->getValueType(0);
if (Ty.isInteger())
return SDValue();
EVT IntTy = Ty.changeVectorElementTypeToInteger();
EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
IntTy.getVectorElementType().getScalarSizeInBits())
return SDValue();
SDLoc DL(N);
SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
DL, ExtIntTy);
SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
DL, ExtIntTy);
SDValue Idx = N->getOperand(2);
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
return DAG.getBitcast(Ty, Trunc);
}
SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
return SDValue();
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
// We purposefully don't care about legality of the nodes here as we know
// they can be split down into something legal.
if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) &&
N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
VT.isFixedLengthVector() &&
VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(), LN0->getBasePtr(),
N0.getValueType(), LN0->getMemOperand());
DCI.CombineTo(N, ExtLoad);
DCI.CombineTo(N0.getNode(),
DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(),
ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
return SDValue();
}
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;
case ISD::ADD:
case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);
case ISD::XOR:
return performXorCombine(N, DAG, DCI, Subtarget);
case ISD::MUL:
return performMulCombine(N, DAG, DCI, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
case ISD::OR:
return performORCombine(N, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::SIGN_EXTEND:
return performExtendCombine(N, DCI, DAG);
case ISD::SIGN_EXTEND_INREG:
return performSignExtendInRegCombine(N, DCI, DAG);
case ISD::TRUNCATE:
return performVectorTruncateCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
case ISD::INSERT_SUBVECTOR:
return performInsertSubvectorCombine(N, DCI, DAG);
case ISD::SELECT:
return performSelectCombine(N, DCI);
case ISD::VSELECT:
return performVSelectCombine(N, DCI.DAG);
case ISD::SETCC:
return performSETCCCombine(N, DAG);
case ISD::LOAD:
if (performTBISimplification(N->getOperand(1), DCI, DAG))
return SDValue(N, 0);
break;
case ISD::STORE:
return performSTORECombine(N, DCI, DAG, Subtarget);
case ISD::VECTOR_SPLICE:
return performSVESpliceCombine(N, DAG);
case ISD::FP_EXTEND:
return performFPExtendCombine(N, DAG, DCI, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
case AArch64ISD::TBZ:
return performTBZCombine(N, DCI, DAG);
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
return performPostLD1Combine(N, DCI, false);
case AArch64ISD::NVCAST:
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
return performSpliceCombine(N, DAG);
case AArch64ISD::UUNPKLO:
case AArch64ISD::UUNPKHI:
return performUnpackCombine(N, DAG);
case AArch64ISD::UZP1:
return performUzpCombine(N, DAG);
case AArch64ISD::SETCC_MERGE_ZERO:
return performSetccMergeZeroCombine(N, DAG);
case AArch64ISD::GLD1_MERGE_ZERO:
case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1_IMM_MERGE_ZERO:
case AArch64ISD::GLD1S_MERGE_ZERO:
case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
return performGLD1Combine(N, DAG);
case AArch64ISD::VASHR:
case AArch64ISD::VLSHR:
return performVectorShiftCombine(N, *this, DCI);
case AArch64ISD::SUNPKLO:
return performSunpkloCombine(N, DAG);
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
return performExtractVectorEltCombine(N, DAG);
case ISD::VECREDUCE_ADD:
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
return legalizeSVEGatherPrefetchOffsVec(N, DAG);
case Intrinsic::aarch64_neon_ld2:
case Intrinsic::aarch64_neon_ld3:
case Intrinsic::aarch64_neon_ld4:
case Intrinsic::aarch64_neon_ld1x2:
case Intrinsic::aarch64_neon_ld1x3:
case Intrinsic::aarch64_neon_ld1x4:
case Intrinsic::aarch64_neon_ld2lane:
case Intrinsic::aarch64_neon_ld3lane:
case Intrinsic::aarch64_neon_ld4lane:
case Intrinsic::aarch64_neon_ld2r:
case Intrinsic::aarch64_neon_ld3r:
case Intrinsic::aarch64_neon_ld4r:
case Intrinsic::aarch64_neon_st2:
case Intrinsic::aarch64_neon_st3:
case Intrinsic::aarch64_neon_st4:
case Intrinsic::aarch64_neon_st1x2:
case Intrinsic::aarch64_neon_st1x3:
case Intrinsic::aarch64_neon_st1x4:
case Intrinsic::aarch64_neon_st2lane:
case Intrinsic::aarch64_neon_st3lane:
case Intrinsic::aarch64_neon_st4lane:
return performNEONPostLDSTCombine(N, DCI, DAG);
case Intrinsic::aarch64_sve_ldnt1:
return performLDNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_ld1rq:
return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ld1ro:
return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1:
return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldnf1:
return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1:
return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1:
return performST1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1:
return performSTNT1Combine(N, DAG);
case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
case Intrinsic::aarch64_sve_stnt1_scatter_index:
return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
case Intrinsic::aarch64_sve_ld1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ld1_gather_sxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather:
return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
return performGatherLoadCombine(N, DAG,
AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
case Intrinsic::aarch64_sve_st1_scatter:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
case Intrinsic::aarch64_sve_st1_scatter_index:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
case Intrinsic::aarch64_sve_st1_scatter_sxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
return performScatterStoreCombine(N, DAG,
AArch64ISD::SST1_SXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
return performScatterStoreCombine(N, DAG,
AArch64ISD::SST1_UXTW_SCALED_PRED,
/*OnlyPackedOffsets=*/false);
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
case Intrinsic::aarch64_sve_tuple_get: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Src1 = N->getOperand(2);
SDValue Idx = N->getOperand(3);
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
EVT ResVT = N->getValueType(0);
uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
SDValue Val =
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
return DAG.getMergeValues({Val, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_set: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Tuple = N->getOperand(2);
SDValue Idx = N->getOperand(3);
SDValue Vec = N->getOperand(4);
EVT TupleVT = Tuple.getValueType();
uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
uint64_t NumLanes =
Vec.getValueType().getVectorElementCount().getKnownMinValue();
if ((TupleLanes % NumLanes) != 0)
report_fatal_error("invalid tuple vector!");
uint64_t NumVecs = TupleLanes / NumLanes;
SmallVector<SDValue, 4> Opnds;
for (unsigned I = 0; I < NumVecs; ++I) {
if (I == IdxConst)
Opnds.push_back(Vec);
else {
SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
Vec.getValueType(), Tuple, ExtIdx));
}
}
SDValue Concat =
DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
return DAG.getMergeValues({Concat, Chain}, DL);
}
case Intrinsic::aarch64_sve_tuple_create2:
case Intrinsic::aarch64_sve_tuple_create3:
case Intrinsic::aarch64_sve_tuple_create4: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SmallVector<SDValue, 4> Opnds;
for (unsigned I = 2; I < N->getNumOperands(); ++I)
Opnds.push_back(N->getOperand(I));
EVT VT = Opnds[0].getValueType();
EVT EltVT = VT.getVectorElementType();
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VT.getVectorElementCount() *
(N->getNumOperands() - 2));
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
return DAG.getMergeValues({Concat, Chain}, DL);
}
case Intrinsic::aarch64_sve_ld2:
case Intrinsic::aarch64_sve_ld3:
case Intrinsic::aarch64_sve_ld4: {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Mask = N->getOperand(2);
SDValue BasePtr = N->getOperand(3);
SDValue LoadOps[] = {Chain, Mask, BasePtr};
unsigned IntrinsicID =
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
SDValue Result =
LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
return DAG.getMergeValues({Result, Chain}, DL);
}
case Intrinsic::aarch64_rndr:
case Intrinsic::aarch64_rndrrs: {
unsigned IntrinsicID =
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
auto Register =
(IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
: AArch64SysReg::RNDRRS);
SDLoc DL(N);
SDValue A = DAG.getNode(
AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
SDValue B = DAG.getNode(
AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
return DAG.getMergeValues(
{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
}
default:
break;
}
break;
case ISD::GlobalAddress:
return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
}
return SDValue();
}
// Check if the return value is used as only a return value, as otherwise
// we can't perform a tail-call. In particular, we need to check for
// target ISD nodes that are returns and any other "odd" constructs
// that the generic analysis code won't necessarily catch.
bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
SDValue &Chain) const {
if (N->getNumValues() != 1)
return false;
if (!N->hasNUsesOfValue(1, 0))
return false;
SDValue TCChain = Chain;
SDNode *Copy = *N->use_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
MVT::Glue)
return false;
TCChain = Copy->getOperand(0);
} else if (Copy->getOpcode() != ISD::FP_EXTEND)
return false;
bool HasRet = false;
for (SDNode *Node : Copy->uses()) {
if (Node->getOpcode() != AArch64ISD::RET_FLAG)
return false;
HasRet = true;
}
if (!HasRet)
return false;
Chain = TCChain;
return true;
}
// Return whether the an instruction can potentially be optimized to a tail
// call. This will cause the optimizers to attempt to move, or duplicate,
// return instructions to help enable tail call optimizations for this
// instruction.
bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
bool &IsInc,
SelectionDAG &DAG) const {
if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
return false;
Base = Op->getOperand(0);
// All of the indexed addressing mode instructions take a signed
// 9 bit immediate offset.
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
int64_t RHSC = RHS->getSExtValue();
if (Op->getOpcode() == ISD::SUB)
RHSC = -(uint64_t)RHSC;
if (!isInt<9>(RHSC))
return false;
IsInc = (Op->getOpcode() == ISD::ADD);
Offset = Op->getOperand(1);
return true;
}
return false;
}
bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
} else
return false;
bool IsInc;
if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
return false;
AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
return true;
}
bool AArch64TargetLowering::getPostIndexedAddressParts(
SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
} else
return false;
bool IsInc;
if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
return false;
// Post-indexing updates the base, so it's not a valid transform
// if that's not the same as the load's pointer.
if (Ptr != Base)
return false;
AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
return true;
}
void AArch64TargetLowering::ReplaceBITCASTResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
SDValue Op = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"Expected fp->int bitcast!");
SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
return;
}
if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
return;
Op = SDValue(
DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
DAG.getUNDEF(MVT::i32), Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
static void ReplaceReductionResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned InterOp,
unsigned AcrossOp) {
EVT LoVT, HiVT;
SDValue Lo, Hi;
SDLoc dl(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
Results.push_back(SplitVal);
}
static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
DAG.getNode(ISD::SRL, DL, MVT::i128, N,
DAG.getConstant(64, DL, MVT::i64)));
return std::make_pair(Lo, Hi);
}
void AArch64TargetLowering::ReplaceExtractSubVectorResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
// Common code will handle these just fine.
if (!InVT.isScalableVector() || !InVT.isInteger())
return;
SDLoc DL(N);
EVT VT = N->getValueType(0);
// The following checks bail if this is not a halving operation.
ElementCount ResEC = VT.getVectorElementCount();
if (InVT.getVectorElementCount() != (ResEC * 2))
return;
auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!CIndex)
return;
unsigned Index = CIndex->getZExtValue();
if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
return;
unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
}
// Create an even/odd pair of X registers holding integer value V.
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
SDLoc dl(V.getNode());
SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
SDValue VHi = DAG.getAnyExtOrTrunc(
DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
dl, MVT::i64);
if (DAG.getDataLayout().isBigEndian())
std::swap (VLo, VHi);
SDValue RegClass =
DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}
static void ReplaceCMP_SWAP_128Results(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
assert(N->getValueType(0) == MVT::i128 &&
"AtomicCmpSwap on types less than 128 should be legal");
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
SDValue Ops[] = {
createGPRPairNode(DAG, N->getOperand(2)), // Compare value
createGPRPairNode(DAG, N->getOperand(3)), // Store value
N->getOperand(1), // Ptr
N->getOperand(0), // Chain in
};
unsigned Opcode;
switch (MemOp->getMergedOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CASPX;
break;
case AtomicOrdering::Acquire:
Opcode = AArch64::CASPAX;
break;
case AtomicOrdering::Release:
Opcode = AArch64::CASPLX;
break;
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
Opcode = AArch64::CASPALX;
break;
default:
llvm_unreachable("Unexpected ordering!");
}
MachineSDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
DAG.setNodeMemRefs(CmpSwap, {MemOp});
unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
if (DAG.getDataLayout().isBigEndian())
std::swap(SubReg1, SubReg2);
SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0));
SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
SDValue(CmpSwap, 0));
Results.push_back(
DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
Results.push_back(SDValue(CmpSwap, 1)); // Chain out
return;
}
unsigned Opcode;
switch (MemOp->getMergedOrdering()) {
case AtomicOrdering::Monotonic:
Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
break;
case AtomicOrdering::Acquire:
Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
break;
case AtomicOrdering::Release:
Opcode = AArch64::CMP_SWAP_128_RELEASE;
break;
case AtomicOrdering::AcquireRelease:
case AtomicOrdering::SequentiallyConsistent:
Opcode = AArch64::CMP_SWAP_128;
break;
default:
llvm_unreachable("Unexpected ordering!");
}
auto Desired = splitInt128(N->getOperand(2), DAG);
auto New = splitInt128(N->getOperand(3), DAG);
SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
New.first, New.second, N->getOperand(0)};
SDNode *CmpSwap = DAG.getMachineNode(
Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
Ops);
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
Results.push_back(SDValue(CmpSwap, 3));
}
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom expand this");
case ISD::BITCAST:
ReplaceBITCASTResults(N, Results, DAG);
return;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
return;
case ISD::CTPOP:
if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
Results.push_back(Result);
return;
case AArch64ISD::SADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
return;
case AArch64ISD::UADDV:
ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
return;
case AArch64ISD::SMINV:
ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
return;
case AArch64ISD::UMINV:
ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
return;
case AArch64ISD::SMAXV:
ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
return;
case AArch64ISD::UMAXV:
ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
// Let normal code take care of it by not adding anything to Results.
return;
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
assert(SDValue(N, 0).getValueType() == MVT::i128 &&
"unexpected load's value type");
MemSDNode *LoadNode = cast<MemSDNode>(N);
if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
LoadNode->getMemoryVT() != MVT::i128) {
// Non-volatile or atomic loads are optimized later in AArch64's load/store
// optimizer.
return;
}
SDValue Result = DAG.getMemIntrinsicNode(
AArch64ISD::LDP, SDLoc(N),
DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
{LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
LoadNode->getMemOperand());
SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
Result.getValue(0), Result.getValue(1));
Results.append({Pair, Result.getValue(2) /* Chain */});
return;
}
case ISD::EXTRACT_SUBVECTOR:
ReplaceExtractSubVectorResults(N, Results, DAG);
return;
case ISD::INSERT_SUBVECTOR:
// Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
// to common code for result type legalisation
return;
case ISD::INTRINSIC_WO_CHAIN: {
EVT VT = N->getValueType(0);
assert((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type");
ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
switch (IntID) {
default:
return;
case Intrinsic::aarch64_sve_clasta_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
N->getOperand(1), Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_clastb_n: {
SDLoc DL(N);
auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
N->getOperand(1), Op2, N->getOperand(3));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lasta: {
SDLoc DL(N);
auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
N->getOperand(1), N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
case Intrinsic::aarch64_sve_lastb: {
SDLoc DL(N);
auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
N->getOperand(1), N->getOperand(2));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
}
}
}
}
bool AArch64TargetLowering::useLoadStackGuardNode() const {
if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
return TargetLowering::useLoadStackGuardNode();
return true;
}
unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
// reciprocal if there are three or more FDIVs.
return 3;
}
TargetLoweringBase::LegalizeTypeAction
AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
// v4i16, v2i32 instead of to promote.
if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
VT == MVT::v1f32)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
// In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
// provided the address is 16-byte aligned.
bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
if (!Subtarget->hasLSE2())
return false;
if (auto LI = dyn_cast<LoadInst>(I))
return LI->getType()->getPrimitiveSizeInBits() == 128 &&
LI->getAlignment() >= 16;
if (auto SI = dyn_cast<StoreInst>(I))
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
SI->getAlignment() >= 16;
return false;
}
bool AArch64TargetLowering::shouldInsertFencesForAtomic(
const Instruction *I) const {
return isOpSuitableForLDPSTP(I);
}
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
if (Size != 128)
return false;
return !isOpSuitableForLDPSTP(SI);
}
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
if (Size != 128 || isOpSuitableForLDPSTP(LI))
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement atomicrmw without spilling. If the target address is also on the
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::LLSC;
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics()) {
// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
// Don't outline them unless
// (1) high level <atomic> support approved:
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
// (2) low level libgcc and compiler-rt support implemented by:
// min/max outline atomics helpers
if (AI->getOperation() != AtomicRMWInst::Min &&
AI->getOperation() != AtomicRMWInst::Max &&
AI->getOperation() != AtomicRMWInst::UMin &&
AI->getOperation() != AtomicRMWInst::UMax) {
return AtomicExpansionKind::None;
}
}
}
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement atomicrmw without spilling. If the target address is also on the
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::CmpXChg;
return AtomicExpansionKind::LLSC;
}
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::None;
// 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
// it.
unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size > 64)
return AtomicExpansionKind::None;
return AtomicExpansionKind::LLSC;
}
Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
Type *ValueTy, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsAcquire = isAcquireOrStronger(Ord);
// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
// intrinsic must return {i64, i64} and we have to recombine them into a
// single i128 here.
if (ValueTy->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
Function *Ldxr = Intrinsic::getDeclaration(M, Int);
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
}
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int =
IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
return Builder.CreateBitCast(Trunc, ValueTy);
}
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilderBase &Builder) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
}
Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
Value *Val, Value *Addr,
AtomicOrdering Ord) const {
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
bool IsRelease = isReleaseOrStronger(Ord);
// Since the intrinsics must have legal type, the i128 intrinsics take two
// parameters: "i64, i64". We must marshal Val into the appropriate form
// before the call.
if (Val->getType()->getPrimitiveSizeInBits() == 128) {
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
Function *Stxr = Intrinsic::getDeclaration(M, Int);
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}
Intrinsic::ID Int =
IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
Type *Tys[] = { Addr->getType() };
Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
const DataLayout &DL = M->getDataLayout();
IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
Val = Builder.CreateBitCast(Val, IntValTy);
return Builder.CreateCall(Stxr,
{Builder.CreateZExtOrBitCast(
Val, Stxr->getFunctionType()->getParamType(0)),
Addr});
}
bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const {
if (!Ty->isArrayTy()) {
const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
return TySize.isScalable() && TySize.getKnownMinSize() > 128;
}
// All non aggregate members of the type must have the same type
SmallVector<EVT> ValueVTs;
ComputeValueVTs(*this, DL, Ty, ValueVTs);
return is_splat(ValueVTs);
}
bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
EVT) const {
return false;
}
static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
return IRB.CreatePointerCast(
IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
Offset),
IRB.getInt8PtrTy()->getPointerTo(0));
}
Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the stack cookie. See the definition
// of TLS_SLOT_STACK_GUARD in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x28);
// Fuchsia is similar.
// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x10);
return TargetLowering::getIRStackGuard(IRB);
}
void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
// MSVC CRT provides functionalities for stack protection.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
// MSVC CRT has a global variable holding security cookie.
M.getOrInsertGlobal("__security_cookie",
Type::getInt8PtrTy(M.getContext()));
// MSVC CRT has a function to validate security cookie.
FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
"__security_check_cookie", Type::getVoidTy(M.getContext()),
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::Win64);
F->addParamAttr(0, Attribute::AttrKind::InReg);
}
return;
}
TargetLowering::insertSSPDeclarations(M);
}
Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
// MSVC CRT has a global variable holding security cookie.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return M.getGlobalVariable("__security_cookie");
return TargetLowering::getSDagStackGuard(M);
}
Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
// MSVC CRT has a function to validate security cookie.
if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
return M.getFunction("__security_check_cookie");
return TargetLowering::getSSPStackGuardCheck(M);
}
Value *
AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
// Android provides a fixed TLS slot for the SafeStack pointer. See the
// definition of TLS_SLOT_SAFESTACK in
// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
if (Subtarget->isTargetAndroid())
return UseTlsOffset(IRB, 0x48);
// Fuchsia is similar.
// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
if (Subtarget->isTargetFuchsia())
return UseTlsOffset(IRB, -0x8);
return TargetLowering::getSafeStackPointerLocation(IRB);
}
bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
const Instruction &AndI) const {
// Only sink 'and' mask to cmp use block if it is masking a single bit, since
// this is likely to be fold the and/cmp/br into a single tbz instruction. It
// may be beneficial to sink in other cases, but we would have to check that
// the cmp would not get folded into the br to form a cbz for these to be
// beneficial.
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
return false;
return Mask->getValue().isPowerOf2();
}
bool AArch64TargetLowering::
shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const {
// Does baseline recommend not to perform the fold by default?
if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
return false;
// Else, if this is a vector shift, prefer 'shl'.
return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
}
bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
SDNode *N) const {
if (DAG.getMachineFunction().getFunction().hasMinSize() &&
!Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
return false;
return true;
}
void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
// Update IsSplitCSR in AArch64unctionInfo.
AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
AFI->setIsSplitCSR(true);
}
void AArch64TargetLowering::insertCopiesSplitCSR(
MachineBasicBlock *Entry,
const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
if (!IStart)
return;
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
MachineBasicBlock::iterator MBBI = Entry->begin();
for (const MCPhysReg *I = IStart; *I; ++I) {
const TargetRegisterClass *RC = nullptr;
if (AArch64::GPR64RegClass.contains(*I))
RC = &AArch64::GPR64RegClass;
else if (AArch64::FPR64RegClass.contains(*I))
RC = &AArch64::FPR64RegClass;
else
llvm_unreachable("Unexpected register class in CSRsViaCopy!");
Register NewVR = MRI->createVirtualRegister(RC);
// Create copy from CSR to a virtual register.
// FIXME: this currently does not emit CFI pseudo-instructions, it works
// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
// nounwind. If we want to generalize this later, we may need to emit
// CFI pseudo-instructions.
assert(Entry->getParent()->getFunction().hasFnAttribute(
Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
}
bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// Integer division on AArch64 is expensive. However, when aggressively
// optimizing for code size, we prefer to use a div instruction, as it is
// usually smaller than the alternative sequence.
// The exception to this is vector division. Since AArch64 doesn't have vector
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
return OptSize && !VT.isVector();
}
bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
// We want inc-of-add for scalars and sub-of-not for vectors.
return VT.isScalarInteger();
}
bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
EVT VT) const {
// v8f16 without fp16 need to be extended to v8f32, which is more difficult to
// legalize.
if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
return false;
return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
}
bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
}
unsigned
AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
return getPointerTy(DL).getSizeInBits();
return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
}
void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
// If we have any vulnerable SVE stack objects then the stack protector
// needs to be placed at the top of the SVE stack area, as the SVE locals
// are placed above the other locals, so we allocate it as if it were a
// scalable vector.
// FIXME: It may be worthwhile having a specific interface for this rather
// than doing it here in finalizeLowering.
if (MFI.hasStackProtectorIndex()) {
for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
if (MFI.getStackID(i) == TargetStackID::ScalableVector &&
MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
MFI.setStackID(MFI.getStackProtectorIndex(),
TargetStackID::ScalableVector);
MFI.setObjectAlignment(MFI.getStackProtectorIndex(), Align(16));
break;
}
}
}
MFI.computeMaxCallFrameSize(MF);
TargetLoweringBase::finalizeLowering(MF);
}
// Unlike X86, we let frame lowering assign offsets to all catch objects.
bool AArch64TargetLowering::needsFixedCatchObjects() const {
return false;
}
bool AArch64TargetLowering::shouldLocalize(
const MachineInstr &MI, const TargetTransformInfo *TTI) const {
switch (MI.getOpcode()) {
case TargetOpcode::G_GLOBAL_VALUE: {
// On Darwin, TLS global vars get selected into function calls, which
// we don't want localized, as they can get moved into the middle of a
// another call sequence.
const GlobalValue &GV = *MI.getOperand(1).getGlobal();
if (GV.isThreadLocal() && Subtarget->isTargetMachO())
return false;
break;
}
// If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
// localizable.
case AArch64::ADRP:
case AArch64::G_ADD_LOW:
return true;
default:
break;
}
return TargetLoweringBase::shouldLocalize(MI, TTI);
}
bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
if (isa<ScalableVectorType>(Inst.getType()))
return true;
for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
return true;
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
if (isa<ScalableVectorType>(AI->getAllocatedType()))
return true;
}
return false;
}
// Return the largest legal scalable vector type that matches VT's element type.
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for SVE container");
case MVT::i8:
return EVT(MVT::nxv16i8);
case MVT::i16:
return EVT(MVT::nxv8i16);
case MVT::i32:
return EVT(MVT::nxv4i32);
case MVT::i64:
return EVT(MVT::nxv2i64);
case MVT::f16:
return EVT(MVT::nxv8f16);
case MVT::f32:
return EVT(MVT::nxv4f32);
case MVT::f64:
return EVT(MVT::nxv2f64);
}
}
// Return a PTRUE with active lanes corresponding to the extent of VT.
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {
assert(VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
Optional<unsigned> PgPattern =
getSVEPredPatternFromNumElements(VT.getVectorNumElements());
assert(PgPattern && "Unexpected element count for SVE predicate");
// For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
// AArch64SVEPredPattern::all, which can enable the use of unpredicated
// variants of instructions when available.
const auto &Subtarget =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
if (MaxSVESize && MinSVESize == MaxSVESize &&
MaxSVESize == VT.getSizeInBits())
PgPattern = AArch64SVEPredPattern::all;
MVT MaskVT;
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unexpected element type for SVE predicate");
case MVT::i8:
MaskVT = MVT::nxv16i1;
break;
case MVT::i16:
case MVT::f16:
MaskVT = MVT::nxv8i1;
break;
case MVT::i32:
case MVT::f32:
MaskVT = MVT::nxv4i1;
break;
case MVT::i64:
case MVT::f64:
MaskVT = MVT::nxv2i1;
break;
}
return getPTrue(DAG, DL, MaskVT, *PgPattern);
}
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
EVT VT) {
assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal scalable vector!");
auto PredTy = VT.changeVectorElementType(MVT::i1);
return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
}
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
if (VT.isFixedLengthVector())
return getPredicateForFixedLengthVector(DAG, DL, VT);
return getPredicateForScalableVector(DAG, DL, VT);
}
// Grow V to consume an entire SVE register.
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
// Convert all fixed length vector loads larger than NEON to masked_loads.
SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<LoadSDNode>(Op);
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT LoadVT = ContainerVT;
EVT MemVT = Load->getMemoryVT();
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
LoadVT = ContainerVT.changeTypeToInteger();
MemVT = MemVT.changeTypeToInteger();
}
auto NewLoad = DAG.getMaskedLoad(
LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
Load->getAddressingMode(), Load->getExtensionType());
if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
EVT ExtendVT = ContainerVT.changeVectorElementType(
Load->getMemoryVT().getVectorElementType());
NewLoad = getSVESafeBitCast(ExtendVT, NewLoad, DAG);
NewLoad = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
Pg, NewLoad, DAG.getUNDEF(ContainerVT));
}
auto Result = convertFromScalableVector(DAG, VT, NewLoad);
SDValue MergedValues[2] = {Result, Load->getChain()};
return DAG.getMergeValues(MergedValues, DL);
}
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG) {
SDLoc DL(Mask);
EVT InVT = Mask.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
if (ISD::isBuildVectorAllOnes(Mask.getNode()))
return Pg;
auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
auto Op2 = DAG.getConstant(0, DL, ContainerVT);
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, Pg.getValueType(),
{Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
}
// Convert all fixed length vector loads larger than NEON to masked_loads.
SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<MaskedLoadSDNode>(Op);
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);
SDValue PassThru;
bool IsPassThruZeroOrUndef = false;
if (Load->getPassThru()->isUndef()) {
PassThru = DAG.getUNDEF(ContainerVT);
IsPassThruZeroOrUndef = true;
} else {
if (ContainerVT.isInteger())
PassThru = DAG.getConstant(0, DL, ContainerVT);
else
PassThru = DAG.getConstantFP(0, DL, ContainerVT);
if (isZerosVector(Load->getPassThru().getNode()))
IsPassThruZeroOrUndef = true;
}
auto NewLoad = DAG.getMaskedLoad(
ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
Load->getAddressingMode(), Load->getExtensionType());
if (!IsPassThruZeroOrUndef) {
SDValue OldPassThru =
convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
}
auto Result = convertFromScalableVector(DAG, VT, NewLoad);
SDValue MergedValues[2] = {Result, Load->getChain()};
return DAG.getMergeValues(MergedValues, DL);
}
// Convert all fixed length vector stores larger than NEON to masked_stores.
SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Store = cast<StoreSDNode>(Op);
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT MemVT = Store->getMemoryVT();
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
EVT TruncVT = ContainerVT.changeVectorElementType(
Store->getMemoryVT().getVectorElementType());
MemVT = MemVT.changeTypeToInteger();
NewValue = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, TruncVT, Pg,
NewValue, DAG.getTargetConstant(0, DL, MVT::i64),
DAG.getUNDEF(TruncVT));
NewValue =
getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
}
return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
Store->getMemOperand(), Store->getAddressingMode(),
Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto *Store = cast<MaskedStoreSDNode>(Op);
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
return DAG.getMaskedStore(
Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
Mask, Store->getMemoryVT(), Store->getMemOperand(),
Store->getAddressingMode(), Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
bool Negated;
uint64_t SplatVal;
if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
if (Negated)
Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
return convertFromScalableVector(DAG, VT, Res);
}
// Scalable vector i32/i64 DIV is supported.
if (EltVT == MVT::i32 || EltVT == MVT::i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
// If this is not a full vector, extend, div, and truncate it.
EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
}
// Convert the operands to scalable vectors.
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
// Extend the scalable operands.
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
// Convert back to fixed vectors so the DIV can be further lowered.
Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
Op0Lo, Op1Lo);
SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
Op0Hi, Op1Hi);
// Convert again to scalable vectors to truncate.
ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
ResultLo, ResultHi);
return convertFromScalableVector(DAG, VT, ScalableResult);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
Val = convertToScalableVector(DAG, ContainerVT, Val);
bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
// Repeatedly unpack Val until the result is of the desired element type.
switch (ContainerVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unimplemented container type");
case MVT::nxv16i8:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
if (VT.getVectorElementType() == MVT::i16)
break;
LLVM_FALLTHROUGH;
case MVT::nxv8i16:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
if (VT.getVectorElementType() == MVT::i32)
break;
LLVM_FALLTHROUGH;
case MVT::nxv4i32:
Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
break;
}
return convertFromScalableVector(DAG, VT, Val);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
Val = convertToScalableVector(DAG, ContainerVT, Val);
// Repeatedly truncate Val until the result is of the desired element type.
switch (ContainerVT.getSimpleVT().SimpleTy) {
default:
llvm_unreachable("unimplemented container type");
case MVT::nxv2i64:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
if (VT.getVectorElementType() == MVT::i32)
break;
LLVM_FALLTHROUGH;
case MVT::nxv4i32:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
if (VT.getVectorElementType() == MVT::i16)
break;
LLVM_FALLTHROUGH;
case MVT::nxv8i16:
Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
break;
}
return convertFromScalableVector(DAG, VT, Val);
}
SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT InVT = Op.getOperand(0).getValueType();
assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
EVT InVT = Op.getOperand(0).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
Op.getOperand(1), Op.getOperand(2));
return convertFromScalableVector(DAG, VT, ScalableRes);
}
// Convert vector operation 'Op' to an equivalent predicated operation whereby
// the original operation's type is used to construct a suitable predicate.
// NOTE: The results for inactive lanes are undefined.
SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SelectionDAG &DAG,
unsigned NewOp,
bool OverrideNEON) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
auto Pg = getPredicateForVector(DAG, DL, VT);
if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
// Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
if (isa<CondCodeSDNode>(V)) {
Operands.push_back(V);
continue;
}
if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
EVT VTArg = VTNode->getVT().getVectorElementType();
EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
Operands.push_back(DAG.getValueType(NewVTArg));
continue;
}
assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
"Only fixed length vectors are supported!");
Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(ContainerVT));
auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
SmallVector<SDValue, 4> Operands = {Pg};
for (const SDValue &V : Op->op_values()) {
assert((!V.getValueType().isVector() ||
V.getValueType().isScalableVector()) &&
"Only scalable vectors are supported!");
Operands.push_back(V);
}
if (isMergePassthruOpcode(NewOp))
Operands.push_back(DAG.getUNDEF(VT));
return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
}
// If a fixed length vector operation has no side effects when applied to
// undefined elements, we can safely use scalable vectors to perform the same
// operation without needing to worry about predication.
SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!");
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
// Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 4> Ops;
for (const SDValue &V : Op->op_values()) {
assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
// Pass through non-vector operands.
if (!V.getValueType().isVector()) {
Ops.push_back(V);
continue;
}
// "cast" fixed length vector to a scalable vector.
assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
"Only fixed length vectors are supported!");
Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue AccOp = ScalarOp.getOperand(0);
SDValue VecOp = ScalarOp.getOperand(1);
EVT SrcVT = VecOp.getValueType();
EVT ResVT = SrcVT.getVectorElementType();
EVT ContainerVT = SrcVT;
if (SrcVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
// Convert operands to Scalable.
AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), AccOp, Zero);
// Perform reduction.
SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
Pg, AccOp, VecOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
}
SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
SelectionDAG &DAG) const {
SDLoc DL(ReduceOp);
SDValue Op = ReduceOp.getOperand(0);
EVT OpVT = Op.getValueType();
EVT VT = ReduceOp.getValueType();
if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
return SDValue();
SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
switch (ReduceOp.getOpcode()) {
default:
return SDValue();
case ISD::VECREDUCE_OR:
if (isAllActivePredicate(DAG, Pg))
// The predicate can be 'Op' because
// vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
else
return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
case ISD::VECREDUCE_AND: {
Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
}
case ISD::VECREDUCE_XOR: {
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
SDValue Cntp =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
}
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue ScalarOp,
SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();
if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
// UADDV always returns an i64 result.
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
SrcVT.getVectorElementType();
EVT RdxVT = SrcVT;
if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
RdxVT = getPackedSVEVectorVT(ResVT);
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
Rdx, DAG.getConstant(0, DL, MVT::i64));
// The VEC_REDUCE nodes expect an element size result.
if (ResVT != ScalarOp.getValueType())
Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
return Res;
}
SDValue
AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
EVT InVT = Op.getOperand(1).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
// Convert the mask to a predicated (NOTE: We don't need to worry about
// inactive lanes since VSELECT is safe when given undefined elements).
EVT MaskVT = Op.getOperand(0).getValueType();
EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
Mask = DAG.getNode(ISD::TRUNCATE, DL,
MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
Mask, Op1, Op2);
return convertFromScalableVector(DAG, VT, ScalableRes);
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT InVT = Op.getOperand(0).getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
assert(useSVEForFixedLengthVectorVT(InVT) &&
"Only expected to lower fixed length vector operation!");
assert(Op.getValueType() == InVT.changeTypeToInteger() &&
"Expected integer result of the same bit length as the inputs!");
auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
EVT CmpVT = Pg.getValueType();
auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
{Pg, Op1, Op2, Op.getOperand(2)});
EVT PromoteVT = ContainerVT.changeTypeToInteger();
auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
return convertFromScalableVector(DAG, Op.getValueType(), Promote);
}
SDValue
AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
auto SrcOp = Op.getOperand(0);
EVT VT = Op.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT =
getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
return convertFromScalableVector(DAG, VT, Op);
}
SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
unsigned NumOperands = Op->getNumOperands();
assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
"Unexpected number of operands in CONCAT_VECTORS");
auto SrcOp1 = Op.getOperand(0);
auto SrcOp2 = Op.getOperand(1);
EVT VT = Op.getValueType();
EVT SrcVT = SrcOp1.getValueType();
if (NumOperands > 2) {
SmallVector<SDValue, 4> Ops;
EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
for (unsigned I = 0; I < NumOperands; I += 2)
Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
Op->getOperand(I), Op->getOperand(I + 1)));
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
}
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
return convertFromScalableVector(DAG, VT, Op);
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
SDValue Pg = getPredicateForVector(DAG, DL, VT);
EVT SrcVT = Val.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
EVT ExtendVT = ContainerVT.changeVectorElementType(
SrcVT.getVectorElementType());
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
Val = getSVESafeBitCast(ExtendVT, Val, DAG);
Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
Pg, Val, DAG.getUNDEF(ContainerVT));
return convertFromScalableVector(DAG, VT, Val);
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
EVT RoundVT = ContainerSrcVT.changeVectorElementType(
VT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
Op.getOperand(1), DAG.getUNDEF(RoundVT));
Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
SDValue
AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
VT.changeTypeToInteger(), Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
// Safe to use a larger than specified operand since we just unpacked the
// data, hence the upper bits are zero.
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeVectorElementType(
ContainerDstVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
Val = convertFromScalableVector(DAG, SrcVT, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::BITCAST, DL, VT, Val);
}
}
SDValue
AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
: AArch64ISD::FCVTZU_MERGE_PASSTHRU;
SDLoc DL(Op);
SDValue Val = Op.getOperand(0);
EVT SrcVT = Val.getValueType();
EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
ContainerDstVT.getVectorElementType().getSizeInBits()) {
EVT CvtVT = ContainerDstVT.changeVectorElementType(
ContainerSrcVT.getVectorElementType());
SDValue Pg = getPredicateForVector(DAG, DL, VT);
Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = getSVESafeBitCast(CvtVT, Val, DAG);
Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
DAG.getUNDEF(ContainerDstVT));
return convertFromScalableVector(DAG, VT, Val);
} else {
EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
// Safe to use a larger than specified result since an fp_to_int where the
// result doesn't fit into the destination is undefined.
Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
}
}
SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
auto ShuffleMask = SVN->getMask();
SDLoc DL(Op);
SDValue Op1 = Op.getOperand(0);
SDValue Op2 = Op.getOperand(1);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
bool ReverseEXT = false;
unsigned Imm;
if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
Imm == VT.getVectorNumElements() - 1) {
if (ReverseEXT)
std::swap(Op1, Op2);
EVT ScalarTy = VT.getVectorElementType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
ScalarTy = MVT::i32;
SDValue Scalar = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
return convertFromScalableVector(DAG, VT, Op);
}
for (unsigned LaneSize : {64U, 32U, 16U}) {
if (isREVMask(ShuffleMask, VT, LaneSize)) {
EVT NewVT =
getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
unsigned RevOp;
unsigned EltSz = VT.getScalarSizeInBits();
if (EltSz == 8)
RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
else if (EltSz == 16)
RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
else
RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;
Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
Op = LowerToPredicatedOp(Op, DAG, RevOp);
Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
return convertFromScalableVector(DAG, VT, Op);
}
}
unsigned WhichResult;
if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));
if (isTRNMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
}
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op1));
if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
}
// Functions like isZIPMask return true when a ISD::VECTOR_SHUFFLE's mask
// represents the same logical operation as performed by a ZIP instruction. In
// isolation these functions do not mean the ISD::VECTOR_SHUFFLE is exactly
// equivalent to an AArch64 instruction. There's the extra component of
// ISD::VECTOR_SHUFFLE's value type to consider. Prior to SVE these functions
// only operated on 64/128bit vector types that have a direct mapping to a
// target register and so an exact mapping is implied.
// However, when using SVE for fixed length vectors, most legal vector types
// are actually sub-vectors of a larger SVE register. When mapping
// ISD::VECTOR_SHUFFLE to an SVE instruction care must be taken to consider
// how the mask's indices translate. Specifically, when the mapping requires
// an exact meaning for a specific vector index (e.g. Index X is the last
// vector element in the register) then such mappings are often only safe when
// the exact SVE register size is know. The main exception to this is when
// indices are logically relative to the first element of either
// ISD::VECTOR_SHUFFLE operand because these relative indices don't change
// when converting from fixed-length to scalable vector types (i.e. the start
// of a fixed length vector is always the start of a scalable vector).
unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
if (MinSVESize == MaxSVESize && MaxSVESize == VT.getSizeInBits()) {
if (ShuffleVectorInst::isReverseMask(ShuffleMask) && Op2.isUndef()) {
Op = DAG.getNode(ISD::VECTOR_REVERSE, DL, ContainerVT, Op1);
return convertFromScalableVector(DAG, VT, Op);
}
if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));
if (isUZPMask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
}
if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
return convertFromScalableVector(
DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op1));
if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
return convertFromScalableVector(
DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
}
}
return SDValue();
}
SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT InVT = Op.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
(void)TLI;
assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
"Only expect to cast between legal scalable vector types!");
assert((VT.getVectorElementType() == MVT::i1) ==
(InVT.getVectorElementType() == MVT::i1) &&
"Cannot cast between data and predicate scalable vector types!");
if (InVT == VT)
return Op;
if (VT.getVectorElementType() == MVT::i1)
return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
// Pack input if required.
if (InVT != PackedInVT)
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
// Unpack result if required.
if (VT != PackedVT)
Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
return Op;
}
bool AArch64TargetLowering::isAllActivePredicate(SelectionDAG &DAG,
SDValue N) const {
return ::isAllActivePredicate(DAG, N);
}
EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
return ::getPromotedVTForPredicate(VT);
}
bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth) const {
unsigned Opc = Op.getOpcode();
switch (Opc) {
case AArch64ISD::VSHL: {
// Match (VSHL (VLSHR Val X) X)
SDValue ShiftL = Op;
SDValue ShiftR = Op->getOperand(0);
if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
return false;
if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
return false;
unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
// Other cases can be handled as well, but this is not
// implemented.
if (ShiftRBits != ShiftLBits)
return false;
unsigned ScalarSize = Op.getScalarValueSizeInBits();
assert(ScalarSize > ShiftLBits && "Invalid shift imm");
APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
APInt UnusedBits = ~OriginalDemandedBits;
if ((ZeroBits & UnusedBits) != ZeroBits)
return false;
// All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
// used - simplify to just Val.
return TLO.CombineTo(Op, ShiftR->getOperand(0));
}
}
return TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
unsigned Opc, LLT Ty1, LLT Ty2) const {
return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
}
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a9191924129c..ea9c1b620065 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1,7745 +1,7810 @@
//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains the AArch64 implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
static cl::opt<unsigned> TBZDisplacementBits(
"aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
static cl::opt<unsigned> CBZDisplacementBits(
"aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
static cl::opt<unsigned>
BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
AArch64::CATCHRET),
RI(STI.getTargetTriple()), Subtarget(STI) {}
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const MachineBasicBlock &MBB = *MI.getParent();
const MachineFunction *MF = MBB.getParent();
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
{
auto Op = MI.getOpcode();
if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
}
// Meta-instructions emit no code.
if (MI.isMetaInstruction())
return 0;
// FIXME: We currently only handle pseudoinstructions that don't get expanded
// before the assembly printer.
unsigned NumBytes = 0;
const MCInstrDesc &Desc = MI.getDesc();
// Size should be preferably set in
// llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
// Specific cases handle instructions of variable sizes
switch (Desc.getOpcode()) {
default:
if (Desc.getSize())
return Desc.getSize();
// Anything not explicitly designated otherwise (i.e. pseudo-instructions
// with fixed constant size but not specified in .td file) is a normal
// 4-byte insn.
NumBytes = 4;
break;
case TargetOpcode::STACKMAP:
// The upper bound for a stackmap intrinsic is the full length of its shadow
NumBytes = StackMapOpers(&MI).getNumPatchBytes();
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
break;
case TargetOpcode::PATCHPOINT:
// The size of the patchpoint intrinsic is the number of bytes requested
NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
break;
case TargetOpcode::STATEPOINT:
NumBytes = StatepointOpers(&MI).getNumPatchBytes();
assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
// No patch bytes means a normal call inst is emitted
if (NumBytes == 0)
NumBytes = 4;
break;
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
break;
case TargetOpcode::BUNDLE:
NumBytes = getInstBundleLength(MI);
break;
}
return NumBytes;
}
unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
unsigned Size = 0;
MachineBasicBlock::const_instr_iterator I = MI.getIterator();
MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
while (++I != E && I->isInsideBundle()) {
assert(!I->isBundle() && "No nested bundle!");
Size += getInstSizeInBytes(*I);
}
return Size;
}
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
SmallVectorImpl<MachineOperand> &Cond) {
// Block ends with fall-through condbranch.
switch (LastInst->getOpcode()) {
default:
llvm_unreachable("Unknown branch instruction?");
case AArch64::Bcc:
Target = LastInst->getOperand(1).getMBB();
Cond.push_back(LastInst->getOperand(0));
break;
case AArch64::CBZW:
case AArch64::CBZX:
case AArch64::CBNZW:
case AArch64::CBNZX:
Target = LastInst->getOperand(1).getMBB();
Cond.push_back(MachineOperand::CreateImm(-1));
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
break;
case AArch64::TBZW:
case AArch64::TBZX:
case AArch64::TBNZW:
case AArch64::TBNZX:
Target = LastInst->getOperand(2).getMBB();
Cond.push_back(MachineOperand::CreateImm(-1));
Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
Cond.push_back(LastInst->getOperand(0));
Cond.push_back(LastInst->getOperand(1));
}
}
static unsigned getBranchDisplacementBits(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("unexpected opcode!");
case AArch64::B:
return 64;
case AArch64::TBNZW:
case AArch64::TBZW:
case AArch64::TBNZX:
case AArch64::TBZX:
return TBZDisplacementBits;
case AArch64::CBNZW:
case AArch64::CBZW:
case AArch64::CBNZX:
case AArch64::CBZX:
return CBZDisplacementBits;
case AArch64::Bcc:
return BCCDisplacementBits;
}
}
bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
int64_t BrOffset) const {
unsigned Bits = getBranchDisplacementBits(BranchOp);
assert(Bits >= 3 && "max branch displacement must be enough to jump"
"over conditional branch expansion");
return isIntN(Bits, BrOffset / 4);
}
MachineBasicBlock *
AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
llvm_unreachable("unexpected opcode!");
case AArch64::B:
return MI.getOperand(0).getMBB();
case AArch64::TBZW:
case AArch64::TBNZW:
case AArch64::TBZX:
case AArch64::TBNZX:
return MI.getOperand(2).getMBB();
case AArch64::CBZW:
case AArch64::CBNZW:
case AArch64::CBZX:
case AArch64::CBNZX:
case AArch64::Bcc:
return MI.getOperand(1).getMBB();
}
}
// Branch analysis.
bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return false;
// Skip over SpeculationBarrierEndBB terminators
if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
--I;
}
if (!isUnpredicatedTerminator(*I))
return false;
// Get the last instruction in the block.
MachineInstr *LastInst = &*I;
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
if (isUncondBranchOpcode(LastOpc)) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
if (isCondBranchOpcode(LastOpc)) {
// Block ends with fall-through condbranch.
parseCondBranch(LastInst, TBB, Cond);
return false;
}
return true; // Can't handle indirect branch.
}
// Get the instruction before it if it is a terminator.
MachineInstr *SecondLastInst = &*I;
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If AllowModify is true and the block ends with two or more unconditional
// branches, delete all but the first unconditional branch.
if (AllowModify && isUncondBranchOpcode(LastOpc)) {
while (isUncondBranchOpcode(SecondLastOpc)) {
LastInst->eraseFromParent();
LastInst = SecondLastInst;
LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
// Return now the only terminator is an unconditional branch.
TBB = LastInst->getOperand(0).getMBB();
return false;
} else {
SecondLastInst = &*I;
SecondLastOpc = SecondLastInst->getOpcode();
}
}
}
// If we're allowed to modify and the block ends in a unconditional branch
// which could simply fallthrough, remove the branch. (Note: This case only
// matters when we can't understand the whole sequence, otherwise it's also
// handled by BranchFolding.cpp.)
if (AllowModify && isUncondBranchOpcode(LastOpc) &&
MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
LastInst->eraseFromParent();
LastInst = SecondLastInst;
LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
assert(!isUncondBranchOpcode(LastOpc) &&
"unreachable unconditional branches removed above");
if (isCondBranchOpcode(LastOpc)) {
// Block ends with fall-through condbranch.
parseCondBranch(LastInst, TBB, Cond);
return false;
}
return true; // Can't handle indirect branch.
} else {
SecondLastInst = &*I;
SecondLastOpc = SecondLastInst->getOpcode();
}
}
// If there are three terminators, we don't know what sort of block this is.
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
return true;
// If the block ends with a B and a Bcc, handle it.
if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
parseCondBranch(SecondLastInst, TBB, Cond);
FBB = LastInst->getOperand(0).getMBB();
return false;
}
// If the block ends with two unconditional branches, handle it. The second
// one is not executed, so remove it.
if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
if (AllowModify)
I->eraseFromParent();
return false;
}
// ...likewise if it ends with an indirect branch followed by an unconditional
// branch.
if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
I = LastInst;
if (AllowModify)
I->eraseFromParent();
return true;
}
// Otherwise, can't handle this.
return true;
}
bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
MachineBranchPredicate &MBP,
bool AllowModify) const {
// For the moment, handle only a block which ends with a cb(n)zx followed by
// a fallthrough. Why this? Because it is a common form.
// TODO: Should we handle b.cc?
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return true;
// Skip over SpeculationBarrierEndBB terminators
if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
--I;
}
if (!isUnpredicatedTerminator(*I))
return true;
// Get the last instruction in the block.
MachineInstr *LastInst = &*I;
unsigned LastOpc = LastInst->getOpcode();
if (!isCondBranchOpcode(LastOpc))
return true;
switch (LastOpc) {
default:
return true;
case AArch64::CBZW:
case AArch64::CBZX:
case AArch64::CBNZW:
case AArch64::CBNZX:
break;
};
MBP.TrueDest = LastInst->getOperand(1).getMBB();
assert(MBP.TrueDest && "expected!");
MBP.FalseDest = MBB.getNextNode();
MBP.ConditionDef = nullptr;
MBP.SingleUseCondition = false;
MBP.LHS = LastInst->getOperand(0);
MBP.RHS = MachineOperand::CreateImm(0);
MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
: MachineBranchPredicate::PRED_EQ;
return false;
}
bool AArch64InstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
if (Cond[0].getImm() != -1) {
// Regular Bcc
AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
} else {
// Folded compare-and-branch
switch (Cond[1].getImm()) {
default:
llvm_unreachable("Unknown conditional branch!");
case AArch64::CBZW:
Cond[1].setImm(AArch64::CBNZW);
break;
case AArch64::CBNZW:
Cond[1].setImm(AArch64::CBZW);
break;
case AArch64::CBZX:
Cond[1].setImm(AArch64::CBNZX);
break;
case AArch64::CBNZX:
Cond[1].setImm(AArch64::CBZX);
break;
case AArch64::TBZW:
Cond[1].setImm(AArch64::TBNZW);
break;
case AArch64::TBNZW:
Cond[1].setImm(AArch64::TBZW);
break;
case AArch64::TBZX:
Cond[1].setImm(AArch64::TBNZX);
break;
case AArch64::TBNZX:
Cond[1].setImm(AArch64::TBZX);
break;
}
}
return false;
}
unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
if (I == MBB.end())
return 0;
if (!isUncondBranchOpcode(I->getOpcode()) &&
!isCondBranchOpcode(I->getOpcode()))
return 0;
// Remove the branch.
I->eraseFromParent();
I = MBB.end();
if (I == MBB.begin()) {
if (BytesRemoved)
*BytesRemoved = 4;
return 1;
}
--I;
if (!isCondBranchOpcode(I->getOpcode())) {
if (BytesRemoved)
*BytesRemoved = 4;
return 1;
}
// Remove the branch.
I->eraseFromParent();
if (BytesRemoved)
*BytesRemoved = 8;
return 2;
}
void AArch64InstrInfo::instantiateCondBranch(
MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
ArrayRef<MachineOperand> Cond) const {
if (Cond[0].getImm() != -1) {
// Regular Bcc
BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
} else {
// Folded compare-and-branch
// Note that we use addOperand instead of addReg to keep the flags.
const MachineInstrBuilder MIB =
BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
if (Cond.size() > 3)
MIB.addImm(Cond[3].getImm());
MIB.addMBB(TBB);
}
}
unsigned AArch64InstrInfo::insertBranch(
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
if (!FBB) {
if (Cond.empty()) // Unconditional branch?
BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
else
instantiateCondBranch(MBB, DL, TBB, Cond);
if (BytesAdded)
*BytesAdded = 4;
return 1;
}
// Two-way conditional branch.
instantiateCondBranch(MBB, DL, TBB, Cond);
BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
if (BytesAdded)
*BytesAdded = 8;
return 2;
}
// Find the original register that VReg is copied from.
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
while (Register::isVirtualRegister(VReg)) {
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
if (!DefMI->isFullCopy())
return VReg;
VReg = DefMI->getOperand(1).getReg();
}
return VReg;
}
// Determine if VReg is defined by an instruction that can be folded into a
// csel instruction. If so, return the folded opcode, and the replacement
// register.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
unsigned *NewVReg = nullptr) {
VReg = removeCopies(MRI, VReg);
if (!Register::isVirtualRegister(VReg))
return 0;
bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
const MachineInstr *DefMI = MRI.getVRegDef(VReg);
unsigned Opc = 0;
unsigned SrcOpNum = 0;
switch (DefMI->getOpcode()) {
case AArch64::ADDSXri:
case AArch64::ADDSWri:
// if NZCV is used, do not fold.
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
return 0;
// fall-through to ADDXri and ADDWri.
LLVM_FALLTHROUGH;
case AArch64::ADDXri:
case AArch64::ADDWri:
// add x, 1 -> csinc.
if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
DefMI->getOperand(3).getImm() != 0)
return 0;
SrcOpNum = 1;
Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
break;
case AArch64::ORNXrr:
case AArch64::ORNWrr: {
// not x -> csinv, represented as orn dst, xzr, src.
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
SrcOpNum = 2;
Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
break;
}
case AArch64::SUBSXrr:
case AArch64::SUBSWrr:
// if NZCV is used, do not fold.
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
return 0;
// fall-through to SUBXrr and SUBWrr.
LLVM_FALLTHROUGH;
case AArch64::SUBXrr:
case AArch64::SUBWrr: {
// neg x -> csneg, represented as sub dst, xzr, src.
unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
return 0;
SrcOpNum = 2;
Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
break;
}
default:
return 0;
}
assert(Opc && SrcOpNum && "Missing parameters");
if (NewVReg)
*NewVReg = DefMI->getOperand(SrcOpNum).getReg();
return Opc;
}
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Cond,
Register DstReg, Register TrueReg,
Register FalseReg, int &CondCycles,
int &TrueCycles,
int &FalseCycles) const {
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
if (!RC)
return false;
// Also need to check the dest regclass, in case we're trying to optimize
// something like:
// %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
return false;
// Expanding cbz/tbz requires an extra cycle of latency on the condition.
unsigned ExtraCondLat = Cond.size() != 1;
// GPRs are handled by csel.
// FIXME: Fold in x+1, -x, and ~x when applicable.
if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
// Single-cycle csel, csinc, csinv, and csneg.
CondCycles = 1 + ExtraCondLat;
TrueCycles = FalseCycles = 1;
if (canFoldIntoCSel(MRI, TrueReg))
TrueCycles = 0;
else if (canFoldIntoCSel(MRI, FalseReg))
FalseCycles = 0;
return true;
}
// Scalar floating point is handled by fcsel.
// FIXME: Form fabs, fmin, and fmax when applicable.
if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
AArch64::FPR32RegClass.hasSubClassEq(RC)) {
CondCycles = 5 + ExtraCondLat;
TrueCycles = FalseCycles = 2;
return true;
}
// Can't do vectors.
return false;
}
void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register DstReg,
ArrayRef<MachineOperand> Cond,
Register TrueReg, Register FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// Parse the condition code, see parseCondBranch() above.
AArch64CC::CondCode CC;
switch (Cond.size()) {
default:
llvm_unreachable("Unknown condition opcode in Cond");
case 1: // b.cc
CC = AArch64CC::CondCode(Cond[0].getImm());
break;
case 3: { // cbz/cbnz
// We must insert a compare against 0.
bool Is64Bit;
switch (Cond[1].getImm()) {
default:
llvm_unreachable("Unknown branch opcode in Cond");
case AArch64::CBZW:
Is64Bit = false;
CC = AArch64CC::EQ;
break;
case AArch64::CBZX:
Is64Bit = true;
CC = AArch64CC::EQ;
break;
case AArch64::CBNZW:
Is64Bit = false;
CC = AArch64CC::NE;
break;
case AArch64::CBNZX:
Is64Bit = true;
CC = AArch64CC::NE;
break;
}
Register SrcReg = Cond[2].getReg();
if (Is64Bit) {
// cmp reg, #0 is actually subs xzr, reg, #0.
MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
.addReg(SrcReg)
.addImm(0)
.addImm(0);
} else {
MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
.addReg(SrcReg)
.addImm(0)
.addImm(0);
}
break;
}
case 4: { // tbz/tbnz
// We must insert a tst instruction.
switch (Cond[1].getImm()) {
default:
llvm_unreachable("Unknown branch opcode in Cond");
case AArch64::TBZW:
case AArch64::TBZX:
CC = AArch64CC::EQ;
break;
case AArch64::TBNZW:
case AArch64::TBNZX:
CC = AArch64CC::NE;
break;
}
// cmp reg, #foo is actually ands xzr, reg, #1<<foo.
if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
.addReg(Cond[2].getReg())
.addImm(
AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
else
BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
.addReg(Cond[2].getReg())
.addImm(
AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
break;
}
}
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
bool TryFold = false;
if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
RC = &AArch64::GPR64RegClass;
Opc = AArch64::CSELXr;
TryFold = true;
} else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
RC = &AArch64::GPR32RegClass;
Opc = AArch64::CSELWr;
TryFold = true;
} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FCSELDrrr;
} else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
RC = &AArch64::FPR32RegClass;
Opc = AArch64::FCSELSrrr;
}
assert(RC && "Unsupported regclass");
// Try folding simple instructions into the csel.
if (TryFold) {
unsigned NewVReg = 0;
unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
if (FoldedOpc) {
// The folded opcodes csinc, csinc and csneg apply the operation to
// FalseReg, so we need to invert the condition.
CC = AArch64CC::getInvertedCondCode(CC);
TrueReg = FalseReg;
} else
FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
// Fold the operation. Leave any dead instructions for DCE to clean up.
if (FoldedOpc) {
FalseReg = NewVReg;
Opc = FoldedOpc;
// The extends the live range of NewVReg.
MRI.clearKillFlags(NewVReg);
}
}
// Pull all virtual register into the appropriate class.
MRI.constrainRegClass(TrueReg, RC);
MRI.constrainRegClass(FalseReg, RC);
// Insert the csel.
BuildMI(MBB, I, DL, get(Opc), DstReg)
.addReg(TrueReg)
.addReg(FalseReg)
.addImm(CC);
}
/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
uint64_t Imm = MI.getOperand(1).getImm();
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
uint64_t Encoding;
return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
}
// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
if (!Subtarget.hasCustomCheapAsMoveHandling())
return MI.isAsCheapAsAMove();
const unsigned Opcode = MI.getOpcode();
// Firstly, check cases gated by features.
if (Subtarget.hasZeroCycleZeroingFP()) {
if (Opcode == AArch64::FMOVH0 ||
Opcode == AArch64::FMOVS0 ||
Opcode == AArch64::FMOVD0)
return true;
}
if (Subtarget.hasZeroCycleZeroingGP()) {
if (Opcode == TargetOpcode::COPY &&
(MI.getOperand(1).getReg() == AArch64::WZR ||
MI.getOperand(1).getReg() == AArch64::XZR))
return true;
}
// Secondly, check cases specific to sub-targets.
if (Subtarget.hasExynosCheapAsMoveHandling()) {
if (isExynosCheapAsMove(MI))
return true;
return MI.isAsCheapAsAMove();
}
// Finally, check generic cases.
switch (Opcode) {
default:
return false;
// add/sub on register without shift
case AArch64::ADDWri:
case AArch64::ADDXri:
case AArch64::SUBWri:
case AArch64::SUBXri:
return (MI.getOperand(3).getImm() == 0);
// logical ops on immediate
case AArch64::ANDWri:
case AArch64::ANDXri:
case AArch64::EORWri:
case AArch64::EORXri:
case AArch64::ORRWri:
case AArch64::ORRXri:
return true;
// logical ops on register without shift
case AArch64::ANDWrr:
case AArch64::ANDXrr:
case AArch64::BICWrr:
case AArch64::BICXrr:
case AArch64::EONWrr:
case AArch64::EONXrr:
case AArch64::EORWrr:
case AArch64::EORXrr:
case AArch64::ORNWrr:
case AArch64::ORNXrr:
case AArch64::ORRWrr:
case AArch64::ORRXrr:
return true;
// If MOVi32imm or MOVi64imm can be expanded into ORRWri or
// ORRXri, it is as cheap as MOV
case AArch64::MOVi32imm:
return canBeExpandedToORR(MI, 32);
case AArch64::MOVi64imm:
return canBeExpandedToORR(MI, 64);
}
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::ADDWrs:
case AArch64::ADDXrs:
case AArch64::ADDSWrs:
case AArch64::ADDSXrs: {
unsigned Imm = MI.getOperand(3).getImm();
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
if (ShiftVal == 0)
return true;
return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
}
case AArch64::ADDWrx:
case AArch64::ADDXrx:
case AArch64::ADDXrx64:
case AArch64::ADDSWrx:
case AArch64::ADDSXrx:
case AArch64::ADDSXrx64: {
unsigned Imm = MI.getOperand(3).getImm();
switch (AArch64_AM::getArithExtendType(Imm)) {
default:
return false;
case AArch64_AM::UXTB:
case AArch64_AM::UXTH:
case AArch64_AM::UXTW:
case AArch64_AM::UXTX:
return AArch64_AM::getArithShiftValue(Imm) <= 4;
}
}
case AArch64::SUBWrs:
case AArch64::SUBSWrs: {
unsigned Imm = MI.getOperand(3).getImm();
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
return ShiftVal == 0 ||
(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
}
case AArch64::SUBXrs:
case AArch64::SUBSXrs: {
unsigned Imm = MI.getOperand(3).getImm();
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
return ShiftVal == 0 ||
(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
}
case AArch64::SUBWrx:
case AArch64::SUBXrx:
case AArch64::SUBXrx64:
case AArch64::SUBSWrx:
case AArch64::SUBSXrx:
case AArch64::SUBSXrx64: {
unsigned Imm = MI.getOperand(3).getImm();
switch (AArch64_AM::getArithExtendType(Imm)) {
default:
return false;
case AArch64_AM::UXTB:
case AArch64_AM::UXTH:
case AArch64_AM::UXTW:
case AArch64_AM::UXTX:
return AArch64_AM::getArithShiftValue(Imm) == 0;
}
}
case AArch64::LDRBBroW:
case AArch64::LDRBBroX:
case AArch64::LDRBroW:
case AArch64::LDRBroX:
case AArch64::LDRDroW:
case AArch64::LDRDroX:
case AArch64::LDRHHroW:
case AArch64::LDRHHroX:
case AArch64::LDRHroW:
case AArch64::LDRHroX:
case AArch64::LDRQroW:
case AArch64::LDRQroX:
case AArch64::LDRSBWroW:
case AArch64::LDRSBWroX:
case AArch64::LDRSBXroW:
case AArch64::LDRSBXroX:
case AArch64::LDRSHWroW:
case AArch64::LDRSHWroX:
case AArch64::LDRSHXroW:
case AArch64::LDRSHXroX:
case AArch64::LDRSWroW:
case AArch64::LDRSWroX:
case AArch64::LDRSroW:
case AArch64::LDRSroX:
case AArch64::LDRWroW:
case AArch64::LDRWroX:
case AArch64::LDRXroW:
case AArch64::LDRXroX:
case AArch64::PRFMroW:
case AArch64::PRFMroX:
case AArch64::STRBBroW:
case AArch64::STRBBroX:
case AArch64::STRBroW:
case AArch64::STRBroX:
case AArch64::STRDroW:
case AArch64::STRDroX:
case AArch64::STRHHroW:
case AArch64::STRHHroX:
case AArch64::STRHroW:
case AArch64::STRHroX:
case AArch64::STRQroW:
case AArch64::STRQroX:
case AArch64::STRSroW:
case AArch64::STRSroX:
case AArch64::STRWroW:
case AArch64::STRWroX:
case AArch64::STRXroW:
case AArch64::STRXroX: {
unsigned IsSigned = MI.getOperand(3).getImm();
return !IsSigned;
}
}
}
bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
switch (Opc) {
default:
return false;
case AArch64::SEH_StackAlloc:
case AArch64::SEH_SaveFPLR:
case AArch64::SEH_SaveFPLR_X:
case AArch64::SEH_SaveReg:
case AArch64::SEH_SaveReg_X:
case AArch64::SEH_SaveRegP:
case AArch64::SEH_SaveRegP_X:
case AArch64::SEH_SaveFReg:
case AArch64::SEH_SaveFReg_X:
case AArch64::SEH_SaveFRegP:
case AArch64::SEH_SaveFRegP_X:
case AArch64::SEH_SetFP:
case AArch64::SEH_AddFP:
case AArch64::SEH_Nop:
case AArch64::SEH_PrologEnd:
case AArch64::SEH_EpilogStart:
case AArch64::SEH_EpilogEnd:
return true;
}
}
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
unsigned &SubIdx) const {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::SBFMXri: // aka sxtw
case AArch64::UBFMXri: // aka uxtw
// Check for the 32 -> 64 bit extension case, these instructions can do
// much more.
if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
return false;
// This is a signed or unsigned 32 -> 64 bit extension.
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
SubIdx = AArch64::sub_32;
return true;
}
}
bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
const MachineInstr &MIa, const MachineInstr &MIb) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
unsigned WidthA = 0, WidthB = 0;
bool OffsetAIsScalable = false, OffsetBIsScalable = false;
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
return false;
// Retrieve the base, offset from the base and width. Width
// is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
// base are identical, and the offset of a lower memory access +
// the width doesn't overlap the offset of a higher memory access,
// then the memory accesses are different.
// If OffsetAIsScalable and OffsetBIsScalable are both true, they
// are assumed to have the same scale (vscale).
if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
WidthA, TRI) &&
getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
WidthB, TRI)) {
if (BaseOpA->isIdenticalTo(*BaseOpB) &&
OffsetAIsScalable == OffsetBIsScalable) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
if (LowOffset + LowWidth <= HighOffset)
return true;
}
}
return false;
}
bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const {
if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
return true;
switch (MI.getOpcode()) {
case AArch64::HINT:
// CSDB hints are scheduling barriers.
if (MI.getOperand(0).getImm() == 0x14)
return true;
break;
case AArch64::DSB:
case AArch64::ISB:
// DSB and ISB also are scheduling barriers.
return true;
default:;
}
return isSEHInstruction(MI);
}
/// analyzeCompare - For a comparison instruction, return the source registers
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
Register &SrcReg2, int64_t &CmpMask,
int64_t &CmpValue) const {
// The first operand can be a frame index where we'd normally expect a
// register.
assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
if (!MI.getOperand(1).isReg())
return false;
switch (MI.getOpcode()) {
default:
break;
case AArch64::PTEST_PP:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = MI.getOperand(1).getReg();
// Not sure about the mask and value for now...
CmpMask = ~0;
CmpValue = 0;
return true;
case AArch64::SUBSWrr:
case AArch64::SUBSWrs:
case AArch64::SUBSWrx:
case AArch64::SUBSXrr:
case AArch64::SUBSXrs:
case AArch64::SUBSXrx:
case AArch64::ADDSWrr:
case AArch64::ADDSWrs:
case AArch64::ADDSWrx:
case AArch64::ADDSXrr:
case AArch64::ADDSXrs:
case AArch64::ADDSXrx:
// Replace SUBSWrr with SUBWrr if NZCV is not used.
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = MI.getOperand(2).getReg();
CmpMask = ~0;
CmpValue = 0;
return true;
case AArch64::SUBSWri:
case AArch64::ADDSWri:
case AArch64::SUBSXri:
case AArch64::ADDSXri:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
CmpValue = MI.getOperand(2).getImm();
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
// ANDS does not use the same encoding scheme as the others xxxS
// instructions.
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
CmpValue = AArch64_AM::decodeLogicalImmediate(
MI.getOperand(2).getImm(),
MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
return true;
}
return false;
}
static bool UpdateOperandRegClass(MachineInstr &Instr) {
MachineBasicBlock *MBB = Instr.getParent();
assert(MBB && "Can't get MachineBasicBlock here");
MachineFunction *MF = MBB->getParent();
assert(MF && "Can't get MachineFunction here");
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MachineRegisterInfo *MRI = &MF->getRegInfo();
for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
++OpIdx) {
MachineOperand &MO = Instr.getOperand(OpIdx);
const TargetRegisterClass *OpRegCstraints =
Instr.getRegClassConstraint(OpIdx, TII, TRI);
// If there's no constraint, there's nothing to do.
if (!OpRegCstraints)
continue;
// If the operand is a frame index, there's nothing to do here.
// A frame index operand will resolve correctly during PEI.
if (MO.isFI())
continue;
assert(MO.isReg() &&
"Operand has register constraints without being a register!");
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
if (!OpRegCstraints->contains(Reg))
return false;
} else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
!MRI->constrainRegClass(Reg, OpRegCstraints))
return false;
}
return true;
}
/// Return the opcode that does not set flags when possible - otherwise
/// return the original opcode. The caller is responsible to do the actual
/// substitution and legality checking.
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
// Don't convert all compare instructions, because for some the zero register
// encoding becomes the sp register.
bool MIDefinesZeroReg = false;
if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
MIDefinesZeroReg = true;
switch (MI.getOpcode()) {
default:
return MI.getOpcode();
case AArch64::ADDSWrr:
return AArch64::ADDWrr;
case AArch64::ADDSWri:
return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
case AArch64::ADDSWrs:
return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
case AArch64::ADDSWrx:
return AArch64::ADDWrx;
case AArch64::ADDSXrr:
return AArch64::ADDXrr;
case AArch64::ADDSXri:
return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
case AArch64::ADDSXrs:
return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
case AArch64::ADDSXrx:
return AArch64::ADDXrx;
case AArch64::SUBSWrr:
return AArch64::SUBWrr;
case AArch64::SUBSWri:
return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
case AArch64::SUBSWrs:
return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
case AArch64::SUBSWrx:
return AArch64::SUBWrx;
case AArch64::SUBSXrr:
return AArch64::SUBXrr;
case AArch64::SUBSXri:
return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
case AArch64::SUBSXrs:
return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
case AArch64::SUBSXrx:
return AArch64::SUBXrx;
}
}
enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
/// True when condition flags are accessed (either by writing or reading)
/// on the instruction trace starting at From and ending at To.
///
/// Note: If From and To are from different blocks it's assumed CC are accessed
/// on the path.
static bool areCFlagsAccessedBetweenInstrs(
MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
// Early exit if To is at the beginning of the BB.
if (To == To->getParent()->begin())
return true;
// Check whether the instructions are in the same basic block
// If not, assume the condition flags might get modified somewhere.
if (To->getParent() != From->getParent())
return true;
// From must be above To.
assert(std::any_of(
++To.getReverse(), To->getParent()->rend(),
[From](MachineInstr &MI) { return MI.getIterator() == From; }));
// We iterate backward starting at \p To until we hit \p From.
for (const MachineInstr &Instr :
instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
if (((AccessToCheck & AK_Write) &&
Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
return true;
}
return false;
}
/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
/// operation which could set the flags in an identical manner
bool AArch64InstrInfo::optimizePTestInstr(
MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
const MachineRegisterInfo *MRI) const {
auto *Mask = MRI->getUniqueVRegDef(MaskReg);
auto *Pred = MRI->getUniqueVRegDef(PredReg);
auto NewOp = Pred->getOpcode();
bool OpChanged = false;
unsigned MaskOpcode = Mask->getOpcode();
unsigned PredOpcode = Pred->getOpcode();
bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
bool PredIsWhileLike = isWhileOpcode(PredOpcode);
if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
// For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
// deactivate any lanes OTHER_INST might set.
uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
// Must be an all active predicate of matching element size.
if ((PredElementSize != MaskElementSize) ||
(Mask->getOperand(1).getImm() != 31))
return false;
// Fallthough to simply remove the PTEST.
} else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
// For PTEST(PG, PG), PTEST is redundant when PG is the result of an
// instruction that sets the flags as PTEST would.
// Fallthough to simply remove the PTEST.
} else if (PredIsPTestLike) {
// For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
// instructions use the same predicate.
auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask != PTestLikeMask)
return false;
// Fallthough to simply remove the PTEST.
} else {
switch (Pred->getOpcode()) {
case AArch64::BRKB_PPzP:
case AArch64::BRKPB_PPzPP: {
// Op 0 is chain, 1 is the mask, 2 the previous predicate to
// propagate, 3 the new predicate.
// Check to see if our mask is the same as the brkpb's. If
// not the resulting flag bits may be different and we
// can't remove the ptest.
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask != PredMask)
return false;
// Switch to the new opcode
NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
: AArch64::BRKPBS_PPzPP;
OpChanged = true;
break;
}
case AArch64::BRKN_PPzP: {
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask != PredMask)
return false;
NewOp = AArch64::BRKNS_PPzP;
OpChanged = true;
break;
}
case AArch64::RDFFR_PPz: {
// rdffr p1.b, PredMask=p0/z <--- Definition of Pred
// ptest Mask=p0, Pred=p1.b <--- If equal masks, remove this and use
// `rdffrs p1.b, p0/z` above.
auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
if (Mask != PredMask)
return false;
NewOp = AArch64::RDFFRS_PPz;
OpChanged = true;
break;
}
default:
// Bail out if we don't recognize the input
return false;
}
}
const TargetRegisterInfo *TRI = &getRegisterInfo();
// If another instruction between Pred and PTest accesses flags, don't remove
// the ptest or update the earlier instruction to modify them.
if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
return false;
// If we pass all the checks, it's safe to remove the PTEST and use the flags
// as they are prior to PTEST. Sometimes this requires the tested PTEST
// operand to be replaced with an equivalent instruction that also sets the
// flags.
Pred->setDesc(get(NewOp));
PTest->eraseFromParent();
if (OpChanged) {
bool succeeded = UpdateOperandRegClass(*Pred);
(void)succeeded;
assert(succeeded && "Operands have incompatible register classes!");
Pred->addRegisterDefined(AArch64::NZCV, TRI);
}
// Ensure that the flags def is live.
if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
unsigned i = 0, e = Pred->getNumOperands();
for (; i != e; ++i) {
MachineOperand &MO = Pred->getOperand(i);
if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
MO.setIsDead(false);
break;
}
}
}
return true;
}
/// Try to optimize a compare instruction. A compare instruction is an
/// instruction which produces AArch64::NZCV. It can be truly compare
/// instruction
/// when there are no uses of its destination register.
///
/// The following steps are tried in order:
/// 1. Convert CmpInstr into an unconditional version.
/// 2. Remove CmpInstr if above there is an instruction producing a needed
/// condition code or an instruction which can be converted into such an
/// instruction.
/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
int64_t CmpValue, const MachineRegisterInfo *MRI) const {
assert(CmpInstr.getParent());
assert(MRI);
// Replace SUBSWrr with SUBWrr if NZCV is not used.
int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
if (DeadNZCVIdx != -1) {
if (CmpInstr.definesRegister(AArch64::WZR) ||
CmpInstr.definesRegister(AArch64::XZR)) {
CmpInstr.eraseFromParent();
return true;
}
unsigned Opc = CmpInstr.getOpcode();
unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
if (NewOpc == Opc)
return false;
const MCInstrDesc &MCID = get(NewOpc);
CmpInstr.setDesc(MCID);
CmpInstr.RemoveOperand(DeadNZCVIdx);
bool succeeded = UpdateOperandRegClass(CmpInstr);
(void)succeeded;
assert(succeeded && "Some operands reg class are incompatible!");
return true;
}
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
if (SrcReg2 != 0)
return false;
// CmpInstr is a Compare instruction if destination register is not used.
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
return true;
return (CmpValue == 0 || CmpValue == 1) &&
removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
}
/// Get opcode of S version of Instr.
/// If Instr is S version its opcode is returned.
/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
/// or we are not interested in it.
static unsigned sForm(MachineInstr &Instr) {
switch (Instr.getOpcode()) {
default:
return AArch64::INSTRUCTION_LIST_END;
case AArch64::ADDSWrr:
case AArch64::ADDSWri:
case AArch64::ADDSXrr:
case AArch64::ADDSXri:
case AArch64::SUBSWrr:
case AArch64::SUBSWri:
case AArch64::SUBSXrr:
case AArch64::SUBSXri:
return Instr.getOpcode();
case AArch64::ADDWrr:
return AArch64::ADDSWrr;
case AArch64::ADDWri:
return AArch64::ADDSWri;
case AArch64::ADDXrr:
return AArch64::ADDSXrr;
case AArch64::ADDXri:
return AArch64::ADDSXri;
case AArch64::ADCWr:
return AArch64::ADCSWr;
case AArch64::ADCXr:
return AArch64::ADCSXr;
case AArch64::SUBWrr:
return AArch64::SUBSWrr;
case AArch64::SUBWri:
return AArch64::SUBSWri;
case AArch64::SUBXrr:
return AArch64::SUBSXrr;
case AArch64::SUBXri:
return AArch64::SUBSXri;
case AArch64::SBCWr:
return AArch64::SBCSWr;
case AArch64::SBCXr:
return AArch64::SBCSXr;
case AArch64::ANDWri:
return AArch64::ANDSWri;
case AArch64::ANDXri:
return AArch64::ANDSXri;
}
}
/// Check if AArch64::NZCV should be alive in successors of MBB.
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {
for (auto *BB : MBB->successors())
if (BB->isLiveIn(AArch64::NZCV))
return true;
return false;
}
/// \returns The condition code operand index for \p Instr if it is a branch
/// or select and -1 otherwise.
static int
findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
switch (Instr.getOpcode()) {
default:
return -1;
case AArch64::Bcc: {
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
assert(Idx >= 2);
return Idx - 2;
}
case AArch64::CSINVWr:
case AArch64::CSINVXr:
case AArch64::CSINCWr:
case AArch64::CSINCXr:
case AArch64::CSELWr:
case AArch64::CSELXr:
case AArch64::CSNEGWr:
case AArch64::CSNEGXr:
case AArch64::FCSELSrrr:
case AArch64::FCSELDrrr: {
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
assert(Idx >= 1);
return Idx - 1;
}
}
}
namespace {
struct UsedNZCV {
bool N = false;
bool Z = false;
bool C = false;
bool V = false;
UsedNZCV() = default;
UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
this->N |= UsedFlags.N;
this->Z |= UsedFlags.Z;
this->C |= UsedFlags.C;
this->V |= UsedFlags.V;
return *this;
}
};
} // end anonymous namespace
/// Find a condition code used by the instruction.
/// Returns AArch64CC::Invalid if either the instruction does not use condition
/// codes or we don't optimize CmpInstr in the presence of such instructions.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
Instr.getOperand(CCIdx).getImm())
: AArch64CC::Invalid;
}
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
assert(CC != AArch64CC::Invalid);
UsedNZCV UsedFlags;
switch (CC) {
default:
break;
case AArch64CC::EQ: // Z set
case AArch64CC::NE: // Z clear
UsedFlags.Z = true;
break;
case AArch64CC::HI: // Z clear and C set
case AArch64CC::LS: // Z set or C clear
UsedFlags.Z = true;
LLVM_FALLTHROUGH;
case AArch64CC::HS: // C set
case AArch64CC::LO: // C clear
UsedFlags.C = true;
break;
case AArch64CC::MI: // N set
case AArch64CC::PL: // N clear
UsedFlags.N = true;
break;
case AArch64CC::VS: // V set
case AArch64CC::VC: // V clear
UsedFlags.V = true;
break;
case AArch64CC::GT: // Z clear, N and V the same
case AArch64CC::LE: // Z set, N and V differ
UsedFlags.Z = true;
LLVM_FALLTHROUGH;
case AArch64CC::GE: // N and V the same
case AArch64CC::LT: // N and V differ
UsedFlags.N = true;
UsedFlags.V = true;
break;
}
return UsedFlags;
}
/// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
/// are not containing C or V flags and NZCV flags are not alive in successors
/// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
///
/// Collect instructions using that flags in \p CCUseInstrs if provided.
static Optional<UsedNZCV>
examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
const TargetRegisterInfo &TRI,
SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) {
MachineBasicBlock *CmpParent = CmpInstr.getParent();
if (MI.getParent() != CmpParent)
return None;
if (areCFlagsAliveInSuccessors(CmpParent))
return None;
UsedNZCV NZCVUsedAfterCmp;
for (MachineInstr &Instr : instructionsWithoutDebug(
std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
return None;
NZCVUsedAfterCmp |= getUsedNZCV(CC);
if (CCUseInstrs)
CCUseInstrs->push_back(&Instr);
}
if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
break;
}
if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V)
return None;
return NZCVUsedAfterCmp;
}
static bool isADDSRegImm(unsigned Opcode) {
return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
}
static bool isSUBSRegImm(unsigned Opcode) {
return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
}
/// Check if CmpInstr can be substituted by MI.
///
/// CmpInstr can be substituted:
/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
/// - and, MI and CmpInstr are from the same MachineBB
/// - and, condition flags are not alive in successors of the CmpInstr parent
/// - and, if MI opcode is the S form there must be no defs of flags between
/// MI and CmpInstr
/// or if MI opcode is not the S form there must be neither defs of flags
/// nor uses of flags between MI and CmpInstr.
/// - and C/V flags are not used after CmpInstr
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
const TargetRegisterInfo &TRI) {
assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
const unsigned CmpOpcode = CmpInstr.getOpcode();
if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
return false;
if (!examineCFlagsUse(MI, CmpInstr, TRI))
return false;
AccessKind AccessToCheck = AK_Write;
if (sForm(MI) != MI.getOpcode())
AccessToCheck = AK_All;
return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
}
/// Substitute an instruction comparing to zero with another instruction
/// which produces needed condition flags.
///
/// Return true on success.
bool AArch64InstrInfo::substituteCmpToZero(
MachineInstr &CmpInstr, unsigned SrcReg,
const MachineRegisterInfo &MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
if (!MI)
return false;
const TargetRegisterInfo &TRI = getRegisterInfo();
unsigned NewOpc = sForm(*MI);
if (NewOpc == AArch64::INSTRUCTION_LIST_END)
return false;
if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
return false;
// Update the instruction to set NZCV.
MI->setDesc(get(NewOpc));
CmpInstr.eraseFromParent();
bool succeeded = UpdateOperandRegClass(*MI);
(void)succeeded;
assert(succeeded && "Some operands reg class are incompatible!");
MI->addRegisterDefined(AArch64::NZCV, &TRI);
return true;
}
/// \returns True if \p CmpInstr can be removed.
///
/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
/// codes used in \p CCUseInstrs must be inverted.
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
int CmpValue, const TargetRegisterInfo &TRI,
SmallVectorImpl<MachineInstr *> &CCUseInstrs,
bool &IsInvertCC) {
assert((CmpValue == 0 || CmpValue == 1) &&
"Only comparisons to 0 or 1 considered for removal!");
// MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
unsigned MIOpc = MI.getOpcode();
if (MIOpc == AArch64::CSINCWr) {
if (MI.getOperand(1).getReg() != AArch64::WZR ||
MI.getOperand(2).getReg() != AArch64::WZR)
return false;
} else if (MIOpc == AArch64::CSINCXr) {
if (MI.getOperand(1).getReg() != AArch64::XZR ||
MI.getOperand(2).getReg() != AArch64::XZR)
return false;
} else {
return false;
}
AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
if (MICC == AArch64CC::Invalid)
return false;
// NZCV needs to be defined
if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
return false;
// CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
const unsigned CmpOpcode = CmpInstr.getOpcode();
bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
if (CmpValue && !IsSubsRegImm)
return false;
if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
return false;
// MI conditions allowed: eq, ne, mi, pl
UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
if (MIUsedNZCV.C || MIUsedNZCV.V)
return false;
Optional<UsedNZCV> NZCVUsedAfterCmp =
examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
// Condition flags are not used in CmpInstr basic block successors and only
// Z or N flags allowed to be used after CmpInstr within its basic block
if (!NZCVUsedAfterCmp)
return false;
// Z or N flag used after CmpInstr must correspond to the flag used in MI
if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
(MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
return false;
// If CmpInstr is comparison to zero MI conditions are limited to eq, ne
if (MIUsedNZCV.N && !CmpValue)
return false;
// There must be no defs of flags between MI and CmpInstr
if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
return false;
// Condition code is inverted in the following cases:
// 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
// 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
(!CmpValue && MICC == AArch64CC::NE);
return true;
}
/// Remove comparision in csinc-cmp sequence
///
/// Examples:
/// 1. \code
/// csinc w9, wzr, wzr, ne
/// cmp w9, #0
/// b.eq
/// \endcode
/// to
/// \code
/// csinc w9, wzr, wzr, ne
/// b.ne
/// \endcode
///
/// 2. \code
/// csinc x2, xzr, xzr, mi
/// cmp x2, #1
/// b.pl
/// \endcode
/// to
/// \code
/// csinc x2, xzr, xzr, mi
/// b.pl
/// \endcode
///
/// \param CmpInstr comparison instruction
/// \return True when comparison removed
bool AArch64InstrInfo::removeCmpToZeroOrOne(
MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
const MachineRegisterInfo &MRI) const {
MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
if (!MI)
return false;
const TargetRegisterInfo &TRI = getRegisterInfo();
SmallVector<MachineInstr *, 4> CCUseInstrs;
bool IsInvertCC = false;
if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
IsInvertCC))
return false;
// Make transformation
CmpInstr.eraseFromParent();
if (IsInvertCC) {
// Invert condition codes in CmpInstr CC users
for (MachineInstr *CCUseInstr : CCUseInstrs) {
int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
assert(Idx >= 0 && "Unexpected instruction using CC.");
MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
CCOperand.setImm(CCUse);
}
}
return true;
}
bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
MI.getOpcode() != AArch64::CATCHRET)
return false;
MachineBasicBlock &MBB = *MI.getParent();
auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
auto TRI = Subtarget.getRegisterInfo();
DebugLoc DL = MI.getDebugLoc();
if (MI.getOpcode() == AArch64::CATCHRET) {
// Skip to the first instruction before the epilog.
const TargetInstrInfo *TII =
MBB.getParent()->getSubtarget().getInstrInfo();
MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
auto MBBI = MachineBasicBlock::iterator(MI);
MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
FirstEpilogSEH != MBB.begin())
FirstEpilogSEH = std::prev(FirstEpilogSEH);
if (FirstEpilogSEH != MBB.begin())
FirstEpilogSEH = std::next(FirstEpilogSEH);
BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
.addReg(AArch64::X0, RegState::Define)
.addMBB(TargetMBB);
BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
.addReg(AArch64::X0, RegState::Define)
.addReg(AArch64::X0)
.addMBB(TargetMBB)
.addImm(0);
return true;
}
Register Reg = MI.getOperand(0).getReg();
Module &M = *MBB.getParent()->getFunction().getParent();
if (M.getStackProtectorGuard() == "sysreg") {
const AArch64SysReg::SysReg *SrcReg =
AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
if (!SrcReg)
report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
// mrs xN, sysreg
BuildMI(MBB, MI, DL, get(AArch64::MRS))
.addDef(Reg, RegState::Renamable)
.addImm(SrcReg->Encoding);
int Offset = M.getStackProtectorGuardOffset();
if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
// ldr xN, [xN, #offset]
BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
.addDef(Reg)
.addUse(Reg, RegState::Kill)
.addImm(Offset / 8);
} else if (Offset >= -256 && Offset <= 255) {
// ldur xN, [xN, #offset]
BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
.addDef(Reg)
.addUse(Reg, RegState::Kill)
.addImm(Offset);
} else if (Offset >= -4095 && Offset <= 4095) {
if (Offset > 0) {
// add xN, xN, #offset
BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
.addDef(Reg)
.addUse(Reg, RegState::Kill)
.addImm(Offset)
.addImm(0);
} else {
// sub xN, xN, #offset
BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
.addDef(Reg)
.addUse(Reg, RegState::Kill)
.addImm(-Offset)
.addImm(0);
}
// ldr xN, [xN]
BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
.addDef(Reg)
.addUse(Reg, RegState::Kill)
.addImm(0);
} else {
// Cases that are larger than +/- 4095 and not a multiple of 8, or larger
// than 23760.
// It might be nice to use AArch64::MOVi32imm here, which would get
// expanded in PreSched2 after PostRA, but our lone scratch Reg already
// contains the MRS result. findScratchNonCalleeSaveRegister() in
// AArch64FrameLowering might help us find such a scratch register
// though. If we failed to find a scratch register, we could emit a
// stream of add instructions to build up the immediate. Or, we could try
// to insert a AArch64::MOVi32imm before register allocation so that we
// didn't need to scavenge for a scratch register.
report_fatal_error("Unable to encode Stack Protector Guard Offset");
}
MBB.erase(MI);
return true;
}
const GlobalValue *GV =
cast<GlobalValue>((*MI.memoperands_begin())->getValue());
const TargetMachine &TM = MBB.getParent()->getTarget();
unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
const unsigned char MO_NC = AArch64II::MO_NC;
if ((OpFlags & AArch64II::MO_GOT) != 0) {
BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
.addGlobalAddress(GV, 0, OpFlags);
if (Subtarget.isTargetILP32()) {
unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
.addDef(Reg32, RegState::Dead)
.addUse(Reg, RegState::Kill)
.addImm(0)
.addMemOperand(*MI.memoperands_begin())
.addDef(Reg, RegState::Implicit);
} else {
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
.addMemOperand(*MI.memoperands_begin());
}
} else if (TM.getCodeModel() == CodeModel::Large) {
assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
.addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
.addImm(0);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
.addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
.addImm(16);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
.addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
.addImm(32);
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
.addReg(Reg, RegState::Kill)
.addGlobalAddress(GV, 0, AArch64II::MO_G3)
.addImm(48);
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addImm(0)
.addMemOperand(*MI.memoperands_begin());
} else if (TM.getCodeModel() == CodeModel::Tiny) {
BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
.addGlobalAddress(GV, 0, OpFlags);
} else {
BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
if (Subtarget.isTargetILP32()) {
unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
.addDef(Reg32, RegState::Dead)
.addUse(Reg, RegState::Kill)
.addGlobalAddress(GV, 0, LoFlags)
.addMemOperand(*MI.memoperands_begin())
.addDef(Reg, RegState::Implicit);
} else {
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
.addReg(Reg, RegState::Kill)
.addGlobalAddress(GV, 0, LoFlags)
.addMemOperand(*MI.memoperands_begin());
}
}
MBB.erase(MI);
return true;
}
// Return true if this instruction simply sets its single destination register
// to zero. This is equivalent to a register rename of the zero-register.
bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::MOVZWi:
case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
assert(MI.getDesc().getNumOperands() == 3 &&
MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
return true;
}
break;
case AArch64::ANDWri: // and Rd, Rzr, #imm
return MI.getOperand(1).getReg() == AArch64::WZR;
case AArch64::ANDXri:
return MI.getOperand(1).getReg() == AArch64::XZR;
case TargetOpcode::COPY:
return MI.getOperand(1).getReg() == AArch64::WZR;
}
return false;
}
// Return true if this instruction simply renames a general register without
// modifying bits.
bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
break;
case TargetOpcode::COPY: {
// GPR32 copies will by lowered to ORRXrs
Register DstReg = MI.getOperand(0).getReg();
return (AArch64::GPR32RegClass.contains(DstReg) ||
AArch64::GPR64RegClass.contains(DstReg));
}
case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
if (MI.getOperand(1).getReg() == AArch64::XZR) {
assert(MI.getDesc().getNumOperands() == 4 &&
MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
return true;
}
break;
case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
if (MI.getOperand(2).getImm() == 0) {
assert(MI.getDesc().getNumOperands() == 4 &&
MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
return true;
}
break;
}
return false;
}
// Return true if this instruction simply renames a general register without
// modifying bits.
bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
break;
case TargetOpcode::COPY: {
Register DstReg = MI.getOperand(0).getReg();
return AArch64::FPR128RegClass.contains(DstReg);
}
case AArch64::ORRv16i8:
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
"invalid ORRv16i8 operands");
return true;
}
break;
}
return false;
}
unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
default:
break;
case AArch64::LDRWui:
case AArch64::LDRXui:
case AArch64::LDRBui:
case AArch64::LDRHui:
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
}
return 0;
}
unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
default:
break;
case AArch64::STRWui:
case AArch64::STRXui:
case AArch64::STRBui:
case AArch64::STRHui:
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
return MI.getOperand(0).getReg();
}
break;
}
return 0;
}
/// Check all MachineMemOperands for a hint to suppress pairing.
bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
return MMO->getFlags() & MOSuppressPair;
});
}
/// Set a flag on the first MachineMemOperand to suppress pairing.
void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
if (MI.memoperands_empty())
return;
(*MI.memoperands_begin())->setFlags(MOSuppressPair);
}
/// Check all MachineMemOperands for a hint that the load/store is strided.
bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
return MMO->getFlags() & MOStridedAccess;
});
}
bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
switch (Opc) {
default:
return false;
case AArch64::STURSi:
case AArch64::STRSpre:
case AArch64::STURDi:
case AArch64::STRDpre:
case AArch64::STURQi:
case AArch64::STRQpre:
case AArch64::STURBBi:
case AArch64::STURHHi:
case AArch64::STURWi:
case AArch64::STRWpre:
case AArch64::STURXi:
case AArch64::STRXpre:
case AArch64::LDURSi:
case AArch64::LDRSpre:
case AArch64::LDURDi:
case AArch64::LDRDpre:
case AArch64::LDURQi:
case AArch64::LDRQpre:
case AArch64::LDURWi:
case AArch64::LDRWpre:
case AArch64::LDURXi:
case AArch64::LDRXpre:
case AArch64::LDURSWi:
case AArch64::LDURHHi:
case AArch64::LDURBBi:
case AArch64::LDURSBWi:
case AArch64::LDURSHWi:
return true;
}
}
Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
switch (Opc) {
default: return {};
case AArch64::PRFMui: return AArch64::PRFUMi;
case AArch64::LDRXui: return AArch64::LDURXi;
case AArch64::LDRWui: return AArch64::LDURWi;
case AArch64::LDRBui: return AArch64::LDURBi;
case AArch64::LDRHui: return AArch64::LDURHi;
case AArch64::LDRSui: return AArch64::LDURSi;
case AArch64::LDRDui: return AArch64::LDURDi;
case AArch64::LDRQui: return AArch64::LDURQi;
case AArch64::LDRBBui: return AArch64::LDURBBi;
case AArch64::LDRHHui: return AArch64::LDURHHi;
case AArch64::LDRSBXui: return AArch64::LDURSBXi;
case AArch64::LDRSBWui: return AArch64::LDURSBWi;
case AArch64::LDRSHXui: return AArch64::LDURSHXi;
case AArch64::LDRSHWui: return AArch64::LDURSHWi;
case AArch64::LDRSWui: return AArch64::LDURSWi;
case AArch64::STRXui: return AArch64::STURXi;
case AArch64::STRWui: return AArch64::STURWi;
case AArch64::STRBui: return AArch64::STURBi;
case AArch64::STRHui: return AArch64::STURHi;
case AArch64::STRSui: return AArch64::STURSi;
case AArch64::STRDui: return AArch64::STURDi;
case AArch64::STRQui: return AArch64::STURQi;
case AArch64::STRBBui: return AArch64::STURBBi;
case AArch64::STRHHui: return AArch64::STURHHi;
}
}
unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
switch (Opc) {
default:
return 2;
case AArch64::LDPXi:
case AArch64::LDPDi:
case AArch64::STPXi:
case AArch64::STPDi:
case AArch64::LDNPXi:
case AArch64::LDNPDi:
case AArch64::STNPXi:
case AArch64::STNPDi:
case AArch64::LDPQi:
case AArch64::STPQi:
case AArch64::LDNPQi:
case AArch64::STNPQi:
case AArch64::LDPWi:
case AArch64::LDPSi:
case AArch64::STPWi:
case AArch64::STPSi:
case AArch64::LDNPWi:
case AArch64::LDNPSi:
case AArch64::STNPWi:
case AArch64::STNPSi:
case AArch64::LDG:
case AArch64::STGPi:
case AArch64::LD1B_IMM:
case AArch64::LD1B_H_IMM:
case AArch64::LD1B_S_IMM:
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_H_IMM:
case AArch64::LD1SB_S_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::LD1H_IMM:
case AArch64::LD1H_S_IMM:
case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_S_IMM:
case AArch64::LD1SH_D_IMM:
case AArch64::LD1W_IMM:
case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM:
case AArch64::LD1D_IMM:
+ case AArch64::LD2B_IMM:
+ case AArch64::LD2H_IMM:
+ case AArch64::LD2W_IMM:
+ case AArch64::LD2D_IMM:
+ case AArch64::LD3B_IMM:
+ case AArch64::LD3H_IMM:
+ case AArch64::LD3W_IMM:
+ case AArch64::LD3D_IMM:
+ case AArch64::LD4B_IMM:
+ case AArch64::LD4H_IMM:
+ case AArch64::LD4W_IMM:
+ case AArch64::LD4D_IMM:
+
case AArch64::ST1B_IMM:
case AArch64::ST1B_H_IMM:
case AArch64::ST1B_S_IMM:
case AArch64::ST1B_D_IMM:
case AArch64::ST1H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1H_D_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1W_D_IMM:
case AArch64::ST1D_IMM:
+ case AArch64::ST2B_IMM:
+ case AArch64::ST2H_IMM:
+ case AArch64::ST2W_IMM:
+ case AArch64::ST2D_IMM:
+ case AArch64::ST3B_IMM:
+ case AArch64::ST3H_IMM:
+ case AArch64::ST3W_IMM:
+ case AArch64::ST3D_IMM:
+ case AArch64::ST4B_IMM:
+ case AArch64::ST4H_IMM:
+ case AArch64::ST4W_IMM:
+ case AArch64::ST4D_IMM:
+
case AArch64::LD1RB_IMM:
case AArch64::LD1RB_H_IMM:
case AArch64::LD1RB_S_IMM:
case AArch64::LD1RB_D_IMM:
case AArch64::LD1RSB_H_IMM:
case AArch64::LD1RSB_S_IMM:
case AArch64::LD1RSB_D_IMM:
case AArch64::LD1RH_IMM:
case AArch64::LD1RH_S_IMM:
case AArch64::LD1RH_D_IMM:
case AArch64::LD1RSH_S_IMM:
case AArch64::LD1RSH_D_IMM:
case AArch64::LD1RW_IMM:
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
case AArch64::LD1RD_IMM:
case AArch64::LDNT1B_ZRI:
case AArch64::LDNT1H_ZRI:
case AArch64::LDNT1W_ZRI:
case AArch64::LDNT1D_ZRI:
case AArch64::STNT1B_ZRI:
case AArch64::STNT1H_ZRI:
case AArch64::STNT1W_ZRI:
case AArch64::STNT1D_ZRI:
case AArch64::LDNF1B_IMM:
case AArch64::LDNF1B_H_IMM:
case AArch64::LDNF1B_S_IMM:
case AArch64::LDNF1B_D_IMM:
case AArch64::LDNF1SB_H_IMM:
case AArch64::LDNF1SB_S_IMM:
case AArch64::LDNF1SB_D_IMM:
case AArch64::LDNF1H_IMM:
case AArch64::LDNF1H_S_IMM:
case AArch64::LDNF1H_D_IMM:
case AArch64::LDNF1SH_S_IMM:
case AArch64::LDNF1SH_D_IMM:
case AArch64::LDNF1W_IMM:
case AArch64::LDNF1W_D_IMM:
case AArch64::LDNF1SW_D_IMM:
case AArch64::LDNF1D_IMM:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
return 2;
}
}
bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
// Scaled instructions.
case AArch64::STRSui:
case AArch64::STRDui:
case AArch64::STRQui:
case AArch64::STRXui:
case AArch64::STRWui:
case AArch64::LDRSui:
case AArch64::LDRDui:
case AArch64::LDRQui:
case AArch64::LDRXui:
case AArch64::LDRWui:
case AArch64::LDRSWui:
// Unscaled instructions.
case AArch64::STURSi:
case AArch64::STRSpre:
case AArch64::STURDi:
case AArch64::STRDpre:
case AArch64::STURQi:
case AArch64::STRQpre:
case AArch64::STURWi:
case AArch64::STRWpre:
case AArch64::STURXi:
case AArch64::STRXpre:
case AArch64::LDURSi:
case AArch64::LDRSpre:
case AArch64::LDURDi:
case AArch64::LDRDpre:
case AArch64::LDURQi:
case AArch64::LDRQpre:
case AArch64::LDURWi:
case AArch64::LDRWpre:
case AArch64::LDURXi:
case AArch64::LDRXpre:
case AArch64::LDURSWi:
return true;
}
}
unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
bool &Is64Bit) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no flag setting equivalent!");
// 32-bit cases:
case AArch64::ADDWri:
Is64Bit = false;
return AArch64::ADDSWri;
case AArch64::ADDWrr:
Is64Bit = false;
return AArch64::ADDSWrr;
case AArch64::ADDWrs:
Is64Bit = false;
return AArch64::ADDSWrs;
case AArch64::ADDWrx:
Is64Bit = false;
return AArch64::ADDSWrx;
case AArch64::ANDWri:
Is64Bit = false;
return AArch64::ANDSWri;
case AArch64::ANDWrr:
Is64Bit = false;
return AArch64::ANDSWrr;
case AArch64::ANDWrs:
Is64Bit = false;
return AArch64::ANDSWrs;
case AArch64::BICWrr:
Is64Bit = false;
return AArch64::BICSWrr;
case AArch64::BICWrs:
Is64Bit = false;
return AArch64::BICSWrs;
case AArch64::SUBWri:
Is64Bit = false;
return AArch64::SUBSWri;
case AArch64::SUBWrr:
Is64Bit = false;
return AArch64::SUBSWrr;
case AArch64::SUBWrs:
Is64Bit = false;
return AArch64::SUBSWrs;
case AArch64::SUBWrx:
Is64Bit = false;
return AArch64::SUBSWrx;
// 64-bit cases:
case AArch64::ADDXri:
Is64Bit = true;
return AArch64::ADDSXri;
case AArch64::ADDXrr:
Is64Bit = true;
return AArch64::ADDSXrr;
case AArch64::ADDXrs:
Is64Bit = true;
return AArch64::ADDSXrs;
case AArch64::ADDXrx:
Is64Bit = true;
return AArch64::ADDSXrx;
case AArch64::ANDXri:
Is64Bit = true;
return AArch64::ANDSXri;
case AArch64::ANDXrr:
Is64Bit = true;
return AArch64::ANDSXrr;
case AArch64::ANDXrs:
Is64Bit = true;
return AArch64::ANDSXrs;
case AArch64::BICXrr:
Is64Bit = true;
return AArch64::BICSXrr;
case AArch64::BICXrs:
Is64Bit = true;
return AArch64::BICSXrs;
case AArch64::SUBXri:
Is64Bit = true;
return AArch64::SUBSXri;
case AArch64::SUBXrr:
Is64Bit = true;
return AArch64::SUBSXrr;
case AArch64::SUBXrs:
Is64Bit = true;
return AArch64::SUBSXrs;
case AArch64::SUBXrx:
Is64Bit = true;
return AArch64::SUBSXrx;
}
}
// Is this a candidate for ld/st merging or pairing? For example, we don't
// touch volatiles or load/stores that have a hint to avoid pair formation.
bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
bool IsPreLdSt = isPreLdSt(MI);
// If this is a volatile load/store, don't mess with it.
if (MI.hasOrderedMemoryRef())
return false;
// Make sure this is a reg/fi+imm (as opposed to an address reloc).
// For Pre-inc LD/ST, the operand is shifted by one.
assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
"Expected a reg or frame index operand.");
// For Pre-indexed addressing quadword instructions, the third operand is the
// immediate value.
bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
return false;
// Can't merge/pair if the instruction modifies the base register.
// e.g., ldr x0, [x0]
// This case will never occur with an FI base.
// However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged.
// For example:
// ldr q0, [x11, #32]!
// ldr q1, [x11, #16]
// to
// ldp q0, q1, [x11, #32]!
if (MI.getOperand(1).isReg() && !IsPreLdSt) {
Register BaseReg = MI.getOperand(1).getReg();
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (MI.modifiesRegister(BaseReg, TRI))
return false;
}
// Check if this load/store has a hint to avoid pair formation.
// MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
if (isLdStPairSuppressed(MI))
return false;
// Do not pair any callee-save store/reload instructions in the
// prologue/epilogue if the CFI information encoded the operations as separate
// instructions, as that will cause the size of the actual prologue to mismatch
// with the prologue size recorded in the Windows CFI.
const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
bool NeedsWinCFI = MAI->usesWindowsCFI() &&
MI.getMF()->getFunction().needsUnwindTableEntry();
if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
MI.getFlag(MachineInstr::FrameDestroy)))
return false;
// On some CPUs quad load/store pairs are slower than two single load/stores.
if (Subtarget.isPaired128Slow()) {
switch (MI.getOpcode()) {
default:
break;
case AArch64::LDURQi:
case AArch64::STURQi:
case AArch64::LDRQui:
case AArch64::STRQui:
return false;
}
}
return true;
}
bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
const TargetRegisterInfo *TRI) const {
if (!LdSt.mayLoadOrStore())
return false;
const MachineOperand *BaseOp;
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
Width, TRI))
return false;
BaseOps.push_back(BaseOp);
return true;
}
Optional<ExtAddrMode>
AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
const TargetRegisterInfo *TRI) const {
const MachineOperand *Base; // Filled with the base operand of MI.
int64_t Offset; // Filled with the offset of MI.
bool OffsetIsScalable;
if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
return None;
if (!Base->isReg())
return None;
ExtAddrMode AM;
AM.BaseReg = Base->getReg();
AM.Displacement = Offset;
AM.ScaledReg = 0;
AM.Scale = 0;
return AM;
}
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
bool &OffsetIsScalable, unsigned &Width,
const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
if (LdSt.getNumExplicitOperands() == 3) {
// Non-paired instruction (e.g., ldr x1, [x0, #8]).
if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
!LdSt.getOperand(2).isImm())
return false;
} else if (LdSt.getNumExplicitOperands() == 4) {
// Paired instruction (e.g., ldp x1, x2, [x0, #8]).
if (!LdSt.getOperand(1).isReg() ||
(!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
!LdSt.getOperand(3).isImm())
return false;
} else
return false;
// Get the scaling factor for the instruction and set the width for the
// instruction.
TypeSize Scale(0U, false);
int64_t Dummy1, Dummy2;
// If this returns false, then it's an instruction we don't want to handle.
if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
return false;
// Compute the offset. Offset is calculated as the immediate operand
// multiplied by the scaling factor. Unscaled instructions have scaling factor
// set to 1.
if (LdSt.getNumExplicitOperands() == 3) {
BaseOp = &LdSt.getOperand(1);
Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
} else {
assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
BaseOp = &LdSt.getOperand(2);
Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
}
OffsetIsScalable = Scale.isScalable();
if (!BaseOp->isReg() && !BaseOp->isFI())
return false;
return true;
}
MachineOperand &
AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
return OfsOp;
}
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
unsigned &Width, int64_t &MinOffset,
int64_t &MaxOffset) {
const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
Scale = TypeSize::Fixed(0);
Width = 0;
MinOffset = MaxOffset = 0;
return false;
case AArch64::STRWpost:
case AArch64::LDRWpost:
Width = 32;
Scale = TypeSize::Fixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURQi:
case AArch64::STURQi:
Width = 16;
Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::PRFUMi:
case AArch64::LDURXi:
case AArch64::LDURDi:
case AArch64::STURXi:
case AArch64::STURDi:
Width = 8;
Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURWi:
case AArch64::LDURSi:
case AArch64::LDURSWi:
case AArch64::STURWi:
case AArch64::STURSi:
Width = 4;
Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURHi:
case AArch64::LDURHHi:
case AArch64::LDURSHXi:
case AArch64::LDURSHWi:
case AArch64::STURHi:
case AArch64::STURHHi:
Width = 2;
Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURBi:
case AArch64::LDURBBi:
case AArch64::LDURSBXi:
case AArch64::LDURSBWi:
case AArch64::STURBi:
case AArch64::STURBBi:
Width = 1;
Scale = TypeSize::Fixed(1);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDPQi:
case AArch64::LDNPQi:
case AArch64::STPQi:
case AArch64::STNPQi:
Scale = TypeSize::Fixed(16);
Width = 32;
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = TypeSize::Fixed(16);
Width = 16;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::LDPXi:
case AArch64::LDPDi:
case AArch64::LDNPXi:
case AArch64::LDNPDi:
case AArch64::STPXi:
case AArch64::STPDi:
case AArch64::STNPXi:
case AArch64::STNPDi:
Scale = TypeSize::Fixed(8);
Width = 16;
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::PRFMui:
case AArch64::LDRXui:
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
Scale = TypeSize::Fixed(8);
Width = 8;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::StoreSwiftAsyncContext:
// Store is an STRXui, but there might be an ADDXri in the expansion too.
Scale = TypeSize::Fixed(1);
Width = 8;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::LDPWi:
case AArch64::LDPSi:
case AArch64::LDNPWi:
case AArch64::LDNPSi:
case AArch64::STPWi:
case AArch64::STPSi:
case AArch64::STNPWi:
case AArch64::STNPSi:
Scale = TypeSize::Fixed(4);
Width = 8;
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRWui:
case AArch64::LDRSui:
case AArch64::LDRSWui:
case AArch64::STRWui:
case AArch64::STRSui:
Scale = TypeSize::Fixed(4);
Width = 4;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::LDRHui:
case AArch64::LDRHHui:
case AArch64::LDRSHWui:
case AArch64::LDRSHXui:
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = TypeSize::Fixed(2);
Width = 2;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::LDRBui:
case AArch64::LDRBBui:
case AArch64::LDRSBWui:
case AArch64::LDRSBXui:
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = TypeSize::Fixed(1);
Width = 1;
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::STPXpre:
case AArch64::LDPXpost:
case AArch64::STPDpre:
case AArch64::LDPDpost:
Scale = TypeSize::Fixed(8);
Width = 8;
MinOffset = -512;
MaxOffset = 504;
break;
case AArch64::STPQpre:
case AArch64::LDPQpost:
Scale = TypeSize::Fixed(16);
Width = 16;
MinOffset = -1024;
MaxOffset = 1008;
break;
case AArch64::STRXpre:
case AArch64::STRDpre:
case AArch64::LDRXpost:
case AArch64::LDRDpost:
Scale = TypeSize::Fixed(1);
Width = 8;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STRQpre:
case AArch64::LDRQpost:
Scale = TypeSize::Fixed(1);
Width = 16;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::ADDG:
Scale = TypeSize::Fixed(16);
Width = 0;
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::TAGPstack:
Scale = TypeSize::Fixed(16);
Width = 0;
// TAGP with a negative offset turns into SUBP, which has a maximum offset
// of 63 (not 64!).
MinOffset = -63;
MaxOffset = 63;
break;
case AArch64::LDG:
case AArch64::STGOffset:
case AArch64::STZGOffset:
Scale = TypeSize::Fixed(16);
Width = 16;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STR_ZZZZXI:
case AArch64::LDR_ZZZZXI:
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector * 4;
MinOffset = -256;
MaxOffset = 252;
break;
case AArch64::STR_ZZZXI:
case AArch64::LDR_ZZZXI:
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector * 3;
MinOffset = -256;
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
case AArch64::LDR_ZZXI:
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector * 2;
MinOffset = -256;
MaxOffset = 254;
break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
Scale = TypeSize::Scalable(2);
Width = SVEMaxBytesPerVector / 8;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LD1B_IMM:
case AArch64::LD1H_IMM:
case AArch64::LD1W_IMM:
case AArch64::LD1D_IMM:
case AArch64::LDNT1B_ZRI:
case AArch64::LDNT1H_ZRI:
case AArch64::LDNT1W_ZRI:
case AArch64::LDNT1D_ZRI:
case AArch64::ST1B_IMM:
case AArch64::ST1H_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1D_IMM:
case AArch64::STNT1B_ZRI:
case AArch64::STNT1H_ZRI:
case AArch64::STNT1W_ZRI:
case AArch64::STNT1D_ZRI:
case AArch64::LDNF1B_IMM:
case AArch64::LDNF1H_IMM:
case AArch64::LDNF1W_IMM:
case AArch64::LDNF1D_IMM:
// A full vectors worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(16);
Width = SVEMaxBytesPerVector;
MinOffset = -8;
MaxOffset = 7;
break;
+ case AArch64::LD2B_IMM:
+ case AArch64::LD2H_IMM:
+ case AArch64::LD2W_IMM:
+ case AArch64::LD2D_IMM:
+ case AArch64::ST2B_IMM:
+ case AArch64::ST2H_IMM:
+ case AArch64::ST2W_IMM:
+ case AArch64::ST2D_IMM:
+ Scale = TypeSize::Scalable(32);
+ Width = SVEMaxBytesPerVector * 2;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD3B_IMM:
+ case AArch64::LD3H_IMM:
+ case AArch64::LD3W_IMM:
+ case AArch64::LD3D_IMM:
+ case AArch64::ST3B_IMM:
+ case AArch64::ST3H_IMM:
+ case AArch64::ST3W_IMM:
+ case AArch64::ST3D_IMM:
+ Scale = TypeSize::Scalable(48);
+ Width = SVEMaxBytesPerVector * 3;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
+ case AArch64::LD4B_IMM:
+ case AArch64::LD4H_IMM:
+ case AArch64::LD4W_IMM:
+ case AArch64::LD4D_IMM:
+ case AArch64::ST4B_IMM:
+ case AArch64::ST4H_IMM:
+ case AArch64::ST4W_IMM:
+ case AArch64::ST4D_IMM:
+ Scale = TypeSize::Scalable(64);
+ Width = SVEMaxBytesPerVector * 4;
+ MinOffset = -8;
+ MaxOffset = 7;
+ break;
case AArch64::LD1B_H_IMM:
case AArch64::LD1SB_H_IMM:
case AArch64::LD1H_S_IMM:
case AArch64::LD1SH_S_IMM:
case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM:
case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM:
case AArch64::LDNF1B_H_IMM:
case AArch64::LDNF1SB_H_IMM:
case AArch64::LDNF1H_S_IMM:
case AArch64::LDNF1SH_S_IMM:
case AArch64::LDNF1W_D_IMM:
case AArch64::LDNF1SW_D_IMM:
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(8);
Width = SVEMaxBytesPerVector / 2;
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::LD1B_S_IMM:
case AArch64::LD1SB_S_IMM:
case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM:
case AArch64::LDNF1B_S_IMM:
case AArch64::LDNF1SB_S_IMM:
case AArch64::LDNF1H_D_IMM:
case AArch64::LDNF1SH_D_IMM:
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(4);
Width = SVEMaxBytesPerVector / 4;
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
case AArch64::LDNF1B_D_IMM:
case AArch64::LDNF1SB_D_IMM:
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(2);
Width = SVEMaxBytesPerVector / 8;
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
Scale = TypeSize::Fixed(16);
Width = 32;
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
Scale = TypeSize::Fixed(16);
Width = 16;
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LD1RB_IMM:
case AArch64::LD1RB_H_IMM:
case AArch64::LD1RB_S_IMM:
case AArch64::LD1RB_D_IMM:
case AArch64::LD1RSB_H_IMM:
case AArch64::LD1RSB_S_IMM:
case AArch64::LD1RSB_D_IMM:
Scale = TypeSize::Fixed(1);
Width = 1;
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RH_IMM:
case AArch64::LD1RH_S_IMM:
case AArch64::LD1RH_D_IMM:
case AArch64::LD1RSH_S_IMM:
case AArch64::LD1RSH_D_IMM:
Scale = TypeSize::Fixed(2);
Width = 2;
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RW_IMM:
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
Scale = TypeSize::Fixed(4);
Width = 4;
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RD_IMM:
Scale = TypeSize::Fixed(8);
Width = 8;
MinOffset = 0;
MaxOffset = 63;
break;
}
return true;
}
// Scaling factor for unscaled load or store.
int AArch64InstrInfo::getMemScale(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has unknown scale!");
case AArch64::LDRBBui:
case AArch64::LDURBBi:
case AArch64::LDRSBWui:
case AArch64::LDURSBWi:
case AArch64::STRBBui:
case AArch64::STURBBi:
return 1;
case AArch64::LDRHHui:
case AArch64::LDURHHi:
case AArch64::LDRSHWui:
case AArch64::LDURSHWi:
case AArch64::STRHHui:
case AArch64::STURHHi:
return 2;
case AArch64::LDRSui:
case AArch64::LDURSi:
case AArch64::LDRSpre:
case AArch64::LDRSWui:
case AArch64::LDURSWi:
case AArch64::LDRWpre:
case AArch64::LDRWui:
case AArch64::LDURWi:
case AArch64::STRSui:
case AArch64::STURSi:
case AArch64::STRSpre:
case AArch64::STRWui:
case AArch64::STURWi:
case AArch64::STRWpre:
case AArch64::LDPSi:
case AArch64::LDPSWi:
case AArch64::LDPWi:
case AArch64::STPSi:
case AArch64::STPWi:
return 4;
case AArch64::LDRDui:
case AArch64::LDURDi:
case AArch64::LDRDpre:
case AArch64::LDRXui:
case AArch64::LDURXi:
case AArch64::LDRXpre:
case AArch64::STRDui:
case AArch64::STURDi:
case AArch64::STRDpre:
case AArch64::STRXui:
case AArch64::STURXi:
case AArch64::STRXpre:
case AArch64::LDPDi:
case AArch64::LDPXi:
case AArch64::STPDi:
case AArch64::STPXi:
return 8;
case AArch64::LDRQui:
case AArch64::LDURQi:
case AArch64::STRQui:
case AArch64::STURQi:
case AArch64::STRQpre:
case AArch64::LDPQi:
case AArch64::LDRQpre:
case AArch64::STPQi:
case AArch64::STGOffset:
case AArch64::STZGOffset:
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
case AArch64::STGPi:
return 16;
}
}
bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDRWpre:
case AArch64::LDRXpre:
case AArch64::LDRSpre:
case AArch64::LDRDpre:
case AArch64::LDRQpre:
return true;
}
}
bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::STRWpre:
case AArch64::STRXpre:
case AArch64::STRSpre:
case AArch64::STRDpre:
case AArch64::STRQpre:
return true;
}
}
bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
return isPreLd(MI) || isPreSt(MI);
}
// Scale the unscaled offsets. Returns false if the unscaled offset can't be
// scaled.
static bool scaleOffset(unsigned Opc, int64_t &Offset) {
int Scale = AArch64InstrInfo::getMemScale(Opc);
// If the byte-offset isn't a multiple of the stride, we can't scale this
// offset.
if (Offset % Scale != 0)
return false;
// Convert the byte-offset used by unscaled into an "element" offset used
// by the scaled pair load/store instructions.
Offset /= Scale;
return true;
}
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
if (FirstOpc == SecondOpc)
return true;
// We can also pair sign-ext and zero-ext instructions.
switch (FirstOpc) {
default:
return false;
case AArch64::LDRWui:
case AArch64::LDURWi:
return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
case AArch64::LDRSWui:
case AArch64::LDURSWi:
return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
}
// These instructions can't be paired based on their opcodes.
return false;
}
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
int64_t Offset1, unsigned Opcode1, int FI2,
int64_t Offset2, unsigned Opcode2) {
// Accesses through fixed stack object frame indices may access a different
// fixed stack slot. Check that the object offsets + offsets match.
if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
// Convert to scaled object offsets.
int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
if (ObjectOffset1 % Scale1 != 0)
return false;
ObjectOffset1 /= Scale1;
int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
if (ObjectOffset2 % Scale2 != 0)
return false;
ObjectOffset2 /= Scale2;
ObjectOffset1 += Offset1;
ObjectOffset2 += Offset2;
return ObjectOffset1 + 1 == ObjectOffset2;
}
return FI1 == FI2;
}
/// Detect opportunities for ldp/stp formation.
///
/// Only called for LdSt for which getMemOperandWithOffset returns true.
bool AArch64InstrInfo::shouldClusterMemOps(
ArrayRef<const MachineOperand *> BaseOps1,
ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
unsigned NumBytes) const {
assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
const MachineOperand &BaseOp1 = *BaseOps1.front();
const MachineOperand &BaseOp2 = *BaseOps2.front();
const MachineInstr &FirstLdSt = *BaseOp1.getParent();
const MachineInstr &SecondLdSt = *BaseOp2.getParent();
if (BaseOp1.getType() != BaseOp2.getType())
return false;
assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
"Only base registers and frame indices are supported.");
// Check for both base regs and base FI.
if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
return false;
// Only cluster up to a single pair.
if (NumLoads > 2)
return false;
if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
return false;
// Can we pair these instructions based on their opcodes?
unsigned FirstOpc = FirstLdSt.getOpcode();
unsigned SecondOpc = SecondLdSt.getOpcode();
if (!canPairLdStOpc(FirstOpc, SecondOpc))
return false;
// Can't merge volatiles or load/stores that have a hint to avoid pair
// formation, for example.
if (!isCandidateToMergeOrPair(FirstLdSt) ||
!isCandidateToMergeOrPair(SecondLdSt))
return false;
// isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
return false;
int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
return false;
// Pairwise instructions have a 7-bit signed offset field.
if (Offset1 > 63 || Offset1 < -64)
return false;
// The caller should already have ordered First/SecondLdSt by offset.
// Note: except for non-equal frame index bases
if (BaseOp1.isFI()) {
assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
"Caller should have ordered offsets.");
const MachineFrameInfo &MFI =
FirstLdSt.getParent()->getParent()->getFrameInfo();
return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
BaseOp2.getIndex(), Offset2, SecondOpc);
}
assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
return Offset1 + 1 == Offset2;
}
static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
unsigned Reg, unsigned SubIdx,
unsigned State,
const TargetRegisterInfo *TRI) {
if (!SubIdx)
return MIB.addReg(Reg, State);
if (Register::isPhysicalRegister(Reg))
return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
return MIB.addReg(Reg, State, SubIdx);
}
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
unsigned NumRegs) {
// We really want the positive remainder mod 32 here, that happens to be
// easily obtainable with a mask.
return ((DestReg - SrcReg) & 0x1f) < NumRegs;
}
void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc,
unsigned Opcode,
ArrayRef<unsigned> Indices) const {
assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
const TargetRegisterInfo *TRI = &getRegisterInfo();
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned NumRegs = Indices.size();
int SubReg = 0, End = NumRegs, Incr = 1;
if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
SubReg = NumRegs - 1;
End = -1;
Incr = -1;
}
for (; SubReg != End; SubReg += Incr) {
const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
}
}
void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
DebugLoc DL, unsigned DestReg,
unsigned SrcReg, bool KillSrc,
unsigned Opcode, unsigned ZeroReg,
llvm::ArrayRef<unsigned> Indices) const {
const TargetRegisterInfo *TRI = &getRegisterInfo();
unsigned NumRegs = Indices.size();
#ifndef NDEBUG
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
"GPR reg sequences should not be able to overlap");
#endif
for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
MIB.addReg(ZeroReg);
AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
MIB.addImm(0);
}
}
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
if (AArch64::GPR32spRegClass.contains(DestReg) &&
(AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
// If either operand is WSP, expand to ADD #0.
if (Subtarget.hasZeroCycleRegMove()) {
// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
MCRegister DestRegX = TRI->getMatchingSuperReg(
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
MCRegister SrcRegX = TRI->getMatchingSuperReg(
SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegX, but a proper
// value from SrcReg.
BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
.addReg(SrcRegX, RegState::Undef)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
} else {
BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
}
} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else {
if (Subtarget.hasZeroCycleRegMove()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
MCRegister DestRegX = TRI->getMatchingSuperReg(
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
MCRegister SrcRegX = TRI->getMatchingSuperReg(
SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
// This instruction is reading and writing X registers. This may upset
// the register scavenger and machine verifier, so we need to indicate
// that we are reading an undefined value from SrcRegX, but a proper
// value from SrcReg.
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
.addReg(AArch64::XZR)
.addReg(SrcRegX, RegState::Undef)
.addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
} else {
// Otherwise, expand to ORR WZR.
BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
.addReg(AArch64::WZR)
.addReg(SrcReg, getKillRegState(KillSrc));
}
}
return;
}
// Copy a Predicate register by ORRing with itself.
if (AArch64::PPRRegClass.contains(DestReg) &&
AArch64::PPRRegClass.contains(SrcReg)) {
assert(Subtarget.hasSVE() && "Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
.addReg(SrcReg) // Pg
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
// Copy a Z register by ORRing with itself.
if (AArch64::ZPRRegClass.contains(DestReg) &&
AArch64::ZPRRegClass.contains(SrcReg)) {
assert(Subtarget.hasSVE() && "Unexpected SVE register.");
BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
// Copy a Z register pair by copying the individual sub-registers.
if (AArch64::ZPR2RegClass.contains(DestReg) &&
AArch64::ZPR2RegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
Indices);
return;
}
// Copy a Z register triple by copying the individual sub-registers.
if (AArch64::ZPR3RegClass.contains(DestReg) &&
AArch64::ZPR3RegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
Indices);
return;
}
// Copy a Z register quad by copying the individual sub-registers.
if (AArch64::ZPR4RegClass.contains(DestReg) &&
AArch64::ZPR4RegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
AArch64::zsub2, AArch64::zsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
Indices);
return;
}
if (AArch64::GPR64spRegClass.contains(DestReg) &&
(AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
// If either operand is SP, expand to ADD #0.
BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc))
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else {
// Otherwise, expand to ORR XZR.
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
.addReg(AArch64::XZR)
.addReg(SrcReg, getKillRegState(KillSrc));
}
return;
}
// Copy a DDDD register quad by copying the individual sub-registers.
if (AArch64::DDDDRegClass.contains(DestReg) &&
AArch64::DDDDRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
AArch64::dsub2, AArch64::dsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
Indices);
return;
}
// Copy a DDD register triple by copying the individual sub-registers.
if (AArch64::DDDRegClass.contains(DestReg) &&
AArch64::DDDRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
AArch64::dsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
Indices);
return;
}
// Copy a DD register pair by copying the individual sub-registers.
if (AArch64::DDRegClass.contains(DestReg) &&
AArch64::DDRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
Indices);
return;
}
// Copy a QQQQ register quad by copying the individual sub-registers.
if (AArch64::QQQQRegClass.contains(DestReg) &&
AArch64::QQQQRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
AArch64::qsub2, AArch64::qsub3};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
Indices);
return;
}
// Copy a QQQ register triple by copying the individual sub-registers.
if (AArch64::QQQRegClass.contains(DestReg) &&
AArch64::QQQRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
AArch64::qsub2};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
Indices);
return;
}
// Copy a QQ register pair by copying the individual sub-registers.
if (AArch64::QQRegClass.contains(DestReg) &&
AArch64::QQRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
Indices);
return;
}
if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
AArch64::XZR, Indices);
return;
}
if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
AArch64::WZR, Indices);
return;
}
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
if (Subtarget.hasNEON()) {
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
} else {
BuildMI(MBB, I, DL, get(AArch64::STRQpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(SrcReg, getKillRegState(KillSrc))
.addReg(AArch64::SP)
.addImm(-16);
BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(DestReg, RegState::Define)
.addReg(AArch64::SP)
.addImm(16);
}
return;
}
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
DestReg =
RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
SrcReg =
RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
DestReg =
RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
SrcReg =
RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
// Copies between GPR64 and FPR64.
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::GPR64RegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::GPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
// Copies between GPR32 and FPR32.
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::GPR32RegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::GPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (DestReg == AArch64::NZCV) {
assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
BuildMI(MBB, I, DL, get(AArch64::MSR))
.addImm(AArch64SysReg::NZCV)
.addReg(SrcReg, getKillRegState(KillSrc))
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
return;
}
if (SrcReg == AArch64::NZCV) {
assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
.addImm(AArch64SysReg::NZCV)
.addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
return;
}
#ifndef NDEBUG
const TargetRegisterInfo &TRI = getRegisterInfo();
errs() << TRI.getRegAsmName(DestReg) << " = COPY "
<< TRI.getRegAsmName(SrcReg) << "\n";
#endif
llvm_unreachable("unimplemented reg-to-reg copy");
}
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const MCInstrDesc &MCID,
Register SrcReg, bool IsKill,
unsigned SubIdx0, unsigned SubIdx1, int FI,
MachineMemOperand *MMO) {
Register SrcReg0 = SrcReg;
Register SrcReg1 = SrcReg;
if (Register::isPhysicalRegister(SrcReg)) {
SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
SubIdx0 = 0;
SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
SubIdx1 = 0;
}
BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
.addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
.addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
void AArch64InstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
bool isKill, int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
Opc = AArch64::STRBui;
break;
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::STRHui;
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_PXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::STRWui;
if (Register::isVirtualRegister(SrcReg))
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
else
assert(SrcReg != AArch64::WSP);
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
Opc = AArch64::STRSui;
break;
case 8:
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::STRXui;
if (Register::isVirtualRegister(SrcReg))
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
else
assert(SrcReg != AArch64::SP);
} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
Opc = AArch64::STRDui;
} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
get(AArch64::STPWi), SrcReg, isKill,
AArch64::sube32, AArch64::subo32, FI, MMO);
return;
}
break;
case 16:
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::STRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
get(AArch64::STPXi), SrcReg, isKill,
AArch64::sube64, AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev1d;
Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv1d;
Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Twov2d;
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
Opc = AArch64::ST1Fourv2d;
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
Opc = AArch64::STR_ZZZZXI;
StackID = TargetStackID::ScalableVector;
}
break;
}
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI);
if (Offset)
MI.addImm(0);
MI.addMemOperand(MMO);
}
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const MCInstrDesc &MCID,
Register DestReg, unsigned SubIdx0,
unsigned SubIdx1, int FI,
MachineMemOperand *MMO) {
Register DestReg0 = DestReg;
Register DestReg1 = DestReg;
bool IsUndef = true;
if (Register::isPhysicalRegister(DestReg)) {
DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
SubIdx0 = 0;
DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
SubIdx1 = 0;
IsUndef = false;
}
BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
.addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
.addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
}
void AArch64InstrInfo::loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
unsigned Opc = 0;
bool Offset = true;
unsigned StackID = TargetStackID::Default;
switch (TRI->getSpillSize(*RC)) {
case 1:
if (AArch64::FPR8RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRBui;
break;
case 2:
if (AArch64::FPR16RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRHui;
else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_PXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 4:
if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDRWui;
if (Register::isVirtualRegister(DestReg))
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
else
assert(DestReg != AArch64::WSP);
} else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRSui;
break;
case 8:
if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDRXui;
if (Register::isVirtualRegister(DestReg))
MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
else
assert(DestReg != AArch64::SP);
} else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
Opc = AArch64::LDRDui;
} else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
get(AArch64::LDPWi), DestReg, AArch64::sube32,
AArch64::subo32, FI, MMO);
return;
}
break;
case 16:
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
Opc = AArch64::LDRQui;
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov1d;
Offset = false;
} else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
get(AArch64::LDPXi), DestReg, AArch64::sube64,
AArch64::subo64, FI, MMO);
return;
} else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 24:
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev1d;
Offset = false;
}
break;
case 32:
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv1d;
Offset = false;
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Twov2d;
Offset = false;
} else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 48:
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Threev2d;
Offset = false;
} else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZXI;
StackID = TargetStackID::ScalableVector;
}
break;
case 64:
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
Opc = AArch64::LD1Fourv2d;
Offset = false;
} else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
Opc = AArch64::LDR_ZZZZXI;
StackID = TargetStackID::ScalableVector;
}
break;
}
assert(Opc && "Unknown register class");
MFI.setStackID(FI, StackID);
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
.addReg(DestReg, getDefRegState(true))
.addFrameIndex(FI);
if (Offset)
MI.addImm(0);
MI.addMemOperand(MMO);
}
bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
const MachineInstr &UseMI,
const TargetRegisterInfo *TRI) {
return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
UseMI.getIterator()),
[TRI](const MachineInstr &I) {
return I.modifiesRegister(AArch64::NZCV, TRI) ||
I.readsRegister(AArch64::NZCV, TRI);
});
}
void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
// The smallest scalable element supported by scaled SVE addressing
// modes are predicates, which are 2 scalable bytes in size. So the scalable
// byte offset must always be a multiple of 2.
assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
// VGSized offsets are divided by '2', because the VG register is the
// the number of 64bit granules as opposed to 128bit vector chunks,
// which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
// So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
// VG = n * 2 and the dwarf offset must be VG * 8 bytes.
ByteSized = Offset.getFixed();
VGSized = Offset.getScalable() / 2;
}
/// Returns the offset in parts to which this frame offset can be
/// decomposed for the purpose of describing a frame offset.
/// For non-scalable offsets this is simply its byte size.
void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
int64_t &NumDataVectors) {
// The smallest scalable element supported by scaled SVE addressing
// modes are predicates, which are 2 scalable bytes in size. So the scalable
// byte offset must always be a multiple of 2.
assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
NumBytes = Offset.getFixed();
NumDataVectors = 0;
NumPredicateVectors = Offset.getScalable() / 2;
// This method is used to get the offsets to adjust the frame offset.
// If the function requires ADDPL to be used and needs more than two ADDPL
// instructions, part of the offset is folded into NumDataVectors so that it
// uses ADDVL for part of it, reducing the number of ADDPL instructions.
if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
NumPredicateVectors > 62) {
NumDataVectors = NumPredicateVectors / 8;
NumPredicateVectors -= NumDataVectors * 8;
}
}
// Helper function to emit a frame offset adjustment from a given
// pointer (SrcReg), stored into DestReg. This function is explicit
// in that it requires the opcode.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned DestReg,
unsigned SrcReg, int64_t Offset, unsigned Opc,
const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool NeedsWinCFI,
bool *HasWinCFI) {
int Sign = 1;
unsigned MaxEncoding, ShiftSize;
switch (Opc) {
case AArch64::ADDXri:
case AArch64::ADDSXri:
case AArch64::SUBXri:
case AArch64::SUBSXri:
MaxEncoding = 0xfff;
ShiftSize = 12;
break;
case AArch64::ADDVL_XXI:
case AArch64::ADDPL_XXI:
MaxEncoding = 31;
ShiftSize = 0;
if (Offset < 0) {
MaxEncoding = 32;
Sign = -1;
Offset = -Offset;
}
break;
default:
llvm_unreachable("Unsupported opcode");
}
// FIXME: If the offset won't fit in 24-bits, compute the offset into a
// scratch register. If DestReg is a virtual register, use it as the
// scratch register; otherwise, create a new virtual register (to be
// replaced by the scavenger at the end of PEI). That case can be optimized
// slightly if DestReg is SP which is always 16-byte aligned, so the scratch
// register can be loaded with offset%8 and the add/sub can use an extending
// instruction with LSL#3.
// Currently the function handles any offsets but generates a poor sequence
// of code.
// assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
Register TmpReg = DestReg;
if (TmpReg == AArch64::XZR)
TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
&AArch64::GPR64RegClass);
do {
uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
unsigned LocalShiftSize = 0;
if (ThisVal > MaxEncoding) {
ThisVal = ThisVal >> ShiftSize;
LocalShiftSize = ShiftSize;
}
assert((ThisVal >> ShiftSize) <= MaxEncoding &&
"Encoding cannot handle value that big");
Offset -= ThisVal << LocalShiftSize;
if (Offset == 0)
TmpReg = DestReg;
auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
.addReg(SrcReg)
.addImm(Sign * (int)ThisVal);
if (ShiftSize)
MBI = MBI.addImm(
AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
MBI = MBI.setMIFlag(Flag);
if (NeedsWinCFI) {
assert(Sign == 1 && "SEH directives should always have a positive sign");
int Imm = (int)(ThisVal << LocalShiftSize);
if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
(SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
if (HasWinCFI)
*HasWinCFI = true;
if (Imm == 0)
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
else
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
.addImm(Imm)
.setMIFlag(Flag);
assert(Offset == 0 && "Expected remaining offset to be zero to "
"emit a single SEH directive");
} else if (DestReg == AArch64::SP) {
if (HasWinCFI)
*HasWinCFI = true;
assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
.addImm(Imm)
.setMIFlag(Flag);
}
if (HasWinCFI)
*HasWinCFI = true;
}
SrcReg = TmpReg;
} while (Offset);
}
void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
unsigned DestReg, unsigned SrcReg,
StackOffset Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV,
bool NeedsWinCFI, bool *HasWinCFI) {
int64_t Bytes, NumPredicateVectors, NumDataVectors;
AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
Offset, Bytes, NumPredicateVectors, NumDataVectors);
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
"SP increment/decrement not 8-byte aligned");
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
if (Bytes < 0) {
Bytes = -Bytes;
Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
}
emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
NeedsWinCFI, HasWinCFI);
SrcReg = DestReg;
}
assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
"SetNZCV not supported with SVE vectors");
assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
"WinCFI not supported with SVE vectors");
if (NumDataVectors) {
emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
SrcReg = DestReg;
}
if (NumPredicateVectors) {
assert(DestReg != AArch64::SP && "Unaligned access to SP");
emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
}
}
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS, VirtRegMap *VRM) const {
// This is a bit of a hack. Consider this instruction:
//
// %0 = COPY %sp; GPR64all:%0
//
// We explicitly chose GPR64all for the virtual register so such a copy might
// be eliminated by RegisterCoalescer. However, that may not be possible, and
// %0 may even spill. We can't spill %sp, and since it is in the GPR64all
// register class, TargetInstrInfo::foldMemoryOperand() is going to try.
//
// To prevent that, we are going to constrain the %0 register class here.
//
// <rdar://problem/11522048>
//
if (MI.isFullCopy()) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
return nullptr;
}
if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
return nullptr;
}
}
// Handle the case where a copy is being spilled or filled but the source
// and destination register class don't match. For example:
//
// %0 = COPY %xzr; GPR64common:%0
//
// In this case we can still safely fold away the COPY and generate the
// following spill code:
//
// STRXui %xzr, %stack.0
//
// This also eliminates spilled cross register class COPYs (e.g. between x and
// d regs) of the same size. For example:
//
// %0 = COPY %1; GPR64:%0, FPR64:%1
//
// will be filled as
//
// LDRDui %0, fi<#0>
//
// instead of
//
// LDRXui %Temp, fi<#0>
// %0 = FMOV %Temp
//
if (MI.isCopy() && Ops.size() == 1 &&
// Make sure we're only folding the explicit COPY defs/uses.
(Ops[0] == 0 || Ops[0] == 1)) {
bool IsSpill = Ops[0] == 0;
bool IsFill = !IsSpill;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &MBB = *MI.getParent();
const MachineOperand &DstMO = MI.getOperand(0);
const MachineOperand &SrcMO = MI.getOperand(1);
Register DstReg = DstMO.getReg();
Register SrcReg = SrcMO.getReg();
// This is slightly expensive to compute for physical regs since
// getMinimalPhysRegClass is slow.
auto getRegClass = [&](unsigned Reg) {
return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
: TRI.getMinimalPhysRegClass(Reg);
};
if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
"Mismatched register size in non subreg COPY");
if (IsSpill)
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
getRegClass(SrcReg), &TRI);
else
loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
getRegClass(DstReg), &TRI);
return &*--InsertPt;
}
// Handle cases like spilling def of:
//
// %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
//
// where the physical register source can be widened and stored to the full
// virtual reg destination stack slot, in this case producing:
//
// STRXui %xzr, %stack.0
//
if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
assert(SrcMO.getSubReg() == 0 &&
"Unexpected subreg on physical register");
const TargetRegisterClass *SpillRC;
unsigned SpillSubreg;
switch (DstMO.getSubReg()) {
default:
SpillRC = nullptr;
break;
case AArch64::sub_32:
case AArch64::ssub:
if (AArch64::GPR32RegClass.contains(SrcReg)) {
SpillRC = &AArch64::GPR64RegClass;
SpillSubreg = AArch64::sub_32;
} else if (AArch64::FPR32RegClass.contains(SrcReg)) {
SpillRC = &AArch64::FPR64RegClass;
SpillSubreg = AArch64::ssub;
} else
SpillRC = nullptr;
break;
case AArch64::dsub:
if (AArch64::FPR64RegClass.contains(SrcReg)) {
SpillRC = &AArch64::FPR128RegClass;
SpillSubreg = AArch64::dsub;
} else
SpillRC = nullptr;
break;
}
if (SpillRC)
if (unsigned WidenedSrcReg =
TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
FrameIndex, SpillRC, &TRI);
return &*--InsertPt;
}
}
// Handle cases like filling use of:
//
// %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
//
// where we can load the full virtual reg source stack slot, into the subreg
// destination, in this case producing:
//
// LDRWui %0:sub_32<def,read-undef>, %stack.0
//
if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
const TargetRegisterClass *FillRC;
switch (DstMO.getSubReg()) {
default:
FillRC = nullptr;
break;
case AArch64::sub_32:
FillRC = &AArch64::GPR32RegClass;
break;
case AArch64::ssub:
FillRC = &AArch64::FPR32RegClass;
break;
case AArch64::dsub:
FillRC = &AArch64::FPR64RegClass;
break;
}
if (FillRC) {
assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
TRI.getRegSizeInBits(*FillRC) &&
"Mismatched regclass size on folded subreg COPY");
loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
MachineInstr &LoadMI = *--InsertPt;
MachineOperand &LoadDst = LoadMI.getOperand(0);
assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
LoadDst.setSubReg(DstMO.getSubReg());
LoadDst.setIsUndef();
return &LoadMI;
}
}
}
// Cannot fold.
return nullptr;
}
int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
StackOffset &SOffset,
bool *OutUseUnscaledOp,
unsigned *OutUnscaledOp,
int64_t *EmittableOffset) {
// Set output values in case of early exit.
if (EmittableOffset)
*EmittableOffset = 0;
if (OutUseUnscaledOp)
*OutUseUnscaledOp = false;
if (OutUnscaledOp)
*OutUnscaledOp = 0;
// Exit early for structured vector spills/fills as they can't take an
// immediate offset.
switch (MI.getOpcode()) {
default:
break;
case AArch64::LD1Twov2d:
case AArch64::LD1Threev2d:
case AArch64::LD1Fourv2d:
case AArch64::LD1Twov1d:
case AArch64::LD1Threev1d:
case AArch64::LD1Fourv1d:
case AArch64::ST1Twov2d:
case AArch64::ST1Threev2d:
case AArch64::ST1Fourv2d:
case AArch64::ST1Twov1d:
case AArch64::ST1Threev1d:
case AArch64::ST1Fourv1d:
case AArch64::ST1i8:
case AArch64::ST1i16:
case AArch64::ST1i32:
case AArch64::ST1i64:
case AArch64::IRG:
case AArch64::IRGstack:
case AArch64::STGloop:
case AArch64::STZGloop:
return AArch64FrameOffsetCannotUpdate;
}
// Get the min/max offset and the scale.
TypeSize ScaleValue(0U, false);
unsigned Width;
int64_t MinOff, MaxOff;
if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
MaxOff))
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
// Construct the complete offset.
bool IsMulVL = ScaleValue.isScalable();
unsigned Scale = ScaleValue.getKnownMinSize();
int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
const MachineOperand &ImmOpnd =
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
Offset += ImmOpnd.getImm() * Scale;
// If the offset doesn't match the scale, we rewrite the instruction to
// use the unscaled instruction instead. Likewise, if we have a negative
// offset and there is an unscaled op to use.
Optional<unsigned> UnscaledOp =
AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
if (useUnscaledOp &&
!AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
MaxOff))
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
Scale = ScaleValue.getKnownMinSize();
assert(IsMulVL == ScaleValue.isScalable() &&
"Unscaled opcode has different value for scalable");
int64_t Remainder = Offset % Scale;
assert(!(Remainder && useUnscaledOp) &&
"Cannot have remainder when using unscaled op");
assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
int64_t NewOffset = Offset / Scale;
if (MinOff <= NewOffset && NewOffset <= MaxOff)
Offset = Remainder;
else {
NewOffset = NewOffset < 0 ? MinOff : MaxOff;
Offset = Offset - NewOffset * Scale + Remainder;
}
if (EmittableOffset)
*EmittableOffset = NewOffset;
if (OutUseUnscaledOp)
*OutUseUnscaledOp = useUnscaledOp;
if (OutUnscaledOp && UnscaledOp)
*OutUnscaledOp = *UnscaledOp;
if (IsMulVL)
SOffset = StackOffset::get(SOffset.getFixed(), Offset);
else
SOffset = StackOffset::get(Offset, SOffset.getScalable());
return AArch64FrameOffsetCanUpdate |
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
}
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, StackOffset &Offset,
const AArch64InstrInfo *TII) {
unsigned Opcode = MI.getOpcode();
unsigned ImmIdx = FrameRegIdx + 1;
if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
MI.getOperand(0).getReg(), FrameReg, Offset, TII,
MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
MI.eraseFromParent();
Offset = StackOffset();
return true;
}
int64_t NewOffset;
unsigned UnscaledOp;
bool UseUnscaledOp;
int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
&UnscaledOp, &NewOffset);
if (Status & AArch64FrameOffsetCanUpdate) {
if (Status & AArch64FrameOffsetIsLegal)
// Replace the FrameIndex with FrameReg.
MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
if (UseUnscaledOp)
MI.setDesc(TII->get(UnscaledOp));
MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
return !Offset;
}
return false;
}
MCInst AArch64InstrInfo::getNop() const {
return MCInstBuilder(AArch64::HINT).addImm(0);
}
// AArch64 supports MachineCombiner.
bool AArch64InstrInfo::useMachineCombiner() const { return true; }
// True when Opc sets flag
static bool isCombineInstrSettingFlag(unsigned Opc) {
switch (Opc) {
case AArch64::ADDSWrr:
case AArch64::ADDSWri:
case AArch64::ADDSXrr:
case AArch64::ADDSXri:
case AArch64::SUBSWrr:
case AArch64::SUBSXrr:
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
case AArch64::SUBSWri:
case AArch64::SUBSXri:
return true;
default:
break;
}
return false;
}
// 32b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate32(unsigned Opc) {
switch (Opc) {
case AArch64::ADDWrr:
case AArch64::ADDWri:
case AArch64::SUBWrr:
case AArch64::ADDSWrr:
case AArch64::ADDSWri:
case AArch64::SUBSWrr:
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
case AArch64::SUBWri:
case AArch64::SUBSWri:
return true;
default:
break;
}
return false;
}
// 64b Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate64(unsigned Opc) {
switch (Opc) {
case AArch64::ADDXrr:
case AArch64::ADDXri:
case AArch64::SUBXrr:
case AArch64::ADDSXrr:
case AArch64::ADDSXri:
case AArch64::SUBSXrr:
// Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
case AArch64::SUBXri:
case AArch64::SUBSXri:
case AArch64::ADDv8i8:
case AArch64::ADDv16i8:
case AArch64::ADDv4i16:
case AArch64::ADDv8i16:
case AArch64::ADDv2i32:
case AArch64::ADDv4i32:
case AArch64::SUBv8i8:
case AArch64::SUBv16i8:
case AArch64::SUBv4i16:
case AArch64::SUBv8i16:
case AArch64::SUBv2i32:
case AArch64::SUBv4i32:
return true;
default:
break;
}
return false;
}
// FP Opcodes that can be combined with a FMUL.
static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
switch (Inst.getOpcode()) {
default:
break;
case AArch64::FADDHrr:
case AArch64::FADDSrr:
case AArch64::FADDDrr:
case AArch64::FADDv4f16:
case AArch64::FADDv8f16:
case AArch64::FADDv2f32:
case AArch64::FADDv2f64:
case AArch64::FADDv4f32:
case AArch64::FSUBHrr:
case AArch64::FSUBSrr:
case AArch64::FSUBDrr:
case AArch64::FSUBv4f16:
case AArch64::FSUBv8f16:
case AArch64::FSUBv2f32:
case AArch64::FSUBv2f64:
case AArch64::FSUBv4f32:
TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
// We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
// the target options or if FADD/FSUB has the contract fast-math flag.
return Options.UnsafeFPMath ||
Options.AllowFPOpFusion == FPOpFusion::Fast ||
Inst.getFlag(MachineInstr::FmContract);
return true;
}
return false;
}
// Opcodes that can be combined with a MUL
static bool isCombineInstrCandidate(unsigned Opc) {
return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
}
//
// Utility routine that checks if \param MO is defined by an
// \param CombineOpc instruction in the basic block \param MBB
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
unsigned CombineOpc, unsigned ZeroReg = 0,
bool CheckZeroReg = false) {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineInstr *MI = nullptr;
if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
MI = MRI.getUniqueVRegDef(MO.getReg());
// And it needs to be in the trace (otherwise, it won't have a depth).
if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
return false;
// Must only used by the user we combine with.
if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
return false;
if (CheckZeroReg) {
assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
// The third input reg must be zero.
if (MI->getOperand(3).getReg() != ZeroReg)
return false;
}
return true;
}
//
// Is \param MO defined by an integer multiply and can be combined?
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
unsigned MulOpc, unsigned ZeroReg) {
return canCombine(MBB, MO, MulOpc, ZeroReg, true);
}
//
// Is \param MO defined by a floating-point multiply and can be combined?
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
unsigned MulOpc) {
return canCombine(MBB, MO, MulOpc);
}
// TODO: There are many more machine instruction opcodes to match:
// 1. Other data types (integer, vectors)
// 2. Other math / logic operations (xor, or)
// 3. Other forms of the same operation (intrinsics and other variants)
bool AArch64InstrInfo::isAssociativeAndCommutative(
const MachineInstr &Inst) const {
switch (Inst.getOpcode()) {
case AArch64::FADDDrr:
case AArch64::FADDSrr:
case AArch64::FADDv2f32:
case AArch64::FADDv2f64:
case AArch64::FADDv4f32:
case AArch64::FMULDrr:
case AArch64::FMULSrr:
case AArch64::FMULX32:
case AArch64::FMULX64:
case AArch64::FMULXv2f32:
case AArch64::FMULXv2f64:
case AArch64::FMULXv4f32:
case AArch64::FMULv2f32:
case AArch64::FMULv2f64:
case AArch64::FMULv4f32:
return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
default:
return false;
}
}
/// Find instructions that can be turned into madd.
static bool getMaddPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
if (!isCombineInstrCandidate(Opc))
return false;
if (isCombineInstrSettingFlag(Opc)) {
int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
// When NZCV is live bail out.
if (Cmp_NZCV == -1)
return false;
unsigned NewOpc = convertToNonFlagSettingOpc(Root);
// When opcode can't change bail out.
// CHECKME: do we miss any cases for opcode conversion?
if (NewOpc == Opc)
return false;
Opc = NewOpc;
}
auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
MachineCombinerPattern Pattern) {
if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
Patterns.push_back(Pattern);
Found = true;
}
};
auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
Patterns.push_back(Pattern);
Found = true;
}
};
typedef MachineCombinerPattern MCP;
switch (Opc) {
default:
break;
case AArch64::ADDWrr:
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
"ADDWrr does not have register operands");
setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
break;
case AArch64::ADDXrr:
setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
break;
case AArch64::SUBWrr:
setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
break;
case AArch64::SUBXrr:
setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
break;
case AArch64::ADDWri:
setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
break;
case AArch64::ADDXri:
setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
break;
case AArch64::SUBWri:
setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
break;
case AArch64::SUBXri:
setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
break;
case AArch64::ADDv8i8:
setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
break;
case AArch64::ADDv16i8:
setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
break;
case AArch64::ADDv4i16:
setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
break;
case AArch64::ADDv8i16:
setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
break;
case AArch64::ADDv2i32:
setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
break;
case AArch64::ADDv4i32:
setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
break;
case AArch64::SUBv8i8:
setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
break;
case AArch64::SUBv16i8:
setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
break;
case AArch64::SUBv4i16:
setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
break;
case AArch64::SUBv8i16:
setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
break;
case AArch64::SUBv2i32:
setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
break;
case AArch64::SUBv4i32:
setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
break;
}
return Found;
}
/// Floating-Point Support
/// Find instructions that can be turned into madd.
static bool getFMAPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
if (!isCombineInstrCandidateFP(Root))
return false;
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
auto Match = [&](int Opcode, int Operand,
MachineCombinerPattern Pattern) -> bool {
if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
Patterns.push_back(Pattern);
return true;
}
return false;
};
typedef MachineCombinerPattern MCP;
switch (Root.getOpcode()) {
default:
assert(false && "Unsupported FP instruction in combiner\n");
break;
case AArch64::FADDHrr:
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
"FADDHrr does not have register operands");
Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
break;
case AArch64::FADDSrr:
assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
"FADDSrr does not have register operands");
Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
break;
case AArch64::FADDDrr:
Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
break;
case AArch64::FADDv4f16:
Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
break;
case AArch64::FADDv8f16:
Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
break;
case AArch64::FADDv2f32:
Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
break;
case AArch64::FADDv2f64:
Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
break;
case AArch64::FADDv4f32:
Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
break;
case AArch64::FSUBHrr:
Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
break;
case AArch64::FSUBSrr:
Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
break;
case AArch64::FSUBDrr:
Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
break;
case AArch64::FSUBv4f16:
Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
break;
case AArch64::FSUBv8f16:
Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
break;
case AArch64::FSUBv2f32:
Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
break;
case AArch64::FSUBv2f64:
Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
break;
case AArch64::FSUBv4f32:
Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
break;
}
return Found;
}
static bool getFMULPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns) {
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
auto Match = [&](unsigned Opcode, int Operand,
MachineCombinerPattern Pattern) -> bool {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineOperand &MO = Root.getOperand(Operand);
MachineInstr *MI = nullptr;
if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
MI = MRI.getUniqueVRegDef(MO.getReg());
if (MI && MI->getOpcode() == Opcode) {
Patterns.push_back(Pattern);
return true;
}
return false;
};
typedef MachineCombinerPattern MCP;
switch (Root.getOpcode()) {
default:
return false;
case AArch64::FMULv2f32:
Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
break;
case AArch64::FMULv2f64:
Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
break;
case AArch64::FMULv4f16:
Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
break;
case AArch64::FMULv4f32:
Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
break;
case AArch64::FMULv8f16:
Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
break;
}
return Found;
}
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
bool AArch64InstrInfo::isThroughputPattern(
MachineCombinerPattern Pattern) const {
switch (Pattern) {
default:
break;
case MachineCombinerPattern::FMULADDH_OP1:
case MachineCombinerPattern::FMULADDH_OP2:
case MachineCombinerPattern::FMULSUBH_OP1:
case MachineCombinerPattern::FMULSUBH_OP2:
case MachineCombinerPattern::FMULADDS_OP1:
case MachineCombinerPattern::FMULADDS_OP2:
case MachineCombinerPattern::FMULSUBS_OP1:
case MachineCombinerPattern::FMULSUBS_OP2:
case MachineCombinerPattern::FMULADDD_OP1:
case MachineCombinerPattern::FMULADDD_OP2:
case MachineCombinerPattern::FMULSUBD_OP1:
case MachineCombinerPattern::FMULSUBD_OP2:
case MachineCombinerPattern::FNMULSUBH_OP1:
case MachineCombinerPattern::FNMULSUBS_OP1:
case MachineCombinerPattern::FNMULSUBD_OP1:
case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
case MachineCombinerPattern::FMLAv4f16_OP2:
case MachineCombinerPattern::FMLAv4f16_OP1:
case MachineCombinerPattern::FMLAv8f16_OP1:
case MachineCombinerPattern::FMLAv8f16_OP2:
case MachineCombinerPattern::FMLAv2f32_OP2:
case MachineCombinerPattern::FMLAv2f32_OP1:
case MachineCombinerPattern::FMLAv2f64_OP1:
case MachineCombinerPattern::FMLAv2f64_OP2:
case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
case MachineCombinerPattern::FMLAv4f32_OP1:
case MachineCombinerPattern::FMLAv4f32_OP2:
case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
case MachineCombinerPattern::FMLSv4f16_OP1:
case MachineCombinerPattern::FMLSv4f16_OP2:
case MachineCombinerPattern::FMLSv8f16_OP1:
case MachineCombinerPattern::FMLSv8f16_OP2:
case MachineCombinerPattern::FMLSv2f32_OP2:
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
case MachineCombinerPattern::FMLSv4f32_OP2:
case MachineCombinerPattern::FMULv2i32_indexed_OP1:
case MachineCombinerPattern::FMULv2i32_indexed_OP2:
case MachineCombinerPattern::FMULv2i64_indexed_OP1:
case MachineCombinerPattern::FMULv2i64_indexed_OP2:
case MachineCombinerPattern::FMULv4i16_indexed_OP1:
case MachineCombinerPattern::FMULv4i16_indexed_OP2:
case MachineCombinerPattern::FMULv4i32_indexed_OP1:
case MachineCombinerPattern::FMULv4i32_indexed_OP2:
case MachineCombinerPattern::FMULv8i16_indexed_OP1:
case MachineCombinerPattern::FMULv8i16_indexed_OP2:
case MachineCombinerPattern::MULADDv8i8_OP1:
case MachineCombinerPattern::MULADDv8i8_OP2:
case MachineCombinerPattern::MULADDv16i8_OP1:
case MachineCombinerPattern::MULADDv16i8_OP2:
case MachineCombinerPattern::MULADDv4i16_OP1:
case MachineCombinerPattern::MULADDv4i16_OP2:
case MachineCombinerPattern::MULADDv8i16_OP1:
case MachineCombinerPattern::MULADDv8i16_OP2:
case MachineCombinerPattern::MULADDv2i32_OP1:
case MachineCombinerPattern::MULADDv2i32_OP2:
case MachineCombinerPattern::MULADDv4i32_OP1:
case MachineCombinerPattern::MULADDv4i32_OP2:
case MachineCombinerPattern::MULSUBv8i8_OP1:
case MachineCombinerPattern::MULSUBv8i8_OP2:
case MachineCombinerPattern::MULSUBv16i8_OP1:
case MachineCombinerPattern::MULSUBv16i8_OP2:
case MachineCombinerPattern::MULSUBv4i16_OP1:
case MachineCombinerPattern::MULSUBv4i16_OP2:
case MachineCombinerPattern::MULSUBv8i16_OP1:
case MachineCombinerPattern::MULSUBv8i16_OP2:
case MachineCombinerPattern::MULSUBv2i32_OP1:
case MachineCombinerPattern::MULSUBv2i32_OP2:
case MachineCombinerPattern::MULSUBv4i32_OP1:
case MachineCombinerPattern::MULSUBv4i32_OP2:
case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
return true;
} // end switch (Pattern)
return false;
}
/// Return true when there is potentially a faster code sequence for an
/// instruction chain ending in \p Root. All potential patterns are listed in
/// the \p Pattern vector. Pattern should be sorted in priority order since the
/// pattern evaluator stops checking as soon as it finds a faster sequence.
bool AArch64InstrInfo::getMachineCombinerPatterns(
MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
bool DoRegPressureReduce) const {
// Integer patterns
if (getMaddPatterns(Root, Patterns))
return true;
// Floating point patterns
if (getFMULPatterns(Root, Patterns))
return true;
if (getFMAPatterns(Root, Patterns))
return true;
return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
DoRegPressureReduce);
}
enum class FMAInstKind { Default, Indexed, Accumulator };
/// genFusedMultiply - Generate fused multiply instructions.
/// This function supports both integer and floating point instructions.
/// A typical example:
/// F|MUL I=A,B,0
/// F|ADD R,I,C
/// ==> F|MADD R,A,B,C
/// \param MF Containing MachineFunction
/// \param MRI Register information
/// \param TII Target information
/// \param Root is the F|ADD instruction
/// \param [out] InsInstrs is a vector of machine instructions and will
/// contain the generated madd instruction
/// \param IdxMulOpd is index of operand in Root that is the result of
/// the F|MUL. In the example above IdxMulOpd is 1.
/// \param MaddOpc the opcode fo the f|madd instruction
/// \param RC Register class of operands
/// \param kind of fma instruction (addressing mode) to be generated
/// \param ReplacedAddend is the result register from the instruction
/// replacing the non-combined operand, if any.
static MachineInstr *
genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
unsigned MaddOpc, const TargetRegisterClass *RC,
FMAInstKind kind = FMAInstKind::Default,
const Register *ReplacedAddend = nullptr) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Register ResultReg = Root.getOperand(0).getReg();
Register SrcReg0 = MUL->getOperand(1).getReg();
bool Src0IsKill = MUL->getOperand(1).isKill();
Register SrcReg1 = MUL->getOperand(2).getReg();
bool Src1IsKill = MUL->getOperand(2).isKill();
unsigned SrcReg2;
bool Src2IsKill;
if (ReplacedAddend) {
// If we just generated a new addend, we must be it's only use.
SrcReg2 = *ReplacedAddend;
Src2IsKill = true;
} else {
SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
}
if (Register::isVirtualRegister(ResultReg))
MRI.constrainRegClass(ResultReg, RC);
if (Register::isVirtualRegister(SrcReg0))
MRI.constrainRegClass(SrcReg0, RC);
if (Register::isVirtualRegister(SrcReg1))
MRI.constrainRegClass(SrcReg1, RC);
if (Register::isVirtualRegister(SrcReg2))
MRI.constrainRegClass(SrcReg2, RC);
MachineInstrBuilder MIB;
if (kind == FMAInstKind::Default)
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
.addReg(SrcReg0, getKillRegState(Src0IsKill))
.addReg(SrcReg1, getKillRegState(Src1IsKill))
.addReg(SrcReg2, getKillRegState(Src2IsKill));
else if (kind == FMAInstKind::Indexed)
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
.addReg(SrcReg2, getKillRegState(Src2IsKill))
.addReg(SrcReg0, getKillRegState(Src0IsKill))
.addReg(SrcReg1, getKillRegState(Src1IsKill))
.addImm(MUL->getOperand(3).getImm());
else if (kind == FMAInstKind::Accumulator)
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
.addReg(SrcReg2, getKillRegState(Src2IsKill))
.addReg(SrcReg0, getKillRegState(Src0IsKill))
.addReg(SrcReg1, getKillRegState(Src1IsKill));
else
assert(false && "Invalid FMA instruction kind \n");
// Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
InsInstrs.push_back(MIB);
return MUL;
}
/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static MachineInstr *
genIndexedMultiply(MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IdxDupOp, unsigned MulOpc,
const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
"Invalid index of FMUL operand");
MachineFunction &MF = *Root.getMF();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineInstr *Dup =
MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
Register DupSrcReg = Dup->getOperand(1).getReg();
MRI.clearKillFlags(DupSrcReg);
MRI.constrainRegClass(DupSrcReg, RC);
unsigned DupSrcLane = Dup->getOperand(2).getImm();
unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
MachineOperand &MulOp = Root.getOperand(IdxMulOp);
Register ResultReg = Root.getOperand(0).getReg();
MachineInstrBuilder MIB;
MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg)
.add(MulOp)
.addReg(DupSrcReg)
.addImm(DupSrcLane);
InsInstrs.push_back(MIB);
return &Root;
}
/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
/// instructions.
///
/// \see genFusedMultiply
static MachineInstr *genFusedMultiplyAcc(
MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
FMAInstKind::Accumulator);
}
/// genNeg - Helper to generate an intermediate negation of the second operand
/// of Root
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
unsigned MnegOpc, const TargetRegisterClass *RC) {
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB =
BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
.add(Root.getOperand(2));
InsInstrs.push_back(MIB);
assert(InstrIdxForVirtReg.empty());
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
return NewVR;
}
/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
/// instructions with an additional negation of the accumulator
static MachineInstr *genFusedMultiplyAccNeg(
MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
assert(IdxMulOpd == 1);
Register NewVR =
genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
FMAInstKind::Accumulator, &NewVR);
}
/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
/// instructions.
///
/// \see genFusedMultiply
static MachineInstr *genFusedMultiplyIdx(
MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
FMAInstKind::Indexed);
}
/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
/// instructions with an additional negation of the accumulator
static MachineInstr *genFusedMultiplyIdxNeg(
MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
assert(IdxMulOpd == 1);
Register NewVR =
genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
FMAInstKind::Indexed, &NewVR);
}
/// genMaddR - Generate madd instruction and combine mul and add using
/// an extra virtual register
/// Example - an ADD intermediate needs to be stored in a register:
/// MUL I=A,B,0
/// ADD R,I,Imm
/// ==> ORR V, ZR, Imm
/// ==> MADD R,A,B,V
/// \param MF Containing MachineFunction
/// \param MRI Register information
/// \param TII Target information
/// \param Root is the ADD instruction
/// \param [out] InsInstrs is a vector of machine instructions and will
/// contain the generated madd instruction
/// \param IdxMulOpd is index of operand in Root that is the result of
/// the MUL. In the example above IdxMulOpd is 1.
/// \param MaddOpc the opcode fo the madd instruction
/// \param VR is a virtual register that holds the value of an ADD operand
/// (V in the example above).
/// \param RC Register class of operands
static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
const TargetInstrInfo *TII, MachineInstr &Root,
SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
const TargetRegisterClass *RC) {
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
Register ResultReg = Root.getOperand(0).getReg();
Register SrcReg0 = MUL->getOperand(1).getReg();
bool Src0IsKill = MUL->getOperand(1).isKill();
Register SrcReg1 = MUL->getOperand(2).getReg();
bool Src1IsKill = MUL->getOperand(2).isKill();
if (Register::isVirtualRegister(ResultReg))
MRI.constrainRegClass(ResultReg, RC);
if (Register::isVirtualRegister(SrcReg0))
MRI.constrainRegClass(SrcReg0, RC);
if (Register::isVirtualRegister(SrcReg1))
MRI.constrainRegClass(SrcReg1, RC);
if (Register::isVirtualRegister(VR))
MRI.constrainRegClass(VR, RC);
MachineInstrBuilder MIB =
BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
.addReg(SrcReg0, getKillRegState(Src0IsKill))
.addReg(SrcReg1, getKillRegState(Src1IsKill))
.addReg(VR);
// Insert the MADD
InsInstrs.push_back(MIB);
return MUL;
}
/// When getMachineCombinerPatterns() finds potential patterns,
/// this function generates the instructions that could replace the
/// original code sequence
void AArch64InstrInfo::genAlternativeCodeSequence(
MachineInstr &Root, MachineCombinerPattern Pattern,
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
MachineBasicBlock &MBB = *Root.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
MachineInstr *MUL = nullptr;
const TargetRegisterClass *RC;
unsigned Opc;
switch (Pattern) {
default:
// Reassociate instructions.
TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
DelInstrs, InstrIdxForVirtReg);
return;
case MachineCombinerPattern::MULADDW_OP1:
case MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0
// ADD R,I,C
// ==> MADD R,A,B,C
// --- Create(MADD);
if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDW_OP2:
case MachineCombinerPattern::MULADDX_OP2:
// MUL I=A,B,0
// ADD R,C,I
// ==> MADD R,A,B,C
// --- Create(MADD);
if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDWI_OP1:
case MachineCombinerPattern::MULADDXI_OP1: {
// MUL I=A,B,0
// ADD R,I,Imm
// ==> ORR V, ZR, Imm
// ==> MADD R,A,B,V
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
OrrOpc = AArch64::ORRXri;
OrrRC = &AArch64::GPR64spRegClass;
BitSize = 64;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
Register NewVR = MRI.createVirtualRegister(OrrRC);
uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned Val = Root.getOperand(3).getImm();
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(Imm, BitSize);
uint64_t Encoding;
if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
return;
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
.addReg(ZeroReg)
.addImm(Encoding);
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
case MachineCombinerPattern::MULSUBW_OP1:
case MachineCombinerPattern::MULSUBX_OP1: {
// MUL I=A,B,0
// SUB R,I, C
// ==> SUB V, 0, C
// ==> MADD R,A,B,V // = -C + A*B
// --- Create(MADD);
const TargetRegisterClass *SubRC;
unsigned SubOpc, ZeroReg;
if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
SubOpc = AArch64::SUBWrr;
SubRC = &AArch64::GPR32spRegClass;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
SubOpc = AArch64::SUBXrr;
SubRC = &AArch64::GPR64spRegClass;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
Register NewVR = MRI.createVirtualRegister(SubRC);
// SUB NewVR, 0, C
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
.addReg(ZeroReg)
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
case MachineCombinerPattern::MULSUBW_OP2:
case MachineCombinerPattern::MULSUBX_OP2:
// MUL I=A,B,0
// SUB R,C,I
// ==> MSUB R,A,B,C (computes C - A*B)
// --- Create(MSUB);
if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
Opc = AArch64::MSUBWrrr;
RC = &AArch64::GPR32RegClass;
} else {
Opc = AArch64::MSUBXrrr;
RC = &AArch64::GPR64RegClass;
}
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBWI_OP1:
case MachineCombinerPattern::MULSUBXI_OP1: {
// MUL I=A,B,0
// SUB R,I, Imm
// ==> ORR V, ZR, -Imm
// ==> MADD R,A,B,V // = -Imm + A*B
// --- Create(MADD);
const TargetRegisterClass *OrrRC;
unsigned BitSize, OrrOpc, ZeroReg;
if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
OrrOpc = AArch64::ORRWri;
OrrRC = &AArch64::GPR32spRegClass;
BitSize = 32;
ZeroReg = AArch64::WZR;
Opc = AArch64::MADDWrrr;
RC = &AArch64::GPR32RegClass;
} else {
OrrOpc = AArch64::ORRXri;
OrrRC = &AArch64::GPR64spRegClass;
BitSize = 64;
ZeroReg = AArch64::XZR;
Opc = AArch64::MADDXrrr;
RC = &AArch64::GPR64RegClass;
}
Register NewVR = MRI.createVirtualRegister(OrrRC);
uint64_t Imm = Root.getOperand(2).getImm();
if (Root.getOperand(3).isImm()) {
unsigned Val = Root.getOperand(3).getImm();
Imm = Imm << Val;
}
uint64_t UImm = SignExtend64(-Imm, BitSize);
uint64_t Encoding;
if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
return;
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
.addReg(ZeroReg)
.addImm(Encoding);
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
case MachineCombinerPattern::MULADDv8i8_OP1:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv8i8_OP2:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv16i8_OP1:
Opc = AArch64::MLAv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv16i8_OP2:
Opc = AArch64::MLAv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i16_OP1:
Opc = AArch64::MLAv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i16_OP2:
Opc = AArch64::MLAv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv8i16_OP1:
Opc = AArch64::MLAv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv8i16_OP2:
Opc = AArch64::MLAv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv2i32_OP1:
Opc = AArch64::MLAv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv2i32_OP2:
Opc = AArch64::MLAv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i32_OP1:
Opc = AArch64::MLAv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i32_OP2:
Opc = AArch64::MLAv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv8i8_OP1:
Opc = AArch64::MLAv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
RC);
break;
case MachineCombinerPattern::MULSUBv8i8_OP2:
Opc = AArch64::MLSv8i8;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv16i8_OP1:
Opc = AArch64::MLAv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
RC);
break;
case MachineCombinerPattern::MULSUBv16i8_OP2:
Opc = AArch64::MLSv16i8;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv4i16_OP1:
Opc = AArch64::MLAv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
RC);
break;
case MachineCombinerPattern::MULSUBv4i16_OP2:
Opc = AArch64::MLSv4i16;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv8i16_OP1:
Opc = AArch64::MLAv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
RC);
break;
case MachineCombinerPattern::MULSUBv8i16_OP2:
Opc = AArch64::MLSv8i16;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv2i32_OP1:
Opc = AArch64::MLAv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
RC);
break;
case MachineCombinerPattern::MULSUBv2i32_OP2:
Opc = AArch64::MLSv2i32;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv4i32_OP1:
Opc = AArch64::MLAv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
RC);
break;
case MachineCombinerPattern::MULSUBv4i32_OP2:
Opc = AArch64::MLSv4i32;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
Opc = AArch64::MLAv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
Opc = AArch64::MLAv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
Opc = AArch64::MLAv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
Opc = AArch64::MLAv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
Opc = AArch64::MLAv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
Opc = AArch64::MLAv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
Opc = AArch64::MLAv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
Opc = AArch64::MLAv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
Opc = AArch64::MLAv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
RC);
break;
case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
Opc = AArch64::MLSv4i16_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
Opc = AArch64::MLAv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
RC);
break;
case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
Opc = AArch64::MLSv8i16_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
Opc = AArch64::MLAv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
RC);
break;
case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
Opc = AArch64::MLSv2i32_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
Opc = AArch64::MLAv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
RC);
break;
case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
Opc = AArch64::MLSv4i32_indexed;
RC = &AArch64::FPR128RegClass;
MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
// Floating Point Support
case MachineCombinerPattern::FMULADDH_OP1:
Opc = AArch64::FMADDHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FMULADDS_OP1:
Opc = AArch64::FMADDSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FMULADDD_OP1:
Opc = AArch64::FMADDDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FMULADDH_OP2:
Opc = AArch64::FMADDHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::FMULADDS_OP2:
Opc = AArch64::FMADDSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::FMULADDD_OP2:
Opc = AArch64::FMADDDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
Opc = AArch64::FMLAv1i32_indexed;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
Opc = AArch64::FMLAv1i32_indexed;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
Opc = AArch64::FMLAv1i64_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
Opc = AArch64::FMLAv1i64_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv4f16_OP1:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator);
break;
case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv4f16_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLAv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
case MachineCombinerPattern::FMLAv2f32_OP1:
RC = &AArch64::FPR64RegClass;
if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
Opc = AArch64::FMLAv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLAv2f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
case MachineCombinerPattern::FMLAv2f32_OP2:
RC = &AArch64::FPR64RegClass;
if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
Opc = AArch64::FMLAv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLAv2f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv8f16_OP1:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator);
break;
case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLAv8f16_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLAv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
case MachineCombinerPattern::FMLAv2f64_OP1:
RC = &AArch64::FPR128RegClass;
if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
Opc = AArch64::FMLAv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLAv2f64;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
case MachineCombinerPattern::FMLAv2f64_OP2:
RC = &AArch64::FPR128RegClass;
if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
Opc = AArch64::FMLAv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLAv2f64;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
case MachineCombinerPattern::FMLAv4f32_OP1:
RC = &AArch64::FPR128RegClass;
if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
Opc = AArch64::FMLAv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLAv4f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
case MachineCombinerPattern::FMLAv4f32_OP2:
RC = &AArch64::FPR128RegClass;
if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
Opc = AArch64::FMLAv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLAv4f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMULSUBH_OP1:
Opc = AArch64::FNMSUBHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FMULSUBS_OP1:
Opc = AArch64::FNMSUBSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FMULSUBD_OP1:
Opc = AArch64::FNMSUBDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FNMULSUBH_OP1:
Opc = AArch64::FNMADDHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FNMULSUBS_OP1:
Opc = AArch64::FNMADDSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FNMULSUBD_OP1:
Opc = AArch64::FNMADDDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
break;
case MachineCombinerPattern::FMULSUBH_OP2:
Opc = AArch64::FMSUBHrrr;
RC = &AArch64::FPR16RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::FMULSUBS_OP2:
Opc = AArch64::FMSUBSrrr;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::FMULSUBD_OP2:
Opc = AArch64::FMSUBDrrr;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
break;
case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
Opc = AArch64::FMLSv1i32_indexed;
RC = &AArch64::FPR32RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
Opc = AArch64::FMLSv1i64_indexed;
RC = &AArch64::FPR64RegClass;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLSv4f16_OP1:
case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
RC = &AArch64::FPR64RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
Opc = AArch64::FMLAv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator, &NewVR);
} else {
Opc = AArch64::FMLAv4i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
}
break;
}
case MachineCombinerPattern::FMLSv4f16_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLSv4f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
RC = &AArch64::FPR64RegClass;
Opc = AArch64::FMLSv4i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLSv2f32_OP2:
case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
RC = &AArch64::FPR64RegClass;
if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
Opc = AArch64::FMLSv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLSv2f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLSv8f16_OP1:
case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
Opc = AArch64::FMLAv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator, &NewVR);
} else {
Opc = AArch64::FMLAv8i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
}
break;
}
case MachineCombinerPattern::FMLSv8f16_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLSv8f16;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
break;
case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
RC = &AArch64::FPR128RegClass;
Opc = AArch64::FMLSv8i16_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
break;
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
RC = &AArch64::FPR128RegClass;
if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
Opc = AArch64::FMLSv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLSv2f64;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLSv4f32_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
RC = &AArch64::FPR128RegClass;
if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
Opc = AArch64::FMLSv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Indexed);
} else {
Opc = AArch64::FMLSv4f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
FMAInstKind::Accumulator);
}
break;
case MachineCombinerPattern::FMLSv2f32_OP1:
case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
RC = &AArch64::FPR64RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
Opc = AArch64::FMLAv2i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
} else {
Opc = AArch64::FMLAv2f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator, &NewVR);
}
break;
}
case MachineCombinerPattern::FMLSv4f32_OP1:
case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
Opc = AArch64::FMLAv4i32_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
} else {
Opc = AArch64::FMLAv4f32;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator, &NewVR);
}
break;
}
case MachineCombinerPattern::FMLSv2f64_OP1:
case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
RC = &AArch64::FPR128RegClass;
Register NewVR = MRI.createVirtualRegister(RC);
MachineInstrBuilder MIB1 =
BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
.add(Root.getOperand(2));
InsInstrs.push_back(MIB1);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
Opc = AArch64::FMLAv2i64_indexed;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Indexed, &NewVR);
} else {
Opc = AArch64::FMLAv2f64;
MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
FMAInstKind::Accumulator, &NewVR);
}
break;
}
case MachineCombinerPattern::FMULv2i32_indexed_OP1:
case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
unsigned IdxDupOp =
(Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
&AArch64::FPR128RegClass, MRI);
break;
}
case MachineCombinerPattern::FMULv2i64_indexed_OP1:
case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
unsigned IdxDupOp =
(Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
&AArch64::FPR128RegClass, MRI);
break;
}
case MachineCombinerPattern::FMULv4i16_indexed_OP1:
case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
unsigned IdxDupOp =
(Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
&AArch64::FPR128_loRegClass, MRI);
break;
}
case MachineCombinerPattern::FMULv4i32_indexed_OP1:
case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
unsigned IdxDupOp =
(Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
&AArch64::FPR128RegClass, MRI);
break;
}
case MachineCombinerPattern::FMULv8i16_indexed_OP1:
case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
unsigned IdxDupOp =
(Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
&AArch64::FPR128_loRegClass, MRI);
break;
}
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
if (MUL)
DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
/// Replace csincr-branch sequence by simple conditional branch
///
/// Examples:
/// 1. \code
/// csinc w9, wzr, wzr, <condition code>
/// tbnz w9, #0, 0x44
/// \endcode
/// to
/// \code
/// b.<inverted condition code>
/// \endcode
///
/// 2. \code
/// csinc w9, wzr, wzr, <condition code>
/// tbz w9, #0, 0x44
/// \endcode
/// to
/// \code
/// b.<condition code>
/// \endcode
///
/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
/// compare's constant operand is power of 2.
///
/// Examples:
/// \code
/// and w8, w8, #0x400
/// cbnz w8, L1
/// \endcode
/// to
/// \code
/// tbnz w8, #10, L1
/// \endcode
///
/// \param MI Conditional Branch
/// \return True when the simple conditional branch is generated
///
bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
bool IsNegativeBranch = false;
bool IsTestAndBranch = false;
unsigned TargetBBInMI = 0;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unknown branch instruction?");
case AArch64::Bcc:
return false;
case AArch64::CBZW:
case AArch64::CBZX:
TargetBBInMI = 1;
break;
case AArch64::CBNZW:
case AArch64::CBNZX:
TargetBBInMI = 1;
IsNegativeBranch = true;
break;
case AArch64::TBZW:
case AArch64::TBZX:
TargetBBInMI = 2;
IsTestAndBranch = true;
break;
case AArch64::TBNZW:
case AArch64::TBNZX:
TargetBBInMI = 2;
IsNegativeBranch = true;
IsTestAndBranch = true;
break;
}
// So we increment a zero register and test for bits other
// than bit 0? Conservatively bail out in case the verifier
// missed this case.
if (IsTestAndBranch && MI.getOperand(1).getImm())
return false;
// Find Definition.
assert(MI.getParent() && "Incomplete machine instruciton\n");
MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
Register VReg = MI.getOperand(0).getReg();
if (!Register::isVirtualRegister(VReg))
return false;
MachineInstr *DefMI = MRI->getVRegDef(VReg);
// Look through COPY instructions to find definition.
while (DefMI->isCopy()) {
Register CopyVReg = DefMI->getOperand(1).getReg();
if (!MRI->hasOneNonDBGUse(CopyVReg))
return false;
if (!MRI->hasOneDef(CopyVReg))
return false;
DefMI = MRI->getVRegDef(CopyVReg);
}
switch (DefMI->getOpcode()) {
default:
return false;
// Fold AND into a TBZ/TBNZ if constant operand is power of 2.
case AArch64::ANDWri:
case AArch64::ANDXri: {
if (IsTestAndBranch)
return false;
if (DefMI->getParent() != MBB)
return false;
if (!MRI->hasOneNonDBGUse(VReg))
return false;
bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
if (!isPowerOf2_64(Mask))
return false;
MachineOperand &MO = DefMI->getOperand(1);
Register NewReg = MO.getReg();
if (!Register::isVirtualRegister(NewReg))
return false;
assert(!MRI->def_empty(NewReg) && "Register must be defined.");
MachineBasicBlock &RefToMBB = *MBB;
MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
DebugLoc DL = MI.getDebugLoc();
unsigned Imm = Log2_64(Mask);
unsigned Opc = (Imm < 32)
? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
: (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
.addReg(NewReg)
.addImm(Imm)
.addMBB(TBB);
// Register lives on to the CBZ now.
MO.setIsKill(false);
// For immediate smaller than 32, we need to use the 32-bit
// variant (W) in all cases. Indeed the 64-bit variant does not
// allow to encode them.
// Therefore, if the input register is 64-bit, we need to take the
// 32-bit sub-part.
if (!Is32Bit && Imm < 32)
NewMI->getOperand(0).setSubReg(AArch64::sub_32);
MI.eraseFromParent();
return true;
}
// Look for CSINC
case AArch64::CSINCWr:
case AArch64::CSINCXr: {
if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
DefMI->getOperand(2).getReg() == AArch64::WZR) &&
!(DefMI->getOperand(1).getReg() == AArch64::XZR &&
DefMI->getOperand(2).getReg() == AArch64::XZR))
return false;
if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
return false;
AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
// Convert only when the condition code is not modified between
// the CSINC and the branch. The CC may be used by other
// instructions in between.
if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
return false;
MachineBasicBlock &RefToMBB = *MBB;
MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
DebugLoc DL = MI.getDebugLoc();
if (IsNegativeBranch)
CC = AArch64CC::getInvertedCondCode(CC);
BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
MI.eraseFromParent();
return true;
}
}
}
std::pair<unsigned, unsigned>
AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
const unsigned Mask = AArch64II::MO_FRAGMENT;
return std::make_pair(TF & Mask, TF & ~Mask);
}
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace AArch64II;
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
{MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
{MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
{MO_HI12, "aarch64-hi12"}};
return makeArrayRef(TargetFlags);
}
ArrayRef<std::pair<unsigned, const char *>>
AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
using namespace AArch64II;
static const std::pair<unsigned, const char *> TargetFlags[] = {
{MO_COFFSTUB, "aarch64-coffstub"},
{MO_GOT, "aarch64-got"},
{MO_NC, "aarch64-nc"},
{MO_S, "aarch64-s"},
{MO_TLS, "aarch64-tls"},
{MO_DLLIMPORT, "aarch64-dllimport"},
{MO_PREL, "aarch64-prel"},
{MO_TAGGED, "aarch64-tagged"}};
return makeArrayRef(TargetFlags);
}
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
{{MOSuppressPair, "aarch64-suppress-pair"},
{MOStridedAccess, "aarch64-strided-access"}};
return makeArrayRef(TargetFlags);
}
/// Constants defining how certain sequences should be outlined.
/// This encompasses how an outlined function should be called, and what kind of
/// frame should be emitted for that outlined function.
///
/// \p MachineOutlinerDefault implies that the function should be called with
/// a save and restore of LR to the stack.
///
/// That is,
///
/// I1 Save LR OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 Restore LR I2
/// I3
/// RET
///
/// * Call construction overhead: 3 (save + BL + restore)
/// * Frame construction overhead: 1 (ret)
/// * Requires stack fixups? Yes
///
/// \p MachineOutlinerTailCall implies that the function is being created from
/// a sequence of instructions ending in a return.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> B OUTLINED_FUNCTION I1
/// RET I2
/// RET
///
/// * Call construction overhead: 1 (B)
/// * Frame construction overhead: 0 (Return included in sequence)
/// * Requires stack fixups? No
///
/// \p MachineOutlinerNoLRSave implies that the function should be called using
/// a BL instruction, but doesn't require LR to be saved and restored. This
/// happens when LR is known to be dead.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 I2
/// I3
/// RET
///
/// * Call construction overhead: 1 (BL)
/// * Frame construction overhead: 1 (RET)
/// * Requires stack fixups? No
///
/// \p MachineOutlinerThunk implies that the function is being created from
/// a sequence of instructions ending in a call. The outlined function is
/// called with a BL instruction, and the outlined function tail-calls the
/// original call destination.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// BL f I2
/// B f
/// * Call construction overhead: 1 (BL)
/// * Frame construction overhead: 0
/// * Requires stack fixups? No
///
/// \p MachineOutlinerRegSave implies that the function should be called with a
/// save and restore of LR to an available register. This allows us to avoid
/// stack fixups. Note that this outlining variant is compatible with the
/// NoLRSave case.
///
/// That is,
///
/// I1 Save LR OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 Restore LR I2
/// I3
/// RET
///
/// * Call construction overhead: 3 (save + BL + restore)
/// * Frame construction overhead: 1 (ret)
/// * Requires stack fixups? No
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
MachineOutlinerNoLRSave, /// Emit a call and return.
MachineOutlinerThunk, /// Emit a call and tail-call.
MachineOutlinerRegSave /// Same as default, but save to a register.
};
enum MachineOutlinerMBBFlags {
LRUnavailableSomewhere = 0x2,
HasCalls = 0x4,
UnsafeRegsDead = 0x8
};
unsigned
AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
assert(C.LRUWasSet && "LRU wasn't set?");
MachineFunction *MF = C.getMF();
const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
// Check if there is an available register across the sequence that we can
// use.
for (unsigned Reg : AArch64::GPR64RegClass) {
if (!ARI->isReservedReg(*MF, Reg) &&
Reg != AArch64::LR && // LR is not reserved, but don't use it.
Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
Reg != AArch64::X17 && // Ditto for X17.
C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
return Reg;
}
// No suitable register. Return 0.
return 0u;
}
static bool
outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
}
static bool
outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
}
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
const outliner::Candidate &b) {
const AArch64Subtarget &SubtargetA =
a.getMF()->getSubtarget<AArch64Subtarget>();
const AArch64Subtarget &SubtargetB =
b.getMF()->getSubtarget<AArch64Subtarget>();
return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
}
outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
unsigned SequenceSize =
std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
[this](unsigned Sum, const MachineInstr &MI) {
return Sum + getInstSizeInBytes(MI);
});
unsigned NumBytesToCreateFrame = 0;
// We only allow outlining for functions having exactly matching return
// address signing attributes, i.e., all share the same value for the
// attribute "sign-return-address" and all share the same type of key they
// are signed with.
// Additionally we require all functions to simultaniously either support
// v8.3a features or not. Otherwise an outlined function could get signed
// using dedicated v8.3 instructions and a call from a function that doesn't
// support v8.3 instructions would therefore be invalid.
if (std::adjacent_find(
RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
[](const outliner::Candidate &a, const outliner::Candidate &b) {
// Return true if a and b are non-equal w.r.t. return address
// signing or support of v8.3a features
if (outliningCandidatesSigningScopeConsensus(a, b) &&
outliningCandidatesSigningKeyConsensus(a, b) &&
outliningCandidatesV8_3OpsConsensus(a, b)) {
return false;
}
return true;
}) != RepeatedSequenceLocs.end()) {
return outliner::OutlinedFunction();
}
// Since at this point all candidates agree on their return address signing
// picking just one is fine. If the candidate functions potentially sign their
// return addresses, the outlined function should do the same. Note that in
// the case of "sign-return-address"="non-leaf" this is an assumption: It is
// not certainly true that the outlined function will have to sign its return
// address but this decision is made later, when the decision to outline
// has already been made.
// The same holds for the number of additional instructions we need: On
// v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
// necessary. However, at this point we don't know if the outlined function
// will have a RET instruction so we assume the worst.
const TargetRegisterInfo &TRI = getRegisterInfo();
if (FirstCand.getMF()
->getInfo<AArch64FunctionInfo>()
->shouldSignReturnAddress(true)) {
// One PAC and one AUT instructions
NumBytesToCreateFrame += 8;
// We have to check if sp modifying instructions would get outlined.
// If so we only allow outlining if sp is unchanged overall, so matching
// sub and add instructions are okay to outline, all other sp modifications
// are not
auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
int SPValue = 0;
MachineBasicBlock::iterator MBBI = C.front();
for (;;) {
if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
switch (MBBI->getOpcode()) {
case AArch64::ADDXri:
case AArch64::ADDWri:
assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
assert(MBBI->getOperand(2).isImm() &&
"Expected operand to be immediate");
assert(MBBI->getOperand(1).isReg() &&
"Expected operand to be a register");
// Check if the add just increments sp. If so, we search for
// matching sub instructions that decrement sp. If not, the
// modification is illegal
if (MBBI->getOperand(1).getReg() == AArch64::SP)
SPValue += MBBI->getOperand(2).getImm();
else
return true;
break;
case AArch64::SUBXri:
case AArch64::SUBWri:
assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
assert(MBBI->getOperand(2).isImm() &&
"Expected operand to be immediate");
assert(MBBI->getOperand(1).isReg() &&
"Expected operand to be a register");
// Check if the sub just decrements sp. If so, we search for
// matching add instructions that increment sp. If not, the
// modification is illegal
if (MBBI->getOperand(1).getReg() == AArch64::SP)
SPValue -= MBBI->getOperand(2).getImm();
else
return true;
break;
default:
return true;
}
}
if (MBBI == C.back())
break;
++MBBI;
}
if (SPValue)
return true;
return false;
};
// Remove candidates with illegal stack modifying instructions
llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
return outliner::OutlinedFunction();
}
// Properties about candidate MBBs that hold for all of them.
unsigned FlagsSetInAll = 0xF;
// Compute liveness information for each candidate, and set FlagsSetInAll.
std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
[&FlagsSetInAll](outliner::Candidate &C) {
FlagsSetInAll &= C.Flags;
});
// According to the AArch64 Procedure Call Standard, the following are
// undefined on entry/exit from a function call:
//
// * Registers x16, x17, (and thus w16, w17)
// * Condition codes (and thus the NZCV register)
//
// Because if this, we can't outline any sequence of instructions where
// one
// of these registers is live into/across it. Thus, we need to delete
// those
// candidates.
auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
// If the unsafe registers in this block are all dead, then we don't need
// to compute liveness here.
if (C.Flags & UnsafeRegsDead)
return false;
C.initLRU(TRI);
LiveRegUnits LRU = C.LRU;
return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
!LRU.available(AArch64::NZCV));
};
// Are there any candidates where those registers are live?
if (!(FlagsSetInAll & UnsafeRegsDead)) {
// Erase every candidate that violates the restrictions above. (It could be
// true that we have viable candidates, so it's not worth bailing out in
// the case that, say, 1 out of 20 candidates violate the restructions.)
llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
return outliner::OutlinedFunction();
}
// At this point, we have only "safe" candidates to outline. Figure out
// frame + call instruction information.
unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
// Helper lambda which sets call information for every candidate.
auto SetCandidateCallInfo =
[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
for (outliner::Candidate &C : RepeatedSequenceLocs)
C.setCallInfo(CallID, NumBytesForCall);
};
unsigned FrameID = MachineOutlinerDefault;
NumBytesToCreateFrame += 4;
bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
});
// We check to see if CFI Instructions are present, and if they are
// we find the number of CFI Instructions in the candidates.
unsigned CFICount = 0;
MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
if (MBBI->isCFIInstruction())
CFICount++;
MBBI++;
}
// We compare the number of found CFI Instructions to the number of CFI
// instructions in the parent function for each candidate. We must check this
// since if we outline one of the CFI instructions in a function, we have to
// outline them all for correctness. If we do not, the address offsets will be
// incorrect between the two sections of the program.
for (outliner::Candidate &C : RepeatedSequenceLocs) {
std::vector<MCCFIInstruction> CFIInstructions =
C.getMF()->getFrameInstructions();
if (CFICount > 0 && CFICount != CFIInstructions.size())
return outliner::OutlinedFunction();
}
// Returns true if an instructions is safe to fix up, false otherwise.
auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
if (MI.isCall())
return true;
if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
!MI.readsRegister(AArch64::SP, &TRI))
return true;
// Any modification of SP will break our code to save/restore LR.
// FIXME: We could handle some instructions which add a constant
// offset to SP, with a bit more work.
if (MI.modifiesRegister(AArch64::SP, &TRI))
return false;
// At this point, we have a stack instruction that we might need to
// fix up. We'll handle it if it's a load or store.
if (MI.mayLoadOrStore()) {
const MachineOperand *Base; // Filled with the base operand of MI.
int64_t Offset; // Filled with the offset of MI.
bool OffsetIsScalable;
// Does it allow us to offset the base operand and is the base the
// register SP?
if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
!Base->isReg() || Base->getReg() != AArch64::SP)
return false;
// Fixe-up code below assumes bytes.
if (OffsetIsScalable)
return false;
// Find the minimum/maximum offset for this instruction and check
// if fixing it up would be in range.
int64_t MinOffset,
MaxOffset; // Unscaled offsets for the instruction.
TypeSize Scale(0U, false); // The scale to multiply the offsets by.
unsigned DummyWidth;
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Offset += 16; // Update the offset to what it would be if we outlined.
if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
Offset > MaxOffset * (int64_t)Scale.getFixedSize())
return false;
// It's in range, so we can outline it.
return true;
}
// FIXME: Add handling for instructions like "add x0, sp, #8".
// We can't fix it up, so don't outline it.
return false;
};
// True if it's possible to fix up each stack instruction in this sequence.
// Important for frames/call variants that modify the stack.
bool AllStackInstrsSafe = std::all_of(
FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
// If the last instruction in any candidate is a terminator, then we should
// tail call all of the candidates.
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
FrameID = MachineOutlinerTailCall;
NumBytesToCreateFrame = 0;
SetCandidateCallInfo(MachineOutlinerTailCall, 4);
}
else if (LastInstrOpcode == AArch64::BL ||
((LastInstrOpcode == AArch64::BLR ||
LastInstrOpcode == AArch64::BLRNoIP) &&
!HasBTI)) {
// FIXME: Do we need to check if the code after this uses the value of LR?
FrameID = MachineOutlinerThunk;
NumBytesToCreateFrame = 0;
SetCandidateCallInfo(MachineOutlinerThunk, 4);
}
else {
// We need to decide how to emit calls + frames. We can always emit the same
// frame if we don't need to save to the stack. If we have to save to the
// stack, then we need a different frame.
unsigned NumBytesNoStackCalls = 0;
std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
// Check if we have to save LR.
for (outliner::Candidate &C : RepeatedSequenceLocs) {
C.initLRU(TRI);
// If we have a noreturn caller, then we're going to be conservative and
// say that we have to save LR. If we don't have a ret at the end of the
// block, then we can't reason about liveness accurately.
//
// FIXME: We can probably do better than always disabling this in
// noreturn functions by fixing up the liveness info.
bool IsNoReturn =
C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
// Is LR available? If so, we don't need a save.
if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
NumBytesNoStackCalls += 4;
C.setCallInfo(MachineOutlinerNoLRSave, 4);
CandidatesWithoutStackFixups.push_back(C);
}
// Is an unused register available? If so, we won't modify the stack, so
// we can outline with the same frame type as those that don't save LR.
else if (findRegisterToSaveLRTo(C)) {
NumBytesNoStackCalls += 12;
C.setCallInfo(MachineOutlinerRegSave, 12);
CandidatesWithoutStackFixups.push_back(C);
}
// Is SP used in the sequence at all? If not, we don't have to modify
// the stack, so we are guaranteed to get the same frame.
else if (C.UsedInSequence.available(AArch64::SP)) {
NumBytesNoStackCalls += 12;
C.setCallInfo(MachineOutlinerDefault, 12);
CandidatesWithoutStackFixups.push_back(C);
}
// If we outline this, we need to modify the stack. Pretend we don't
// outline this by saving all of its bytes.
else {
NumBytesNoStackCalls += SequenceSize;
}
}
// If there are no places where we have to save LR, then note that we
// don't have to update the stack. Otherwise, give every candidate the
// default call type, as long as it's safe to do so.
if (!AllStackInstrsSafe ||
NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
RepeatedSequenceLocs = CandidatesWithoutStackFixups;
FrameID = MachineOutlinerNoLRSave;
} else {
SetCandidateCallInfo(MachineOutlinerDefault, 12);
// Bugzilla ID: 46767
// TODO: Check if fixing up the stack more than once is safe so we can
// outline these.
//
// An outline resulting in a caller that requires stack fixups at the
// callsite to a callee that also requires stack fixups can happen when
// there are no available registers at the candidate callsite for a
// candidate that itself also has calls.
//
// In other words if function_containing_sequence in the following pseudo
// assembly requires that we save LR at the point of the call, but there
// are no available registers: in this case we save using SP and as a
// result the SP offsets requires stack fixups by multiples of 16.
//
// function_containing_sequence:
// ...
// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
// call OUTLINED_FUNCTION_N
// restore LR from SP
// ...
//
// OUTLINED_FUNCTION_N:
// save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
// ...
// bl foo
// restore LR from SP
// ret
//
// Because the code to handle more than one stack fixup does not
// currently have the proper checks for legality, these cases will assert
// in the AArch64 MachineOutliner. This is because the code to do this
// needs more hardening, testing, better checks that generated code is
// legal, etc and because it is only verified to handle a single pass of
// stack fixup.
//
// The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
// these cases until they are known to be handled. Bugzilla 46767 is
// referenced in comments at the assert site.
//
// To avoid asserting (or generating non-legal code on noassert builds)
// we remove all candidates which would need more than one stack fixup by
// pruning the cases where the candidate has calls while also having no
// available LR and having no available general purpose registers to copy
// LR to (ie one extra stack save/restore).
//
if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
return (std::any_of(
C.front(), std::next(C.back()),
[](const MachineInstr &MI) { return MI.isCall(); })) &&
(!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
});
}
}
// If we dropped all of the candidates, bail out here.
if (RepeatedSequenceLocs.size() < 2) {
RepeatedSequenceLocs.clear();
return outliner::OutlinedFunction();
}
}
// Does every candidate's MBB contain a call? If so, then we might have a call
// in the range.
if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
// Check if the range contains a call. These require a save + restore of the
// link register.
bool ModStackToSaveLR = false;
if (std::any_of(FirstCand.front(), FirstCand.back(),
[](const MachineInstr &MI) { return MI.isCall(); }))
ModStackToSaveLR = true;
// Handle the last instruction separately. If this is a tail call, then the
// last instruction is a call. We don't want to save + restore in this case.
// However, it could be possible that the last instruction is a call without
// it being valid to tail call this sequence. We should consider this as
// well.
else if (FrameID != MachineOutlinerThunk &&
FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
ModStackToSaveLR = true;
if (ModStackToSaveLR) {
// We can't fix up the stack. Bail out.
if (!AllStackInstrsSafe) {
RepeatedSequenceLocs.clear();
return outliner::OutlinedFunction();
}
// Save + restore LR.
NumBytesToCreateFrame += 8;
}
}
// If we have CFI instructions, we can only outline if the outlined section
// can be a tail call
if (FrameID != MachineOutlinerTailCall && CFICount > 0)
return outliner::OutlinedFunction();
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
}
bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
const Function &F = MF.getFunction();
// Can F be deduplicated by the linker? If it can, don't outline from it.
if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
return false;
// Don't outline from functions with section markings; the program could
// expect that all the code is in the named section.
// FIXME: Allow outlining from multiple functions with the same section
// marking.
if (F.hasSection())
return false;
// Outlining from functions with redzones is unsafe since the outliner may
// modify the stack. Check if hasRedZone is true or unknown; if yes, don't
// outline from it.
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (!AFI || AFI->hasRedZone().getValueOr(true))
return false;
// FIXME: Teach the outliner to generate/handle Windows unwind info.
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
return false;
// It's safe to outline from MF.
return true;
}
bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const {
if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags))
return false;
// Check if LR is available through all of the MBB. If it's not, then set
// a flag.
assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
"Suitable Machine Function for outlining must track liveness");
LiveRegUnits LRU(getRegisterInfo());
std::for_each(MBB.rbegin(), MBB.rend(),
[&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
// Check if each of the unsafe registers are available...
bool W16AvailableInBlock = LRU.available(AArch64::W16);
bool W17AvailableInBlock = LRU.available(AArch64::W17);
bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
// If all of these are dead (and not live out), we know we don't have to check
// them later.
if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
// Now, add the live outs to the set.
LRU.addLiveOuts(MBB);
// If any of these registers is available in the MBB, but also a live out of
// the block, then we know outlining is unsafe.
if (W16AvailableInBlock && !LRU.available(AArch64::W16))
return false;
if (W17AvailableInBlock && !LRU.available(AArch64::W17))
return false;
if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
return false;
// Check if there's a call inside this MachineBasicBlock. If there is, then
// set a flag.
if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
Flags |= MachineOutlinerMBBFlags::HasCalls;
MachineFunction *MF = MBB.getParent();
// In the event that we outline, we may have to save LR. If there is an
// available register in the MBB, then we'll always save LR there. Check if
// this is true.
bool CanSaveLR = false;
const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
// Check if there is an available register across the sequence that we can
// use.
for (unsigned Reg : AArch64::GPR64RegClass) {
if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
CanSaveLR = true;
break;
}
}
// Check if we have a register we can save LR to, and if LR was used
// somewhere. If both of those things are true, then we need to evaluate the
// safety of outlining stack instructions later.
if (!CanSaveLR && !LRU.available(AArch64::LR))
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
return true;
}
outliner::InstrType
AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
unsigned Flags) const {
MachineInstr &MI = *MIT;
MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
// Don't outline anything used for return address signing. The outlined
// function will get signed later if needed
switch (MI.getOpcode()) {
case AArch64::PACIASP:
case AArch64::PACIBSP:
case AArch64::AUTIASP:
case AArch64::AUTIBSP:
case AArch64::RETAA:
case AArch64::RETAB:
case AArch64::EMITBKEY:
return outliner::InstrType::Illegal;
}
// Don't outline LOHs.
if (FuncInfo->getLOHRelated().count(&MI))
return outliner::InstrType::Illegal;
// We can only outline these if we will tail call the outlined function, or
// fix up the CFI offsets. Currently, CFI instructions are outlined only if
// in a tail call.
//
// FIXME: If the proper fixups for the offset are implemented, this should be
// possible.
if (MI.isCFIInstruction())
return outliner::InstrType::Legal;
// Don't allow debug values to impact outlining type.
if (MI.isDebugInstr() || MI.isIndirectDebugValue())
return outliner::InstrType::Invisible;
// At this point, KILL instructions don't really tell us much so we can go
// ahead and skip over them.
if (MI.isKill())
return outliner::InstrType::Invisible;
// Is this a terminator for a basic block?
if (MI.isTerminator()) {
// Is this the end of a function?
if (MI.getParent()->succ_empty())
return outliner::InstrType::Legal;
// It's not, so don't outline it.
return outliner::InstrType::Illegal;
}
// Make sure none of the operands are un-outlinable.
for (const MachineOperand &MOP : MI.operands()) {
if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
MOP.isTargetIndex())
return outliner::InstrType::Illegal;
// If it uses LR or W30 explicitly, then don't touch it.
if (MOP.isReg() && !MOP.isImplicit() &&
(MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
return outliner::InstrType::Illegal;
}
// Special cases for instructions that can always be outlined, but will fail
// the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
// be outlined because they don't require a *specific* value to be in LR.
if (MI.getOpcode() == AArch64::ADRP)
return outliner::InstrType::Legal;
// If MI is a call we might be able to outline it. We don't want to outline
// any calls that rely on the position of items on the stack. When we outline
// something containing a call, we have to emit a save and restore of LR in
// the outlined function. Currently, this always happens by saving LR to the
// stack. Thus, if we outline, say, half the parameters for a function call
// plus the call, then we'll break the callee's expectations for the layout
// of the stack.
//
// FIXME: Allow calls to functions which construct a stack frame, as long
// as they don't access arguments on the stack.
// FIXME: Figure out some way to analyze functions defined in other modules.
// We should be able to compute the memory usage based on the IR calling
// convention, even if we can't see the definition.
if (MI.isCall()) {
// Get the function associated with the call. Look at each operand and find
// the one that represents the callee and get its name.
const Function *Callee = nullptr;
for (const MachineOperand &MOP : MI.operands()) {
if (MOP.isGlobal()) {
Callee = dyn_cast<Function>(MOP.getGlobal());
break;
}
}
// Never outline calls to mcount. There isn't any rule that would require
// this, but the Linux kernel's "ftrace" feature depends on it.
if (Callee && Callee->getName() == "\01_mcount")
return outliner::InstrType::Illegal;
// If we don't know anything about the callee, assume it depends on the
// stack layout of the caller. In that case, it's only legal to outline
// as a tail-call. Explicitly list the call instructions we know about so we
// don't get unexpected results with call pseudo-instructions.
auto UnknownCallOutlineType = outliner::InstrType::Illegal;
if (MI.getOpcode() == AArch64::BLR ||
MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
if (!Callee)
return UnknownCallOutlineType;
// We have a function we have information about. Check it if it's something
// can safely outline.
MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
// We don't know what's going on with the callee at all. Don't touch it.
if (!CalleeMF)
return UnknownCallOutlineType;
// Check if we know anything about the callee saves on the function. If we
// don't, then don't touch it, since that implies that we haven't
// computed anything about its stack frame yet.
MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
MFI.getNumObjects() > 0)
return UnknownCallOutlineType;
// At this point, we can say that CalleeMF ought to not pass anything on the
// stack. Therefore, we can outline it.
return outliner::InstrType::Legal;
}
// Don't outline positions.
if (MI.isPosition())
return outliner::InstrType::Illegal;
// Don't touch the link register or W30.
if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
return outliner::InstrType::Illegal;
// Don't outline BTI instructions, because that will prevent the outlining
// site from being indirectly callable.
if (MI.getOpcode() == AArch64::HINT) {
int64_t Imm = MI.getOperand(0).getImm();
if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
return outliner::InstrType::Illegal;
}
return outliner::InstrType::Legal;
}
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
const MachineOperand *Base;
unsigned Width;
int64_t Offset;
bool OffsetIsScalable;
// Is this a load or store with an immediate offset with SP as the base?
if (!MI.mayLoadOrStore() ||
!getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
&RI) ||
(Base->isReg() && Base->getReg() != AArch64::SP))
continue;
// It is, so we have to fix it up.
TypeSize Scale(0U, false);
int64_t Dummy1, Dummy2;
MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
assert(Scale != 0 && "Unexpected opcode!");
assert(!OffsetIsScalable && "Expected offset to be a byte offset");
// We've pushed the return address to the stack, so add 16 to the offset.
// This is safe, since we already checked if it would overflow when we
// checked if this instruction was legal to outline.
int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
StackOffsetOperand.setImm(NewImm);
}
}
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
bool ShouldSignReturnAddr,
bool ShouldSignReturnAddrWithAKey) {
if (ShouldSignReturnAddr) {
MachineBasicBlock::iterator MBBPAC = MBB.begin();
MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
DebugLoc DL;
if (MBBAUT != MBB.end())
DL = MBBAUT->getDebugLoc();
// At the very beginning of the basic block we insert the following
// depending on the key type
//
// a_key: b_key:
// PACIASP EMITBKEY
// CFI_INSTRUCTION PACIBSP
// CFI_INSTRUCTION
unsigned PACI;
if (ShouldSignReturnAddrWithAKey) {
PACI = Subtarget.hasPAuth() ? AArch64::PACIA : AArch64::PACIASP;
} else {
BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
.setMIFlag(MachineInstr::FrameSetup);
PACI = Subtarget.hasPAuth() ? AArch64::PACIB : AArch64::PACIBSP;
}
auto MI = BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(PACI));
if (Subtarget.hasPAuth())
MI.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::SP, RegState::InternalRead);
MI.setMIFlag(MachineInstr::FrameSetup);
unsigned CFIIndex =
MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
// If v8.3a features are available we can replace a RET instruction by
// RETAA or RETAB and omit the AUT instructions
if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
: AArch64::RETAB))
.copyImplicitOps(*MBBAUT);
MBB.erase(MBBAUT);
} else {
BuildMI(MBB, MBBAUT, DL,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
: AArch64::AUTIBSP))
.setMIFlag(MachineInstr::FrameDestroy);
}
}
}
void AArch64InstrInfo::buildOutlinedFrame(
MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
if (OF.FrameConstructionID == MachineOutlinerTailCall)
FI->setOutliningStyle("Tail Call");
else if (OF.FrameConstructionID == MachineOutlinerThunk) {
// For thunk outlining, rewrite the last instruction from a call to a
// tail-call.
MachineInstr *Call = &*--MBB.instr_end();
unsigned TailOpcode;
if (Call->getOpcode() == AArch64::BL) {
TailOpcode = AArch64::TCRETURNdi;
} else {
assert(Call->getOpcode() == AArch64::BLR ||
Call->getOpcode() == AArch64::BLRNoIP);
TailOpcode = AArch64::TCRETURNriALL;
}
MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
.add(Call->getOperand(0))
.addImm(0);
MBB.insert(MBB.end(), TC);
Call->eraseFromParent();
FI->setOutliningStyle("Thunk");
}
bool IsLeafFunction = true;
// Is there a call in the outlined range?
auto IsNonTailCall = [](const MachineInstr &MI) {
return MI.isCall() && !MI.isReturn();
};
if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
// Fix up the instructions in the range, since we're going to modify the
// stack.
// Bugzilla ID: 46767
// TODO: Check if fixing up twice is safe so we can outline these.
assert(OF.FrameConstructionID != MachineOutlinerDefault &&
"Can only fix up stack references once");
fixupPostOutline(MBB);
IsLeafFunction = false;
// LR has to be a live in so that we can save it.
if (!MBB.isLiveIn(AArch64::LR))
MBB.addLiveIn(AArch64::LR);
MachineBasicBlock::iterator It = MBB.begin();
MachineBasicBlock::iterator Et = MBB.end();
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
OF.FrameConstructionID == MachineOutlinerThunk)
Et = std::prev(MBB.end());
// Insert a save before the outlined region
MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::SP)
.addImm(-16);
It = MBB.insert(It, STRXpre);
const TargetSubtargetInfo &STI = MF.getSubtarget();
const MCRegisterInfo *MRI = STI.getRegisterInfo();
unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
// Add a CFI saying the stack was moved 16 B down.
int64_t StackPosEntry =
MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(StackPosEntry)
.setMIFlags(MachineInstr::FrameSetup);
// Add a CFI saying that the LR that we want to find is now 16 B higher than
// before.
int64_t LRPosEntry =
MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
.addCFIIndex(LRPosEntry)
.setMIFlags(MachineInstr::FrameSetup);
// Insert a restore before the terminator for the function.
MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::SP)
.addImm(16);
Et = MBB.insert(Et, LDRXpost);
}
// If a bunch of candidates reach this point they must agree on their return
// address signing. It is therefore enough to just consider the signing
// behaviour of one of them
const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
// a_key is the default
bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
// If this is a tail call outlined function, then there's already a return.
if (OF.FrameConstructionID == MachineOutlinerTailCall ||
OF.FrameConstructionID == MachineOutlinerThunk) {
signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
ShouldSignReturnAddrWithAKey);
return;
}
// It's not a tail call, so we have to insert the return ourselves.
// LR has to be a live in so that we can return to it.
if (!MBB.isLiveIn(AArch64::LR))
MBB.addLiveIn(AArch64::LR);
MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
.addReg(AArch64::LR);
MBB.insert(MBB.end(), ret);
signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
ShouldSignReturnAddrWithAKey);
FI->setOutliningStyle("Function");
// Did we have to modify the stack by saving the link register?
if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
// We modified the stack.
// Walk over the basic block and fix up all the stack accesses.
fixupPostOutline(MBB);
}
MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
MachineFunction &MF, const outliner::Candidate &C) const {
// Are we tail calling?
if (C.CallConstructionID == MachineOutlinerTailCall) {
// If yes, then we can just branch to the label.
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
.addGlobalAddress(M.getNamedValue(MF.getName()))
.addImm(0));
return It;
}
// Are we saving the link register?
if (C.CallConstructionID == MachineOutlinerNoLRSave ||
C.CallConstructionID == MachineOutlinerThunk) {
// No, so just insert the call.
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
.addGlobalAddress(M.getNamedValue(MF.getName())));
return It;
}
// We want to return the spot where we inserted the call.
MachineBasicBlock::iterator CallPt;
// Instructions for saving and restoring LR around the call instruction we're
// going to insert.
MachineInstr *Save;
MachineInstr *Restore;
// Can we save to a register?
if (C.CallConstructionID == MachineOutlinerRegSave) {
// FIXME: This logic should be sunk into a target-specific interface so that
// we don't have to recompute the register.
unsigned Reg = findRegisterToSaveLRTo(C);
assert(Reg != 0 && "No callee-saved register available?");
// LR has to be a live in so that we can save it.
if (!MBB.isLiveIn(AArch64::LR))
MBB.addLiveIn(AArch64::LR);
// Save and restore LR from Reg.
Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
.addReg(AArch64::XZR)
.addReg(AArch64::LR)
.addImm(0);
Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
.addReg(AArch64::XZR)
.addReg(Reg)
.addImm(0);
} else {
// We have the default case. Save and restore from SP.
Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::SP)
.addImm(-16);
Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::SP)
.addImm(16);
}
It = MBB.insert(It, Save);
It++;
// Insert the call.
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
.addGlobalAddress(M.getNamedValue(MF.getName())));
CallPt = It;
It++;
It = MBB.insert(It, Restore);
return CallPt;
}
bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
MachineFunction &MF) const {
return MF.getFunction().hasMinSize();
}
Optional<DestSourcePair>
AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
// AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
// and zero immediate operands used as an alias for mov instruction.
if (MI.getOpcode() == AArch64::ORRWrs &&
MI.getOperand(1).getReg() == AArch64::WZR &&
MI.getOperand(3).getImm() == 0x0) {
return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
}
if (MI.getOpcode() == AArch64::ORRXrs &&
MI.getOperand(1).getReg() == AArch64::XZR &&
MI.getOperand(3).getImm() == 0x0) {
return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
}
return None;
}
Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
Register Reg) const {
int Sign = 1;
int64_t Offset = 0;
// TODO: Handle cases where Reg is a super- or sub-register of the
// destination register.
const MachineOperand &Op0 = MI.getOperand(0);
if (!Op0.isReg() || Reg != Op0.getReg())
return None;
switch (MI.getOpcode()) {
default:
return None;
case AArch64::SUBWri:
case AArch64::SUBXri:
case AArch64::SUBSWri:
case AArch64::SUBSXri:
Sign *= -1;
LLVM_FALLTHROUGH;
case AArch64::ADDSWri:
case AArch64::ADDSXri:
case AArch64::ADDWri:
case AArch64::ADDXri: {
// TODO: Third operand can be global address (usually some string).
if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
!MI.getOperand(2).isImm())
return None;
int Shift = MI.getOperand(3).getImm();
assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
Offset = Sign * (MI.getOperand(2).getImm() << Shift);
}
}
return RegImmPair{MI.getOperand(1).getReg(), Offset};
}
/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
/// the destination register then, if possible, describe the value in terms of
/// the source register.
static Optional<ParamLoadedValue>
describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
auto DestSrc = TII->isCopyInstr(MI);
if (!DestSrc)
return None;
Register DestReg = DestSrc->Destination->getReg();
Register SrcReg = DestSrc->Source->getReg();
auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
// If the described register is the destination, just return the source.
if (DestReg == DescribedReg)
return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
// ORRWrs zero-extends to 64-bits, so we need to consider such cases.
if (MI.getOpcode() == AArch64::ORRWrs &&
TRI->isSuperRegister(DestReg, DescribedReg))
return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
// We may need to describe the lower part of a ORRXrs move.
if (MI.getOpcode() == AArch64::ORRXrs &&
TRI->isSubRegister(DestReg, DescribedReg)) {
Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
}
assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
"Unhandled ORR[XW]rs copy case");
return None;
}
Optional<ParamLoadedValue>
AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
Register Reg) const {
const MachineFunction *MF = MI.getMF();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
switch (MI.getOpcode()) {
case AArch64::MOVZWi:
case AArch64::MOVZXi: {
// MOVZWi may be used for producing zero-extended 32-bit immediates in
// 64-bit parameters, so we need to consider super-registers.
if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
return None;
if (!MI.getOperand(1).isImm())
return None;
int64_t Immediate = MI.getOperand(1).getImm();
int Shift = MI.getOperand(2).getImm();
return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
nullptr);
}
case AArch64::ORRWrs:
case AArch64::ORRXrs:
return describeORRLoadedValue(MI, Reg, this, TRI);
}
return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
bool AArch64InstrInfo::isExtendLikelyToBeFolded(
MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
// Anyexts are nops.
if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
return true;
Register DefReg = ExtMI.getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(DefReg))
return false;
// It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
// addressing mode.
auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
}
uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
}
bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
}
unsigned int
AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2;
}
unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
return AArch64::BLRNoIP;
else
return AArch64::BLR;
}
#define GET_INSTRINFO_HELPERS
#define GET_INSTRMAP_INFO
#include "AArch64GenInstrInfo.inc"
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 83bf89ff97c5..1316161f05f1 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1,8414 +1,8414 @@
//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// AArch64 Instruction definitions.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">,
AssemblerPredicate<(all_of HasV8_1aOps), "armv8.1a">;
def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">,
AssemblerPredicate<(all_of HasV8_2aOps), "armv8.2a">;
def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">,
AssemblerPredicate<(all_of HasV8_3aOps), "armv8.3a">;
def HasV8_4a : Predicate<"Subtarget->hasV8_4aOps()">,
AssemblerPredicate<(all_of HasV8_4aOps), "armv8.4a">;
def HasV8_5a : Predicate<"Subtarget->hasV8_5aOps()">,
AssemblerPredicate<(all_of HasV8_5aOps), "armv8.5a">;
def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">;
def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
def HasV9_3a : Predicate<"Subtarget->hasV9_3aOps()">,
AssemblerPredicate<(all_of HasV9_3aOps), "armv9.3a">;
def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">;
def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
AssemblerPredicate<(all_of FeatureEL2VMSA), "el2vmsa">;
def HasEL3 : Predicate<"Subtarget->hasEL3()">,
AssemblerPredicate<(all_of FeatureEL3), "el3">;
def HasVH : Predicate<"Subtarget->hasVH()">,
AssemblerPredicate<(all_of FeatureVH), "vh">;
def HasLOR : Predicate<"Subtarget->hasLOR()">,
AssemblerPredicate<(all_of FeatureLOR), "lor">;
def HasPAuth : Predicate<"Subtarget->hasPAuth()">,
AssemblerPredicate<(all_of FeaturePAuth), "pauth">;
def HasJS : Predicate<"Subtarget->hasJS()">,
AssemblerPredicate<(all_of FeatureJS), "jsconv">;
def HasCCIDX : Predicate<"Subtarget->hasCCIDX()">,
AssemblerPredicate<(all_of FeatureCCIDX), "ccidx">;
def HasComplxNum : Predicate<"Subtarget->hasComplxNum()">,
AssemblerPredicate<(all_of FeatureComplxNum), "complxnum">;
def HasNV : Predicate<"Subtarget->hasNV()">,
AssemblerPredicate<(all_of FeatureNV), "nv">;
def HasMPAM : Predicate<"Subtarget->hasMPAM()">,
AssemblerPredicate<(all_of FeatureMPAM), "mpam">;
def HasDIT : Predicate<"Subtarget->hasDIT()">,
AssemblerPredicate<(all_of FeatureDIT), "dit">;
def HasTRACEV8_4 : Predicate<"Subtarget->hasTRACEV8_4()">,
AssemblerPredicate<(all_of FeatureTRACEV8_4), "tracev8.4">;
def HasAM : Predicate<"Subtarget->hasAM()">,
AssemblerPredicate<(all_of FeatureAM), "am">;
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
AssemblerPredicate<(all_of FeatureSEL2), "sel2">;
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
def HasFlagM : Predicate<"Subtarget->hasFlagM()">,
AssemblerPredicate<(all_of FeatureFlagM), "flagm">;
def HasRCPC_IMMO : Predicate<"Subtarget->hasRCPCImm()">,
AssemblerPredicate<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicate<(all_of FeatureFPARMv8), "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<(all_of FeatureNEON), "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
AssemblerPredicate<(all_of FeatureCrypto), "crypto">;
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
AssemblerPredicate<(all_of FeatureSM4), "sm4">;
def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
AssemblerPredicate<(all_of FeatureSHA3), "sha3">;
def HasSHA2 : Predicate<"Subtarget->hasSHA2()">,
AssemblerPredicate<(all_of FeatureSHA2), "sha2">;
def HasAES : Predicate<"Subtarget->hasAES()">,
AssemblerPredicate<(all_of FeatureAES), "aes">;
def HasDotProd : Predicate<"Subtarget->hasDotProd()">,
AssemblerPredicate<(all_of FeatureDotProd), "dotprod">;
def HasCRC : Predicate<"Subtarget->hasCRC()">,
AssemblerPredicate<(all_of FeatureCRC), "crc">;
def HasLSE : Predicate<"Subtarget->hasLSE()">,
AssemblerPredicate<(all_of FeatureLSE), "lse">;
def HasNoLSE : Predicate<"!Subtarget->hasLSE()">;
def HasRAS : Predicate<"Subtarget->hasRAS()">,
AssemblerPredicate<(all_of FeatureRAS), "ras">;
def HasRDM : Predicate<"Subtarget->hasRDM()">,
AssemblerPredicate<(all_of FeatureRDM), "rdm">;
def HasPerfMon : Predicate<"Subtarget->hasPerfMon()">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
AssemblerPredicate<(all_of FeatureFullFP16), "fullfp16">;
def HasFP16FML : Predicate<"Subtarget->hasFP16FML()">,
AssemblerPredicate<(all_of FeatureFP16FML), "fp16fml">;
def HasSPE : Predicate<"Subtarget->hasSPE()">,
AssemblerPredicate<(all_of FeatureSPE), "spe">;
def HasFuseAES : Predicate<"Subtarget->hasFuseAES()">,
AssemblerPredicate<(all_of FeatureFuseAES),
"fuse-aes">;
def HasSVE : Predicate<"Subtarget->hasSVE()">,
AssemblerPredicate<(all_of FeatureSVE), "sve">;
def HasSVE2 : Predicate<"Subtarget->hasSVE2()">,
AssemblerPredicate<(all_of FeatureSVE2), "sve2">;
def HasSVE2AES : Predicate<"Subtarget->hasSVE2AES()">,
AssemblerPredicate<(all_of FeatureSVE2AES), "sve2-aes">;
def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
AssemblerPredicate<(all_of FeatureSVE2SM4), "sve2-sm4">;
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
AssemblerPredicate<(all_of FeatureSVE2SHA3), "sve2-sha3">;
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
AssemblerPredicate<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
def HasSME : Predicate<"Subtarget->hasSME()">,
AssemblerPredicate<(all_of FeatureSME), "sme">;
def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">,
AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">;
def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">,
AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">;
def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">,
- AssemblerPredicate<(all_of FeatureStreamingSVE), "streaming-sve">;
+ AssemblerPredicate<(all_of FeatureStreamingSVE), "sme">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
def HasSVEorStreamingSVE
: Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">,
AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE),
- "streaming-sve or sve">;
+ "sve or sme">;
def HasSVE2orStreamingSVE
: Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">,
AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE),
- "streaming-sve or sve2">;
+ "sve2 or sme">;
// A subset of NEON instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
def HasNEONorStreamingSVE
: Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">,
AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE),
- "streaming-sve or neon">;
+ "neon or sme">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
AssemblerPredicate<(all_of FeatureAltFPCmp), "altnzcv">;
def HasFRInt3264 : Predicate<"Subtarget->hasFRInt3264()">,
AssemblerPredicate<(all_of FeatureFRInt3264), "frint3264">;
def HasSB : Predicate<"Subtarget->hasSB()">,
AssemblerPredicate<(all_of FeatureSB), "sb">;
def HasPredRes : Predicate<"Subtarget->hasPredRes()">,
AssemblerPredicate<(all_of FeaturePredRes), "predres">;
def HasCCDP : Predicate<"Subtarget->hasCCDP()">,
AssemblerPredicate<(all_of FeatureCacheDeepPersist), "ccdp">;
def HasBTI : Predicate<"Subtarget->hasBTI()">,
AssemblerPredicate<(all_of FeatureBranchTargetId), "bti">;
def HasMTE : Predicate<"Subtarget->hasMTE()">,
AssemblerPredicate<(all_of FeatureMTE), "mte">;
def HasTME : Predicate<"Subtarget->hasTME()">,
AssemblerPredicate<(all_of FeatureTME), "tme">;
def HasETE : Predicate<"Subtarget->hasETE()">,
AssemblerPredicate<(all_of FeatureETE), "ete">;
def HasTRBE : Predicate<"Subtarget->hasTRBE()">,
AssemblerPredicate<(all_of FeatureTRBE), "trbe">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">,
AssemblerPredicate<(all_of FeatureBF16), "bf16">;
def HasMatMulInt8 : Predicate<"Subtarget->hasMatMulInt8()">,
AssemblerPredicate<(all_of FeatureMatMulInt8), "i8mm">;
def HasMatMulFP32 : Predicate<"Subtarget->hasMatMulFP32()">,
AssemblerPredicate<(all_of FeatureMatMulFP32), "f32mm">;
def HasMatMulFP64 : Predicate<"Subtarget->hasMatMulFP64()">,
AssemblerPredicate<(all_of FeatureMatMulFP64), "f64mm">;
def HasXS : Predicate<"Subtarget->hasXS()">,
AssemblerPredicate<(all_of FeatureXS), "xs">;
def HasWFxT : Predicate<"Subtarget->hasWFxT()">,
AssemblerPredicate<(all_of FeatureWFxT), "wfxt">;
def HasLS64 : Predicate<"Subtarget->hasLS64()">,
AssemblerPredicate<(all_of FeatureLS64), "ls64">;
def HasBRBE : Predicate<"Subtarget->hasBRBE()">,
AssemblerPredicate<(all_of FeatureBRBE), "brbe">;
def HasSPE_EEF : Predicate<"Subtarget->hasSPE_EEF()">,
AssemblerPredicate<(all_of FeatureSPE_EEF), "spe-eef">;
def HasHBC : Predicate<"Subtarget->hasHBC()">,
AssemblerPredicate<(all_of FeatureHBC), "hbc">;
def HasMOPS : Predicate<"Subtarget->hasMOPS()">,
AssemblerPredicate<(all_of FeatureMOPS), "mops">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
def UseExperimentalZeroingPseudos
: Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
def UseAlternateSExtLoadCVTF32
: Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
def UseNegativeImmediates
: Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
"NegativeImmediates">;
def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
//
// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>, SDTCisVT<1, i32>]>;
// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<0>,
SDTCisVT<3, i32>]>;
// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>,
SDTCisVT<1, i32>,
SDTCisVT<4, i32>]>;
def SDT_AArch64Brcond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>]>;
def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, OtherVT>]>;
def SDT_AArch64CSel : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisInt<3>,
SDTCisVT<4, i32>]>;
def SDT_AArch64CCMP : SDTypeProfile<1, 5,
[SDTCisVT<0, i32>,
SDTCisInt<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
SDTCisInt<4>,
SDTCisVT<5, i32>]>;
def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
[SDTCisVT<0, i32>,
SDTCisFP<1>,
SDTCisSameAs<1, 2>,
SDTCisInt<3>,
SDTCisInt<4>,
SDTCisVT<5, i32>]>;
def SDT_AArch64FCmp : SDTypeProfile<0, 2,
[SDTCisFP<0>,
SDTCisSameAs<0, 1>]>;
def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisInt<2>, SDTCisInt<3>]>;
def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<3>]>;
def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisVec<2>, SDTCisSameAs<2,3>]>;
def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>;
def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>,
SDTCisSameAs<0,3>]>;
def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>]>;
def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
// Generates the general dynamic sequences, i.e.
// adrp x0, :tlsdesc:var
// ldr x1, [x0, #:tlsdesc_lo12:var]
// add x0, x0, #:tlsdesc_lo12:var
// .tlsdesccall var
// blr x1
// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
// number of operands (the variable)
def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
[SDTCisPtrTy<0>]>;
def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
[SDTCisVT<0, i64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
SDTCisSameAs<1, 4>]>;
def SDT_AArch64TBL : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
]>;
// non-extending masked load fragment.
def nonext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
!cast<MaskedLoadSDNode>(N)->isNonTemporal();
}]>;
// sign extending masked load fragments.
def asext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(masked_ld node:$ptr, undef, node:$pred, node:$def),[{
return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD) &&
cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
def asext_masked_load_i8 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(asext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def asext_masked_load_i16 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(asext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def asext_masked_load_i32 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(asext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
// zero extending masked load fragments.
def zext_masked_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD &&
cast<MaskedLoadSDNode>(N)->isUnindexed();
}]>;
def zext_masked_load_i8 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(zext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def zext_masked_load_i16 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(zext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def zext_masked_load_i32 :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(zext_masked_load node:$ptr, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def non_temporal_load :
PatFrag<(ops node:$ptr, node:$pred, node:$def),
(masked_ld node:$ptr, undef, node:$pred, node:$def), [{
return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
cast<MaskedLoadSDNode>(N)->isUnindexed() &&
cast<MaskedLoadSDNode>(N)->isNonTemporal();
}]>;
// non-truncating masked store fragment.
def nontrunc_masked_store :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
!cast<MaskedStoreSDNode>(N)->isNonTemporal();
}]>;
// truncating masked store fragments.
def trunc_masked_store :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
cast<MaskedStoreSDNode>(N)->isUnindexed();
}]>;
def trunc_masked_store_i8 :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
(trunc_masked_store node:$val, node:$ptr, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
}]>;
def trunc_masked_store_i16 :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
(trunc_masked_store node:$val, node:$ptr, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
}]>;
def trunc_masked_store_i32 :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
(trunc_masked_store node:$val, node:$ptr, node:$pred), [{
return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
}]>;
def non_temporal_store :
PatFrag<(ops node:$val, node:$ptr, node:$pred),
(masked_st node:$val, node:$ptr, undef, node:$pred), [{
return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
cast<MaskedStoreSDNode>(N)->isUnindexed() &&
cast<MaskedStoreSDNode>(N)->isNonTemporal();
}]>;
// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
def top16Zero: PatLeaf<(i32 GPR32:$src), [{
return SDValue(N,0)->getValueType(0) == MVT::i32 &&
CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
}]>;
// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
def top32Zero: PatLeaf<(i64 GPR64:$src), [{
return SDValue(N,0)->getValueType(0) == MVT::i64 &&
CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
}]>;
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>,
[SDNPHasChain, SDNPOutGlue]>;
def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",
SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def AArch64call : SDNode<"AArch64ISD::CALL",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
[SDNPHasChain]>;
def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
[SDNPHasChain]>;
def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
[SDNPHasChain]>;
def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
[SDNPHasChain]>;
def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
[SDNPHasChain]>;
def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut,
[SDNPCommutative]>;
def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
[SDNPCommutative]>;
def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
def AArch64ccmp : SDNode<"AArch64ISD::CCMP", SDT_AArch64CCMP>;
def AArch64ccmn : SDNode<"AArch64ISD::CCMN", SDT_AArch64CCMP>;
def AArch64fccmp : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
def AArch64strict_fcmp : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
[SDNPHasChain]>;
def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
[SDNPHasChain]>;
def AArch64any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(AArch64strict_fcmp node:$lhs, node:$rhs),
(AArch64fcmp node:$lhs, node:$rhs)]>;
def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
(vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
[SDNPHasChain, SDNPSideEffect]>;
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
SDT_AArch64TLSDescCallSeq,
[SDNPInGlue, SDNPOutGlue, SDNPHasChain,
SDNPVariadic]>;
def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
SDT_AArch64WrapperLarge>;
def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisSameAs<1, 2>]>;
def AArch64smull : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
def AArch64umull : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
def AArch64frecpe : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
def AArch64frecps : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
def AArch64frsqrte : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
def AArch64frsqrts : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
def AArch64sdot : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
def AArch64udot : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
def AArch64saddv : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
def AArch64uaddv : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
def AArch64sminv : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
def AArch64srhadd : SDNode<"AArch64ISD::SRHADD", SDT_AArch64binvec>;
def AArch64urhadd : SDNode<"AArch64ISD::URHADD", SDT_AArch64binvec>;
def AArch64shadd : SDNode<"AArch64ISD::SHADD", SDT_AArch64binvec>;
def AArch64uhadd : SDNode<"AArch64ISD::UHADD", SDT_AArch64binvec>;
def AArch64uabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abdu node:$lhs, node:$rhs),
(int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
def AArch64sabd : PatFrags<(ops node:$lhs, node:$rhs),
[(abds node:$lhs, node:$rhs),
(int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
def AArch64uaddlp : PatFrags<(ops node:$src),
[(AArch64uaddlp_n node:$src),
(int_aarch64_neon_uaddlp node:$src)]>;
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def SDT_AArch64unpk : SDTypeProfile<1, 1, [
SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
]>;
def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
def AArch64mrs : SDNode<"AArch64ISD::MRS",
SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
[SDNPHasChain, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AArch64 Instruction Predicate Definitions.
// We could compute these on a per-module basis but doing so requires accessing
// the Function object through the <Target>Subtarget and objections were raised
// to that (see post-commit review comments for r301750).
let RecomputePerFunction = 1 in {
def ForCodeSize : Predicate<"shouldOptForSize(MF)">;
def NotForCodeSize : Predicate<"!shouldOptForSize(MF)">;
// Avoid generating STRQro if it is slow, unless we're optimizing for code size.
def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
// Toggles patterns which aren't beneficial in GlobalISel when we aren't
// optimizing. This allows us to selectively use patterns without impacting
// SelectionDAG's behaviour.
// FIXME: One day there will probably be a nicer way to check for this, but
// today is not that day.
def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
}
include "AArch64InstrFormats.td"
include "SVEInstrFormats.td"
include "SMEInstrFormats.td"
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Miscellaneous instructions.
//===----------------------------------------------------------------------===//
let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
// We set Sched to empty list because we expect these instructions to simply get
// removed in most cases.
def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
Sched<[]>;
def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
[(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
Sched<[]>;
} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
let isReMaterializable = 1, isCodeGenOnly = 1 in {
// FIXME: The following pseudo instructions are only needed because remat
// cannot handle multiple instructions. When that changes, they can be
// removed, along with the AArch64Wrapper node.
let AddedComplexity = 10 in
def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
[(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
Sched<[WriteLDAdr]>;
// The MOVaddr instruction should match only when the add is not folded
// into a load or store address.
def MOVaddr
: Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
tglobaladdr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrJT
: Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
tjumptable:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrCP
: Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
tconstpool:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrBA
: Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
tblockaddress:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrTLS
: Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
tglobaltlsaddr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrEXT
: Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
[(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
texternalsym:$low))]>,
Sched<[WriteAdrAdr]>;
// Normally AArch64addlow either gets folded into a following ldr/str,
// or together with an adrp into MOVaddr above. For cases with TLS, it
// might appear without either of them, so allow lowering it into a plain
// add.
def ADDlowTLS
: Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
[(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
tglobaltlsaddr:$low))]>,
Sched<[WriteAdr]>;
} // isReMaterializable, isCodeGenOnly
def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
(LOADgot tglobaltlsaddr:$addr)>;
def : Pat<(AArch64LOADgot texternalsym:$addr),
(LOADgot texternalsym:$addr)>;
def : Pat<(AArch64LOADgot tconstpool:$addr),
(LOADgot tconstpool:$addr)>;
// In general these get lowered into a sequence of three 4-byte instructions.
// 32-bit jump table destination is actually only 2 instructions since we can
// use the table itself as a PC-relative base. But optimization occurs after
// branch relaxation so be pessimistic.
let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
isNotDuplicable = 1 in {
def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
(ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
Sched<[]>;
def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
(ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
Sched<[]>;
def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
(ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
Sched<[]>;
}
// Space-consuming pseudo to aid testing of placement and reachability
// algorithms. Immediate operand is the number of bytes this "instruction"
// occupies; register operands can be used to enforce dependency and constrain
// the scheduler.
let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
[(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
Sched<[]>;
let hasSideEffects = 1, isCodeGenOnly = 1 in {
def SpeculationSafeValueX
: Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
def SpeculationSafeValueW
: Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
}
// SpeculationBarrierEndBB must only be used after an unconditional control
// flow, i.e. after a terminator for which isBarrier is True.
let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
// This gets lowered to a pair of 4-byte instructions.
let Size = 8 in
def SpeculationBarrierISBDSBEndBB
: Pseudo<(outs), (ins), []>, Sched<[]>;
// This gets lowered to a 4-byte instruction.
let Size = 4 in
def SpeculationBarrierSBEndBB
: Pseudo<(outs), (ins), []>, Sched<[]>;
}
//===----------------------------------------------------------------------===//
// System instructions.
//===----------------------------------------------------------------------===//
def HINT : HintI<"hint">;
def : InstAlias<"nop", (HINT 0b000)>;
def : InstAlias<"yield",(HINT 0b001)>;
def : InstAlias<"wfe", (HINT 0b010)>;
def : InstAlias<"wfi", (HINT 0b011)>;
def : InstAlias<"sev", (HINT 0b100)>;
def : InstAlias<"sevl", (HINT 0b101)>;
def : InstAlias<"dgh", (HINT 0b110)>;
def : InstAlias<"esb", (HINT 0b10000)>, Requires<[HasRAS]>;
def : InstAlias<"csdb", (HINT 20)>;
// In order to be able to write readable assembly, LLVM should accept assembly
// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
// should not emit these mnemonics unless BTI is enabled.
def : InstAlias<"bti", (HINT 32), 0>;
def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>;
def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
// v8.2a Statistical Profiling extension
def : InstAlias<"psb $op", (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
// As far as LLVM is concerned this writes to the system's exclusive monitors.
let mayLoad = 1, mayStore = 1 in
def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
// model patterns with sufficiently fine granularity.
let mayLoad = ?, mayStore = ? in {
def DMB : CRmSystemI<barrier_op, 0b101, "dmb",
[(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
def DSB : CRmSystemI<barrier_op, 0b100, "dsb",
[(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
def ISB : CRmSystemI<barrier_op, 0b110, "isb",
[(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
def TSB : CRmSystemI<barrier_op, 0b010, "tsb", []> {
let CRm = 0b0010;
let Inst{12} = 0;
let Predicates = [HasTRACEV8_4];
}
def DSBnXS : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
let CRm{1-0} = 0b11;
let Inst{9-8} = 0b10;
let Predicates = [HasXS];
}
let Predicates = [HasWFxT] in {
def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
}
// Branch Record Buffer two-word mnemonic instructions
class BRBEI<bits<3> op2, string keyword>
: SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
let Inst{31-8} = 0b110101010000100101110010;
let Inst{7-5} = op2;
let Predicates = [HasBRBE];
}
def BRB_IALL: BRBEI<0b100, "\tiall">;
def BRB_INJ: BRBEI<0b101, "\tinj">;
}
// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
def : TokenAlias<"INJ", "inj">;
def : TokenAlias<"IALL", "iall">;
// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
}
// ARMv8.6-A BFloat
let Predicates = [HasNEON, HasBF16] in {
defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
def BFMLALB : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
def BFMLALT : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
def BFCVTN : SIMD_BFCVTN;
def BFCVTN2 : SIMD_BFCVTN2;
// Vector-scalar BFDOT:
// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
// register (the instruction uses a single 32-bit lane from it), so the pattern
// is a bit tricky.
def : Pat<(v2f32 (int_aarch64_neon_bfdot
(v2f32 V64:$Rd), (v4bf16 V64:$Rn),
(v4bf16 (bitconvert
(v2i32 (AArch64duplane32
(v4i32 (bitconvert
(v8bf16 (insert_subvector undef,
(v4bf16 V64:$Rm),
(i64 0))))),
VectorIndexS:$idx)))))),
(BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
VectorIndexS:$idx)>;
}
let Predicates = [HasNEONorStreamingSVE, HasBF16] in {
def BFCVT : BF16ToSinglePrecision<"bfcvt">;
}
// ARMv8.6A AArch64 matrix multiplication
let Predicates = [HasMatMulInt8] in {
def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
def UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
// sudot lane has a pattern where usdot is expected (there is no sudot).
// The second operand is used in the dup operation to repeat the indexed
// element.
class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
string rhs_kind, RegisterOperand RegType,
ValueType AccumType, ValueType InputType>
: BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind,
lhs_kind, rhs_kind, RegType, AccumType,
InputType, null_frag> {
let Pattern = [(set (AccumType RegType:$dst),
(AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
(InputType (bitconvert (AccumType
(AArch64duplane32 (v4i32 V128:$Rm),
VectorIndexS:$idx)))),
(InputType RegType:$Rn))))];
}
multiclass SIMDSUDOTIndex {
def v8i8 : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
}
defm SUDOTlane : SIMDSUDOTIndex;
}
// ARMv8.2-A FP16 Fused Multiply-Add Long
let Predicates = [HasNEON, HasFP16FML] in {
defm FMLAL : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
defm FMLSL : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
defm FMLAL2 : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
defm FMLSL2 : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
defm FMLALlane : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
defm FMLSLlane : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
}
// Armv8.2-A Crypto extensions
let Predicates = [HasSHA3] in {
def SHA512H : CryptoRRRTied<0b0, 0b00, "sha512h">;
def SHA512H2 : CryptoRRRTied<0b0, 0b01, "sha512h2">;
def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
def RAX1 : CryptoRRR_2D<0b0,0b11, "rax1">;
def EOR3 : CryptoRRRR_16B<0b00, "eor3">;
def BCAX : CryptoRRRR_16B<0b01, "bcax">;
def XAR : CryptoRRRi6<"xar">;
class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
: Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
(INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
(SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
class EOR3_pattern<ValueType VecTy>
: Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
(EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
def : EOR3_pattern<v16i8>;
def : EOR3_pattern<v8i16>;
def : EOR3_pattern<v4i32>;
def : EOR3_pattern<v2i64>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
(RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
(XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
} // HasSHA3
let Predicates = [HasSM4] in {
def SM3TT1A : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
def SM3TT1B : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
def SM3TT2A : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
def SM3TT2B : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
def SM3SS1 : CryptoRRRR_4S<0b10, "sm3ss1">;
def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
def SM4E : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
(SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
: Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
(INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
: Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
(INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
class SM4_pattern<Instruction INST, Intrinsic OpNode>
: Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
(INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
} // HasSM4
let Predicates = [HasRCPC] in {
// v8.3 Release Consistent Processor Consistent support, optional in v8.2.
def LDAPRB : RCPCLoad<0b00, "ldaprb", GPR32>;
def LDAPRH : RCPCLoad<0b01, "ldaprh", GPR32>;
def LDAPRW : RCPCLoad<0b10, "ldapr", GPR32>;
def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>;
}
// v8.3a complex add and multiply-accumulate. No predicate here, that is done
// inside the multiclass as the FP16 versions need different predicates.
defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
"fcmla", null_frag>;
defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
"fcadd", null_frag>;
defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
(FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
(FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
(FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
(FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
}
let Predicates = [HasComplxNum, HasNEON] in {
def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
foreach Ty = [v4f32, v2f64] in {
def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
(!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
(!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
}
}
multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
(!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
}
multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
(!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
}
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
defm : FCMLA_PATS<v4f16, V64>;
defm : FCMLA_PATS<v8f16, V128>;
defm : FCMLA_LANE_PATS<v4f16, V64,
(v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
defm : FCMLA_LANE_PATS<v8f16, V128,
(v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
}
let Predicates = [HasComplxNum, HasNEON] in {
defm : FCMLA_PATS<v2f32, V64>;
defm : FCMLA_PATS<v4f32, V128>;
defm : FCMLA_PATS<v2f64, V128>;
defm : FCMLA_LANE_PATS<v4f32, V128,
(v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
}
// v8.3a Pointer Authentication
// These instructions inhabit part of the hint space and so can be used for
// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
// important for compatibility with other assemblers (e.g. GAS) when building
// software compatible with both CPUs that do or don't implement PA.
let Uses = [LR], Defs = [LR] in {
def PACIAZ : SystemNoOperands<0b000, "hint\t#24">;
def PACIBZ : SystemNoOperands<0b010, "hint\t#26">;
let isAuthenticated = 1 in {
def AUTIAZ : SystemNoOperands<0b100, "hint\t#28">;
def AUTIBZ : SystemNoOperands<0b110, "hint\t#30">;
}
}
let Uses = [LR, SP], Defs = [LR] in {
def PACIASP : SystemNoOperands<0b001, "hint\t#25">;
def PACIBSP : SystemNoOperands<0b011, "hint\t#27">;
let isAuthenticated = 1 in {
def AUTIASP : SystemNoOperands<0b101, "hint\t#29">;
def AUTIBSP : SystemNoOperands<0b111, "hint\t#31">;
}
}
let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
def PACIA1716 : SystemNoOperands<0b000, "hint\t#8">;
def PACIB1716 : SystemNoOperands<0b010, "hint\t#10">;
let isAuthenticated = 1 in {
def AUTIA1716 : SystemNoOperands<0b100, "hint\t#12">;
def AUTIB1716 : SystemNoOperands<0b110, "hint\t#14">;
}
}
let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
def XPACLRI : SystemNoOperands<0b111, "hint\t#7">;
}
// In order to be able to write readable assembly, LLVM should accept assembly
// inputs that use pointer authentication mnemonics, even with PA disabled.
// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
// should not emit these mnemonics unless PA is enabled.
def : InstAlias<"paciaz", (PACIAZ), 0>;
def : InstAlias<"pacibz", (PACIBZ), 0>;
def : InstAlias<"autiaz", (AUTIAZ), 0>;
def : InstAlias<"autibz", (AUTIBZ), 0>;
def : InstAlias<"paciasp", (PACIASP), 0>;
def : InstAlias<"pacibsp", (PACIBSP), 0>;
def : InstAlias<"autiasp", (AUTIASP), 0>;
def : InstAlias<"autibsp", (AUTIBSP), 0>;
def : InstAlias<"pacia1716", (PACIA1716), 0>;
def : InstAlias<"pacib1716", (PACIB1716), 0>;
def : InstAlias<"autia1716", (AUTIA1716), 0>;
def : InstAlias<"autib1716", (AUTIB1716), 0>;
def : InstAlias<"xpaclri", (XPACLRI), 0>;
// These pointer authentication instructions require armv8.3a
let Predicates = [HasPAuth] in {
// When PA is enabled, a better mnemonic should be emitted.
def : InstAlias<"paciaz", (PACIAZ), 1>;
def : InstAlias<"pacibz", (PACIBZ), 1>;
def : InstAlias<"autiaz", (AUTIAZ), 1>;
def : InstAlias<"autibz", (AUTIBZ), 1>;
def : InstAlias<"paciasp", (PACIASP), 1>;
def : InstAlias<"pacibsp", (PACIBSP), 1>;
def : InstAlias<"autiasp", (AUTIASP), 1>;
def : InstAlias<"autibsp", (AUTIBSP), 1>;
def : InstAlias<"pacia1716", (PACIA1716), 1>;
def : InstAlias<"pacib1716", (PACIB1716), 1>;
def : InstAlias<"autia1716", (AUTIA1716), 1>;
def : InstAlias<"autib1716", (AUTIB1716), 1>;
def : InstAlias<"xpaclri", (XPACLRI), 1>;
multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
SDPatternOperator op> {
def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>;
def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>;
def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>;
def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>;
def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>;
def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>;
def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>;
def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
}
defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
def XPACI : ClearAuth<0, "xpaci">;
def XPACD : ClearAuth<1, "xpacd">;
def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
// Combined Instructions
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BRAA : AuthBranchTwoOperands<0, 0, "braa">;
def BRAB : AuthBranchTwoOperands<0, 1, "brab">;
}
let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLRAA : AuthBranchTwoOperands<1, 0, "blraa">;
def BLRAB : AuthBranchTwoOperands<1, 1, "blrab">;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BRAAZ : AuthOneOperand<0b000, 0, "braaz">;
def BRABZ : AuthOneOperand<0b000, 1, "brabz">;
}
let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLRAAZ : AuthOneOperand<0b001, 0, "blraaz">;
def BLRABZ : AuthOneOperand<0b001, 1, "blrabz">;
}
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RETAA : AuthReturn<0b010, 0, "retaa">;
def RETAB : AuthReturn<0b010, 1, "retab">;
def ERETAA : AuthReturn<0b100, 0, "eretaa">;
def ERETAB : AuthReturn<0b100, 1, "eretab">;
}
defm LDRAA : AuthLoad<0, "ldraa", simm10Scaled>;
defm LDRAB : AuthLoad<1, "ldrab", simm10Scaled>;
}
// v8.3a floating point conversion for javascript
let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
def FJCVTZS : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
"fjcvtzs",
[(set GPR32:$Rd,
(int_aarch64_fjcvtzs FPR64:$Rn))]> {
let Inst{31} = 0;
} // HasJS, HasFPARMv8
// v8.4 Flag manipulation instructions
let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
let Inst{20-5} = 0b0000001000000000;
}
def SETF8 : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
def RMIF : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
"{\t$Rn, $imm, $mask}">;
} // HasFlagM
// v8.5 flag manipulation instructions
let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
let Inst{18-16} = 0b000;
let Inst{11-8} = 0b0000;
let Unpredictable{11-8} = 0b1111;
let Inst{7-5} = 0b001;
}
def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
let Inst{18-16} = 0b000;
let Inst{11-8} = 0b0000;
let Unpredictable{11-8} = 0b1111;
let Inst{7-5} = 0b010;
}
} // HasAltNZCV
// Armv8.5-A speculation barrier
def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
let Inst{20-5} = 0b0001100110000111;
let Unpredictable{11-8} = 0b1111;
let Predicates = [HasSB];
let hasSideEffects = 1;
}
def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
def : InstAlias<"ssbb", (DSB 0)>;
def : InstAlias<"pssbb", (DSB 4)>;
def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
def MRS : MRSI;
def MSR : MSRI;
def MSRpstateImm1 : MSRpstateImm0_1;
def MSRpstateImm4 : MSRpstateImm0_15;
def : Pat<(AArch64mrs imm:$id),
(MRS imm:$id)>;
// The thread pointer (on Linux, at least, where this has been implemented) is
// TPIDR_EL0.
def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
[(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
def HWASAN_CHECK_MEMACCESS : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
}
let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
[(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
}
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
def : Pat<(readcyclecounter), (MRS 0xdce8)>;
// FPCR register
def : Pat<(i64 (int_aarch64_get_fpcr)), (MRS 0xda20)>;
def : Pat<(int_aarch64_set_fpcr i64:$val), (MSR 0xda20, GPR64:$val)>;
// Generic system instructions
def SYSxt : SystemXtI<0, "sys">;
def SYSLxt : SystemLXtI<1, "sysl">;
def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
(SYSxt imm0_7:$op1, sys_cr_op:$Cn,
sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
let Predicates = [HasTME] in {
def TSTART : TMSystemI<0b0000, "tstart",
[(set GPR64:$Rt, (int_aarch64_tstart))]>;
def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
def TCANCEL : TMSystemException<0b011, "tcancel",
[(int_aarch64_tcancel timm64_0_65535:$imm)]>;
def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
let mayLoad = 0;
let mayStore = 0;
}
} // HasTME
//===----------------------------------------------------------------------===//
// Move immediate instructions.
//===----------------------------------------------------------------------===//
defm MOVK : InsertImmediate<0b11, "movk">;
defm MOVN : MoveImmediate<0b00, "movn">;
let PostEncoderMethod = "fixMOVZ" in
defm MOVZ : MoveImmediate<0b10, "movz">;
// First group of aliases covers an implicit "lsl #0".
def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
// Final group of aliases covers true "mov $Rd, $imm" cases.
multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
int width, int shift> {
def _asmoperand : AsmOperandClass {
let Name = basename # width # "_lsl" # shift # "MovAlias";
let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
# shift # ">";
let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
}
def _movimm : Operand<i32> {
let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
}
def : InstAlias<"mov $Rd, $imm",
(INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
}
defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
isAsCheapAsAMove = 1 in {
// FIXME: The following pseudo instructions are only needed because remat
// cannot handle multiple instructions. When that changes, we can select
// directly to the real instructions and get rid of these pseudos.
def MOVi32imm
: Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
[(set GPR32:$dst, imm:$src)]>,
Sched<[WriteImm]>;
def MOVi64imm
: Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
[(set GPR64:$dst, imm:$src)]>,
Sched<[WriteImm]>;
} // isReMaterializable, isCodeGenOnly
// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
// eventual expansion code fewer bits to worry about getting right. Marshalling
// the types is a little tricky though:
def i64imm_32bit : ImmLeaf<i64, [{
return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
}]>;
def s64imm_32bit : ImmLeaf<i64, [{
int64_t Imm64 = static_cast<int64_t>(Imm);
return Imm64 >= std::numeric_limits<int32_t>::min() &&
Imm64 <= std::numeric_limits<int32_t>::max();
}]>;
def trunc_imm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
GISDNodeXFormEquiv<trunc_imm>;
let Predicates = [OptimizedGISelOrOtherSelector] in {
// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
// copies.
def : Pat<(i64 i64imm_32bit:$src),
(SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
}
// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
return CurDAG->getTargetConstant(
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def : Pat<(f32 fpimm:$in),
(COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
def : Pat<(f64 fpimm:$in),
(COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
// sequences.
def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
tglobaladdr:$g1, tglobaladdr:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
tglobaladdr:$g1, 16),
tglobaladdr:$g2, 32),
tglobaladdr:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
tblockaddress:$g1, tblockaddress:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
tblockaddress:$g1, 16),
tblockaddress:$g2, 32),
tblockaddress:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
tconstpool:$g1, tconstpool:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
tconstpool:$g1, 16),
tconstpool:$g2, 32),
tconstpool:$g3, 48)>;
def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
tjumptable:$g1, tjumptable:$g0),
(MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
tjumptable:$g1, 16),
tjumptable:$g2, 32),
tjumptable:$g3, 48)>;
//===----------------------------------------------------------------------===//
// Arithmetic instructions.
//===----------------------------------------------------------------------===//
// Add/subtract with carry.
defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>;
def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>;
def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
// Add/subtract
defm ADD : AddSub<0, "add", "sub", add>;
defm SUB : AddSub<1, "sub", "add">;
def : InstAlias<"mov $dst, $src",
(ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
def : InstAlias<"mov $dst, $src",
(ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
def : InstAlias<"mov $dst, $src",
(ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
def : InstAlias<"mov $dst, $src",
(ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
(SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
(SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
(SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
(SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
(SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
(SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
let AddedComplexity = 1 in {
def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
(SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
(SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
}
// Because of the immediate format for add/sub-imm instructions, the
// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
// These patterns capture that transformation.
let AddedComplexity = 1 in {
def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
}
// Because of the immediate format for add/sub-imm instructions, the
// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
// These patterns capture that transformation.
let AddedComplexity = 1 in {
def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
(ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
(ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
}
def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
def : InstAlias<"neg $dst, $src$shift",
(SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
def : InstAlias<"neg $dst, $src$shift",
(SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
def : InstAlias<"negs $dst, $src$shift",
(SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
def : InstAlias<"negs $dst, $src$shift",
(SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
// Unsigned/Signed divide
defm UDIV : Div<0, "udiv", udiv>;
defm SDIV : Div<1, "sdiv", sdiv>;
def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
// Variable shift
defm ASRV : Shift<0b10, "asr", sra>;
defm LSLV : Shift<0b00, "lsl", shl>;
defm LSRV : Shift<0b01, "lsr", srl>;
defm RORV : Shift<0b11, "ror", rotr>;
def : ShiftAlias<"asrv", ASRVWr, GPR32>;
def : ShiftAlias<"asrv", ASRVXr, GPR64>;
def : ShiftAlias<"lslv", LSLVWr, GPR32>;
def : ShiftAlias<"lslv", LSLVXr, GPR64>;
def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
def : ShiftAlias<"rorv", RORVWr, GPR32>;
def : ShiftAlias<"rorv", RORVXr, GPR64>;
// Multiply-add
let AddedComplexity = 5 in {
defm MADD : MulAccum<0, "madd">;
defm MSUB : MulAccum<1, "msub">;
def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
(MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
(MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
(MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
(MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
} // AddedComplexity = 5
let AddedComplexity = 5 in {
def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
(SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
(SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
(SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
(UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
(UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
(UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
(SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
(UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
(SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
(UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
(SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
(MOVi32imm (trunc_imm imm:$C)), XZR)>;
def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
(SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
(UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
(SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
(MOVi32imm (trunc_imm imm:$C)), XZR)>;
def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
(SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
(UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
GPR64:$Ra)),
(SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
(MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
(SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
(UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
(s64imm_32bit:$C)))),
(SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
(MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
} // AddedComplexity = 5
def : MulAccumWAlias<"mul", MADDWrrr>;
def : MulAccumXAlias<"mul", MADDXrrr>;
def : MulAccumWAlias<"mneg", MSUBWrrr>;
def : MulAccumXAlias<"mneg", MSUBXrrr>;
def : WideMulAccumAlias<"smull", SMADDLrrr>;
def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
def : WideMulAccumAlias<"umull", UMADDLrrr>;
def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
// Multiply-high
def SMULHrr : MulHi<0b010, "smulh", mulhs>;
def UMULHrr : MulHi<0b110, "umulh", mulhu>;
// CRC32
def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
// v8.1 atomic CAS
defm CAS : CompareAndSwap<0, 0, "">;
defm CASA : CompareAndSwap<1, 0, "a">;
defm CASL : CompareAndSwap<0, 1, "l">;
defm CASAL : CompareAndSwap<1, 1, "al">;
// v8.1 atomic CASP
defm CASP : CompareAndSwapPair<0, 0, "">;
defm CASPA : CompareAndSwapPair<1, 0, "a">;
defm CASPL : CompareAndSwapPair<0, 1, "l">;
defm CASPAL : CompareAndSwapPair<1, 1, "al">;
// v8.1 atomic SWP
defm SWP : Swap<0, 0, "">;
defm SWPA : Swap<1, 0, "a">;
defm SWPL : Swap<0, 1, "l">;
defm SWPAL : Swap<1, 1, "al">;
// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
defm LDADD : LDOPregister<0b000, "add", 0, 0, "">;
defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">;
defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">;
defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">;
defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">;
defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">;
defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">;
defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">;
defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">;
defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
defm LDSET : LDOPregister<0b011, "set", 0, 0, "">;
defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">;
defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">;
defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">;
defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">;
defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">;
defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">;
defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">;
defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">;
defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">;
defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">;
defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">;
defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">;
defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">;
defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">;
defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
defm : STOPregister<"stadd","LDADD">; // STADDx
defm : STOPregister<"stclr","LDCLR">; // STCLRx
defm : STOPregister<"steor","LDEOR">; // STEORx
defm : STOPregister<"stset","LDSET">; // STSETx
defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
defm : STOPregister<"stumin","LDUMIN">;// STUMINx
// v8.5 Memory Tagging Extension
let Predicates = [HasMTE] in {
def IRG : BaseTwoOperand<0b0100, GPR64sp, "irg", int_aarch64_irg, GPR64sp, GPR64>,
Sched<[]>{
let Inst{31} = 1;
}
def GMI : BaseTwoOperand<0b0101, GPR64, "gmi", int_aarch64_gmi, GPR64sp>, Sched<[]>{
let Inst{31} = 1;
let isNotDuplicable = 1;
}
def ADDG : AddSubG<0, "addg", null_frag>;
def SUBG : AddSubG<1, "subg", null_frag>;
def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
let Defs = [NZCV];
}
def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
(ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
(LDG GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
(outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
(outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
(outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
let Inst{23} = 0;
}
defm STG : MemTagStore<0b00, "stg">;
defm STZG : MemTagStore<0b01, "stzg">;
defm ST2G : MemTagStore<0b10, "st2g">;
defm STZ2G : MemTagStore<0b11, "stz2g">;
def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STGOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STZGOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(ST2GOffset $Rn, $Rm, $imm)>;
def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
(STZ2GOffset $Rn, $Rm, $imm)>;
defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
(STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
(STGPi $Rt, $Rt2, $Rn, $imm)>;
def IRGstack
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
Sched<[]>;
def TAGPstack
: Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
Sched<[]>;
// Explicit SP in the first operand prevents ShrinkWrap optimization
// from leaving this instruction out of the stack frame. When IRGstack
// is transformed into IRG, this operand is replaced with the actual
// register / expression for the tagged base pointer of the current function.
def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
// $Rn_wback is one past the end of the range. $Rm is the loop counter.
let isCodeGenOnly=1, mayStore=1 in {
def STGloop_wback
: Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
Sched<[WriteAdr, WriteST]>;
def STZGloop_wback
: Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
[], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
Sched<[WriteAdr, WriteST]>;
// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
def STGloop
: Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
[], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
Sched<[WriteAdr, WriteST]>;
def STZGloop
: Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
[], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
Sched<[WriteAdr, WriteST]>;
}
} // Predicates = [HasMTE]
//===----------------------------------------------------------------------===//
// Logical instructions.
//===----------------------------------------------------------------------===//
// (immediate)
defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
defm AND : LogicalImm<0b00, "and", and, "bic">;
defm EOR : LogicalImm<0b10, "eor", xor, "eon">;
defm ORR : LogicalImm<0b01, "orr", or, "orn">;
// FIXME: these aliases *are* canonical sometimes (when movz can't be
// used). Actually, it seems to be working right now, but putting logical_immXX
// here is a bit dodgy on the AsmParser side too.
def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
logical_imm32:$imm), 0>;
def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
logical_imm64:$imm), 0>;
// (register)
defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
defm BICS : LogicalRegS<0b11, 1, "bics",
BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
defm AND : LogicalReg<0b00, 0, "and", and>;
defm BIC : LogicalReg<0b00, 1, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
defm EON : LogicalReg<0b10, 1, "eon",
BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
defm EOR : LogicalReg<0b10, 0, "eor", xor>;
defm ORN : LogicalReg<0b01, 1, "orn",
BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
defm ORR : LogicalReg<0b01, 0, "orr", or>;
def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
def : InstAlias<"mvn $Wd, $Wm$sh",
(ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
def : InstAlias<"mvn $Xd, $Xm$sh",
(ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
def : InstAlias<"tst $src1, $src2",
(ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
def : InstAlias<"tst $src1, $src2",
(ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
def : InstAlias<"tst $src1, $src2",
(ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
def : InstAlias<"tst $src1, $src2",
(ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
def : InstAlias<"tst $src1, $src2$sh",
(ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
def : InstAlias<"tst $src1, $src2$sh",
(ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
//===----------------------------------------------------------------------===//
// One operand data processing instructions.
//===----------------------------------------------------------------------===//
defm CLS : OneOperandData<0b101, "cls">;
defm CLZ : OneOperandData<0b100, "clz", ctlz>;
defm RBIT : OneOperandData<0b000, "rbit", bitreverse>;
def REV16Wr : OneWRegData<0b001, "rev16",
UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
def : Pat<(cttz GPR32:$Rn),
(CLZWr (RBITWr GPR32:$Rn))>;
def : Pat<(cttz GPR64:$Rn),
(CLZXr (RBITXr GPR64:$Rn))>;
def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
(i32 1))),
(CLSWr GPR32:$Rn)>;
def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
(i64 1))),
(CLSXr GPR64:$Rn)>;
def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
// Unlike the other one operand instructions, the instructions with the "rev"
// mnemonic do *not* just different in the size bit, but actually use different
// opcode bits for the different sizes.
def REVWr : OneWRegData<0b010, "rev", bswap>;
def REVXr : OneXRegData<0b011, "rev", bswap>;
def REV32Xr : OneXRegData<0b010, "rev32",
UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
// The bswap commutes with the rotr so we want a pattern for both possible
// orders.
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in
defm EXTR : ExtractImm<"extr">;
def : InstAlias<"ror $dst, $src, $shift",
(EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
def : InstAlias<"ror $dst, $src, $shift",
(EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
(EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
(EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
//===----------------------------------------------------------------------===//
// Other bitfield immediate instructions.
//===----------------------------------------------------------------------===//
let hasSideEffects = 0 in {
defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
defm SBFM : BitfieldImm<0b00, "sbfm">;
defm UBFM : BitfieldImm<0b10, "ubfm">;
}
def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(7, 31 - shift_amt)
def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(15, 31 - shift_amt)
def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 31 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(7, 63 - shift_amt)
def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 7 ? 7 : enc;
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(15, 63 - shift_amt)
def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 15 ? 15 : enc;
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
// min(31, 63 - shift_amt)
def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
uint64_t enc = 63 - N->getZExtValue();
enc = enc > 31 ? 31 : enc;
return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
}]>;
def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
(UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
(i64 (i32shift_b imm0_31:$imm)))>;
def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
(UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_b imm0_63:$imm)))>;
let AddedComplexity = 10 in {
def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
(SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
(SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
}
def : InstAlias<"asr $dst, $src, $shift",
(SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
def : InstAlias<"asr $dst, $src, $shift",
(SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
(UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
(UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
def : InstAlias<"lsr $dst, $src, $shift",
(UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
def : InstAlias<"lsr $dst, $src, $shift",
(UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
//===----------------------------------------------------------------------===//
// Conditional comparison instructions.
//===----------------------------------------------------------------------===//
defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
//===----------------------------------------------------------------------===//
// Conditional select instructions.
//===----------------------------------------------------------------------===//
defm CSEL : CondSelect<0, 0b00, "csel">;
def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
(CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
(CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
(CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
(CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
(CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr WZR, WZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
(CSINCXr XZR, XZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
(CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
(CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
(CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
(CSINVWr WZR, WZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
(CSINVXr XZR, XZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
(CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
(CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
(CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
(CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
(CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
(CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
// The inverse of the condition code from the alias instruction is what is used
// in the aliased instruction. The parser all ready inverts the condition code
// for these aliases.
def : InstAlias<"cset $dst, $cc",
(CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
def : InstAlias<"cset $dst, $cc",
(CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
def : InstAlias<"csetm $dst, $cc",
(CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
def : InstAlias<"csetm $dst, $cc",
(CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
def : InstAlias<"cinc $dst, $src, $cc",
(CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
def : InstAlias<"cinc $dst, $src, $cc",
(CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
def : InstAlias<"cinv $dst, $src, $cc",
(CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
def : InstAlias<"cinv $dst, $src, $cc",
(CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
def : InstAlias<"cneg $dst, $src, $cc",
(CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
def : InstAlias<"cneg $dst, $src, $cc",
(CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
//===----------------------------------------------------------------------===//
// PC-relative instructions.
//===----------------------------------------------------------------------===//
let isReMaterializable = 1 in {
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def ADR : ADRI<0, "adr", adrlabel,
[(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
} // hasSideEffects = 0
def ADRP : ADRI<1, "adrp", adrplabel,
[(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
} // isReMaterializable = 1
// page address of a constant pool entry, block address
def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Unconditional branch (register) instructions.
//===----------------------------------------------------------------------===//
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RET : BranchReg<0b0010, "ret", []>;
def DRPS : SpecialReturn<0b0101, "drps">;
def ERET : SpecialReturn<0b0100, "eret">;
} // isReturn = 1, isTerminator = 1, isBarrier = 1
// Default to the LR register.
def : InstAlias<"ret", (RET LR)>;
let isCall = 1, Defs = [LR], Uses = [SP] in {
def BLR : BranchReg<0b0001, "blr", []>;
def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
Sched<[WriteBrReg]>,
PseudoInstExpansion<(BLR GPR64:$Rn)>;
def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
Sched<[WriteBrReg]>;
} // isCall
def : Pat<(AArch64call GPR64:$Rn),
(BLR GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
def : Pat<(AArch64call GPR64noip:$Rn),
(BLRNoIP GPR64noip:$Rn)>,
Requires<[SLSBLRMitigation]>;
def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
(BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
} // isBranch, isTerminator, isBarrier, isIndirectBranch
// Create a separate pseudo-instruction for codegen to use so that we don't
// flag lr as used in every function. It'll be restored before the RET by the
// epilogue if it's legitimately used.
def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>,
Sched<[WriteBrReg]> {
let isTerminator = 1;
let isBarrier = 1;
let isReturn = 1;
}
// This is a directive-like pseudo-instruction. The purpose is to insert an
// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
// (which in the usual case is a BLR).
let hasSideEffects = 1 in
def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
let AsmString = ".tlsdesccall $sym";
}
// Pseudo instruction to tell the streamer to emit a 'B' character into the
// augmentation string.
def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
// FIXME: maybe the scratch register used shouldn't be fixed to X1?
// FIXME: can "hasSideEffects be dropped?
// This gets lowered to an instruction sequence which takes 16 bytes
let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16,
isCodeGenOnly = 1 in
def TLSDESC_CALLSEQ
: Pseudo<(outs), (ins i64imm:$sym),
[(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
(TLSDESC_CALLSEQ texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
def Bcc : BranchCond<0, "b">;
// Armv8.8-A variant form which hints to the branch predictor that
// this branch is very likely to go the same way nearly all the time
// (even though it is not known at compile time _which_ way that is).
def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
//===----------------------------------------------------------------------===//
// Compare-and-branch instructions.
//===----------------------------------------------------------------------===//
defm CBZ : CmpBranch<0, "cbz", AArch64cbz>;
defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
//===----------------------------------------------------------------------===//
// Test-bit-and-branch instructions.
//===----------------------------------------------------------------------===//
defm TBZ : TestBranch<0, "tbz", AArch64tbz>;
defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
//===----------------------------------------------------------------------===//
// Unconditional branch (immediate) instructions.
//===----------------------------------------------------------------------===//
let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
def B : BranchImm<0, "b", [(br bb:$addr)]>;
} // isBranch, isTerminator, isBarrier
let isCall = 1, Defs = [LR], Uses = [SP] in {
def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
} // isCall
def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
//===----------------------------------------------------------------------===//
// Exception generation instructions.
//===----------------------------------------------------------------------===//
let isTrap = 1 in {
def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
}
def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
// DCPSn defaults to an immediate operand of zero if unspecified.
def : InstAlias<"dcps1", (DCPS1 0)>;
def : InstAlias<"dcps2", (DCPS2 0)>;
def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
def UDF : UDFType<0, "udf">;
//===----------------------------------------------------------------------===//
// Load instructions.
//===----------------------------------------------------------------------===//
// Pair (indexed, offset)
defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
// Pair (pre-indexed)
def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
// Pair (post-indexed)
def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
// Pair (no allocate)
defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
(LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
//---
// (register offset)
//---
// Integer
defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
// Floating-point
defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>;
defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
// Load sign-extended half-word
defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
// Load sign-extended byte
defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
// Load sign-extended word
defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
// Pre-fetch.
defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
// For regular load, we do not have any alignment requirement.
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
// FIXME: We could do the same for bitconvert to floating point vectors.
multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
ValueType ScalTy, ValueType VecTy,
Instruction LOADW, Instruction LOADX,
SubRegIndex sub> {
def : Pat<(VecTy (scalar_to_vector (ScalTy
(loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
sub)>;
def : Pat<(VecTy (scalar_to_vector (ScalTy
(loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
(INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
(LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
sub)>;
}
let AddedComplexity = 10 in {
defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>;
defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, load, i32, v4f16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro16, load, i32, v8f16, LDRHroW, LDRHroX, hsub>;
defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>;
defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
def : Pat <(v1i64 (scalar_to_vector (i64
(load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend))))),
(LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
def : Pat <(v1i64 (scalar_to_vector (i64
(load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend))))),
(LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
}
// Match all load 64 bits width whose type is compatible with FPR64
multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
Instruction LOADW, Instruction LOADX> {
def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
(LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
(LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
let Predicates = [IsLE] in {
// We must do vector loads with LD1 in big-endian.
defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
}
defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>;
// Match all load 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
// We must do vector loads with LD1 in big-endian.
defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8f16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v8bf16, LDRQroW, LDRQroX>;
defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
}
} // AddedComplexity = 10
// zextload -> i64
multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
Instruction INSTW, Instruction INSTX> {
def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
(SUBREG_TO_REG (i64 0),
(INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
sub_32)>;
def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
(SUBREG_TO_REG (i64 0),
(INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
sub_32)>;
}
let AddedComplexity = 10 in {
defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>;
defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>;
// zextloadi1 -> zextloadi8
defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
// extload -> zextload
defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
// extloadi1 -> zextloadi8
defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>;
}
// zextload -> i64
multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
Instruction INSTW, Instruction INSTX> {
def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
(INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
(INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
// extload -> zextload
defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
// zextloadi1 -> zextloadi8
defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
}
//---
// (unsigned immediate)
//---
defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
[(set GPR64z:$Rt,
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
[(set GPR32z:$Rt,
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
[(set FPR8Op:$Rt,
(load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
[(set (f16 FPR16Op:$Rt),
(load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
[(set (f32 FPR32Op:$Rt),
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
[(set (f64 FPR64Op:$Rt),
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
[(set (f128 FPR128Op:$Rt),
(load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
// bf16 load pattern
def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
// For regular load, we do not have any alignment requirement.
// Thus, it is safe to directly map the vector loads with interesting
// addressing modes.
// FIXME: We could do the same for bitconvert to floating point vectors.
def : Pat <(v8i8 (scalar_to_vector (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
def : Pat <(v16i8 (scalar_to_vector (i32
(extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
def : Pat <(v4i16 (scalar_to_vector (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
def : Pat <(v8i16 (scalar_to_vector (i32
(extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
def : Pat <(v2i32 (scalar_to_vector (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
def : Pat <(v4i32 (scalar_to_vector (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
def : Pat <(v1i64 (scalar_to_vector (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat <(v2i64 (scalar_to_vector (i64
(load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
// Match all load 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
// We must use LD1 to perform vector loads in big-endian.
def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
}
def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
(LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
// Match all load 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
// We must use LD1 to perform vector loads in big-endian.
def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
}
def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
(LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
[(set GPR32:$Rt,
(zextloadi16 (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)))]>;
defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
[(set GPR32:$Rt,
(zextloadi8 (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)))]>;
// zextload -> i64
def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
(SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
// zextloadi1 -> zextloadi8
def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
// extload -> zextload
def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
(LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
(SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
(SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
// load sign-extended half-word
defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
[(set GPR32:$Rt,
(sextloadi16 (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)))]>;
defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
[(set GPR64:$Rt,
(sextloadi16 (am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset)))]>;
// load sign-extended byte
defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
[(set GPR32:$Rt,
(sextloadi8 (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)))]>;
defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
[(set GPR64:$Rt,
(sextloadi8 (am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset)))]>;
// load sign-extended word
defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
[(set GPR64:$Rt,
(sextloadi32 (am_indexed32 GPR64sp:$Rn,
uimm12s4:$offset)))]>;
// load zero-extended word
def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
(SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
// Pre-fetch.
def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
[(AArch64Prefetch imm:$Rt,
(am_indexed64 GPR64sp:$Rn,
uimm12s8:$offset))]>;
def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
//---
// (literal)
def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
const DataLayout &DL = MF->getDataLayout();
Align Align = G->getGlobal()->getPointerAlignment(DL);
return Align >= 4 && G->getOffset() % 4 == 0;
}
if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
return false;
}]>;
def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
[(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
[(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
[(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
[(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
[(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
// load sign-extended word
def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
[(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
let AddedComplexity = 20 in {
def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
(SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
}
// prefetch
def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
//---
// (unscaled immediate)
defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
[(set GPR64z:$Rt,
(load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
[(set GPR32z:$Rt,
(load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
[(set FPR8Op:$Rt,
(load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
[(set (f16 FPR16Op:$Rt),
(load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
[(set (f32 FPR32Op:$Rt),
(load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
[(set (f64 FPR64Op:$Rt),
(load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
[(set (f128 FPR128Op:$Rt),
(load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURHH
: LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
[(set GPR32:$Rt,
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURBB
: LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
[(set GPR32:$Rt,
(zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
// Match all load 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
}
def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
(LDURDi GPR64sp:$Rn, simm9:$offset)>;
// Match all load 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE] in {
def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
(LDURQi GPR64sp:$Rn, simm9:$offset)>;
}
// anyext -> zext
def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
// unscaled zext
def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(LDURHHi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(LDURBBi GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
//---
// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
// Define new assembler match classes as we want to only match these when
// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
// associate a DiagnosticType either, as we want the diagnostic for the
// canonical form (the scaled operand) to take precedence.
class SImm9OffsetOperand<int Width> : AsmOperandClass {
let Name = "SImm9OffsetFB" # Width;
let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
let RenderMethod = "addImmOperands";
}
def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
def simm9_offset_fb8 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB8Operand;
}
def simm9_offset_fb16 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB16Operand;
}
def simm9_offset_fb32 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB32Operand;
}
def simm9_offset_fb64 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB64Operand;
}
def simm9_offset_fb128 : Operand<i64> {
let ParserMatchClass = SImm9OffsetFB128Operand;
}
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"ldr $Rt, [$Rn, $offset]",
(LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
// zextload -> i64
def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
(SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
// load sign-extended half-word
defm LDURSHW
: LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
[(set GPR32:$Rt,
(sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURSHX
: LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
[(set GPR64:$Rt,
(sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
// load sign-extended byte
defm LDURSBW
: LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
[(set GPR32:$Rt,
(sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
defm LDURSBX
: LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
[(set GPR64:$Rt,
(sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
// load sign-extended word
defm LDURSW
: LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
[(set GPR64:$Rt,
(sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
(LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
(LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
(LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
(LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
(LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
(LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
(LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
// Pre-fetch.
defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
[(AArch64Prefetch imm:$Rt,
(am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
//---
// (unscaled immediate, unprivileged)
defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
// load sign-extended half-word
defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
// load sign-extended byte
defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
// load sign-extended word
defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
//---
// (immediate pre-indexed)
def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
// load sign-extended half-word
def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
// load sign-extended byte
def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
// load zero-extended byte
def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
// load sign-extended word
def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
//---
// (immediate post-indexed)
def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op, "ldr">;
def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
// load sign-extended half-word
def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
// load sign-extended byte
def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
// load zero-extended byte
def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
// load sign-extended word
def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
//===----------------------------------------------------------------------===//
// Store instructions.
//===----------------------------------------------------------------------===//
// Pair (indexed, offset)
// FIXME: Use dedicated range-checked addressing mode operand here.
defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
// Pair (pre-indexed)
def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
// Pair (pre-indexed)
def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
// Pair (no allocate)
defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
(STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
(STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
//---
// (Register offset)
// Integer
defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>;
defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
// Floating-point
defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
let Predicates = [UseSTRQro], AddedComplexity = 10 in {
def : Pat<(store (f128 FPR128:$Rt),
(ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend128:$extend)),
(STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
def : Pat<(store (f128 FPR128:$Rt),
(ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend128:$extend)),
(STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
}
multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
Instruction STRW, Instruction STRX> {
def : Pat<(storeop GPR64:$Rt,
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
(STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(storeop GPR64:$Rt,
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
(STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
// truncstore i64
defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>;
defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>;
}
multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
Instruction STRW, Instruction STRX> {
def : Pat<(store (VecTy FPR:$Rt),
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
(STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(store (VecTy FPR:$Rt),
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
(STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 10 in {
// Match all store 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
}
defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
// Match all store 128 bits width whose type is compatible with FPR128
let Predicates = [IsLE, UseSTRQro] in {
// We must use ST1 to store vectors in big-endian.
defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
}
} // AddedComplexity = 10
// Match stores from lane 0 to the appropriate subreg's store.
multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
ValueType VecTy, ValueType STy,
SubRegIndex SubRegIdx,
Instruction STRW, Instruction STRX> {
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
(ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
(STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
(ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
(STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
let AddedComplexity = 19 in {
defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
defm : VecROStoreLane0Pat<ro16, store, v8f16, f16, hsub, STRHroW, STRHroX>;
defm : VecROStoreLane0Pat<ro32, store, v4i32, i32, ssub, STRSroW, STRSroX>;
defm : VecROStoreLane0Pat<ro32, store, v4f32, f32, ssub, STRSroW, STRSroX>;
defm : VecROStoreLane0Pat<ro64, store, v2i64, i64, dsub, STRDroW, STRDroX>;
defm : VecROStoreLane0Pat<ro64, store, v2f64, f64, dsub, STRDroW, STRDroX>;
}
//---
// (unsigned immediate)
defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
[(store GPR64z:$Rt,
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
[(store GPR32z:$Rt,
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
[(store FPR8Op:$Rt,
(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
[(store (f16 FPR16Op:$Rt),
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
[(store (f32 FPR32Op:$Rt),
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
[(store (f64 FPR64Op:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
[(truncstorei16 GPR32z:$Rt,
(am_indexed16 GPR64sp:$Rn,
uimm12s2:$offset))]>;
defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1, "strb",
[(truncstorei8 GPR32z:$Rt,
(am_indexed8 GPR64sp:$Rn,
uimm12s1:$offset))]>;
// bf16 store pattern
def : Pat<(store (bf16 FPR16Op:$Rt),
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
(STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
let AddedComplexity = 10 in {
// Match all store 64 bits width whose type is compatible with FPR64
def : Pat<(store (v1i64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v1f64 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
def : Pat<(store (v2f32 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v8i8 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v4i16 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v2i32 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v4f16 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
def : Pat<(store (v4bf16 FPR64:$Rt),
(am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
(STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
}
// Match all store 128 bits width whose type is compatible with FPR128
def : Pat<(store (f128 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
def : Pat<(store (v4f32 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v2f64 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v16i8 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v8i16 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v4i32 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v2i64 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v8f16 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
def : Pat<(store (v8bf16 FPR128:$Rt),
(am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
(STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
}
// truncstore i64
def : Pat<(truncstorei32 GPR64:$Rt,
(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
(STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
def : Pat<(truncstorei16 GPR64:$Rt,
(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
(STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
(STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
} // AddedComplexity = 10
// Match stores from lane 0 to the appropriate subreg's store.
multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
ValueType VTy, ValueType STy,
SubRegIndex SubRegIdx, Operand IndexType,
Instruction STR> {
def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
(STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
GPR64sp:$Rn, IndexType:$offset)>;
}
let AddedComplexity = 19 in {
defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, hsub, uimm12s2, STRHui>;
defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, ssub, uimm12s4, STRSui>;
defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, ssub, uimm12s4, STRSui>;
defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, dsub, uimm12s8, STRDui>;
defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, dsub, uimm12s8, STRDui>;
}
//---
// (unscaled immediate)
defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
[(store GPR64z:$Rt,
(am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
[(store GPR32z:$Rt,
(am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
[(store FPR8Op:$Rt,
(am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
[(store (f16 FPR16Op:$Rt),
(am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
[(store (f32 FPR32Op:$Rt),
(am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
[(store (f64 FPR64Op:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
[(store (f128 FPR128Op:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
[(truncstorei16 GPR32z:$Rt,
(am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
[(truncstorei8 GPR32z:$Rt,
(am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
// Armv8.4 Weaker Release Consistency enhancements
// LDAPR & STLR with Immediate Offset instructions
let Predicates = [HasRCPC_IMMO] in {
defm STLURB : BaseStoreUnscaleV84<"stlurb", 0b00, 0b00, GPR32>;
defm STLURH : BaseStoreUnscaleV84<"stlurh", 0b01, 0b00, GPR32>;
defm STLURW : BaseStoreUnscaleV84<"stlur", 0b10, 0b00, GPR32>;
defm STLURX : BaseStoreUnscaleV84<"stlur", 0b11, 0b00, GPR64>;
defm LDAPURB : BaseLoadUnscaleV84<"ldapurb", 0b00, 0b01, GPR32>;
defm LDAPURSBW : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
defm LDAPURSBX : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
defm LDAPURH : BaseLoadUnscaleV84<"ldapurh", 0b01, 0b01, GPR32>;
defm LDAPURSHW : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
defm LDAPURSHX : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
defm LDAPUR : BaseLoadUnscaleV84<"ldapur", 0b10, 0b01, GPR32>;
defm LDAPURSW : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
defm LDAPURX : BaseLoadUnscaleV84<"ldapur", 0b11, 0b01, GPR64>;
}
// Match all store 64 bits width whose type is compatible with FPR64
def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
let AddedComplexity = 10 in {
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
def : Pat<(store (v2f32 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v8i8 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v4i16 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v2i32 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v4f16 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v4bf16 FPR64:$Rt),
(am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
(STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
// Match all store 128 bits width whose type is compatible with FPR128
def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
let Predicates = [IsLE] in {
// We must use ST1 to store vectors in big-endian.
def : Pat<(store (v4f32 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v2f64 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v16i8 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v8i16 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v4i32 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v2i64 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v2f64 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v8f16 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(store (v8bf16 FPR128:$Rt),
(am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
(STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
}
} // AddedComplexity = 10
// unscaled i64 truncating stores
def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
(STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
(STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
(STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
// Match stores from lane 0 to the appropriate subreg's store.
multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
ValueType VTy, ValueType STy,
SubRegIndex SubRegIdx, Instruction STR> {
defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
}
let AddedComplexity = 19 in {
defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
defm : VecStoreULane0Pat<store, v8f16, f16, hsub, STURHi>;
defm : VecStoreULane0Pat<store, v4i32, i32, ssub, STURSi>;
defm : VecStoreULane0Pat<store, v4f32, f32, ssub, STURSi>;
defm : VecStoreULane0Pat<store, v2i64, i64, dsub, STURDi>;
defm : VecStoreULane0Pat<store, v2f64, f64, dsub, STURDi>;
}
//---
// STR mnemonics fall back to STUR for negative or unaligned offsets.
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
def : InstAlias<"str $Rt, [$Rn, $offset]",
(STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
def : InstAlias<"strb $Rt, [$Rn, $offset]",
(STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
def : InstAlias<"strh $Rt, [$Rn, $offset]",
(STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
//---
// (unscaled immediate, unprivileged)
defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
//---
// (immediate pre-indexed)
def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>;
def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>;
def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>;
def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8, i32>;
def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
// truncstore i64
def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
(STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
simm9:$off)>;
def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
(STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
simm9:$off)>;
def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
(STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
simm9:$off)>;
def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//---
// (immediate post-indexed)
def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>;
def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>;
def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>;
def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
// truncstore i64
def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
(STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
simm9:$off)>;
def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
(STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
simm9:$off)>;
def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
(STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
simm9:$off)>;
def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
(STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
(STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
(STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//===----------------------------------------------------------------------===//
// Load/store exclusive instructions.
//===----------------------------------------------------------------------===//
def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">;
def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">;
def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">;
def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
let Predicates = [HasLOR] in {
// v8.1a "Limited Order Region" extension load-acquire instructions
def LDLARW : LoadAcquire <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
def LDLARX : LoadAcquire <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
def LDLARB : LoadAcquire <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
def LDLARH : LoadAcquire <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
// v8.1a "Limited Order Region" extension store-release instructions
def STLLRW : StoreRelease <0b10, 1, 0, 0, 0, GPR32, "stllr">;
def STLLRX : StoreRelease <0b11, 1, 0, 0, 0, GPR64, "stllr">;
def STLLRB : StoreRelease <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
def STLLRH : StoreRelease <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
}
//===----------------------------------------------------------------------===//
// Scaled floating point to integer conversion instructions.
//===----------------------------------------------------------------------===//
defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
}
def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
}
def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}
defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
}
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
}
def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
def : Pat<(i32 (to_int (round f32:$Rn))),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int (round f32:$Rn))),
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
def : Pat<(i32 (to_int (round f64:$Rn))),
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int (round f64:$Rn))),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
// These instructions saturate like fp_to_[su]int_sat.
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
}
def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
}
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (lround f16:$Rn)),
(!cast<Instruction>(FCVTASUWHr) f16:$Rn)>;
def : Pat<(i64 (lround f16:$Rn)),
(!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
def : Pat<(i64 (llround f16:$Rn)),
(!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
}
def : Pat<(i32 (lround f32:$Rn)),
(!cast<Instruction>(FCVTASUWSr) f32:$Rn)>;
def : Pat<(i32 (lround f64:$Rn)),
(!cast<Instruction>(FCVTASUWDr) f64:$Rn)>;
def : Pat<(i64 (lround f32:$Rn)),
(!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
def : Pat<(i64 (lround f64:$Rn)),
(!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
def : Pat<(i64 (llround f32:$Rn)),
(!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
def : Pat<(i64 (llround f64:$Rn)),
(!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
//===----------------------------------------------------------------------===//
// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//
defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
//===----------------------------------------------------------------------===//
// Unscaled integer to floating point conversion instruction.
//===----------------------------------------------------------------------===//
defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
Sched<[WriteF]>, Requires<[HasFullFP16]>;
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
}
// Similarly add aliases
def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
Requires<[HasFullFP16]>;
def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
//===----------------------------------------------------------------------===//
// Floating point conversion instruction.
//===----------------------------------------------------------------------===//
defm FCVT : FPConversion<"fcvt">;
//===----------------------------------------------------------------------===//
// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>;
defm FMOV : SingleOperandFPData<0b0000, "fmov">;
defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
defm FRINTN : SingleOperandFPData<0b1000, "frintn", froundeven>;
defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
let SchedRW = [WriteFDiv] in {
defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
}
let Predicates = [HasFRInt3264] in {
defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
} // HasFRInt3264
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (lrint f16:$Rn)),
(FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
def : Pat<(i64 (lrint f16:$Rn)),
(FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
def : Pat<(i64 (llrint f16:$Rn)),
(FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
}
def : Pat<(i32 (lrint f32:$Rn)),
(FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
def : Pat<(i32 (lrint f64:$Rn)),
(FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
def : Pat<(i64 (lrint f32:$Rn)),
(FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
def : Pat<(i64 (lrint f64:$Rn)),
(FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
def : Pat<(i64 (llrint f32:$Rn)),
(FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
def : Pat<(i64 (llrint f64:$Rn)),
(FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
//===----------------------------------------------------------------------===//
// Floating point two operand instructions.
//===----------------------------------------------------------------------===//
defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
let SchedRW = [WriteFDiv] in {
defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
}
defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
defm FMAX : TwoOperandFPData<0b0100, "fmax", fmaximum>;
defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
defm FMIN : TwoOperandFPData<0b0101, "fmin", fminimum>;
let SchedRW = [WriteFMul] in {
defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
}
defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINDrr FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
//===----------------------------------------------------------------------===//
// Floating point three operand instructions.
//===----------------------------------------------------------------------===//
defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>;
defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
// The following def pats catch the case where the LHS of an FMA is negated.
// The TriOpFrag above catches the case where the middle operand is negated.
// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
// the NEON variant.
// Here we handle first -(a + b*c) for FNMADD:
let Predicates = [HasNEON, HasFullFP16] in
def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
(FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
(FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
(FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
// Now it's time for "(-a) + (-b)*c"
let Predicates = [HasNEON, HasFullFP16] in
def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
(FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
(FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
(FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
//===----------------------------------------------------------------------===//
// Floating point comparison instructions.
//===----------------------------------------------------------------------===//
defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
defm FCMP : FPComparison<0, "fcmp", AArch64any_fcmp>;
//===----------------------------------------------------------------------===//
// Floating point conditional comparison instructions.
//===----------------------------------------------------------------------===//
defm FCCMPE : FPCondComparison<1, "fccmpe">;
defm FCCMP : FPCondComparison<0, "fccmp", AArch64fccmp>;
//===----------------------------------------------------------------------===//
// Floating point conditional select instruction.
//===----------------------------------------------------------------------===//
defm FCSEL : FPCondSelect<"fcsel">;
// CSEL instructions providing f128 types need to be handled by a
// pseudo-instruction since the eventual code will need to introduce basic
// blocks and control flow.
def F128CSEL : Pseudo<(outs FPR128:$Rd),
(ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
[(set (f128 FPR128:$Rd),
(AArch64csel FPR128:$Rn, FPR128:$Rm,
(i32 imm:$cond), NZCV))]> {
let Uses = [NZCV];
let usesCustomInserter = 1;
let hasNoSchedulingInfo = 1;
}
//===----------------------------------------------------------------------===//
// Instructions used for emitting unwind opcodes on ARM64 Windows.
//===----------------------------------------------------------------------===//
let isPseudo = 1 in {
def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveFReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
}
// Pseudo instructions for Windows EH
//===----------------------------------------------------------------------===//
let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
let usesCustomInserter = 1 in
def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
Sched<[]>;
}
// Pseudo instructions for homogeneous prolog/epilog
let isPseudo = 1 in {
// Save CSRs in order, {FPOffset}
def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
// Restore CSRs in order
def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
}
//===----------------------------------------------------------------------===//
// Floating point immediate move.
//===----------------------------------------------------------------------===//
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm FMOV : FPMoveImmediate<"fmov">;
}
//===----------------------------------------------------------------------===//
// Advanced SIMD two vector instructions.
//===----------------------------------------------------------------------===//
defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
AArch64uabd>;
// Match UABDL in log2-shuffle patterns.
def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
(zext (v8i8 V64:$opB))))),
(UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(v8i16 (add (sub (zext (v8i8 V64:$opA)),
(zext (v8i8 V64:$opB))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 V128:$opA)),
(zext (extract_high_v16i8 V128:$opB))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
(v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
(zext (extract_high_v16i8 V128:$opB))),
(AArch64vashr v8i16:$src, (i32 15))))),
(UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
(zext (v4i16 V64:$opB))))),
(UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 V128:$opA)),
(zext (extract_high_v8i16 V128:$opB))))),
(UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
(zext (v2i32 V64:$opB))))),
(UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 V128:$opA)),
(zext (extract_high_v4i32 V128:$opB))))),
(UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
(CMLTv8i8rz V64:$Rn)>;
def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
(CMLTv4i16rz V64:$Rn)>;
def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
(CMLTv2i32rz V64:$Rn)>;
def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
(CMLTv16i8rz V128:$Rn)>;
def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
(CMLTv8i16rz V128:$Rn)>;
def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
(CMLTv4i32rz V128:$Rn)>;
def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
(CMLTv2i64rz V128:$Rn)>;
defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
(FCVTLv4i16 V64:$Rn)>;
def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
(i64 4)))),
(FCVTLv8i16 V128:$Rn)>;
def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
(FCVTNv4i16 V128:$Rn)>;
def : Pat<(concat_vectors V64:$Rd,
(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
(FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))),
(FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
int_aarch64_neon_fcvtxn>;
defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
// AArch64's FCVT instructions saturate when out of range.
multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
(!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
(!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
(!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
(!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
(!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
}
defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", froundeven>;
defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
let Predicates = [HasFRInt3264] in {
defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
} // HasFRInt3264
defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
// Aliases for MVN -> NOT.
def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
(NOTv8i8 V64:$Vd, V64:$Vn)>;
def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
(NOTv16i8 V128:$Vd, V128:$Vn)>;
def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
def : Pat<(vnot (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
defm SHLL : SIMDVectorLShiftLongBySizeBHS;
defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
def : Pat<(v4f16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
def : Pat<(v4f16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)), (REV32v4i16 V64:$Rn)>;
def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)), (REV64v4i16 V64:$Rn)>;
def : Pat<(v8f16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
def : Pat<(v8f16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
// Patterns for vector long shift (by element width). These need to match all
// three of zext, sext and anyext so it's easier to pull the patterns out of the
// definition.
multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
(SHLLv8i8 V64:$Rn)>;
def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
(SHLLv16i8 V128:$Rn)>;
def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
(SHLLv4i16 V64:$Rn)>;
def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
(SHLLv8i16 V128:$Rn)>;
def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
(SHLLv2i32 V64:$Rn)>;
def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
(SHLLv4i32 V128:$Rn)>;
}
defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
// Constant vector values, used in the S/UQXTN patterns below.
def VImmFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
def VImm7F: PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
def VImm80: PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
// trunc(umin(X, 255)) -> UQXTRN v8i8
def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
(UQXTNv8i8 V128:$Vn)>;
// trunc(umin(X, 65535)) -> UQXTRN v4i16
def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
(UQXTNv4i16 V128:$Vn)>;
// trunc(smin(smax(X, -128), 128)) -> SQXTRN
// with reversed min/max
def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
(v8i16 VImm7F)))),
(SQXTNv8i8 V128:$Vn)>;
def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
(v8i16 VImm80)))),
(SQXTNv8i8 V128:$Vn)>;
// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
// with reversed min/max
def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
(v4i32 VImm7FFF)))),
(SQXTNv4i16 V128:$Vn)>;
def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
(v4i32 VImm8000)))),
(SQXTNv4i16 V128:$Vn)>;
// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
// with reversed min/max
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
(v8i16 VImm7F)))))),
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
(v8i16 VImm80)))))),
(SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
// with reversed min/max
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
(v4i32 VImm7FFF)))))),
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
(v4i32 VImm8000)))))),
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
//===----------------------------------------------------------------------===//
// Advanced SIMD three vector instructions.
//===----------------------------------------------------------------------===//
defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>;
defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
}
defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
let Predicates = [HasNEON] in {
foreach VT = [ v2f32, v4f32, v2f64 ] in
def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
}
let Predicates = [HasNEON, HasFullFP16] in {
foreach VT = [ v4f16, v8f16 ] in
def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
}
defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_faddp>;
defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaximum>;
defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminimum>;
// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
// instruction expects the addend first, while the fma intrinsic puts it last.
defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
// MLA and MLS are generated in MachineCombine
defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", AArch64shadd>;
defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", AArch64srhadd>;
defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", AArch64uhadd>;
defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", AArch64urhadd>;
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
int_aarch64_neon_sqrdmlsh>;
// Extra saturate patterns, other than the intrinsics matches above
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
// Pseudo bitwise select pattern BSP.
// It is expanded into BSL/BIT/BIF after register allocation.
defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
(and (vnot node:$LHS), node:$RHS))>>;
defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
(BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
(BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
(BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
(BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
(ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
(ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
(ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
(ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
"|cmls.8b\t$dst, $src1, $src2}",
(CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
"|cmls.16b\t$dst, $src1, $src2}",
(CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
"|cmls.4h\t$dst, $src1, $src2}",
(CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
"|cmls.8h\t$dst, $src1, $src2}",
(CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
"|cmls.2s\t$dst, $src1, $src2}",
(CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
"|cmls.4s\t$dst, $src1, $src2}",
(CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
"|cmls.2d\t$dst, $src1, $src2}",
(CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
"|cmlo.8b\t$dst, $src1, $src2}",
(CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
"|cmlo.16b\t$dst, $src1, $src2}",
(CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
"|cmlo.4h\t$dst, $src1, $src2}",
(CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
"|cmlo.8h\t$dst, $src1, $src2}",
(CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
"|cmlo.2s\t$dst, $src1, $src2}",
(CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
"|cmlo.4s\t$dst, $src1, $src2}",
(CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
"|cmlo.2d\t$dst, $src1, $src2}",
(CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
"|cmle.8b\t$dst, $src1, $src2}",
(CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
"|cmle.16b\t$dst, $src1, $src2}",
(CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
"|cmle.4h\t$dst, $src1, $src2}",
(CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
"|cmle.8h\t$dst, $src1, $src2}",
(CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
"|cmle.2s\t$dst, $src1, $src2}",
(CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
"|cmle.4s\t$dst, $src1, $src2}",
(CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
"|cmle.2d\t$dst, $src1, $src2}",
(CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
"|cmlt.8b\t$dst, $src1, $src2}",
(CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
"|cmlt.16b\t$dst, $src1, $src2}",
(CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
"|cmlt.4h\t$dst, $src1, $src2}",
(CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
"|cmlt.8h\t$dst, $src1, $src2}",
(CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
"|cmlt.2s\t$dst, $src1, $src2}",
(CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
"|cmlt.4s\t$dst, $src1, $src2}",
(CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
"|cmlt.2d\t$dst, $src1, $src2}",
(CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
let Predicates = [HasNEON, HasFullFP16] in {
def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
"|fcmle.4h\t$dst, $src1, $src2}",
(FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
"|fcmle.8h\t$dst, $src1, $src2}",
(FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
}
def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
"|fcmle.2s\t$dst, $src1, $src2}",
(FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
"|fcmle.4s\t$dst, $src1, $src2}",
(FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
"|fcmle.2d\t$dst, $src1, $src2}",
(FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
let Predicates = [HasNEON, HasFullFP16] in {
def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
"|fcmlt.4h\t$dst, $src1, $src2}",
(FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
"|fcmlt.8h\t$dst, $src1, $src2}",
(FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
}
def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
"|fcmlt.2s\t$dst, $src1, $src2}",
(FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
"|fcmlt.4s\t$dst, $src1, $src2}",
(FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
"|fcmlt.2d\t$dst, $src1, $src2}",
(FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
let Predicates = [HasNEON, HasFullFP16] in {
def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
"|facle.4h\t$dst, $src1, $src2}",
(FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
"|facle.8h\t$dst, $src1, $src2}",
(FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
}
def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
"|facle.2s\t$dst, $src1, $src2}",
(FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
"|facle.4s\t$dst, $src1, $src2}",
(FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
"|facle.2d\t$dst, $src1, $src2}",
(FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
let Predicates = [HasNEON, HasFullFP16] in {
def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
"|faclt.4h\t$dst, $src1, $src2}",
(FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
"|faclt.8h\t$dst, $src1, $src2}",
(FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
}
def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
"|faclt.2s\t$dst, $src1, $src2}",
(FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
"|faclt.4s\t$dst, $src1, $src2}",
(FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
"|faclt.2d\t$dst, $src1, $src2}",
(FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
//===----------------------------------------------------------------------===//
// Advanced SIMD three scalar instructions.
//===----------------------------------------------------------------------===//
defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>;
defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(FABD64 FPR64:$Rn, FPR64:$Rm)>;
let Predicates = [HasFullFP16] in {
def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
}
def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
int_aarch64_neon_facge>;
defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
int_aarch64_neon_facgt>;
defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
let Predicates = [HasRDM] in {
defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
(i32 FPR32:$Rm))),
(SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
(i32 FPR32:$Rm))),
(SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
}
def : InstAlias<"cmls $dst, $src1, $src2",
(CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"cmle $dst, $src1, $src2",
(CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"cmlo $dst, $src1, $src2",
(CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"cmlt $dst, $src1, $src2",
(CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"fcmle $dst, $src1, $src2",
(FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
def : InstAlias<"fcmle $dst, $src1, $src2",
(FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"fcmlt $dst, $src1, $src2",
(FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
def : InstAlias<"fcmlt $dst, $src1, $src2",
(FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"facle $dst, $src1, $src2",
(FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
def : InstAlias<"facle $dst, $src1, $src2",
(FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"faclt $dst, $src1, $src2",
(FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
def : InstAlias<"faclt $dst, $src1, $src2",
(FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
//===----------------------------------------------------------------------===//
// Advanced SIMD three scalar instructions (mixed operands).
//===----------------------------------------------------------------------===//
defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
int_aarch64_neon_sqdmulls_scalar>;
defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),
(SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
(i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(i32 FPR32:$Rm))))),
(SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
//===----------------------------------------------------------------------===//
// Advanced SIMD two scalar instructions.
//===----------------------------------------------------------------------===//
defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", abs>;
defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
defm FCMEQ : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
int_aarch64_neon_suqadd>;
defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
(CMLTv1i64rz V64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
(FCVTASv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
(FCVTAUv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
(FCVTMSv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
(FCVTMUv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
(FCVTNSv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
(FCVTNUv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
(FCVTPSv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
(FCVTPUv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
(FCVTZSv1i64 FPR64:$Rn)>;
def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
(FCVTZUv1i64 FPR64:$Rn)>;
def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
(FRECPEv1f16 FPR16:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
(FRECPEv1i32 FPR32:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
(FRECPEv1i64 FPR64:$Rn)>;
def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
(FRECPEv1i64 FPR64:$Rn)>;
def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
(FRECPEv1i32 FPR32:$Rn)>;
def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
(FRECPEv2f32 V64:$Rn)>;
def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
(FRECPEv4f32 FPR128:$Rn)>;
def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
(FRECPEv1i64 FPR64:$Rn)>;
def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
(FRECPEv1i64 FPR64:$Rn)>;
def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
(FRECPEv2f64 FPR128:$Rn)>;
def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
(FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FRECPSv2f32 V64:$Rn, V64:$Rm)>;
def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
(FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
(FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
(FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
(FRECPXv1f16 FPR16:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
(FRECPXv1i32 FPR32:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
(FRECPXv1i64 FPR64:$Rn)>;
def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
(FRSQRTEv1f16 FPR16:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
(FRSQRTEv1i32 FPR32:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
(FRSQRTEv1i64 FPR64:$Rn)>;
def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
(FRSQRTEv1i64 FPR64:$Rn)>;
def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
(FRSQRTEv1i32 FPR32:$Rn)>;
def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
(FRSQRTEv2f32 V64:$Rn)>;
def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
(FRSQRTEv4f32 FPR128:$Rn)>;
def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
(FRSQRTEv1i64 FPR64:$Rn)>;
def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
(FRSQRTEv1i64 FPR64:$Rn)>;
def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
(FRSQRTEv2f64 FPR128:$Rn)>;
def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
(FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
(FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
(FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
(FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
(FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
// Some float -> int -> float conversion patterns for which we want to keep the
// int values in FP registers using the corresponding NEON instructions to
// avoid more costly int <-> fp register transfers.
let Predicates = [HasNEON] in {
def : Pat<(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
(SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
def : Pat<(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
(SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
def : Pat<(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
(UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
def : Pat<(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
(UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
let Predicates = [HasFullFP16] in {
def : Pat<(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
(SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
def : Pat<(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
(UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
}
}
// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.
// Here are the patterns for 8 and 16-bits to float.
// 8-bits -> float.
multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
SDPatternOperator loadop, Instruction UCVTF,
ROAddrMode ro, Instruction LDRW, Instruction LDRX,
SubRegIndex sub> {
def : Pat<(DstTy (uint_to_fp (SrcTy
(loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
ro.Wext:$extend))))),
(UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
(LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
sub))>;
def : Pat<(DstTy (uint_to_fp (SrcTy
(loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
ro.Wext:$extend))))),
(UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
(LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
sub))>;
}
defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
def : Pat <(f32 (uint_to_fp (i32
(zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
def : Pat <(f32 (uint_to_fp (i32
(zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
(UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
(LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
// 16-bits -> float.
defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
def : Pat <(f32 (uint_to_fp (i32
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
def : Pat <(f32 (uint_to_fp (i32
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
(UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
(LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
// 32-bits are handled in target specific dag combine:
// performIntToFpCombine.
// 64-bits integer to 32-bits floating point, not possible with
// UCVTF on floating point registers (both source and destination
// must have the same size).
// Here are the patterns for 8, 16, 32, and 64-bits to double.
// 8-bits -> double.
defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
def : Pat <(f64 (uint_to_fp (i32
(zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
(UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
def : Pat <(f64 (uint_to_fp (i32
(zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
(UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
// 16-bits -> double.
defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
def : Pat <(f64 (uint_to_fp (i32
(zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
(UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
def : Pat <(f64 (uint_to_fp (i32
(zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
(UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
// 32-bits -> double.
defm : UIntToFPROLoadPat<f64, i32, load,
UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
def : Pat <(f64 (uint_to_fp (i32
(load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
(UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
def : Pat <(f64 (uint_to_fp (i32
(load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
(UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
(LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
// 64-bits -> double are handled in target specific dag combine:
// performIntToFpCombine.
//===----------------------------------------------------------------------===//
// Advanced SIMD three different-sized vector instructions.
//===----------------------------------------------------------------------===//
defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
AArch64sabd>;
defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
AArch64sabd>;
defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
int_aarch64_neon_sqsub>;
defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
int_aarch64_neon_sqdmull>;
defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
AArch64uabd>;
defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
// Additional patterns for [SU]ML[AS]L
multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
def : Pat<(v4i16 (opnode
V64:$Ra,
(v4i16 (extract_subvector
(vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
(i64 0))))),
(EXTRACT_SUBREG (v8i16 (INST8B
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
V64:$Rn, V64:$Rm)), dsub)>;
def : Pat<(v2i32 (opnode
V64:$Ra,
(v2i32 (extract_subvector
(vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
(i64 0))))),
(EXTRACT_SUBREG (v4i32 (INST4H
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
V64:$Rn, V64:$Rm)), dsub)>;
def : Pat<(v1i64 (opnode
V64:$Ra,
(v1i64 (extract_subvector
(vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
(i64 0))))),
(EXTRACT_SUBREG (v2i64 (INST2S
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
V64:$Rn, V64:$Rm)), dsub)>;
}
defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_umull,
UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
defm : Neon_mul_acc_widen_patterns<add, int_aarch64_neon_smull,
SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_umull,
UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
defm : Neon_mul_acc_widen_patterns<sub, int_aarch64_neon_smull,
SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
// Additional patterns for SMULL and UMULL
multiclass Neon_mul_widen_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
def : Pat<(v8i16 (opnode (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
(INST8B V64:$Rn, V64:$Rm)>;
def : Pat<(v4i32 (opnode (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
(INST4H V64:$Rn, V64:$Rm)>;
def : Pat<(v2i64 (opnode (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
(INST2S V64:$Rn, V64:$Rm)>;
}
defm : Neon_mul_widen_patterns<AArch64smull, SMULLv8i8_v8i16,
SMULLv4i16_v4i32, SMULLv2i32_v2i64>;
defm : Neon_mul_widen_patterns<AArch64umull, UMULLv8i8_v8i16,
UMULLv4i16_v4i32, UMULLv2i32_v2i64>;
// Patterns for smull2/umull2.
multiclass Neon_mul_high_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
def : Pat<(v8i16 (opnode (extract_high_v16i8 V128:$Rn),
(extract_high_v16i8 V128:$Rm))),
(INST8B V128:$Rn, V128:$Rm)>;
def : Pat<(v4i32 (opnode (extract_high_v8i16 V128:$Rn),
(extract_high_v8i16 V128:$Rm))),
(INST4H V128:$Rn, V128:$Rm)>;
def : Pat<(v2i64 (opnode (extract_high_v4i32 V128:$Rn),
(extract_high_v4i32 V128:$Rm))),
(INST2S V128:$Rn, V128:$Rm)>;
}
defm : Neon_mul_high_patterns<AArch64smull, SMULLv16i8_v8i16,
SMULLv8i16_v4i32, SMULLv4i32_v2i64>;
defm : Neon_mul_high_patterns<AArch64umull, UMULLv16i8_v8i16,
UMULLv8i16_v4i32, UMULLv4i32_v2i64>;
// Additional patterns for SMLAL/SMLSL and UMLAL/UMLSL
multiclass Neon_mulacc_widen_patterns<SDPatternOperator opnode,
Instruction INST8B, Instruction INST4H, Instruction INST2S> {
def : Pat<(v8i16 (opnode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
(INST8B V128:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(v4i32 (opnode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
(INST4H V128:$Rd, V64:$Rn, V64:$Rm)>;
def : Pat<(v2i64 (opnode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
(INST2S V128:$Rd, V64:$Rn, V64:$Rm)>;
}
defm : Neon_mulacc_widen_patterns<
TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
defm : Neon_mulacc_widen_patterns<
TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
defm : Neon_mulacc_widen_patterns<
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>,
SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
defm : Neon_mulacc_widen_patterns<
TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>,
UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
// Patterns for 64-bit pmull
def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
(PMULLv1i64 V64:$Rn, V64:$Rm)>;
def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
(extractelt (v2i64 V128:$Rm), (i64 1))),
(PMULLv2i64 V128:$Rn, V128:$Rm)>;
// CodeGen patterns for addhn and subhn instructions, which can actually be
// written in LLVM IR without too much difficulty.
// ADDHN
def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
(ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 16))))),
(ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 32))))),
(ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v8i8 V64:$Rd),
(trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 8))))),
(ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v4i16 V64:$Rd),
(trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 16))))),
(ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v2i32 V64:$Rd),
(trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
(i32 32))))),
(ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
// SUBHN
def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
(SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 16))))),
(SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 32))))),
(SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v8i8 V64:$Rd),
(trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 8))))),
(SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v4i16 V64:$Rd),
(trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 16))))),
(SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
def : Pat<(concat_vectors (v2i32 V64:$Rd),
(trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
(i32 32))))),
(SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
V128:$Rn, V128:$Rm)>;
//----------------------------------------------------------------------------
// AdvSIMD bitwise extract from vector instruction.
//----------------------------------------------------------------------------
defm EXT : SIMDBitwiseExtract<"ext">;
def AdjustExtImm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
(EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
(EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
// We use EXT to handle extract_subvector to copy the upper 64-bits of a
// 128-bit vector.
def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
// A 64-bit EXT of two halves of the same 128-bit register can be done as a
// single 128-bit EXT.
def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
(extract_subvector V128:$Rn, (i64 N)),
(i32 imm:$imm))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
// A 64-bit EXT of the high half of a 128-bit register can be done using a
// 128-bit EXT of the whole register with an adjustment to the immediate. The
// top half of the other operand will be unset, but that doesn't matter as it
// will not be used.
def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
V64:$Rm,
(i32 imm:$imm))),
(EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
(AdjustExtImm imm:$imm)), dsub)>;
}
defm : ExtPat<v8i8, v16i8, 8>;
defm : ExtPat<v4i16, v8i16, 4>;
defm : ExtPat<v4f16, v8f16, 4>;
defm : ExtPat<v4bf16, v8bf16, 4>;
defm : ExtPat<v2i32, v4i32, 2>;
defm : ExtPat<v2f32, v4f32, 2>;
defm : ExtPat<v1i64, v2i64, 1>;
defm : ExtPat<v1f64, v2f64, 1>;
//----------------------------------------------------------------------------
// AdvSIMD zip vector
//----------------------------------------------------------------------------
defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
(v8i8 (trunc (v8i16 V128:$Vm))))),
(UZP1v16i8 V128:$Vn, V128:$Vm)>;
def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
(v4i16 (trunc (v4i32 V128:$Vm))))),
(UZP1v8i16 V128:$Vn, V128:$Vm)>;
def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
(v2i32 (trunc (v2i64 V128:$Vm))))),
(UZP1v4i32 V128:$Vn, V128:$Vm)>;
//----------------------------------------------------------------------------
// AdvSIMD TBL/TBX instructions
//----------------------------------------------------------------------------
defm TBL : SIMDTableLookup< 0, "tbl">;
defm TBX : SIMDTableLookupTied<1, "tbx">;
def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
(TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
(TBLv16i8One V128:$Ri, V128:$Rn)>;
def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
(v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
(TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
(v16i8 V128:$Ri), (v16i8 V128:$Rn))),
(TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
//----------------------------------------------------------------------------
// AdvSIMD scalar DUP instruction
//----------------------------------------------------------------------------
defm DUP : SIMDScalarDUP<"mov">;
//----------------------------------------------------------------------------
// AdvSIMD scalar pairwise instructions
//----------------------------------------------------------------------------
defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
let Predicates = [HasFullFP16] in {
def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
(FADDPv2i16p
(EXTRACT_SUBREG
(FADDPv8f16 (FADDPv8f16 V128:$Rn, (v8f16 (IMPLICIT_DEF))), (v8f16 (IMPLICIT_DEF))),
dsub))>;
def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
(FADDPv2i16p (FADDPv4f16 V64:$Rn, (v4f16 (IMPLICIT_DEF))))>;
}
def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
(FADDPv2i32p
(EXTRACT_SUBREG
(FADDPv4f32 V128:$Rn, (v4f32 (IMPLICIT_DEF))),
dsub))>;
def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
(FADDPv2i64p V128:$Rn)>;
def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
(FADDPv2i32p V64:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
(FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
(FADDPv2i64p V128:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
(FMAXNMPv2i32p V64:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
(FMAXNMPv2i64p V128:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
(FMAXPv2i32p V64:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
(FMAXPv2i64p V128:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
(FMINNMPv2i32p V64:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
(FMINNMPv2i64p V128:$Rn)>;
def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
(FMINPv2i32p V64:$Rn)>;
def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
(FMINPv2i64p V128:$Rn)>;
//----------------------------------------------------------------------------
// AdvSIMD INS/DUP instructions
//----------------------------------------------------------------------------
def DUPv8i8gpr : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
def DUPv2i64lane : SIMDDup64FromElement;
def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
// DUP from a 64-bit register to a 64-bit register is just a copy
def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
(COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
(COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
(v2f32 (DUPv2i32lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
(i64 0)))>;
def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
(v4f32 (DUPv4i32lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
(i64 0)))>;
def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
(v2f64 (DUPv2i64lane
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
(i64 0)))>;
def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
(v4f16 (DUPv4i16lane
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
(i64 0)))>;
def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
(v4bf16 (DUPv4i16lane
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
(i64 0)))>;
def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
(v8f16 (DUPv8i16lane
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
(i64 0)))>;
def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
(v8bf16 (DUPv8i16lane
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
(i64 0)))>;
def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
(DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
(DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
(DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
(DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
(DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
(DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
(DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
// instruction even if the types don't match: we just have to remap the lane
// carefully. N.b. this trick only applies to truncations.
def VecIndex_x2 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def VecIndex_x4 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
def VecIndex_x8 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
}]>;
multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
ValueType Src128VT, ValueType ScalVT,
Instruction DUP, SDNodeXForm IdxXFORM> {
def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
imm:$idx)))),
(DUP V128:$Rn, (IdxXFORM imm:$idx))>;
def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
imm:$idx)))),
(DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
}
defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>;
defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>;
defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
SDNodeXForm IdxXFORM> {
def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
imm:$idx))))),
(DUP V128:$Rn, (IdxXFORM imm:$idx))>;
def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
imm:$idx))))),
(DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
}
defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>;
defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>;
defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>;
defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
// SMOV and UMOV definitions, with some extra patterns for convenience
defm SMOV : SMov;
defm UMOV : UMov;
def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
(i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
(i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
(i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
(i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
(i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
(i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
VectorIndexB:$idx)))), i8),
(i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
VectorIndexH:$idx)))), i16),
(i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
// Extracting i8 or i16 elements will have the zero-extend transformed to
// an 'and' mask by type legalization since neither i8 nor i16 are legal types
// for AArch64. Match these patterns here since UMOV already zeroes out the high
// bits of the destination register.
def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
(i32 0xff)),
(i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
(i32 0xffff)),
(i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
VectorIndexB:$idx)))), (i64 0xff))),
(SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
VectorIndexH:$idx)))), (i64 0xffff))),
(SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
defm INS : SIMDIns;
def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
(SUBREG_TO_REG (i32 0),
(f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
(v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
(i32 FPR32:$Rn), ssub))>;
def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
(v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(i32 FPR32:$Rn), ssub))>;
def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
(v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
(i64 FPR64:$Rn), dsub))>;
def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
(INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
(f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
(EXTRACT_SUBREG
(INSvi16lane
(v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
VectorIndexS:$imm,
(v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
(i64 0)),
dsub)>;
def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
(i64 VectorIndexH:$imm)),
(INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
(i64 VectorIndexS:$imm)),
(INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
(i64 VectorIndexD:$imm)),
(INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
(f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
(INSvi16lane
V128:$Rn, VectorIndexH:$imm,
(v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
(i64 0))>;
def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
(bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
(EXTRACT_SUBREG
(INSvi16lane
(v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
VectorIndexS:$imm,
(v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
(i64 0)),
dsub)>;
def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
(bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
(INSvi16lane
V128:$Rn, VectorIndexH:$imm,
(v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
(i64 0))>;
def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
(f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
(EXTRACT_SUBREG
(INSvi32lane
(v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
VectorIndexS:$imm,
(v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
(i64 0)),
dsub)>;
def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
(f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
(INSvi32lane
V128:$Rn, VectorIndexS:$imm,
(v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
(i64 0))>;
def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
(f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
(INSvi64lane
V128:$Rn, VectorIndexD:$imm,
(v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
(i64 0))>;
// Copy an element at a constant index in one vector into a constant indexed
// element of another.
// FIXME refactor to a shared class/dev parameterized on vector type, vector
// index type and INS extension
def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
(v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
VectorIndexB:$idx2)),
(v16i8 (INSvi8lane
V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
)>;
def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
(v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
VectorIndexH:$idx2)),
(v8i16 (INSvi16lane
V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
)>;
def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
(v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
VectorIndexS:$idx2)),
(v4i32 (INSvi32lane
V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
)>;
def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
(v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
VectorIndexD:$idx2)),
(v2i64 (INSvi64lane
V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
)>;
multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
ValueType VTScal, Instruction INS> {
def : Pat<(VT128 (vector_insert V128:$src,
(VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
imm:$Immd)),
(INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
def : Pat<(VT128 (vector_insert V128:$src,
(VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
imm:$Immd)),
(INS V128:$src, imm:$Immd,
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
def : Pat<(VT64 (vector_insert V64:$src,
(VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
imm:$Immd)),
(EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
imm:$Immd, V128:$Rn, imm:$Immn),
dsub)>;
def : Pat<(VT64 (vector_insert V64:$src,
(VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
imm:$Immd)),
(EXTRACT_SUBREG
(INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
dsub)>;
}
defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
// Floating point vector extractions are codegen'd as either a sequence of
// subregister extractions, or a MOV (aka DUP here) if
// the lane number is anything other than zero.
def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
(f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
(f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
(f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
(bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
(f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
(f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
(f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
(bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
// All concat_vectors operations are canonicalised to act on i64 vectors for
// AArch64. In the general case we need an instruction, which had just as well be
// INS.
class ConcatPat<ValueType DstTy, ValueType SrcTy>
: Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
(INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
def : ConcatPat<v2i64, v1i64>;
def : ConcatPat<v2f64, v1f64>;
def : ConcatPat<v4i32, v2i32>;
def : ConcatPat<v4f32, v2f32>;
def : ConcatPat<v8i16, v4i16>;
def : ConcatPat<v8f16, v4f16>;
def : ConcatPat<v8bf16, v4bf16>;
def : ConcatPat<v16i8, v8i8>;
// If the high lanes are undef, though, we can just ignore them:
class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
: Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
def : ConcatUndefPat<v2i64, v1i64>;
def : ConcatUndefPat<v2f64, v1f64>;
def : ConcatUndefPat<v4i32, v2i32>;
def : ConcatUndefPat<v4f32, v2f32>;
def : ConcatUndefPat<v8i16, v4i16>;
def : ConcatUndefPat<v16i8, v8i8>;
//----------------------------------------------------------------------------
// AdvSIMD across lanes instructions
//----------------------------------------------------------------------------
defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
// Patterns for uaddv(uaddlp(x)) ==> uaddlv
def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
(v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
(i64 0))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(UADDLVv8i8v V64:$op), hsub), ssub)>;
def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
(v16i8 V128:$op))))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(UADDLVv16i8v V128:$op), hsub), ssub)>;
def : Pat<(v4i32 (AArch64uaddv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (UADDLVv8i16v V128:$op), ssub)>;
// Patterns for addp(uaddlp(x))) ==> uaddlv
def : Pat<(v2i32 (AArch64uaddv (v2i32 (AArch64uaddlp (v4i16 V64:$op))))),
(INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (UADDLVv4i16v V64:$op), ssub)>;
def : Pat<(v2i64 (AArch64uaddv (v2i64 (AArch64uaddlp (v4i32 V128:$op))))),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (UADDLVv4i32v V128:$op), dsub)>;
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
// In effect, opNode is the same as (scalar_to_vector (IntNode)).
multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
SDPatternOperator opNode> {
// If a lane instruction caught the vector_extract around opNode, we can
// directly match the latter to the instruction.
def : Pat<(v8i8 (opNode V64:$Rn)),
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
def : Pat<(v16i8 (opNode V128:$Rn)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
def : Pat<(v4i16 (opNode V64:$Rn)),
(INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
def : Pat<(v8i16 (opNode V128:$Rn)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
def : Pat<(v4i32 (opNode V128:$Rn)),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
// If none did, fallback to the explicit patterns, consuming the vector_extract.
def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
(i64 0)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
bsub), ssub)>;
def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
bsub), ssub)>;
def : Pat<(i32 (vector_extract (insert_subvector undef,
(v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
hsub), ssub)>;
def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
hsub), ssub)>;
def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
ssub), ssub)>;
}
multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
SDPatternOperator opNode>
: SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a sign extension after this intrinsic, consume it as smov already
// performed it
def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
(opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
(i64 0)))>;
def : Pat<(i32 (sext_inreg (i32 (vector_extract
(opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
(i64 0)))>;
def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
(opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
def : Pat<(i32 (sext_inreg (i32 (vector_extract
(opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
(i64 0)))>;
}
multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
SDPatternOperator opNode>
: SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
// If there is a masking operation keeping only what has been actually
// generated, consume it.
def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
(opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
ssub))>;
def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
maski8_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;
def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
(opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;
def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
maski16_or_more)),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
ssub))>;
}
defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>;
// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
(ADDPv2i32 V64:$Rn, V64:$Rn)>;
defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
(ADDPv2i32 V64:$Rn, V64:$Rn)>;
defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
(SMAXPv2i32 V64:$Rn, V64:$Rn)>;
defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
(SMINPv2i32 V64:$Rn, V64:$Rn)>;
defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
(UMAXPv2i32 V64:$Rn, V64:$Rn)>;
defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
(UMINPv2i32 V64:$Rn, V64:$Rn)>;
multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
(i64 0)))>;
def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
(i64 0)))>;
def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
ssub))>;
def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
ssub))>;
def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
(i64 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
dsub))>;
}
multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
ssub))>;
def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
ssub))>;
def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
ssub))>;
def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
ssub))>;
def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
(i64 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
dsub))>;
}
defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
// The vaddlv_s32 intrinsic gets mapped to SADDLP.
def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
(i64 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(SADDLPv2i32_v1i64 V64:$Rn), dsub),
dsub))>;
// The vaddlv_u32 intrinsic gets mapped to UADDLP.
def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
(i64 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(UADDLPv2i32_v1i64 V64:$Rn), dsub),
dsub))>;
//------------------------------------------------------------------------------
// AdvSIMD modified immediate instructions
//------------------------------------------------------------------------------
// AdvSIMD BIC
defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
// AdvSIMD ORR
defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
// AdvSIMD FMOV
def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
"fmov", ".2d",
[(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8,
"fmov", ".2s",
[(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
"fmov", ".4s",
[(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
let Predicates = [HasNEON, HasFullFP16] in {
def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8,
"fmov", ".4h",
[(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
"fmov", ".8h",
[(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
} // Predicates = [HasNEON, HasFullFP16]
// AdvSIMD MOVI
// EDIT byte mask: scalar
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
[(set FPR64:$Rd, simdimmtype10:$imm8)]>;
// The movi_edit node has the immediate value already encoded, so we use
// a plain imm0_255 here.
def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
(MOVID imm0_255:$shift)>;
// EDIT byte mask: 2d
// The movi_edit node has the immediate value already encoded, so we use
// a plain imm0_255 in the pattern
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
simdimmtype10,
"movi", ".2d",
[(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
// extract is free and this gives better MachineCSE results.
def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
def : Pat<(v8i8 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
(MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
(MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
(MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
(MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
// EDIT per word: 2s & 4s with MSL shifter
def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
[(set (v2i32 V64:$Rd),
(AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
[(set (v4i32 V128:$Rd),
(AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
// Per byte: 8b & 16b
def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255,
"movi", ".8b",
[(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
"movi", ".16b",
[(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
}
// AdvSIMD MVNI
// EDIT per word & halfword: 2s, 4h, 4s, & 8h
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
(MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
(MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
(MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
(MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
// EDIT per word: 2s & 4s with MSL shifter
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
[(set (v2i32 V64:$Rd),
(AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
[(set (v4i32 V128:$Rd),
(AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
}
//----------------------------------------------------------------------------
// AdvSIMD indexed element
//----------------------------------------------------------------------------
let hasSideEffects = 0 in {
defm FMLA : SIMDFPIndexedTied<0, 0b0001, "fmla">;
defm FMLS : SIMDFPIndexedTied<0, 0b0101, "fmls">;
}
// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
// instruction expects the addend first, while the intrinsic expects it last.
// On the other hand, there are quite a few valid combinatorial options due to
// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
defm : SIMDFPIndexedTiedPatterns<"FMLA",
TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
defm : SIMDFPIndexedTiedPatterns<"FMLA",
TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
defm : SIMDFPIndexedTiedPatterns<"FMLS",
TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
// 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
// and DUP scalar.
def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(AArch64duplane32 (v4f32 (fneg V128:$Rm)),
VectorIndexS:$idx))),
(FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(v2f32 (AArch64duplane32
(v4f32 (insert_subvector undef,
(v2f32 (fneg V64:$Rm)),
(i64 0))),
VectorIndexS:$idx)))),
(FMLSv2i32_indexed V64:$Rd, V64:$Rn,
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
VectorIndexS:$idx)>;
def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
(AArch64dup (f32 (fneg FPR32Op:$Rm))))),
(FMLSv2i32_indexed V64:$Rd, V64:$Rn,
(SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
// 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
// and DUP scalar.
def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
(AArch64duplane32 (v4f32 (fneg V128:$Rm)),
VectorIndexS:$idx))),
(FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
VectorIndexS:$idx)>;
def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
(v4f32 (AArch64duplane32
(v4f32 (insert_subvector undef,
(v2f32 (fneg V64:$Rm)),
(i64 0))),
VectorIndexS:$idx)))),
(FMLSv4i32_indexed V128:$Rd, V128:$Rn,
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
VectorIndexS:$idx)>;
def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
(AArch64dup (f32 (fneg FPR32Op:$Rm))))),
(FMLSv4i32_indexed V128:$Rd, V128:$Rn,
(SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
// 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
// (DUPLANE from 64-bit would be trivial).
def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
(AArch64duplane64 (v2f64 (fneg V128:$Rm)),
VectorIndexD:$idx))),
(FMLSv2i64_indexed
V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
(AArch64dup (f64 (fneg FPR64Op:$Rm))))),
(FMLSv2i64_indexed V128:$Rd, V128:$Rn,
(SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
// 2 variants for 32-bit scalar version: extract from .2s or from .4s
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
(vector_extract (v4f32 (fneg V128:$Rm)),
VectorIndexS:$idx))),
(FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
V128:$Rm, VectorIndexS:$idx)>;
def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
(vector_extract (v4f32 (insert_subvector undef,
(v2f32 (fneg V64:$Rm)),
(i64 0))),
VectorIndexS:$idx))),
(FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
(SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
// 1 variant for 64-bit scalar version: extract from .1d or from .2d
def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
(vector_extract (v2f64 (fneg V128:$Rm)),
VectorIndexS:$idx))),
(FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
V128:$Rm, VectorIndexS:$idx)>;
}
defm : FMLSIndexedAfterNegPatterns<
TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
defm : FMLSIndexedAfterNegPatterns<
TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
defm FMUL : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv2i32_indexed V64:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
(i64 0))>;
def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
(FMULv4i32_indexed V128:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
(i64 0))>;
def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
(FMULv2i64_indexed V128:$Rn,
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
(i64 0))>;
defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
int_aarch64_neon_sqdmulh_laneq>;
defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
int_aarch64_neon_sqrdmulh_laneq>;
// Generated by MachineCombine
defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
int_aarch64_neon_smull>;
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
int_aarch64_neon_sqadd>;
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
int_aarch64_neon_sqsub>;
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
int_aarch64_neon_sqrdmlah>;
defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
int_aarch64_neon_sqrdmlsh>;
defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
int_aarch64_neon_umull>;
// A scalar sqdmull with the second operand being a vector lane can be
// handled directly with the indexed instruction encoding.
def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(vector_extract (v4i32 V128:$Vm),
VectorIndexS:$idx)),
(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
// have no common bits.
def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
[(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
if (N->getOpcode() == ISD::ADD)
return true;
return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
}]> {
let GISelPredicateCode = [{
// Only handle G_ADD for now. FIXME. build capability to compute whether
// operands of G_OR have common bits set or not.
return MI.getOpcode() == TargetOpcode::G_ADD;
}];
}
//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------
defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
// Codegen patterns for the above. We don't put these directly on the
// instructions because TableGen's type inference can't handle the truth.
// Having the same base pattern for fp <--> int totally freaks it out.
def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
(FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
(FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
(FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
vecshiftR64:$imm)),
(FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
vecshiftR64:$imm)),
(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
(UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
vecshiftR64:$imm)),
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
vecshiftR64:$imm)),
(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
(SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported.
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
(SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
(SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
(and FPR32:$Rn, (i32 65535)),
vecshiftR16:$imm)),
(UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
(UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
(UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
(i32 (INSERT_SUBREG
(i32 (IMPLICIT_DEF)),
(FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
hsub))>;
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
(i64 (INSERT_SUBREG
(i64 (IMPLICIT_DEF)),
(FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
hsub))>;
def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
(i32 (INSERT_SUBREG
(i32 (IMPLICIT_DEF)),
(FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
hsub))>;
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
(i64 (INSERT_SUBREG
(i64 (IMPLICIT_DEF)),
(FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
hsub))>;
def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
(i32 (INSERT_SUBREG
(i32 (IMPLICIT_DEF)),
(FACGE16 FPR16:$Rn, FPR16:$Rm),
hsub))>;
def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
(i32 (INSERT_SUBREG
(i32 (IMPLICIT_DEF)),
(FACGT16 FPR16:$Rn, FPR16:$Rm),
hsub))>;
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
int_aarch64_neon_sqrshrn>;
defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
int_aarch64_neon_sqrshrun>;
defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
int_aarch64_neon_sqshrn>;
defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;
defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
TriOpFrag<(add node:$LHS,
(AArch64srshri node:$MHS, node:$RHS))>>;
defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vashr node:$MHS, node:$RHS))>>;
defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
int_aarch64_neon_uqshrn>;
defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,
(AArch64urshri node:$MHS, node:$RHS))>>;
defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
TriOpFrag<(add_and_or_is_add node:$LHS,
(AArch64vlshr node:$MHS, node:$RHS))>>;
//----------------------------------------------------------------------------
// AdvSIMD vector shift instructions
//----------------------------------------------------------------------------
defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
int_aarch64_neon_vcvtfxs2fp>;
defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
int_aarch64_neon_rshrn>;
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
(i32 vecshiftL64:$imm))),
(SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
int_aarch64_neon_sqrshrn>;
defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
int_aarch64_neon_sqrshrun>;
defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
int_aarch64_neon_sqshrn>;
defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
int_aarch64_neon_sqshrun>;
defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
(i32 vecshiftR64:$imm))),
(SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
TriOpFrag<(add node:$LHS,
(AArch64srshri node:$MHS, node:$RHS))> >;
defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
int_aarch64_neon_vcvtfxu2fp>;
defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
int_aarch64_neon_uqrshrn>;
defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
int_aarch64_neon_uqshrn>;
defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
TriOpFrag<(add node:$LHS,
(AArch64urshri node:$MHS, node:$RHS))> >;
defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
// RADDHN patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
(RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
(RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
(RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
def : Pat<(v16i8 (concat_vectors
(v8i8 V64:$Vd),
(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
(RADDHNv8i16_v16i8
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v8i16 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v8i16 (concat_vectors
(v4i16 V64:$Vd),
(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
(RADDHNv4i32_v8i16
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v4i32 (MOVIv2d_ns (i32 0))))>;
def : Pat<(v4i32 (concat_vectors
(v2i32 V64:$Vd),
(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
(RADDHNv2i64_v4i32
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
(v2i64 (MOVIv2d_ns (i32 0))))>;
// SHRN patterns for when a logical right shift was used instead of arithmetic
// (the immediate guarantees no sign bits actually end up in the result so it
// doesn't matter).
def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
(SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
(SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
(SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
(trunc (AArch64vlshr (v8i16 V128:$Rn),
vecshiftR16Narrow:$imm)))),
(SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
V128:$Rn, vecshiftR16Narrow:$imm)>;
def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
(trunc (AArch64vlshr (v4i32 V128:$Rn),
vecshiftR32Narrow:$imm)))),
(SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
V128:$Rn, vecshiftR32Narrow:$imm)>;
def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
(trunc (AArch64vlshr (v2i64 V128:$Rn),
vecshiftR64Narrow:$imm)))),
(SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
V128:$Rn, vecshiftR32Narrow:$imm)>;
// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
// Anyexts are implemented as zexts.
def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>;
def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
// Also match an extend from the upper half of a 128 bit source register.
def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
(USHLLv16i8_shift V128:$Rn, (i32 0))>;
def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
(USHLLv16i8_shift V128:$Rn, (i32 0))>;
def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
(SSHLLv16i8_shift V128:$Rn, (i32 0))>;
def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
(USHLLv8i16_shift V128:$Rn, (i32 0))>;
def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
(USHLLv8i16_shift V128:$Rn, (i32 0))>;
def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
(SSHLLv8i16_shift V128:$Rn, (i32 0))>;
def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
(USHLLv4i32_shift V128:$Rn, (i32 0))>;
def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
(USHLLv4i32_shift V128:$Rn, (i32 0))>;
def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
(SSHLLv4i32_shift V128:$Rn, (i32 0))>;
// Vector shift sxtl aliases
def : InstAlias<"sxtl.8h $dst, $src1",
(SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"sxtl $dst.8h, $src1.8b",
(SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"sxtl.4s $dst, $src1",
(SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"sxtl $dst.4s, $src1.4h",
(SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"sxtl.2d $dst, $src1",
(SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"sxtl $dst.2d, $src1.2s",
(SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
// Vector shift sxtl2 aliases
def : InstAlias<"sxtl2.8h $dst, $src1",
(SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
(SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"sxtl2.4s $dst, $src1",
(SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
(SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"sxtl2.2d $dst, $src1",
(SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
(SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
// Vector shift uxtl aliases
def : InstAlias<"uxtl.8h $dst, $src1",
(USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"uxtl $dst.8h, $src1.8b",
(USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"uxtl.4s $dst, $src1",
(USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"uxtl $dst.4s, $src1.4h",
(USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"uxtl.2d $dst, $src1",
(USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
def : InstAlias<"uxtl $dst.2d, $src1.2s",
(USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
// Vector shift uxtl2 aliases
def : InstAlias<"uxtl2.8h $dst, $src1",
(USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
(USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"uxtl2.4s $dst, $src1",
(USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
(USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"uxtl2.2d $dst, $src1",
(USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
(USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
// If an integer is about to be converted to a floating point value,
// just load it on the floating point unit.
// These patterns are more complex because floating point loads do not
// support sign extension.
// The sign extension has to be explicitly added and is only supported for
// one step: byte-to-half, half-to-word, word-to-doubleword.
// SCVTF GPR -> FPR is 9 cycles.
// SCVTF FPR -> FPR is 4 cyclces.
// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
// and still being faster.
// However, this is not good for code size.
// 8-bits -> float. 2 sizes step-up.
class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
: Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
(SCVTFv1i32 (f32 (EXTRACT_SUBREG
(SSHLLv4i16_shift
(f64
(EXTRACT_SUBREG
(SSHLLv8i8_shift
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
INST,
bsub),
0),
dsub)),
0),
ssub)))>,
Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
(LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
(LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
(LDURBi GPR64sp:$Rn, simm9:$offset)>;
// 16-bits -> float. 1 size step-up.
class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
: Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
(SCVTFv1i32 (f32 (EXTRACT_SUBREG
(SSHLLv4i16_shift
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
INST,
hsub),
0),
ssub)))>, Requires<[NotForCodeSize]>;
def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
(LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
(LDURHi GPR64sp:$Rn, simm9:$offset)>;
// 32-bits to 32-bits are handled in target specific dag combine:
// performIntToFpCombine.
// 64-bits integer to 32-bits floating point, not possible with
// SCVTF on floating point registers (both source and destination
// must have the same size).
// Here are the patterns for 8, 16, 32, and 64-bits to double.
// 8-bits -> double. 3 size step-up: give up.
// 16-bits -> double. 2 size step.
class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
: Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
(SCVTFv1i64 (f64 (EXTRACT_SUBREG
(SSHLLv2i32_shift
(f64
(EXTRACT_SUBREG
(SSHLLv4i16_shift
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
INST,
hsub),
0),
dsub)),
0),
dsub)))>,
Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
(LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
(LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
(LDURHi GPR64sp:$Rn, simm9:$offset)>;
// 32-bits -> double. 1 size step-up.
class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
: Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
(SCVTFv1i64 (f64 (EXTRACT_SUBREG
(SSHLLv2i32_shift
(INSERT_SUBREG (f64 (IMPLICIT_DEF)),
INST,
ssub),
0),
dsub)))>, Requires<[NotForCodeSize]>;
def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
(LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
(LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
(LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
(LDURSi GPR64sp:$Rn, simm9:$offset)>;
// 64-bits -> double are handled in target specific dag combine:
// performIntToFpCombine.
//----------------------------------------------------------------------------
// AdvSIMD Load-Store Structure
//----------------------------------------------------------------------------
defm LD1 : SIMDLd1Multiple<"ld1">;
defm LD2 : SIMDLd2Multiple<"ld2">;
defm LD3 : SIMDLd3Multiple<"ld3">;
defm LD4 : SIMDLd4Multiple<"ld4">;
defm ST1 : SIMDSt1Multiple<"st1">;
defm ST2 : SIMDSt2Multiple<"st2">;
defm ST3 : SIMDSt3Multiple<"st3">;
defm ST4 : SIMDSt4Multiple<"st4">;
class Ld1Pat<ValueType ty, Instruction INST>
: Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
def : Ld1Pat<v16i8, LD1Onev16b>;
def : Ld1Pat<v8i16, LD1Onev8h>;
def : Ld1Pat<v4i32, LD1Onev4s>;
def : Ld1Pat<v2i64, LD1Onev2d>;
def : Ld1Pat<v8i8, LD1Onev8b>;
def : Ld1Pat<v4i16, LD1Onev4h>;
def : Ld1Pat<v2i32, LD1Onev2s>;
def : Ld1Pat<v1i64, LD1Onev1d>;
class St1Pat<ValueType ty, Instruction INST>
: Pat<(store ty:$Vt, GPR64sp:$Rn),
(INST ty:$Vt, GPR64sp:$Rn)>;
def : St1Pat<v16i8, ST1Onev16b>;
def : St1Pat<v8i16, ST1Onev8h>;
def : St1Pat<v4i32, ST1Onev4s>;
def : St1Pat<v2i64, ST1Onev2d>;
def : St1Pat<v8i8, ST1Onev8b>;
def : St1Pat<v4i16, ST1Onev4h>;
def : St1Pat<v2i32, ST1Onev2s>;
def : St1Pat<v1i64, ST1Onev1d>;
//---
// Single-element
//---
defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
let mayLoad = 1, hasSideEffects = 0 in {
defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>;
defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>;
defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>;
defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>;
defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>;
defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>;
defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>;
defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>;
defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>;
defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>;
defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>;
}
def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
(LD1Rv8b GPR64sp:$Rn)>;
def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
(LD1Rv16b GPR64sp:$Rn)>;
def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
(LD1Rv4h GPR64sp:$Rn)>;
def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
(LD1Rv8h GPR64sp:$Rn)>;
def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
(LD1Rv2s GPR64sp:$Rn)>;
def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
(LD1Rv4s GPR64sp:$Rn)>;
def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
(LD1Rv2d GPR64sp:$Rn)>;
def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
(LD1Rv1d GPR64sp:$Rn)>;
// Grab the floating point version too
def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
(LD1Rv2s GPR64sp:$Rn)>;
def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
(LD1Rv4s GPR64sp:$Rn)>;
def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
(LD1Rv2d GPR64sp:$Rn)>;
def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
(LD1Rv1d GPR64sp:$Rn)>;
def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
(LD1Rv4h GPR64sp:$Rn)>;
def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
(LD1Rv8h GPR64sp:$Rn)>;
def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
(LD1Rv4h GPR64sp:$Rn)>;
def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
(LD1Rv8h GPR64sp:$Rn)>;
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
: Pat<(vector_insert (VTy VecListOne128:$Rd),
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
(LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexH, v8f16, f16, LD1i16>;
def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>;
// Generate LD1 for extload if memory type does not match the
// destination type, for example:
//
// (v4i32 (insert_vector_elt (load anyext from i8) idx))
//
// In this case, the index must be adjusted to match LD1 type.
//
class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
VecIndex, ValueType VTy, ValueType STy,
Instruction LD1, SDNodeXForm IdxOp>
: Pat<(vector_insert (VTy VecListOne128:$Rd),
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
(LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
def VectorIndexStoH : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
}]>;
def VectorIndexStoB : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
}]>;
def VectorIndexHtoB : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
}]>;
def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
// Same as above, but the first element is populated using
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
SDPatternOperator ExtLoad, Instruction LD1>
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
(ResultTy (EXTRACT_SUBREG
(LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
: Pat<(vector_insert (VTy VecListOne64:$Rd),
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
(EXTRACT_SUBREG
(LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
VecIndex:$idx, GPR64sp:$Rn),
dsub)>;
def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexH, v4f16, f16, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexH, v4bf16, bf16, LD1i16>;
defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
// Stores
defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
let AddedComplexity = 19 in
class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
(STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
GPR64sp:$Rn),
(ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexH, v8f16, f16, ST1i16>;
def : St1Lane128Pat<store, VectorIndexH, v8bf16, bf16, ST1i16>;
let AddedComplexity = 19 in
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
(STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
GPR64sp:$Rn),
(ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
VecIndex:$idx, GPR64sp:$Rn)>;
def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexH, v4f16, f16, ST1i16>;
def : St1Lane64Pat<store, VectorIndexH, v4bf16, bf16, ST1i16>;
multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
int offset> {
def : Pat<(scalar_store
(STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
GPR64sp:$Rn, offset),
(ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
VecIndex:$idx, GPR64sp:$Rn, XZR)>;
def : Pat<(scalar_store
(STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
GPR64sp:$Rn, GPR64:$Rm),
(ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
}
defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
2>;
defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1,
int offset> {
def : Pat<(scalar_store
(STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
GPR64sp:$Rn, offset),
(ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
def : Pat<(scalar_store
(STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
GPR64sp:$Rn, GPR64:$Rm),
(ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
}
defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
1>;
defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
2>;
defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
let mayStore = 1, hasSideEffects = 0 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
}
defm ST1 : SIMDLdSt1SingleAliases<"st1">;
defm ST2 : SIMDLdSt2SingleAliases<"st2">;
defm ST3 : SIMDLdSt3SingleAliases<"st3">;
defm ST4 : SIMDLdSt4SingleAliases<"st4">;
//----------------------------------------------------------------------------
// Crypto extensions
//----------------------------------------------------------------------------
let Predicates = [HasAES] in {
def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>;
def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>;
def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>;
def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
}
// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
// for AES fusion on some CPUs.
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
Sched<[WriteVq]>;
def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
Sched<[WriteVq]>;
}
// Only use constrained versions of AES(I)MC instructions if they are paired with
// AESE/AESD.
def : Pat<(v16i8 (int_aarch64_crypto_aesmc
(v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
(v16i8 V128:$src2))))),
(v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
(v16i8 V128:$src2)))))>,
Requires<[HasFuseAES]>;
def : Pat<(v16i8 (int_aarch64_crypto_aesimc
(v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
(v16i8 V128:$src2))))),
(v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
(v16i8 V128:$src2)))))>,
Requires<[HasFuseAES]>;
let Predicates = [HasSHA2] in {
def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>;
def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>;
def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>;
def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>;
def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>;
def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
}
//----------------------------------------------------------------------------
// Compiler-pseudos
//----------------------------------------------------------------------------
// FIXME: Like for X86, these should go in their own separate .td file.
def def32 : PatLeaf<(i32 GPR32:$src), [{
return isDef32(*N);
}]>;
// In the case of a 32-bit def that is known to implicitly zero-extend,
// we can use a SUBREG_TO_REG.
def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
// For an anyext, we don't care what the high bits are, so we can perform an
// INSERT_SUBREF into an IMPLICIT_DEF.
def : Pat<(i64 (anyext GPR32:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
// then assert the extension has happened.
def : Pat<(i64 (zext GPR32:$src)),
(SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
// To sign extend, we use a signed bitfield move instruction (SBFM) on the
// containing super-reg.
def : Pat<(i64 (sext GPR32:$src)),
(SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>;
def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>;
def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>;
def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>;
def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
(SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
(i64 (i32shift_sext_i8 imm0_31:$imm)))>;
def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
(SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_sext_i8 imm0_63:$imm)))>;
def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
(SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
(i64 (i32shift_sext_i16 imm0_31:$imm)))>;
def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
(SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_sext_i16 imm0_63:$imm)))>;
def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
(SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
(i64 (i64shift_a imm0_63:$imm)),
(i64 (i64shift_sext_i32 imm0_63:$imm)))>;
// sra patterns have an AddedComplexity of 10, so make sure we have a higher
// AddedComplexity for the following patterns since we want to match sext + sra
// patterns before we attempt to match a single sra node.
let AddedComplexity = 20 in {
// We support all sext + sra combinations which preserve at least one bit of the
// original value which is to be sign extended. E.g. we support shifts up to
// bitwidth-1 bits.
def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
(SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
(SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
(SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
(SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
(SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
(i64 imm0_31:$imm), 31)>;
} // AddedComplexity = 20
// To truncate, we can simply extract from a subregister.
def : Pat<(i32 (trunc GPR64sp:$src)),
(i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
// __builtin_trap() uses the BRK instruction on AArch64.
def : Pat<(trap), (BRK 1)>;
def : Pat<(debugtrap), (BRK 0xF000)>;
def ubsan_trap_xform : SDNodeXForm<timm, [{
return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
}]>;
def ubsan_trap_imm : TImmLeaf<i32, [{
return isUInt<8>(Imm);
}], ubsan_trap_xform>;
def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
// Multiply high patterns which multiply the lower subvector using smull/umull
// and the upper subvector with smull2/umull2. Then shuffle the high the high
// part of both results together.
def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
(UZP2v16i8
(SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
(EXTRACT_SUBREG V128:$Rm, dsub)),
(SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
(UZP2v8i16
(SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
(EXTRACT_SUBREG V128:$Rm, dsub)),
(SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
(UZP2v4i32
(SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
(EXTRACT_SUBREG V128:$Rm, dsub)),
(SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
(UZP2v16i8
(UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
(EXTRACT_SUBREG V128:$Rm, dsub)),
(UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
(UZP2v8i16
(UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
(EXTRACT_SUBREG V128:$Rm, dsub)),
(UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
(UZP2v4i32
(UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
(EXTRACT_SUBREG V128:$Rm, dsub)),
(UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
// Conversions within AdvSIMD types in the same register size are free.
// But because we need a consistent lane ordering, in big endian many
// conversions require one or more REV instructions.
//
// Consider a simple memory load followed by a bitconvert then a store.
// v0 = load v2i32
// v1 = BITCAST v2i32 v0 to v4i16
// store v4i16 v2
//
// In big endian mode every memory access has an implicit byte swap. LDR and
// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
// is, they treat the vector as a sequence of elements to be byte-swapped.
// The two pairs of instructions are fundamentally incompatible. We've decided
// to use LD1/ST1 only to simplify compiler implementation.
//
// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
// the original code sequence:
// v0 = load v2i32
// v1 = REV v2i32 (implicit)
// v2 = BITCAST v2i32 v1 to v4i16
// v3 = REV v4i16 v2 (implicit)
// store v4i16 v3
//
// But this is now broken - the value stored is different to the value loaded
// due to lane reordering. To fix this, on every BITCAST we must perform two
// other REVs:
// v0 = load v2i32
// v1 = REV v2i32 (implicit)
// v2 = REV v2i32
// v3 = BITCAST v2i32 v2 to v4i16
// v4 = REV v4i16
// v5 = REV v4i16 v4 (implicit)
// store v4i16 v5
//
// This means an extra two instructions, but actually in most cases the two REV
// instructions can be combined into one. For example:
// (REV64_2s (REV64_4h X)) === (REV32_4h X)
//
// There is also no 128-bit REV instruction. This must be synthesized with an
// EXT instruction.
//
// Most bitconverts require some sort of conversion. The only exceptions are:
// a) Identity conversions - vNfX <-> vNiX
// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
//
// Natural vector casts (64 bit)
def : Pat<(v8i8 (AArch64NvCast (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4bf16 (AArch64NvCast (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2i32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v4i16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4bf16 (AArch64NvCast (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v8i8 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4bf16 (AArch64NvCast (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4f16 (AArch64NvCast (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4bf16 (AArch64NvCast (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1f64 (AArch64NvCast (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1f64 (AArch64NvCast (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
// Natural vector casts (128 bit)
def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (AArch64NvCast (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4i32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v8i16 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (AArch64NvCast (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v16i8 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (AArch64NvCast (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (AArch64NvCast (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2i64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v4f32 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (AArch64NvCast (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v16i8 (AArch64NvCast (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v8i16 (AArch64NvCast (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v4i32 (AArch64NvCast (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v2i64 (AArch64NvCast (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2f64 (AArch64NvCast (v2f64 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v8f16 (AArch64NvCast (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (AArch64NvCast (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v4f32 (AArch64NvCast (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i8 (bitconvert GPR64:$Xn)),
(REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
(REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
(REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
(REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
(REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
(REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
(REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
(REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
(REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
(REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
}
def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
(COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
(COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
(COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
(COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
(COPY_TO_REGCLASS V64:$Vn, GPR64)>;
def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
(v1i64 (REV64v2i32 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
(v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
(v1i64 (REV64v8i8 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
(v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
(v1i64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
(v1i64 (REV64v2i32 FPR64:$src))>;
}
def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
(v2i32 (REV32v4i16 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))),
(v2i32 (REV32v8i8 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
(v2i32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
(v2i32 (REV32v4i16 FPR64:$src))>;
def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
(v2i32 (REV32v4i16 FPR64:$src))>;
}
def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
(v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))),
(v4i16 (REV16v8i8 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
(v4i16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
(v4i16 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
(v4f16 (REV64v4i16 FPR64:$src))>;
def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
(v4f16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4f16 (bitconvert (v8i8 FPR64:$src))),
(v4f16 (REV16v8i8 FPR64:$src))>;
def : Pat<(v4f16 (bitconvert (f64 FPR64:$src))),
(v4f16 (REV64v4i16 FPR64:$src))>;
def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
(v4f16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
(v4f16 (REV64v4i16 FPR64:$src))>;
def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
(v4bf16 (REV64v4i16 FPR64:$src))>;
def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
(v4bf16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4bf16 (bitconvert (v8i8 FPR64:$src))),
(v4bf16 (REV16v8i8 FPR64:$src))>;
def : Pat<(v4bf16 (bitconvert (f64 FPR64:$src))),
(v4bf16 (REV64v4i16 FPR64:$src))>;
def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
(v4bf16 (REV32v4i16 FPR64:$src))>;
def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
(v4bf16 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))), (v8i8 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
(v8i8 (REV64v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))),
(v8i8 (REV32v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))),
(v8i8 (REV16v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
(v8i8 (REV64v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))),
(v8i8 (REV32v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
(v8i8 (REV64v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v4f16 FPR64:$src))),
(v8i8 (REV16v8i8 FPR64:$src))>;
def : Pat<(v8i8 (bitconvert (v4bf16 FPR64:$src))),
(v8i8 (REV16v8i8 FPR64:$src))>;
}
let Predicates = [IsLE] in {
def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))), (f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
(f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))),
(f64 (REV64v4i16 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))),
(f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
(f64 (REV64v8i8 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v4f16 FPR64:$src))),
(f64 (REV64v4i16 FPR64:$src))>;
def : Pat<(f64 (bitconvert (v4bf16 FPR64:$src))),
(f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
(v1f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
(v1f64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))),
(v1f64 (REV64v8i8 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
(v1f64 (REV64v2i32 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
(v1f64 (REV64v4i16 FPR64:$src))>;
def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
(v1f64 (REV64v4i16 FPR64:$src))>;
}
def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
(v2f32 (REV32v4i16 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))),
(v2f32 (REV32v8i8 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
(v2f32 (REV64v2i32 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
(v2f32 (REV32v4i16 FPR64:$src))>;
def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
(v2f32 (REV32v4i16 FPR64:$src))>;
}
def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
let Predicates = [IsLE] in {
def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
(f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
(f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
(REV64v4i32 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
(f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
(f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
(f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
(f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
(f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
(REV64v4i32 FPR128:$src), (i32 8)))>;
def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
(f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
(REV64v16i8 FPR128:$src), (i32 8)))>;
}
let Predicates = [IsLE] in {
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
(v2f64 (EXTv16i8 FPR128:$src,
FPR128:$src, (i32 8)))>;
def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
(v2f64 (REV64v4i32 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
(v2f64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
(v2f64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
(v2f64 (REV64v4i32 FPR128:$src))>;
}
def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
(v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
(REV64v4i32 FPR128:$src), (i32 8)))>;
def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
(v4f32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
(v4f32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
(v4f32 (REV64v4i32 FPR128:$src))>;
def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
(v4f32 (REV64v4i32 FPR128:$src))>;
}
def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
(v2i64 (EXTv16i8 FPR128:$src,
FPR128:$src, (i32 8)))>;
def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
(v2i64 (REV64v4i32 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
(v2i64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
(v2i64 (REV64v16i8 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
(v2i64 (REV64v4i32 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
(v2i64 (REV64v8i16 FPR128:$src))>;
def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
(v2i64 (REV64v8i16 FPR128:$src))>;
}
def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
(v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
(REV64v4i32 FPR128:$src),
(i32 8)))>;
def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
(v4i32 (REV64v4i32 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
(v4i32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
(v4i32 (REV32v16i8 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
(v4i32 (REV64v4i32 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
(v4i32 (REV32v8i16 FPR128:$src))>;
def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
(v4i32 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
(v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src),
(i32 8)))>;
def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
(v8i16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
(v8i16 (REV32v8i16 FPR128:$src))>;
def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
(v8i16 (REV16v16i8 FPR128:$src))>;
def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
(v8i16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
(v8i16 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v8f16 (bitconvert (f128 FPR128:$src))),
(v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src),
(i32 8)))>;
def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
(v8f16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
(v8f16 (REV32v8i16 FPR128:$src))>;
def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
(v8f16 (REV16v16i8 FPR128:$src))>;
def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
(v8f16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
(v8f16 (REV32v8i16 FPR128:$src))>;
def : Pat<(v8bf16 (bitconvert (f128 FPR128:$src))),
(v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
(REV64v8i16 FPR128:$src),
(i32 8)))>;
def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
(v8bf16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
(v8bf16 (REV32v8i16 FPR128:$src))>;
def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
(v8bf16 (REV16v16i8 FPR128:$src))>;
def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
(v8bf16 (REV64v8i16 FPR128:$src))>;
def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
(v8bf16 (REV32v8i16 FPR128:$src))>;
}
def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
let Predicates = [IsLE] in {
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
}
let Predicates = [IsBE] in {
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
(v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
(REV64v16i8 FPR128:$src),
(i32 8)))>;
def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
(v16i8 (REV64v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
(v16i8 (REV32v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
(v16i8 (REV16v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
(v16i8 (REV64v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
(v16i8 (REV32v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
(v16i8 (REV16v16i8 FPR128:$src))>;
def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
(v16i8 (REV16v16i8 FPR128:$src))>;
}
def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
(EXTRACT_SUBREG V128:$Rn, dsub)>;
def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
(EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
(EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
(EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
(EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
// A 64-bit subvector insert to the first 128-bit vector position
// is a subregister copy that needs no instruction.
multiclass InsertSubvectorUndef<ValueType Ty> {
def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
}
defm : InsertSubvectorUndef<i32>;
defm : InsertSubvectorUndef<i64>;
// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
// or v2f32.
def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
(vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
(i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
(vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
(f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
// vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
// so we match on v4f32 here, not v2f32. This will also catch adding
// the low two lanes of a true v4f32 vector.
def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
(vector_extract (v4f32 FPR128:$Rn), (i64 1))),
(f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
def : Pat<(fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
(vector_extract (v8f16 FPR128:$Rn), (i64 1))),
(f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
// Scalar 64-bit shifts in FPR64 registers.
def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
// Patterns for nontemporal/no-allocate stores.
// We have to resort to tricks to turn a single-input store into a store pair,
// because there is no single-input nontemporal store, only STNP.
let Predicates = [IsLE] in {
let AddedComplexity = 15 in {
class NTStore128Pat<ValueType VT> :
Pat<(nontemporalstore (VT FPR128:$Rt),
(am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
(STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
(DUPi64 FPR128:$Rt, (i64 1)),
GPR64sp:$Rn, simm7s8:$offset)>;
def : NTStore128Pat<v2i64>;
def : NTStore128Pat<v4i32>;
def : NTStore128Pat<v8i16>;
def : NTStore128Pat<v16i8>;
class NTStore64Pat<ValueType VT> :
Pat<(nontemporalstore (VT FPR64:$Rt),
(am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
(STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
(DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
GPR64sp:$Rn, simm7s4:$offset)>;
// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
def : NTStore64Pat<v1f64>;
def : NTStore64Pat<v1i64>;
def : NTStore64Pat<v2i32>;
def : NTStore64Pat<v4i16>;
def : NTStore64Pat<v8i8>;
def : Pat<(nontemporalstore GPR64:$Rt,
(am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
(STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
(EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
GPR64sp:$Rn, simm7s4:$offset)>;
} // AddedComplexity=10
} // Predicates = [IsLE]
// Tail call return handling. These are all compiler pseudo-instructions,
// so no encoding information or anything like that.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
Sched<[WriteBrReg]>;
def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
Sched<[WriteBrReg]>;
// Indirect tail-call with any register allowed, used by MachineOutliner when
// this is proven safe.
// FIXME: If we have to add any more hacks like this, we should instead relax
// some verifier checks for outlined functions.
def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
Sched<[WriteBrReg]>;
// Indirect tail-call limited to only use registers (x16 and x17) which are
// allowed to tail-call a "BTI c" instruction.
def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
Sched<[WriteBrReg]>;
}
def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
(TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
Requires<[NotUseBTI]>;
def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
(TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
Requires<[UseBTI]>;
def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
// Extracting lane zero is a special case where we can just use a plain
// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
// rest of the compiler, especially the register allocator and copy propagation,
// to reason about, so is preferred when it's possible to use it.
let AddedComplexity = 10 in {
def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
}
// dot_v4i8
class mul_v4i8<SDPatternOperator ldop> :
PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
(mul (ldop (add node:$Rn, node:$offset)),
(ldop (add node:$Rm, node:$offset)))>;
class mulz_v4i8<SDPatternOperator ldop> :
PatFrag<(ops node:$Rn, node:$Rm),
(mul (ldop node:$Rn), (ldop node:$Rm))>;
def load_v4i8 :
OutPatFrag<(ops node:$R),
(INSERT_SUBREG
(v2i32 (IMPLICIT_DEF)),
(i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
ssub)>;
class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
(add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
(add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
(mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
(EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
(load_v4i8 GPR64sp:$Rn),
(load_v4i8 GPR64sp:$Rm))),
sub_32)>, Requires<[HasDotProd]>;
// dot_v8i8
class ee_v8i8<SDPatternOperator extend> :
PatFrag<(ops node:$V, node:$K),
(v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
PatFrag<(ops node:$M, node:$N, node:$K),
(mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
(v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
PatFrag<(ops node:$M, node:$N),
(i32 (extractelt
(v4i32 (AArch64uaddv
(add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
(mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
(i64 0)))>;
// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
class odot_v8i8<Instruction DOT> :
OutPatFrag<(ops node:$Vm, node:$Vn),
(EXTRACT_SUBREG
(VADDV_32
(i64 (DOT (DUPv2i32gpr WZR),
(v8i8 node:$Vm),
(v8i8 node:$Vn)))),
sub_32)>;
class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
SDPatternOperator extend> :
Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
(odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
Requires<[HasDotProd]>;
// dot_v16i8
class ee_v16i8<SDPatternOperator extend> :
PatFrag<(ops node:$V, node:$K1, node:$K2),
(v4i16 (extract_subvector
(v8i16 (extend
(v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
(v4i32
(mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
(v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
PatFrag<(ops node:$M, node:$N),
(i32 (extractelt
(v4i32 (AArch64uaddv
(add
(add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
(mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
(add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
(mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
(i64 0)))>;
class odot_v16i8<Instruction DOT> :
OutPatFrag<(ops node:$Vm, node:$Vn),
(i32 (ADDVv4i32v
(DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
SDPatternOperator extend> :
Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
(odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
Requires<[HasDotProd]>;
let AddedComplexity = 10 in {
def : dot_v4i8<SDOTv8i8, sextloadi8>;
def : dot_v4i8<UDOTv8i8, zextloadi8>;
def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
// FIXME: add patterns to generate vector by element dot product.
// FIXME: add SVE dot-product patterns.
}
// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
// so that it can be used as input to inline asm, and vice versa.
def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
(REG_SEQUENCE GPR64x8Class,
$x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
$x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
foreach i = 0-7 in {
def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
(EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
}
let Predicates = [HasLS64] in {
def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
(outs GPR64x8:$Rt)>;
def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
(outs)>;
def ST64BV: Store64BV<0b011, "st64bv">;
def ST64BV0: Store64BV<0b010, "st64bv0">;
class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
: Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
(instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
def : ST64BPattern<int_aarch64_st64b, ST64B>;
def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
}
let Predicates = [HasMOPS] in {
let Defs = [NZCV] in {
defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
defm SETP : MOPSMemorySetInsns<0b00, "setp">;
}
let Uses = [NZCV] in {
defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
defm SETM : MOPSMemorySetInsns<0b01, "setm">;
defm SETE : MOPSMemorySetInsns<0b10, "sete">;
}
}
let Predicates = [HasMOPS, HasMTE] in {
let Defs = [NZCV] in {
defm SETGP : MOPSMemorySetTaggingInsns<0b00, "setgp">;
}
let Uses = [NZCV] in {
defm SETGM : MOPSMemorySetTaggingInsns<0b01, "setgm">;
// Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
}
}
// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
// MOPS operations always contain three 4-byte instructions
let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
let mayLoad = 1 in {
def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
[], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
[], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
}
let mayLoad = 0 in {
def MOPSMemorySetPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
[], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
}
}
let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
(ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
[], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
}
// This gets lowered into an instruction sequence of 20 bytes
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
[]>, Sched<[]>;
def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
def : Pat<(AArch64AssertZExtBool GPR32:$op),
(i32 GPR32:$op)>;
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
include "AArch64InstrGISel.td"
diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
index eb52e4aa6273..b195b1f2556a 100644
--- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1,1459 +1,1470 @@
//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that PPC uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
#include "PPCInstrInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/MachineValueType.h"
#include <utility>
namespace llvm {
namespace PPCISD {
// When adding a NEW PPCISD node please add it to the correct position in
// the enum. The order of elements in this enum matters!
// Values that are added after this entry:
// STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
// are considered memory opcodes and are treated differently than entries
// that come before it. For example, ADD or MUL should be placed before
// the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
// after it.
enum NodeType : unsigned {
// Start the numbering where the builtin ops and target ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
/// FSEL - Traditional three-operand fsel node.
///
FSEL,
/// XSMAXCDP, XSMINCDP - C-type min/max instructions.
XSMAXCDP,
XSMINCDP,
/// FCFID - The FCFID instruction, taking an f64 operand and producing
/// and f64 value containing the FP representation of the integer that
/// was temporarily in the f64 operand.
FCFID,
/// Newer FCFID[US] integer-to-floating-point conversion instructions for
/// unsigned integers and single-precision outputs.
FCFIDU,
FCFIDS,
FCFIDUS,
/// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
/// operand, producing an f64 value containing the integer representation
/// of that FP value.
FCTIDZ,
FCTIWZ,
/// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
/// unsigned integers with round toward zero.
FCTIDUZ,
FCTIWUZ,
/// Floating-point-to-interger conversion instructions
FP_TO_UINT_IN_VSR,
FP_TO_SINT_IN_VSR,
/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,
/// Reciprocal estimate instructions (unary FP ops).
FRE,
FRSQRTE,
/// Test instruction for software square root.
FTSQRT,
/// Square root instruction.
FSQRT,
/// VPERM - The PPC VPERM Instruction.
///
VPERM,
/// XXSPLT - The PPC VSX splat instructions
///
XXSPLT,
/// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
/// converting immediate single precision numbers to double precision
/// vector or scalar.
XXSPLTI_SP_TO_DP,
/// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
///
XXSPLTI32DX,
/// VECINSERT - The PPC vector insert instruction
///
VECINSERT,
/// VECSHL - The PPC vector shift left instruction
///
VECSHL,
/// XXPERMDI - The PPC XXPERMDI instruction
///
XXPERMDI,
/// The CMPB instruction (takes two operands of i32 or i64).
CMPB,
/// Hi/Lo - These represent the high and low 16-bit parts of a global
/// address respectively. These nodes have two operands, the first of
/// which must be a TargetGlobalAddress, and the second of which must be a
/// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
/// though these are usually folded into other nodes.
Hi,
Lo,
/// The following two target-specific nodes are used for calls through
/// function pointers in the 64-bit SVR4 ABI.
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
/// compute an allocation on the stack.
DYNALLOC,
/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
/// compute an offset from native SP to the address of the most recent
/// dynamic alloca.
DYNAREAOFFSET,
/// To avoid stack clash, allocation is performed by block and each block is
/// probed.
PROBED_ALLOCA,
/// The result of the mflr at function entry, used for PIC code.
GlobalBaseReg,
/// These nodes represent PPC shifts.
///
/// For scalar types, only the last `n + 1` bits of the shift amounts
/// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
/// for exact behaviors.
///
/// For vector types, only the last n bits are used. See vsld.
SRL,
SRA,
SHL,
/// FNMSUB - Negated multiply-subtract instruction.
FNMSUB,
/// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
/// word and shift left immediate.
EXTSWSLI,
/// The combination of sra[wd]i and addze used to implemented signed
/// integer division by a power of 2. The first operand is the dividend,
/// and the second is the constant shift amount (representing the
/// divisor).
SRA_ADDZE,
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
/// CALL_NOTOC the caller does not use the TOC.
/// SVR4 calls and 32-bit/64-bit AIX calls.
CALL,
CALL_NOP,
CALL_NOTOC,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
/// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
/// BCTRL instruction.
BCTRL,
/// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
/// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
/// and 64-bit AIX.
BCTRL_LOAD_TOC,
/// The variants that implicitly define rounding mode for calls with
/// strictfp semantics.
CALL_RM,
CALL_NOP_RM,
CALL_NOTOC_RM,
BCTRL_RM,
BCTRL_LOAD_TOC_RM,
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
/// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
/// This copies the bits corresponding to the specified CRREG into the
/// resultant GPR. Bits corresponding to other CR regs are undefined.
MFOCRF,
/// Direct move from a VSX register to a GPR
MFVSR,
/// Direct move from a GPR to a VSX register (algebraic)
MTVSRA,
/// Direct move from a GPR to a VSX register (zero)
MTVSRZ,
/// Direct move of 2 consecutive GPR to a VSX register.
BUILD_FP128,
/// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
/// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
/// unsupported for this target.
/// Merge 2 GPRs to a single SPE register.
BUILD_SPE64,
/// Extract SPE register component, second argument is high or low.
EXTRACT_SPE,
/// Extract a subvector from signed integer vector and convert to FP.
/// It is primarily used to convert a (widened) illegal integer vector
/// type to a legal floating point vector type.
/// For example v2i32 -> widened to v4i32 -> v2f64
SINT_VEC_TO_FP,
/// Extract a subvector from unsigned integer vector and convert to FP.
/// As with SINT_VEC_TO_FP, used for converting illegal types.
UINT_VEC_TO_FP,
/// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to
/// place the value into the least significant element of the most
/// significant doubleword in the vector. This is not element zero for
/// anything smaller than a doubleword on either endianness. This node has
/// the same semantics as SCALAR_TO_VECTOR except that the value remains in
/// the aforementioned location in the vector register.
SCALAR_TO_VECTOR_PERMUTED,
// FIXME: Remove these once the ANDI glue bug is fixed:
/// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
/// eq or gt bit of CR0 after executing andi. x, 1. This is used to
/// implement truncation of i32 or i64 to i1.
ANDI_rec_1_EQ_BIT,
ANDI_rec_1_GT_BIT,
// READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
// target (returns (Lo, Hi)). It takes a chain operand.
READ_TIME_BASE,
// EH_SJLJ_SETJMP - SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
// EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
EH_SJLJ_LONGJMP,
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
/// instructions. For lack of better number, we use the opcode number
/// encoding for the OPC field to identify the compare. For example, 838
/// is VCMPGTSH.
VCMP,
/// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the
/// altivec VCMP*_rec instructions. For lack of better number, we use the
/// opcode number encoding for the OPC field to identify the compare. For
/// example, 838 is VCMPGTSH.
VCMP_rec,
/// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
/// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
/// condition register to branch on, OPC is the branch opcode to use (e.g.
/// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
/// an optional input flag argument.
COND_BRANCH,
/// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
/// loops.
BDNZ,
BDZ,
/// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
/// towards zero. Used only as part of the long double-to-int
/// conversion sequence.
FADDRTZ,
/// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
MFFS,
/// TC_RETURN - A tail call return.
/// operand #0 chain
/// operand #1 callee (register or absolute)
/// operand #2 stack adjustment
/// operand #3 optional in flag
TC_RETURN,
/// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
CR6SET,
CR6UNSET,
/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
/// for non-position independent code on PPC32.
PPC32_GOT,
/// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
/// local dynamic TLS and position indendepent code on PPC32.
PPC32_PICGOT,
/// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
/// TLS model, produces an ADDIS8 instruction that adds the GOT
/// base to sym\@got\@tprel\@ha.
ADDIS_GOT_TPREL_HA,
/// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
/// TLS model, produces a LD instruction with base register G8RReg
/// and offset sym\@got\@tprel\@l. This completes the addition that
/// finds the offset of "sym" relative to the thread pointer.
LD_GOT_TPREL_L,
/// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
/// model, produces an ADD instruction that adds the contents of
/// G8RReg to the thread pointer. Symbol contains a relocation
/// sym\@tls which is to be replaced by the thread pointer and
/// identifies to the linker that the instruction is part of a
/// TLS sequence.
ADD_TLS,
/// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsgd\@ha.
ADDIS_TLSGD_HA,
/// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
/// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
/// ADDIS_TLSGD_L_ADDR until after register assignment.
ADDI_TLSGD_L,
/// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
/// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
/// ADDIS_TLSGD_L_ADDR until after register assignment.
GET_TLS_ADDR,
/// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
/// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
/// register assignment.
ADDI_TLSGD_L_ADDR,
/// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
/// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
/// Op that combines two register copies of TOC entries
/// (region handle into R3 and variable offset into R4) followed by a
/// GET_TLS_ADDR node which will be expanded to a call to __get_tls_addr.
/// This node is used in 64-bit mode as well (in which case the result is
/// G8RC and inputs are X3/X4).
TLSGD_AIX,
/// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
ADDIS_TLSLD_HA,
/// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
/// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
/// ADDIS_TLSLD_L_ADDR until after register assignment.
ADDI_TLSLD_L,
/// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
/// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
/// ADDIS_TLSLD_L_ADDR until after register assignment.
GET_TLSLD_ADDR,
/// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
/// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
/// following register assignment.
ADDI_TLSLD_L_ADDR,
/// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds X3 to
/// sym\@dtprel\@ha.
ADDIS_DTPREL_HA,
/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
/// sym\@got\@dtprel\@l.
ADDI_DTPREL_L,
/// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS
/// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.
PADDI_DTPREL,
/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
/// during instruction selection to optimize a BUILD_VECTOR into
/// operations on splats. This is necessary to avoid losing these
/// optimizations due to constant folding.
VADD_SPLAT,
/// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
/// operand identifies the operating system entry point.
SC,
/// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
CLRBHRB,
/// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
/// history rolling buffer entry.
MFBHRBE,
/// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
RFEBB,
/// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
/// endian. Maps to an xxswapd instruction that corrects an lxvd2x
/// or stxvd2x instruction. The chain is necessary because the
/// sequence replaces a load and needs to provide the same number
/// of outputs.
XXSWAPD,
/// An SDNode for swaps that are not associated with any loads/stores
/// and thereby have no chain.
SWAP_NO_CHAIN,
/// An SDNode for Power9 vector absolute value difference.
/// operand #0 vector
/// operand #1 vector
/// operand #2 constant i32 0 or 1, to indicate whether needs to patch
/// the most significant bit for signed i32
///
/// Power9 VABSD* instructions are designed to support unsigned integer
/// vectors (byte/halfword/word), if we want to make use of them for signed
/// integer vectors, we have to flip their sign bits first. To flip sign bit
/// for byte/halfword integer vector would become inefficient, but for word
/// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
/// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
/// => VABSDUW((XVNEGSP a), (XVNEGSP b))
VABSD,
/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
/// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done
/// either through an add like PADDI or through a PC Relative load like
/// PLD.
MAT_PCREL_ADDR,
/// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for
/// TLS global address when using dynamic access models. This can be done
/// through an add like PADDI.
TLS_DYNAMIC_MAT_PCREL_ADDR,
/// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address
/// when using local exec access models, and when prefixed instructions are
/// available. This is used with ADD_TLS to produce an add like PADDI.
TLS_LOCAL_EXEC_MAT_ADDR,
/// ACC_BUILD = Build an accumulator register from 4 VSX registers.
ACC_BUILD,
/// PAIR_BUILD = Build a vector pair register from 2 VSX registers.
PAIR_BUILD,
/// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of
/// an accumulator or pair register. This node is needed because
/// EXTRACT_SUBVECTOR expects the input and output vectors to have the same
/// element type.
EXTRACT_VSX_REG,
/// XXMFACC = This corresponds to the xxmfacc instruction.
XXMFACC,
// Constrained conversion from floating point to int
STRICT_FCTIDZ = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCTIWZ,
STRICT_FCTIDUZ,
STRICT_FCTIWUZ,
/// Constrained integer-to-floating-point conversion instructions.
STRICT_FCFID,
STRICT_FCFIDU,
STRICT_FCFIDS,
STRICT_FCFIDUS,
/// Constrained floating point add in round-to-zero mode.
STRICT_FADDRTZ,
// NOTE: The nodes below may require PC-Rel specific patterns if the
// address could be PC-Relative. When adding new nodes below, consider
// whether or not the address can be PC-Relative and add the corresponding
// PC-relative patterns and tests.
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
/// i32.
STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
/// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
/// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
/// then puts it in the bottom bits of the GPRC. TYPE can be either i16
/// or i32.
LBRX,
/// STFIWX - The STFIWX instruction. The first operand is an input token
/// chain, then an f64 value to store, then an address to store it to.
STFIWX,
/// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
/// load which sign-extends from a 32-bit integer value into the
/// destination 64-bit register.
LFIWAX,
/// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
/// load which zero-extends from a 32-bit integer value into the
/// destination 64-bit register.
LFIWZX,
/// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
/// integer smaller than 64 bits into a VSR. The integer is zero-extended.
/// This can be used for converting loaded integers to floating point.
LXSIZX,
/// STXSIX - The STXSI[bh]X instruction. The first operand is an input
/// chain, then an f64 value to store, then an address to store it to,
/// followed by a byte-width for the store.
STXSIX,
/// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
/// Maps directly to an lxvd2x instruction that will be followed by
/// an xxswapd.
LXVD2X,
/// LXVRZX - Load VSX Vector Rightmost and Zero Extend
/// This node represents v1i128 BUILD_VECTOR of a zero extending load
/// instruction from <byte, halfword, word, or doubleword> to i128.
/// Allows utilization of the Load VSX Vector Rightmost Instructions.
LXVRZX,
/// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
/// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
/// the vector type to load vector in big-endian element order.
LOAD_VEC_BE,
/// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
/// v2f32 value into the lower half of a VSR register.
LD_VSX_LH,
/// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
/// instructions such as LXVDSX, LXVWSX.
LD_SPLAT,
/// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
/// that zero-extends.
ZEXT_LD_SPLAT,
/// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
/// that sign-extends.
SEXT_LD_SPLAT,
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
STXVD2X,
/// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
/// the vector type to store vector in big-endian element order.
STORE_VEC_BE,
/// Store scalar integers from VSR.
ST_VSR_SCAL_INT,
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
/// except they ensure that the compare input is zero-extended for
/// sub-word versions because the atomic loads zero-extend.
ATOMIC_CMP_SWAP_8,
ATOMIC_CMP_SWAP_16,
/// GPRC = TOC_ENTRY GA, TOC
/// Loads the entry for GA from the TOC, where the TOC base is given by
/// the last operand.
TOC_ENTRY
};
} // end namespace PPCISD
/// Define some predicates that are used for node matching.
namespace PPC {
/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUHUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUWUM instruction.
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
/// VPKUDUM instruction.
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG);
/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
unsigned ShuffleKind, SelectionDAG &DAG);
/// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
/// a VMRGEW or VMRGOW instruction
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG);
/// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXSLDWI instruction.
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
/// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXBRH instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
/// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXBRW instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
/// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXBRD instruction.
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
/// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXBRQ instruction.
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
/// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXPERMDI instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
SelectionDAG &DAG);
/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a splat of a single element that is suitable for input to
/// VSPLTB/VSPLTH/VSPLTW.
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
/// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
/// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
/// shuffle of v4f32/v4i32 vectors that just inserts one element from one
/// vector into the other. This function will also set a couple of
/// output parameters for how much the source vector needs to be shifted and
/// what byte number needs to be specified for the instruction to put the
/// element in the desired location of the target vector.
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE);
/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
/// appropriate for PPC mnemonics (which have a big endian bias - namely
/// elements are counted from the left of the vector register).
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
SelectionDAG &DAG);
/// get_VSPLTI_elt - If this is a build_vector of constants which can be
/// formed by using a vspltis[bhw] instruction of the specified element
/// size, return the constant being splatted. The ByteSize field indicates
/// the number of bytes of each element [124] -> [bhw].
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
// Flags for computing the optimal addressing mode for loads and stores.
enum MemOpFlags {
MOF_None = 0,
// Extension mode for integer loads.
MOF_SExt = 1,
MOF_ZExt = 1 << 1,
MOF_NoExt = 1 << 2,
// Address computation flags.
MOF_NotAddNorCst = 1 << 5, // Not const. or sum of ptr and scalar.
MOF_RPlusSImm16 = 1 << 6, // Reg plus signed 16-bit constant.
MOF_RPlusLo = 1 << 7, // Reg plus signed 16-bit relocation
MOF_RPlusSImm16Mult4 = 1 << 8, // Reg plus 16-bit signed multiple of 4.
MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16.
MOF_RPlusSImm34 = 1 << 10, // Reg plus 34-bit signed constant.
MOF_RPlusR = 1 << 11, // Sum of two variables.
MOF_PCRel = 1 << 12, // PC-Relative relocation.
MOF_AddrIsSImm32 = 1 << 13, // A simple 32-bit constant.
// The in-memory type.
MOF_SubWordInt = 1 << 15,
MOF_WordInt = 1 << 16,
MOF_DoubleWordInt = 1 << 17,
MOF_ScalarFloat = 1 << 18, // Scalar single or double precision.
MOF_Vector = 1 << 19, // Vector types and quad precision scalars.
MOF_Vector256 = 1 << 20,
// Subtarget features.
MOF_SubtargetBeforeP9 = 1 << 22,
MOF_SubtargetP9 = 1 << 23,
MOF_SubtargetP10 = 1 << 24,
MOF_SubtargetSPE = 1 << 25
};
// The addressing modes for loads and stores.
enum AddrMode {
AM_None,
AM_DForm,
AM_DSForm,
AM_DQForm,
AM_PrefixDForm,
AM_XForm,
AM_PCRel
};
} // end namespace PPC
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &Subtarget;
public:
explicit PPCTargetLowering(const PPCTargetMachine &TM,
const PPCSubtarget &STI);
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
bool isSelectSupported(SelectSupportKind Kind) const override {
// PowerPC does not support scalar condition selects on vectors.
return (Kind != SelectSupportKind::ScalarCondVectorVal);
}
/// getPreferredVectorAction - The code we generate when vector types are
/// legalized by promoting the integer element type is often much worse
/// than code we generate if we widen the type for applicable vector types.
/// The issue with promoting is that the vector is scalaraized, individual
/// elements promoted and then the vector is rebuilt. So say we load a pair
/// of v4i8's and shuffle them. This will turn into a mess of 8 extending
/// loads, moves back into VSR's (or memory ops if we don't have moves) and
/// then the VPERM for the shuffle. All in all a very slow sequence.
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
const override {
- if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
- VT.getScalarSizeInBits() % 8 == 0)
+ // Default handling for scalable and single-element vectors.
+ if (VT.isScalableVector() || VT.getVectorNumElements() == 1)
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+
+ // Split and promote vNi1 vectors so we don't produce v256i1/v512i1
+ // types as those are only for MMA instructions.
+ if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16)
+ return TypeSplitVector;
+ if (VT.getScalarSizeInBits() == 1)
+ return TypePromoteInteger;
+
+ // Widen vectors that have reasonably sized elements.
+ if (VT.getScalarSizeInBits() % 8 == 0)
return TypeWidenVector;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
bool useSoftFloat() const override;
bool hasSPE() const;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
return MVT::i32;
}
bool isCheapToSpeculateCttz() const override {
return true;
}
bool isCheapToSpeculateCtlz() const override {
return true;
}
bool isCtlzFast() const override {
return true;
}
bool isEqualityCmpFoldedWithSignedCmp() const override {
return false;
}
bool hasAndNotCompare(SDValue) const override {
return true;
}
bool preferIncOfAddToSubOfNot(EVT VT) const override;
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
bool OptForSize, NegatibleCost &Cost,
unsigned Depth = 0) const override;
/// getSetCCResultType - Return the ISD::SETCC ValueType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
/// Return true if target always benefits from combining into FMA for a
/// given value type. This must typically return false on targets where FMA
/// takes more cycles to execute than FADD.
bool enableAggressiveFMAFusion(EVT VT) const override;
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
/// SelectAddressEVXRegReg - Given the specified addressed, check to see if
/// it can be more efficiently represented as [r+imm].
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
/// is non-zero, only accept displacement which is not suitable for [r+imm].
/// Returns false if it can be represented by [r+imm], which are preferred.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG,
MaybeAlign EncodingAlignment = None) const;
/// SelectAddressRegImm - Returns true if the address N can be represented
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
/// is not better represented as reg+reg. If \p EncodingAlignment is
/// non-zero, only accept displacements suitable for instruction encoding
/// requirement, i.e. multiples of 4 for DS form.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
MaybeAlign EncodingAlignment) const;
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
/// SelectAddressPCRel - Represent the specified address as pc relative to
/// be represented as [pc+imm]
bool SelectAddressPCRel(SDValue N, SDValue &Base) const;
Sched::Preference getSchedulingPreference(SDNode *N) const override;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const override;
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const override;
Register getRegisterByName(const char* RegName, LLT VT,
const MachineFunction &MF) const override;
void computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
Align getPrefLoopAlignment(MachineLoop *ML) const override;
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return true;
}
Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
AtomicOrdering Ord) const override;
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
AtomicRMWInst *AI, Value *AlignedAddr,
Value *Incr, Value *Mask,
Value *ShiftAmt,
AtomicOrdering Ord) const override;
Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
AtomicCmpXchgInst *CI,
Value *AlignedAddr, Value *CmpVal,
Value *NewVal, Value *Mask,
AtomicOrdering Ord) const override;
MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,
MachineBasicBlock *MBB,
unsigned AtomicSize,
unsigned BinOpcode,
unsigned CmpOpcode = 0,
unsigned CmpPred = 0) const;
MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,
MachineBasicBlock *MBB,
bool is8bit,
unsigned Opcode,
unsigned CmpOpcode = 0,
unsigned CmpPred = 0) const;
MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const;
MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
MachineBasicBlock *MBB) const;
bool hasInlineStackProbe(MachineFunction &MF) const override;
unsigned getStackProbeSize(MachineFunction &MF) const;
ConstraintType getConstraintType(StringRef Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
ConstraintWeight getSingleConstraintMatchWeight(
AsmOperandInfo &info, const char *constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
uint64_t getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
if (ConstraintCode == "es")
return InlineAsm::Constraint_es;
else if (ConstraintCode == "Q")
return InlineAsm::Constraint_Q;
else if (ConstraintCode == "Z")
return InlineAsm::Constraint_Z;
else if (ConstraintCode == "Zy")
return InlineAsm::Constraint_Zy;
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
Type *Ty, unsigned AS,
Instruction *I = nullptr) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
/// compare a register against the immediate without having to materialize
/// the immediate into a register.
bool isLegalICmpImmediate(int64_t Imm) const override;
/// isLegalAddImmediate - Return true if the specified immediate is legal
/// add immediate, that is the target has add instructions which can
/// add a register and the immediate without having to materialize
/// the immediate into a register.
bool isLegalAddImmediate(int64_t Imm) const override;
/// isTruncateFree - Return true if it's free to truncate a value of
/// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
/// register X1 to i32 by referencing its sub-register R1.
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
bool convertSelectOfConstantsToMath(EVT VT) const override {
return true;
}
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
bool isDesirableToTransformToIntegerOp(unsigned Opc,
EVT VT) const override {
// Only handle float load/store pair because float(fpr) load/store
// instruction has more cycles than integer(gpr) load/store in PPC.
if (Opc != ISD::LOAD && Opc != ISD::STORE)
return false;
if (VT != MVT::f32 && VT != MVT::f64)
return false;
return true;
}
// Returns true if the address of the global is stored in TOC entry.
bool isAccessedAsGotIndirect(SDValue N) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
EVT getOptimalMemOpType(const MemOp &Op,
const AttributeList &FuncAttributes) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment = Align(1),
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
/// expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
/// isProfitableToHoist - Check if it is profitable to hoist instruction
/// \p I to its dominator block.
/// For example, it is not profitable if \p I and it's only user can form a
/// FMA instruction, because Powerpc prefers FMADD.
bool isProfitableToHoist(Instruction *I) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
// Should we expand the build vector with shuffles?
bool
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
// Keep the zero-extensions for arguments to libcalls.
bool shouldKeepZExtForFP16Conv() const override { return true; }
/// createFastISel - This method returns a target-specific FastISel object,
/// or null if the target does not support "fast" instruction selection.
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) const override;
/// Returns true if an argument of type Ty needs to be passed in a
/// contiguous block of registers in calling convention CallConv.
bool functionArgumentNeedsConsecutiveRegisters(
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
const DataLayout &DL) const override {
// We support any array type as "consecutive" block in the parameter
// save area. The element type defines the alignment requirement and
// whether the argument should go in GPRs, FPRs, or VRs if available.
//
// Note that clang uses this capability both to implement the ELFv2
// homogeneous float/vector aggregate ABI, and to avoid having to use
// "byval" when passing aggregates that might fully fit in registers.
return Ty->isArrayTy();
}
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
Register
getExceptionPointerRegister(const Constant *PersonalityFn) const override;
/// If a physical register, this returns the register that receives the
/// exception typeid on entry to a landing pad.
Register
getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
/// Override to support customized stack guard loading.
bool useLoadStackGuardNode() const override;
void insertSSPDeclarations(Module &M) const override;
Value *getSDagStackGuard(const Module &M) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
unsigned getJumpTableEncoding() const override;
bool isJumpTableRelative() const override;
SDValue getPICJumpTableRelocBase(SDValue Table,
SelectionDAG &DAG) const override;
const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
unsigned JTI,
MCContext &Ctx) const override;
/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
/// compute the address flags of the node, get the optimal address mode
/// based on the flags, and set the Base and Disp based on the address mode.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N,
SDValue &Disp, SDValue &Base,
SelectionDAG &DAG,
MaybeAlign Align) const;
/// SelectForceXFormMode - Given the specified address, force it to be
/// represented as an indexed [r+r] operation (an XForm instruction).
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
bool
splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
Optional<CallingConv::ID> CC) const override;
/// Structure that collects some common arguments that get passed around
/// between the functions for call lowering.
struct CallFlags {
const CallingConv::ID CallConv;
const bool IsTailCall : 1;
const bool IsVarArg : 1;
const bool IsPatchPoint : 1;
const bool IsIndirect : 1;
const bool HasNest : 1;
const bool NoMerge : 1;
CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg,
bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge)
: CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg),
IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect),
HasNest(HasNest), NoMerge(NoMerge) {}
};
CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return,
bool IsVarArg) const;
private:
struct ReuseLoadInfo {
SDValue Ptr;
SDValue Chain;
SDValue ResChain;
MachinePointerInfo MPI;
bool IsDereferenceable = false;
bool IsInvariant = false;
Align Alignment;
AAMDNodes AAInfo;
const MDNode *Ranges = nullptr;
ReuseLoadInfo() = default;
MachineMemOperand::Flags MMOFlags() const {
MachineMemOperand::Flags F = MachineMemOperand::MONone;
if (IsDereferenceable)
F |= MachineMemOperand::MODereferenceable;
if (IsInvariant)
F |= MachineMemOperand::MOInvariant;
return F;
}
};
// Map that relates a set of common address flags to PPC addressing modes.
std::map<PPC::AddrMode, SmallVector<unsigned, 16>> AddrModesMap;
void initializeAddrModeMap();
bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
SelectionDAG &DAG,
ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
SelectionDAG &DAG) const;
void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
SelectionDAG &DAG, const SDLoc &dl) const;
SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
bool directMoveIsProfitable(const SDValue &Op) const;
SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
bool
IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
bool IsEligibleForTailCallOptimization_64SVR4(
SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB,
bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
SDValue Chain, SDValue &LROpOut,
SDValue &FPOpOut,
const SDLoc &dl) const;
SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressAIX(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
SDValue &Callee, int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals,
const CallBase *CB) const;
SDValue
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const override;
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const override;
SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
SelectionDAG &DAG, SDValue ArgVal,
const SDLoc &dl) const;
SDValue LowerFormalArguments_AIX(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerFormalArguments_64SVR4(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerFormalArguments_32SVR4(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue CallSeqStart,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
const SDLoc &dl) const;
SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
const CallBase *CB) const;
SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
const CallBase *CB) const;
SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
const CallBase *CB) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG) const;
SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
DAGCombinerInfo &DCI) const;
/// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
/// SETCC with integer subtraction when (1) there is a legal way of doing it
/// (2) keeping the result of comparison in GPR has performance benefit.
SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps, bool &UseOneConstNR,
bool Reciprocal) const override;
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;
SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
const DenormalMode &Mode) const override;
SDValue getSqrtResultForDenormInput(SDValue Operand,
SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
/// essentially any shuffle of v8i16 vectors that just inserts one element
/// from one vector into the other.
SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTB instruction introduced in ISA 3.0. This is
/// essentially v16i8 vector version of VINSERTH.
SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1.
SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
// Return whether the call instruction can potentially be optimized to a
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
/// getAddrModeForFlags - Based on the set of address flags, select the most
/// optimal instruction format to match by.
PPC::AddrMode getAddrModeForFlags(unsigned Flags) const;
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
/// the address flags of the load/store instruction that is to be matched.
/// The address flags are stored in a map, which is then searched
/// through to determine the optimal load/store instruction format.
unsigned computeMOFlags(const SDNode *Parent, SDValue N,
SelectionDAG &DAG) const;
}; // end class PPCTargetLowering
namespace PPC {
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo);
} // end namespace PPC
bool isIntS16Immediate(SDNode *N, int16_t &Imm);
bool isIntS16Immediate(SDValue Op, int16_t &Imm);
bool isIntS34Immediate(SDNode *N, int64_t &Imm);
bool isIntS34Immediate(SDValue Op, int64_t &Imm);
bool convertToNonDenormSingle(APInt &ArgAPInt);
bool convertToNonDenormSingle(APFloat &ArgAPFloat);
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat);
} // end namespace llvm
#endif // LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 95319d1b0b74..9a6ffb20615b 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1,2794 +1,2796 @@
//===-- RISCVAsmParser.cpp - Parse RISCV assembly to MCInst instructions --===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVAsmBackend.h"
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVInstPrinter.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "MCTargetDesc/RISCVTargetStreamer.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributes.h"
#include "llvm/Support/RISCVISAInfo.h"
#include <limits>
using namespace llvm;
#define DEBUG_TYPE "riscv-asm-parser"
// Include the auto-generated portion of the compress emitter.
#define GEN_COMPRESS_INSTR
#include "RISCVGenCompressInstEmitter.inc"
STATISTIC(RISCVNumInstrsCompressed,
"Number of RISC-V Compressed instructions emitted");
namespace llvm {
extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
} // namespace llvm
namespace {
struct RISCVOperand;
struct ParserOptionsSet {
bool IsPicEnabled;
};
class RISCVAsmParser : public MCTargetAsmParser {
SmallVector<FeatureBitset, 4> FeatureBitStack;
SmallVector<ParserOptionsSet, 4> ParserOptionsStack;
ParserOptionsSet ParserOptions;
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); }
RISCVTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<RISCVTargetStreamer &>(TS);
}
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
int64_t Lower, int64_t Upper, Twine Msg);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
bool ParseDirective(AsmToken DirectiveID) override;
// Helper to actually emit an instruction to the MCStreamer. Also, when
// possible, compression of the instruction is performed.
void emitToStreamer(MCStreamer &S, const MCInst &Inst);
// Helper to emit a combination of LUI, ADDI(W), and SLLI instructions that
// synthesize the desired immedate value into the destination register.
void emitLoadImm(MCRegister DestReg, int64_t Value, MCStreamer &Out);
// Helper to emit a combination of AUIPC and SecondOpcode. Used to implement
// helpers such as emitLoadLocalAddress and emitLoadAddress.
void emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
const MCExpr *Symbol, RISCVMCExpr::VariantKind VKHi,
unsigned SecondOpcode, SMLoc IDLoc, MCStreamer &Out);
// Helper to emit pseudo instruction "lla" used in PC-rel addressing.
void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
// Helper to emit pseudo instruction "la" used in GOT/PC-rel addressing.
void emitLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
// Helper to emit pseudo instruction "la.tls.ie" used in initial-exec TLS
// addressing.
void emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
// Helper to emit pseudo instruction "la.tls.gd" used in global-dynamic TLS
// addressing.
void emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
// Helper to emit pseudo load/store instruction with a symbol.
void emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
MCStreamer &Out, bool HasTmpReg);
// Helper to emit pseudo sign/zero extend instruction.
void emitPseudoExtend(MCInst &Inst, bool SignExtend, int64_t Width,
SMLoc IDLoc, MCStreamer &Out);
// Helper to emit pseudo vmsge{u}.vx instruction.
void emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, MCStreamer &Out);
// Checks that a PseudoAddTPRel is using x4/tp in its second input operand.
// Enforcing this using a restricted register class for the second input
// operand of PseudoAddTPRel results in a poor diagnostic due to the fact
// 'add' is an overloaded mnemonic.
bool checkPseudoAddTPRel(MCInst &Inst, OperandVector &Operands);
// Check instruction constraints.
bool validateInstruction(MCInst &Inst, OperandVector &Operands);
/// Helper for processing MC instructions that have been successfully matched
/// by MatchAndEmitInstruction. Modifications to the emitted instructions,
/// like the expansion of pseudo instructions (e.g., "li"), can be performed
/// in this method.
bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
MCStreamer &Out);
// Auto-generated instruction matching functions
#define GET_ASSEMBLER_HEADER
#include "RISCVGenAsmMatcher.inc"
OperandMatchResultTy parseCSRSystemRegister(OperandVector &Operands);
OperandMatchResultTy parseImmediate(OperandVector &Operands);
OperandMatchResultTy parseRegister(OperandVector &Operands,
bool AllowParens = false);
OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands);
OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
OperandMatchResultTy parseCallSymbol(OperandVector &Operands);
OperandMatchResultTy parsePseudoJumpSymbol(OperandVector &Operands);
OperandMatchResultTy parseJALOffset(OperandVector &Operands);
OperandMatchResultTy parseVTypeI(OperandVector &Operands);
OperandMatchResultTy parseMaskReg(OperandVector &Operands);
OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands);
OperandMatchResultTy parseGPRAsFPR(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
bool parseDirectiveOption();
bool parseDirectiveAttribute();
bool parseDirectiveInsn(SMLoc L);
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(getSTI().getFeatureBits()[Feature])) {
MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
bool getFeatureBits(uint64_t Feature) {
return getSTI().getFeatureBits()[Feature];
}
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (getSTI().getFeatureBits()[Feature]) {
MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
void pushFeatureBits() {
assert(FeatureBitStack.size() == ParserOptionsStack.size() &&
"These two stacks must be kept synchronized");
FeatureBitStack.push_back(getSTI().getFeatureBits());
ParserOptionsStack.push_back(ParserOptions);
}
bool popFeatureBits() {
assert(FeatureBitStack.size() == ParserOptionsStack.size() &&
"These two stacks must be kept synchronized");
if (FeatureBitStack.empty())
return true;
FeatureBitset FeatureBits = FeatureBitStack.pop_back_val();
copySTI().setFeatureBits(FeatureBits);
setAvailableFeatures(ComputeAvailableFeatures(FeatureBits));
ParserOptions = ParserOptionsStack.pop_back_val();
return false;
}
std::unique_ptr<RISCVOperand> defaultMaskRegOp() const;
public:
enum RISCVMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "RISCVGenAsmMatcher.inc"
#undef GET_OPERAND_DIAGNOSTIC_TYPES
};
static bool classifySymbolRef(const MCExpr *Expr,
RISCVMCExpr::VariantKind &Kind);
RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII) {
Parser.addAliasForDirective(".half", ".2byte");
Parser.addAliasForDirective(".hword", ".2byte");
Parser.addAliasForDirective(".word", ".4byte");
Parser.addAliasForDirective(".dword", ".8byte");
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
auto ABIName = StringRef(Options.ABIName);
if (ABIName.endswith("f") &&
!getSTI().getFeatureBits()[RISCV::FeatureStdExtF]) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
} else if (ABIName.endswith("d") &&
!getSTI().getFeatureBits()[RISCV::FeatureStdExtD]) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
"target-abi)\n";
}
const MCObjectFileInfo *MOFI = Parser.getContext().getObjectFileInfo();
ParserOptions.IsPicEnabled = MOFI->isPositionIndependent();
}
};
/// RISCVOperand - Instances of this class represent a parsed machine
/// instruction
struct RISCVOperand : public MCParsedAsmOperand {
enum class KindTy {
Token,
Register,
Immediate,
SystemRegister,
VType,
} Kind;
bool IsRV64;
bool IsGPRAsFPR;
struct RegOp {
MCRegister RegNum;
};
struct ImmOp {
const MCExpr *Val;
};
struct SysRegOp {
const char *Data;
unsigned Length;
unsigned Encoding;
// FIXME: Add the Encoding parsed fields as needed for checks,
// e.g.: read/write or user/supervisor/machine privileges.
};
struct VTypeOp {
unsigned Val;
};
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
struct SysRegOp SysReg;
struct VTypeOp VType;
};
RISCVOperand(KindTy K) : Kind(K) {}
public:
RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
IsRV64 = o.IsRV64;
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
case KindTy::Register:
Reg = o.Reg;
break;
case KindTy::Immediate:
Imm = o.Imm;
break;
case KindTy::Token:
Tok = o.Tok;
break;
case KindTy::SystemRegister:
SysReg = o.SysReg;
break;
case KindTy::VType:
VType = o.VType;
break;
}
}
bool isToken() const override { return Kind == KindTy::Token; }
bool isReg() const override { return Kind == KindTy::Register; }
bool isV0Reg() const {
return Kind == KindTy::Register && Reg.RegNum == RISCV::V0;
}
bool isImm() const override { return Kind == KindTy::Immediate; }
bool isMem() const override { return false; }
bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
bool isGPR() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum);
}
bool isGPRAsFPR() const { return isGPR() && IsGPRAsFPR; }
bool isGPRF64AsFPR() const { return isGPR() && IsGPRAsFPR && IsRV64; }
bool isGPRPF64AsFPR() const {
return isGPR() && IsGPRAsFPR && !IsRV64 && !((Reg.RegNum - RISCV::X0) & 1);
}
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
VK = RE->getKind();
return RE->evaluateAsConstant(Imm);
}
if (auto CE = dyn_cast<MCConstantExpr>(Expr)) {
VK = RISCVMCExpr::VK_RISCV_None;
Imm = CE->getValue();
return true;
}
return false;
}
// True if operand is a symbol with no modifiers, or a constant with no
// modifiers and isShiftedInt<N-1, 1>(Op).
template <int N> bool isBareSimmNLsb0() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
bool IsValid;
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
else
IsValid = isShiftedInt<N - 1, 1>(Imm);
return IsValid && VK == RISCVMCExpr::VK_RISCV_None;
}
// Predicate methods for AsmOperands defined in RISCVInstrInfo.td
bool isBareSymbol() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isCallSymbol() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
(VK == RISCVMCExpr::VK_RISCV_CALL ||
VK == RISCVMCExpr::VK_RISCV_CALL_PLT);
}
bool isPseudoJumpSymbol() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
VK == RISCVMCExpr::VK_RISCV_CALL;
}
bool isTPRelAddSymbol() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
// Must be of 'immediate' type but not a constant.
if (!isImm() || evaluateConstantImm(getImm(), Imm, VK))
return false;
return RISCVAsmParser::classifySymbolRef(getImm(), VK) &&
VK == RISCVMCExpr::VK_RISCV_TPREL_ADD;
}
bool isCSRSystemRegister() const { return isSystemRegister(); }
bool isVTypeImm(unsigned N) const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUIntN(N, Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
// If the last operand of the vsetvli/vsetvli instruction is a constant
// expression, KindTy is Immediate.
bool isVTypeI10() const {
if (Kind == KindTy::Immediate)
return isVTypeImm(10);
return Kind == KindTy::VType;
}
bool isVTypeI11() const {
if (Kind == KindTy::Immediate)
return isVTypeImm(11);
return Kind == KindTy::VType;
}
/// Return true if the operand is a valid for the fence instruction e.g.
/// ('iorw').
bool isFenceArg() const {
if (!isImm())
return false;
const MCExpr *Val = getImm();
auto *SVal = dyn_cast<MCSymbolRefExpr>(Val);
if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None)
return false;
StringRef Str = SVal->getSymbol().getName();
// Letters must be unique, taken from 'iorw', and in ascending order. This
// holds as long as each individual character is one of 'iorw' and is
// greater than the previous character.
char Prev = '\0';
for (char c : Str) {
if (c != 'i' && c != 'o' && c != 'r' && c != 'w')
return false;
if (c <= Prev)
return false;
Prev = c;
}
return true;
}
/// Return true if the operand is a valid floating point rounding mode.
bool isFRMArg() const {
if (!isImm())
return false;
const MCExpr *Val = getImm();
auto *SVal = dyn_cast<MCSymbolRefExpr>(Val);
if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None)
return false;
StringRef Str = SVal->getSymbol().getName();
return RISCVFPRndMode::stringToRoundingMode(Str) != RISCVFPRndMode::Invalid;
}
bool isImmXLenLI() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (VK == RISCVMCExpr::VK_RISCV_LO || VK == RISCVMCExpr::VK_RISCV_PCREL_LO)
return true;
// Given only Imm, ensuring that the actually specified constant is either
// a signed or unsigned 64-bit number is unfortunately impossible.
return IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None &&
(isRV64() || (isInt<32>(Imm) || isUInt<32>(Imm)));
}
bool isUImmLog2XLen() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
if (!evaluateConstantImm(getImm(), Imm, VK) ||
VK != RISCVMCExpr::VK_RISCV_None)
return false;
return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
}
bool isUImmLog2XLenNonZero() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
if (!evaluateConstantImm(getImm(), Imm, VK) ||
VK != RISCVMCExpr::VK_RISCV_None)
return false;
if (Imm == 0)
return false;
return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
}
bool isUImmLog2XLenHalf() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
if (!evaluateConstantImm(getImm(), Imm, VK) ||
VK != RISCVMCExpr::VK_RISCV_None)
return false;
return (isRV64() && isUInt<5>(Imm)) || isUInt<4>(Imm);
}
bool isUImm2() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm3() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm5() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm7() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isRnumArg() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && Imm >= INT64_C(0) && Imm <= INT64_C(10) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm5() const {
if (!isImm())
return false;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6() const {
if (!isImm())
return false;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<6>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6NonZero() const {
if (!isImm())
return false;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<6>(Imm) && (Imm != 0) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isCLUIImm() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) &&
(isUInt<5>(Imm) || (Imm >= 0xfffe0 && Imm <= 0xfffff)) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm7Lsb00() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm8Lsb00() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 2>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm8Lsb000() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<5, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm9Lsb0() const { return isBareSimmNLsb0<9>(); }
bool isUImm9Lsb000() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<6, 3>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm10Lsb00NonZero() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isShiftedUInt<8, 2>(Imm) && (Imm != 0) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm12() const {
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsValid;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
else
IsValid = isInt<12>(Imm);
return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
VK == RISCVMCExpr::VK_RISCV_LO ||
VK == RISCVMCExpr::VK_RISCV_PCREL_LO ||
VK == RISCVMCExpr::VK_RISCV_TPREL_LO);
}
bool isSImm12Lsb0() const { return isBareSimmNLsb0<12>(); }
bool isSImm13Lsb0() const { return isBareSimmNLsb0<13>(); }
bool isSImm10Lsb0000NonZero() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm != 0) && isShiftedInt<6, 4>(Imm) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
bool isUImm20LUI() const {
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsValid;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm) {
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
return IsValid && (VK == RISCVMCExpr::VK_RISCV_HI ||
VK == RISCVMCExpr::VK_RISCV_TPREL_HI);
} else {
return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
VK == RISCVMCExpr::VK_RISCV_HI ||
VK == RISCVMCExpr::VK_RISCV_TPREL_HI);
}
}
bool isUImm20AUIPC() const {
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsValid;
if (!isImm())
return false;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
if (!IsConstantImm) {
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
return IsValid && (VK == RISCVMCExpr::VK_RISCV_PCREL_HI ||
VK == RISCVMCExpr::VK_RISCV_GOT_HI ||
VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI ||
VK == RISCVMCExpr::VK_RISCV_TLS_GD_HI);
} else {
return isUInt<20>(Imm) && (VK == RISCVMCExpr::VK_RISCV_None ||
VK == RISCVMCExpr::VK_RISCV_PCREL_HI ||
VK == RISCVMCExpr::VK_RISCV_GOT_HI ||
VK == RISCVMCExpr::VK_RISCV_TLS_GOT_HI ||
VK == RISCVMCExpr::VK_RISCV_TLS_GD_HI);
}
}
bool isSImm21Lsb0JAL() const { return isBareSimmNLsb0<21>(); }
bool isImmZero() const {
if (!isImm())
return false;
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && (Imm == 0) && VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm5Plus1() const {
if (!isImm())
return false;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
return IsConstantImm && isInt<5>(Imm - 1) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
/// getStartLoc - Gets location of the first token of this operand
SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Gets location of the last token of this operand
SMLoc getEndLoc() const override { return EndLoc; }
/// True if this operand is for an RV64 instruction
bool isRV64() const { return IsRV64; }
unsigned getReg() const override {
assert(Kind == KindTy::Register && "Invalid type access!");
return Reg.RegNum.id();
}
StringRef getSysReg() const {
assert(Kind == KindTy::SystemRegister && "Invalid type access!");
return StringRef(SysReg.Data, SysReg.Length);
}
const MCExpr *getImm() const {
assert(Kind == KindTy::Immediate && "Invalid type access!");
return Imm.Val;
}
StringRef getToken() const {
assert(Kind == KindTy::Token && "Invalid type access!");
return Tok;
}
unsigned getVType() const {
assert(Kind == KindTy::VType && "Invalid type access!");
return VType.Val;
}
void print(raw_ostream &OS) const override {
auto RegName = [](unsigned Reg) {
if (Reg)
return RISCVInstPrinter::getRegisterName(Reg);
else
return "noreg";
};
switch (Kind) {
case KindTy::Immediate:
OS << *getImm();
break;
case KindTy::Register:
OS << "<register " << RegName(getReg()) << ">";
break;
case KindTy::Token:
OS << "'" << getToken() << "'";
break;
case KindTy::SystemRegister:
OS << "<sysreg: " << getSysReg() << '>';
break;
case KindTy::VType:
OS << "<vtype: ";
RISCVVType::printVType(getVType(), OS);
OS << '>';
break;
}
}
static std::unique_ptr<RISCVOperand> createToken(StringRef Str, SMLoc S,
bool IsRV64) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Token);
Op->Tok = Str;
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsRV64 = IsRV64;
return Op;
}
static std::unique_ptr<RISCVOperand> createReg(unsigned RegNo, SMLoc S,
SMLoc E, bool IsRV64,
bool IsGPRAsFPR = false) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Register);
Op->Reg.RegNum = RegNo;
Op->StartLoc = S;
Op->EndLoc = E;
Op->IsRV64 = IsRV64;
Op->IsGPRAsFPR = IsGPRAsFPR;
return Op;
}
static std::unique_ptr<RISCVOperand> createImm(const MCExpr *Val, SMLoc S,
SMLoc E, bool IsRV64) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Immediate);
Op->Imm.Val = Val;
Op->StartLoc = S;
Op->EndLoc = E;
Op->IsRV64 = IsRV64;
return Op;
}
static std::unique_ptr<RISCVOperand>
createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) {
auto Op = std::make_unique<RISCVOperand>(KindTy::SystemRegister);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
Op->SysReg.Encoding = Encoding;
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsRV64 = IsRV64;
return Op;
}
static std::unique_ptr<RISCVOperand> createVType(unsigned VTypeI, SMLoc S,
bool IsRV64) {
auto Op = std::make_unique<RISCVOperand>(KindTy::VType);
Op->VType.Val = VTypeI;
Op->StartLoc = S;
Op->EndLoc = S;
Op->IsRV64 = IsRV64;
return Op;
}
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
int64_t Imm = 0;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstant = evaluateConstantImm(Expr, Imm, VK);
if (IsConstant)
Inst.addOperand(MCOperand::createImm(Imm));
else
Inst.addOperand(MCOperand::createExpr(Expr));
}
// Used by the TableGen Code
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(getReg()));
}
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
void addFenceArgOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// isFenceArg has validated the operand, meaning this cast is safe
auto SE = cast<MCSymbolRefExpr>(getImm());
unsigned Imm = 0;
for (char c : SE->getSymbol().getName()) {
switch (c) {
default:
llvm_unreachable("FenceArg must contain only [iorw]");
case 'i':
Imm |= RISCVFenceField::I;
break;
case 'o':
Imm |= RISCVFenceField::O;
break;
case 'r':
Imm |= RISCVFenceField::R;
break;
case 'w':
Imm |= RISCVFenceField::W;
break;
}
}
Inst.addOperand(MCOperand::createImm(Imm));
}
void addCSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(SysReg.Encoding));
}
// Support non-canonical syntax:
// "vsetivli rd, uimm, 0xabc" or "vsetvli rd, rs1, 0xabc"
// "vsetivli rd, uimm, (0xc << N)" or "vsetvli rd, rs1, (0xc << N)"
void addVTypeIOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
int64_t Imm = 0;
if (Kind == KindTy::Immediate) {
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
(void)IsConstantImm;
assert(IsConstantImm && "Invalid VTypeI Operand!");
} else {
Imm = getVType();
}
Inst.addOperand(MCOperand::createImm(Imm));
}
// Returns the rounding mode represented by this RISCVOperand. Should only
// be called after checking isFRMArg.
RISCVFPRndMode::RoundingMode getRoundingMode() const {
// isFRMArg has validated the operand, meaning this cast is safe.
auto SE = cast<MCSymbolRefExpr>(getImm());
RISCVFPRndMode::RoundingMode FRM =
RISCVFPRndMode::stringToRoundingMode(SE->getSymbol().getName());
assert(FRM != RISCVFPRndMode::Invalid && "Invalid rounding mode");
return FRM;
}
void addFRMArgOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(getRoundingMode()));
}
};
} // end anonymous namespace.
#define GET_REGISTER_MATCHER
#define GET_SUBTARGET_FEATURE_NAME
#define GET_MATCHER_IMPLEMENTATION
#define GET_MNEMONIC_SPELL_CHECKER
#include "RISCVGenAsmMatcher.inc"
static MCRegister convertFPR64ToFPR16(MCRegister Reg) {
assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register");
return Reg - RISCV::F0_D + RISCV::F0_H;
}
static MCRegister convertFPR64ToFPR32(MCRegister Reg) {
assert(Reg >= RISCV::F0_D && Reg <= RISCV::F31_D && "Invalid register");
return Reg - RISCV::F0_D + RISCV::F0_F;
}
static MCRegister convertVRToVRMx(const MCRegisterInfo &RI, MCRegister Reg,
unsigned Kind) {
unsigned RegClassID;
if (Kind == MCK_VRM2)
RegClassID = RISCV::VRM2RegClassID;
else if (Kind == MCK_VRM4)
RegClassID = RISCV::VRM4RegClassID;
else if (Kind == MCK_VRM8)
RegClassID = RISCV::VRM8RegClassID;
else
return 0;
return RI.getMatchingSuperReg(Reg, RISCV::sub_vrm1_0,
&RISCVMCRegisterClasses[RegClassID]);
}
unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
RISCVOperand &Op = static_cast<RISCVOperand &>(AsmOp);
if (!Op.isReg())
return Match_InvalidOperand;
MCRegister Reg = Op.getReg();
bool IsRegFPR64 =
RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg);
bool IsRegFPR64C =
RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(Reg);
bool IsRegVR = RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg);
// As the parser couldn't differentiate an FPR32 from an FPR64, coerce the
// register from FPR64 to FPR32 or FPR64C to FPR32C if necessary.
if ((IsRegFPR64 && Kind == MCK_FPR32) ||
(IsRegFPR64C && Kind == MCK_FPR32C)) {
Op.Reg.RegNum = convertFPR64ToFPR32(Reg);
return Match_Success;
}
// As the parser couldn't differentiate an FPR16 from an FPR64, coerce the
// register from FPR64 to FPR16 if necessary.
if (IsRegFPR64 && Kind == MCK_FPR16) {
Op.Reg.RegNum = convertFPR64ToFPR16(Reg);
return Match_Success;
}
// As the parser couldn't differentiate an VRM2/VRM4/VRM8 from an VR, coerce
// the register from VR to VRM2/VRM4/VRM8 if necessary.
if (IsRegVR && (Kind == MCK_VRM2 || Kind == MCK_VRM4 || Kind == MCK_VRM8)) {
Op.Reg.RegNum = convertVRToVRMx(*getContext().getRegisterInfo(), Reg, Kind);
if (Op.Reg.RegNum == 0)
return Match_InvalidOperand;
return Match_Success;
}
return Match_InvalidOperand;
}
bool RISCVAsmParser::generateImmOutOfRangeError(
OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper,
Twine Msg = "immediate must be an integer in the range") {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
uint64_t &ErrorInfo,
bool MatchingInlineAsm) {
MCInst Inst;
FeatureBitset MissingFeatures;
auto Result = MatchInstructionImpl(Operands, Inst, ErrorInfo, MissingFeatures,
MatchingInlineAsm);
switch (Result) {
default:
break;
case Match_Success:
if (validateInstruction(Inst, Operands))
return true;
return processInstruction(Inst, IDLoc, Operands, Out);
case Match_MissingFeature: {
assert(MissingFeatures.any() && "Unknown missing features!");
bool FirstFeature = true;
std::string Msg = "instruction requires the following:";
for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
if (MissingFeatures[i]) {
Msg += FirstFeature ? " " : ", ";
Msg += getSubtargetFeatureName(i);
FirstFeature = false;
}
}
return Error(IDLoc, Msg);
}
case Match_MnemonicFail: {
FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
std::string Suggestion = RISCVMnemonicSpellCheck(
((RISCVOperand &)*Operands[0]).getToken(), FBS, 0);
return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion);
}
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(ErrorLoc, "too few operands for instruction");
ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
if (ErrorLoc == SMLoc())
ErrorLoc = IDLoc;
}
return Error(ErrorLoc, "invalid operand for instruction");
}
}
// Handle the case when the error message is of specific type
// other than the generic Match_InvalidOperand, and the
// corresponding operand is missing.
if (Result > FIRST_TARGET_MATCH_RESULT_TY) {
SMLoc ErrorLoc = IDLoc;
if (ErrorInfo != ~0ULL && ErrorInfo >= Operands.size())
return Error(ErrorLoc, "too few operands for instruction");
}
switch (Result) {
default:
break;
case Match_InvalidImmXLenLI:
if (isRV64()) {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a constant 64-bit integer");
}
return generateImmOutOfRangeError(Operands, ErrorInfo,
std::numeric_limits<int32_t>::min(),
std::numeric_limits<uint32_t>::max());
case Match_InvalidImmZero: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "immediate must be zero");
}
case Match_InvalidUImmLog2XLen:
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1);
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
case Match_InvalidUImmLog2XLenNonZero:
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 6) - 1);
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 5) - 1);
case Match_InvalidUImmLog2XLenHalf:
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
case Match_InvalidUImm2:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 2) - 1);
case Match_InvalidUImm3:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 3) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
case Match_InvalidUImm7:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 7) - 1);
case Match_InvalidSImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4),
(1 << 4) - 1);
case Match_InvalidSImm6:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 5),
(1 << 5) - 1);
case Match_InvalidSImm6NonZero:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 5), (1 << 5) - 1,
"immediate must be non-zero in the range");
case Match_InvalidCLUIImm:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 1, (1 << 5) - 1,
"immediate must be in [0xfffe0, 0xfffff] or");
case Match_InvalidUImm7Lsb00:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 7) - 4,
"immediate must be a multiple of 4 bytes in the range");
case Match_InvalidUImm8Lsb00:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 8) - 4,
"immediate must be a multiple of 4 bytes in the range");
case Match_InvalidUImm8Lsb000:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 8) - 8,
"immediate must be a multiple of 8 bytes in the range");
case Match_InvalidSImm9Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 8), (1 << 8) - 2,
"immediate must be a multiple of 2 bytes in the range");
case Match_InvalidUImm9Lsb000:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 9) - 8,
"immediate must be a multiple of 8 bytes in the range");
case Match_InvalidUImm10Lsb00NonZero:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 4, (1 << 10) - 4,
"immediate must be a multiple of 4 bytes in the range");
case Match_InvalidSImm10Lsb0000NonZero:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 9), (1 << 9) - 16,
"immediate must be a multiple of 16 bytes and non-zero in the range");
case Match_InvalidSImm12:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 11), (1 << 11) - 1,
"operand must be a symbol with %lo/%pcrel_lo/%tprel_lo modifier or an "
"integer in the range");
case Match_InvalidSImm12Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 11), (1 << 11) - 2,
"immediate must be a multiple of 2 bytes in the range");
case Match_InvalidSImm13Lsb0:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 12), (1 << 12) - 2,
"immediate must be a multiple of 2 bytes in the range");
case Match_InvalidUImm20LUI:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 20) - 1,
"operand must be a symbol with "
"%hi/%tprel_hi modifier or an integer in "
"the range");
case Match_InvalidUImm20AUIPC:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 20) - 1,
"operand must be a symbol with a "
"%pcrel_hi/%got_pcrel_hi/%tls_ie_pcrel_hi/%tls_gd_pcrel_hi modifier or "
"an integer in the range");
case Match_InvalidSImm21Lsb0JAL:
return generateImmOutOfRangeError(
Operands, ErrorInfo, -(1 << 20), (1 << 20) - 2,
"immediate must be a multiple of 2 bytes in the range");
case Match_InvalidCSRSystemRegister: {
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1,
"operand must be a valid system register "
"name or an integer in the range");
}
case Match_InvalidFenceArg: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(
ErrorLoc,
"operand must be formed of letters selected in-order from 'iorw'");
}
case Match_InvalidFRMArg: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(
ErrorLoc,
"operand must be a valid floating point rounding mode mnemonic");
}
case Match_InvalidBareSymbol: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a bare symbol name");
}
case Match_InvalidPseudoJumpSymbol: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a valid jump target");
}
case Match_InvalidCallSymbol: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a bare symbol name");
}
case Match_InvalidTPRelAddSymbol: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a symbol with %tprel_add modifier");
}
case Match_InvalidVTypeI: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(
ErrorLoc,
"operand must be "
"e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]");
}
case Match_InvalidVMaskRegister: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be v0.t");
}
case Match_InvalidSImm5Plus1: {
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4) + 1,
(1 << 4),
"immediate must be in the range");
}
case Match_InvalidRnumArg: {
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10);
}
}
llvm_unreachable("Unknown match type detected!");
}
// Attempts to match Name as a register (either using the default name or
// alternative ABI names), setting RegNo to the matching register. Upon
// failure, returns true and sets RegNo to 0. If IsRV32E then registers
// x16-x31 will be rejected.
static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo,
StringRef Name) {
RegNo = MatchRegisterName(Name);
// The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial
// match always matches the 64-bit variant, and not the 16/32-bit one.
assert(!(RegNo >= RISCV::F0_H && RegNo <= RISCV::F31_H));
assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F));
// The default FPR register class is based on the tablegen enum ordering.
static_assert(RISCV::F0_D < RISCV::F0_H, "FPR matching must be updated");
static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated");
if (RegNo == RISCV::NoRegister)
RegNo = MatchRegisterAltName(Name);
if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31)
RegNo = RISCV::NoRegister;
return RegNo == RISCV::NoRegister;
}
bool RISCVAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) {
if (tryParseRegister(RegNo, StartLoc, EndLoc) != MatchOperand_Success)
return Error(StartLoc, "invalid register name");
return false;
}
OperandMatchResultTy RISCVAsmParser::tryParseRegister(unsigned &RegNo,
SMLoc &StartLoc,
SMLoc &EndLoc) {
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
RegNo = 0;
StringRef Name = getLexer().getTok().getIdentifier();
if (matchRegisterNameHelper(isRV32E(), (MCRegister &)RegNo, Name))
return MatchOperand_NoMatch;
getParser().Lex(); // Eat identifier token.
return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
bool AllowParens) {
SMLoc FirstS = getLoc();
bool HadParens = false;
AsmToken LParen;
// If this is an LParen and a parenthesised register name is allowed, parse it
// atomically.
if (AllowParens && getLexer().is(AsmToken::LParen)) {
AsmToken Buf[2];
size_t ReadCount = getLexer().peekTokens(Buf);
if (ReadCount == 2 && Buf[1].getKind() == AsmToken::RParen) {
HadParens = true;
LParen = getParser().getTok();
getParser().Lex(); // Eat '('
}
}
switch (getLexer().getKind()) {
default:
if (HadParens)
getLexer().UnLex(LParen);
return MatchOperand_NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
MCRegister RegNo;
matchRegisterNameHelper(isRV32E(), RegNo, Name);
if (RegNo == RISCV::NoRegister) {
if (HadParens)
getLexer().UnLex(LParen);
return MatchOperand_NoMatch;
}
if (HadParens)
Operands.push_back(RISCVOperand::createToken("(", FirstS, isRV64()));
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
getLexer().Lex();
Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64()));
}
if (HadParens) {
getParser().Lex(); // Eat ')'
Operands.push_back(RISCVOperand::createToken(")", getLoc(), isRV64()));
}
return MatchOperand_Success;
}
OperandMatchResultTy
RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Exclaim:
case AsmToken::Tilde:
case AsmToken::Integer:
case AsmToken::String: {
if (getParser().parseExpression(Res, E))
return MatchOperand_ParseFail;
auto *CE = dyn_cast<MCConstantExpr>(Res);
if (CE) {
int64_t Imm = CE->getValue();
if (isUInt<7>(Imm)) {
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
}
Twine Msg = "immediate must be an integer in the range";
Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
return MatchOperand_ParseFail;
}
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
auto Opcode = RISCVInsnOpcode::lookupRISCVOpcodeByName(Identifier);
if (Opcode) {
Res = MCConstantExpr::create(Opcode->Value, getContext());
E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
Twine Msg = "operand must be a valid opcode name or an "
"integer in the range";
Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
return MatchOperand_ParseFail;
}
case AsmToken::Percent: {
// Discard operand with modifier.
Twine Msg = "immediate must be an integer in the range";
Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
return MatchOperand_ParseFail;
}
}
return MatchOperand_NoMatch;
}
OperandMatchResultTy
RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Exclaim:
case AsmToken::Tilde:
case AsmToken::Integer:
case AsmToken::String: {
if (getParser().parseExpression(Res))
return MatchOperand_ParseFail;
auto *CE = dyn_cast<MCConstantExpr>(Res);
if (CE) {
int64_t Imm = CE->getValue();
if (isUInt<12>(Imm)) {
auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
// Accept an immediate representing a named or un-named Sys Reg
// if the range is valid, regardless of the required features.
Operands.push_back(RISCVOperand::createSysReg(
SysReg ? SysReg->Name : "", S, Imm, isRV64()));
return MatchOperand_Success;
}
}
Twine Msg = "immediate must be an integer in the range";
Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
return MatchOperand_ParseFail;
}
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
if (!SysReg)
SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier);
if (!SysReg)
if ((SysReg = RISCVSysReg::lookupSysRegByDeprecatedName(Identifier)))
Warning(S, "'" + Identifier + "' is a deprecated alias for '" +
SysReg->Name + "'");
// Accept a named Sys Reg if the required features are present.
if (SysReg) {
if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) {
Error(S, "system register use requires an option to be enabled");
return MatchOperand_ParseFail;
}
Operands.push_back(RISCVOperand::createSysReg(
Identifier, S, SysReg->Encoding, isRV64()));
return MatchOperand_Success;
}
Twine Msg = "operand must be a valid system register name "
"or an integer in the range";
Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
return MatchOperand_ParseFail;
}
case AsmToken::Percent: {
// Discard operand with modifier.
Twine Msg = "immediate must be an integer in the range";
Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
return MatchOperand_ParseFail;
}
}
return MatchOperand_NoMatch;
}
OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::LParen:
case AsmToken::Dot:
case AsmToken::Minus:
case AsmToken::Plus:
case AsmToken::Exclaim:
case AsmToken::Tilde:
case AsmToken::Integer:
case AsmToken::String:
case AsmToken::Identifier:
if (getParser().parseExpression(Res, E))
return MatchOperand_ParseFail;
break;
case AsmToken::Percent:
return parseOperandWithModifier(Operands);
}
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
OperandMatchResultTy
RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
if (getLexer().getKind() != AsmToken::Percent) {
Error(getLoc(), "expected '%' for operand modifier");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat '%'
if (getLexer().getKind() != AsmToken::Identifier) {
Error(getLoc(), "expected valid identifier for operand modifier");
return MatchOperand_ParseFail;
}
StringRef Identifier = getParser().getTok().getIdentifier();
RISCVMCExpr::VariantKind VK = RISCVMCExpr::getVariantKindForName(Identifier);
if (VK == RISCVMCExpr::VK_RISCV_Invalid) {
Error(getLoc(), "unrecognized operand modifier");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat the identifier
if (getLexer().getKind() != AsmToken::LParen) {
Error(getLoc(), "expected '('");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat '('
const MCExpr *SubExpr;
if (getParser().parseParenExpression(SubExpr, E)) {
return MatchOperand_ParseFail;
}
const MCExpr *ModExpr = RISCVMCExpr::create(SubExpr, VK, getContext());
Operands.push_back(RISCVOperand::createImm(ModExpr, S, E, isRV64()));
return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
StringRef Identifier;
AsmToken Tok = getLexer().getTok();
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
if (Identifier.consume_back("@plt")) {
Error(getLoc(), "'@plt' operand not valid for instruction");
return MatchOperand_ParseFail;
}
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
if (Sym->isVariable()) {
const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
if (!isa<MCSymbolRefExpr>(V)) {
getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
return MatchOperand_NoMatch;
}
Res = V;
} else
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
MCBinaryExpr::Opcode Opcode;
switch (getLexer().getKind()) {
default:
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
case AsmToken::Plus:
Opcode = MCBinaryExpr::Add;
+ getLexer().Lex();
break;
case AsmToken::Minus:
Opcode = MCBinaryExpr::Sub;
+ getLexer().Lex();
break;
}
const MCExpr *Expr;
if (getParser().parseExpression(Expr, E))
return MatchOperand_ParseFail;
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
// Avoid parsing the register in `call rd, foo` as a call symbol.
if (getLexer().peekTok().getKind() != AsmToken::EndOfStatement)
return MatchOperand_NoMatch;
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
return MatchOperand_ParseFail;
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
RISCVMCExpr::VariantKind Kind = RISCVMCExpr::VK_RISCV_CALL;
if (Identifier.consume_back("@plt"))
Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
Res = RISCVMCExpr::create(Res, Kind, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
OperandMatchResultTy
RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
if (getParser().parseExpression(Res, E))
return MatchOperand_ParseFail;
if (Res->getKind() != MCExpr::ExprKind::SymbolRef ||
cast<MCSymbolRefExpr>(Res)->getKind() ==
MCSymbolRefExpr::VariantKind::VK_PLT) {
Error(S, "operand must be a valid jump target");
return MatchOperand_ParseFail;
}
Res = RISCVMCExpr::create(Res, RISCVMCExpr::VK_RISCV_CALL, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
// Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
// both being acceptable forms. When parsing `jal ra, foo` this function
// will be called for the `ra` register operand in an attempt to match the
// single-operand alias. parseJALOffset must fail for this case. It would
// seem logical to try parse the operand using parseImmediate and return
// NoMatch if the next token is a comma (meaning we must be parsing a jal in
// the second form rather than the first). We can't do this as there's no
// way of rewinding the lexer state. Instead, return NoMatch if this operand
// is an identifier and is followed by a comma.
if (getLexer().is(AsmToken::Identifier) &&
getLexer().peekTok().is(AsmToken::Comma))
return MatchOperand_NoMatch;
return parseImmediate(Operands);
}
OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
SMLoc S = getLoc();
if (getLexer().isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
SmallVector<AsmToken, 7> VTypeIElements;
// Put all the tokens for vtypei operand into VTypeIElements vector.
while (getLexer().isNot(AsmToken::EndOfStatement)) {
VTypeIElements.push_back(getLexer().getTok());
getLexer().Lex();
if (getLexer().is(AsmToken::EndOfStatement))
break;
if (getLexer().isNot(AsmToken::Comma))
goto MatchFail;
AsmToken Comma = getLexer().getTok();
VTypeIElements.push_back(Comma);
getLexer().Lex();
}
if (VTypeIElements.size() == 7) {
// The VTypeIElements layout is:
// SEW comma LMUL comma TA comma MA
// 0 1 2 3 4 5 6
StringRef Name = VTypeIElements[0].getIdentifier();
if (!Name.consume_front("e"))
goto MatchFail;
unsigned Sew;
if (Name.getAsInteger(10, Sew))
goto MatchFail;
if (!RISCVVType::isValidSEW(Sew))
goto MatchFail;
Name = VTypeIElements[2].getIdentifier();
if (!Name.consume_front("m"))
goto MatchFail;
// "m" or "mf"
bool Fractional = Name.consume_front("f");
unsigned Lmul;
if (Name.getAsInteger(10, Lmul))
goto MatchFail;
if (!RISCVVType::isValidLMUL(Lmul, Fractional))
goto MatchFail;
// ta or tu
Name = VTypeIElements[4].getIdentifier();
bool TailAgnostic;
if (Name == "ta")
TailAgnostic = true;
else if (Name == "tu")
TailAgnostic = false;
else
goto MatchFail;
// ma or mu
Name = VTypeIElements[6].getIdentifier();
bool MaskAgnostic;
if (Name == "ma")
MaskAgnostic = true;
else if (Name == "mu")
MaskAgnostic = false;
else
goto MatchFail;
unsigned LmulLog2 = Log2_32(Lmul);
RISCVII::VLMUL VLMUL =
static_cast<RISCVII::VLMUL>(Fractional ? 8 - LmulLog2 : LmulLog2);
unsigned VTypeI =
RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic);
Operands.push_back(RISCVOperand::createVType(VTypeI, S, isRV64()));
return MatchOperand_Success;
}
// If NoMatch, unlex all the tokens that comprise a vtypei operand
MatchFail:
while (!VTypeIElements.empty())
getLexer().UnLex(VTypeIElements.pop_back_val());
return MatchOperand_NoMatch;
}
OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
if (!Name.consume_back(".t")) {
Error(getLoc(), "expected '.t' suffix");
return MatchOperand_ParseFail;
}
MCRegister RegNo;
matchRegisterNameHelper(isRV32E(), RegNo, Name);
if (RegNo == RISCV::NoRegister)
return MatchOperand_NoMatch;
if (RegNo != RISCV::V0)
return MatchOperand_NoMatch;
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
getLexer().Lex();
Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64()));
}
return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
return MatchOperand_NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
MCRegister RegNo;
matchRegisterNameHelper(isRV32E(), RegNo, Name);
if (RegNo == RISCV::NoRegister)
return MatchOperand_NoMatch;
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
getLexer().Lex();
Operands.push_back(RISCVOperand::createReg(
RegNo, S, E, isRV64(), !getSTI().hasFeature(RISCV::FeatureStdExtF)));
}
return MatchOperand_Success;
}
OperandMatchResultTy
RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::LParen)) {
Error(getLoc(), "expected '('");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat '('
Operands.push_back(RISCVOperand::createToken("(", getLoc(), isRV64()));
if (parseRegister(Operands) != MatchOperand_Success) {
Error(getLoc(), "expected register");
return MatchOperand_ParseFail;
}
if (getLexer().isNot(AsmToken::RParen)) {
Error(getLoc(), "expected ')'");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat ')'
Operands.push_back(RISCVOperand::createToken(")", getLoc(), isRV64()));
return MatchOperand_Success;
}
OperandMatchResultTy RISCVAsmParser::parseAtomicMemOp(OperandVector &Operands) {
// Atomic operations such as lr.w, sc.w, and amo*.w accept a "memory operand"
// as one of their register operands, such as `(a0)`. This just denotes that
// the register (in this case `a0`) contains a memory address.
//
// Normally, we would be able to parse these by putting the parens into the
// instruction string. However, GNU as also accepts a zero-offset memory
// operand (such as `0(a0)`), and ignores the 0. Normally this would be parsed
// with parseImmediate followed by parseMemOpBaseReg, but these instructions
// do not accept an immediate operand, and we do not want to add a "dummy"
// operand that is silently dropped.
//
// Instead, we use this custom parser. This will: allow (and discard) an
// offset if it is zero; require (and discard) parentheses; and add only the
// parsed register operand to `Operands`.
//
// These operands are printed with RISCVInstPrinter::printAtomicMemOp, which
// will only print the register surrounded by parentheses (which GNU as also
// uses as its canonical representation for these operands).
std::unique_ptr<RISCVOperand> OptionalImmOp;
if (getLexer().isNot(AsmToken::LParen)) {
// Parse an Integer token. We do not accept arbritrary constant expressions
// in the offset field (because they may include parens, which complicates
// parsing a lot).
int64_t ImmVal;
SMLoc ImmStart = getLoc();
if (getParser().parseIntToken(ImmVal,
"expected '(' or optional integer offset"))
return MatchOperand_ParseFail;
// Create a RISCVOperand for checking later (so the error messages are
// nicer), but we don't add it to Operands.
SMLoc ImmEnd = getLoc();
OptionalImmOp =
RISCVOperand::createImm(MCConstantExpr::create(ImmVal, getContext()),
ImmStart, ImmEnd, isRV64());
}
if (getLexer().isNot(AsmToken::LParen)) {
Error(getLoc(), OptionalImmOp ? "expected '(' after optional integer offset"
: "expected '(' or optional integer offset");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat '('
if (parseRegister(Operands) != MatchOperand_Success) {
Error(getLoc(), "expected register");
return MatchOperand_ParseFail;
}
if (getLexer().isNot(AsmToken::RParen)) {
Error(getLoc(), "expected ')'");
return MatchOperand_ParseFail;
}
getParser().Lex(); // Eat ')'
// Deferred Handling of non-zero offsets. This makes the error messages nicer.
if (OptionalImmOp && !OptionalImmOp->isImmZero()) {
Error(OptionalImmOp->getStartLoc(), "optional integer offset must be 0",
SMRange(OptionalImmOp->getStartLoc(), OptionalImmOp->getEndLoc()));
return MatchOperand_ParseFail;
}
return MatchOperand_Success;
}
/// Looks at a token type and creates the relevant operand from this
/// information, adding to Operands. If operand was parsed, returns false, else
/// true.
bool RISCVAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
OperandMatchResultTy Result =
MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
if (Result == MatchOperand_Success)
return false;
if (Result == MatchOperand_ParseFail)
return true;
// Attempt to parse token as a register.
if (parseRegister(Operands, true) == MatchOperand_Success)
return false;
// Attempt to parse token as an immediate
if (parseImmediate(Operands) == MatchOperand_Success) {
// Parse memory base register if present
if (getLexer().is(AsmToken::LParen))
return parseMemOpBaseReg(Operands) != MatchOperand_Success;
return false;
}
// Finally we have exhausted all options and must declare defeat.
Error(getLoc(), "unknown operand");
return true;
}
bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// Ensure that if the instruction occurs when relaxation is enabled,
// relocations are forced for the file. Ideally this would be done when there
// is enough information to reliably determine if the instruction itself may
// cause relaxations. Unfortunately instruction processing stage occurs in the
// same pass as relocation emission, so it's too late to set a 'sticky bit'
// for the entire file.
if (getSTI().getFeatureBits()[RISCV::FeatureRelax]) {
auto *Assembler = getTargetStreamer().getStreamer().getAssemblerPtr();
if (Assembler != nullptr) {
RISCVAsmBackend &MAB =
static_cast<RISCVAsmBackend &>(Assembler->getBackend());
MAB.setForceRelocs();
}
}
// First operand is token for instruction
Operands.push_back(RISCVOperand::createToken(Name, NameLoc, isRV64()));
// If there are no more operands, then finish
if (getLexer().is(AsmToken::EndOfStatement)) {
getParser().Lex(); // Consume the EndOfStatement.
return false;
}
// Parse first operand
if (parseOperand(Operands, Name))
return true;
// Parse until end of statement, consuming commas between operands
unsigned OperandIdx = 1;
while (getLexer().is(AsmToken::Comma)) {
// Consume comma token
getLexer().Lex();
// Parse next operand
if (parseOperand(Operands, Name))
return true;
++OperandIdx;
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
getParser().eatToEndOfStatement();
return Error(Loc, "unexpected token");
}
getParser().Lex(); // Consume the EndOfStatement.
return false;
}
bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr,
RISCVMCExpr::VariantKind &Kind) {
Kind = RISCVMCExpr::VK_RISCV_None;
if (const RISCVMCExpr *RE = dyn_cast<RISCVMCExpr>(Expr)) {
Kind = RE->getKind();
Expr = RE->getSubExpr();
}
MCValue Res;
MCFixup Fixup;
if (Expr->evaluateAsRelocatable(Res, nullptr, &Fixup))
return Res.getRefKind() == RISCVMCExpr::VK_RISCV_None;
return false;
}
bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
// This returns false if this function recognizes the directive
// regardless of whether it is successfully handles or reports an
// error. Otherwise it returns true to give the generic parser a
// chance at recognizing it.
StringRef IDVal = DirectiveID.getString();
if (IDVal == ".option")
return parseDirectiveOption();
if (IDVal == ".attribute")
return parseDirectiveAttribute();
if (IDVal == ".insn")
return parseDirectiveInsn(DirectiveID.getLoc());
return true;
}
bool RISCVAsmParser::parseDirectiveOption() {
MCAsmParser &Parser = getParser();
// Get the option token.
AsmToken Tok = Parser.getTok();
// At the moment only identifiers are supported.
if (Tok.isNot(AsmToken::Identifier))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected identifier");
StringRef Option = Tok.getIdentifier();
if (Option == "push") {
getTargetStreamer().emitDirectiveOptionPush();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
pushFeatureBits();
return false;
}
if (Option == "pop") {
SMLoc StartLoc = Parser.getTok().getLoc();
getTargetStreamer().emitDirectiveOptionPop();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
if (popFeatureBits())
return Error(StartLoc, ".option pop with no .option push");
return false;
}
if (Option == "rvc") {
getTargetStreamer().emitDirectiveOptionRVC();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
setFeatureBits(RISCV::FeatureStdExtC, "c");
return false;
}
if (Option == "norvc") {
getTargetStreamer().emitDirectiveOptionNoRVC();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
clearFeatureBits(RISCV::FeatureStdExtC, "c");
return false;
}
if (Option == "pic") {
getTargetStreamer().emitDirectiveOptionPIC();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
ParserOptions.IsPicEnabled = true;
return false;
}
if (Option == "nopic") {
getTargetStreamer().emitDirectiveOptionNoPIC();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
ParserOptions.IsPicEnabled = false;
return false;
}
if (Option == "relax") {
getTargetStreamer().emitDirectiveOptionRelax();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
setFeatureBits(RISCV::FeatureRelax, "relax");
return false;
}
if (Option == "norelax") {
getTargetStreamer().emitDirectiveOptionNoRelax();
Parser.Lex();
if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Error(Parser.getTok().getLoc(),
"unexpected token, expected end of statement");
clearFeatureBits(RISCV::FeatureRelax, "relax");
return false;
}
// Unknown option.
Warning(Parser.getTok().getLoc(),
"unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'relax' or "
"'norelax'");
Parser.eatToEndOfStatement();
return false;
}
/// parseDirectiveAttribute
/// ::= .attribute expression ',' ( expression | "string" )
/// ::= .attribute identifier ',' ( expression | "string" )
bool RISCVAsmParser::parseDirectiveAttribute() {
MCAsmParser &Parser = getParser();
int64_t Tag;
SMLoc TagLoc;
TagLoc = Parser.getTok().getLoc();
if (Parser.getTok().is(AsmToken::Identifier)) {
StringRef Name = Parser.getTok().getIdentifier();
Optional<unsigned> Ret =
ELFAttrs::attrTypeFromString(Name, RISCVAttrs::getRISCVAttributeTags());
if (!Ret.hasValue()) {
Error(TagLoc, "attribute name not recognised: " + Name);
return false;
}
Tag = Ret.getValue();
Parser.Lex();
} else {
const MCExpr *AttrExpr;
TagLoc = Parser.getTok().getLoc();
if (Parser.parseExpression(AttrExpr))
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
if (check(!CE, TagLoc, "expected numeric constant"))
return true;
Tag = CE->getValue();
}
if (Parser.parseToken(AsmToken::Comma, "comma expected"))
return true;
StringRef StringValue;
int64_t IntegerValue = 0;
bool IsIntegerValue = true;
// RISC-V attributes have a string value if the tag number is odd
// and an integer value if the tag number is even.
if (Tag % 2)
IsIntegerValue = false;
SMLoc ValueExprLoc = Parser.getTok().getLoc();
if (IsIntegerValue) {
const MCExpr *ValueExpr;
if (Parser.parseExpression(ValueExpr))
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr);
if (!CE)
return Error(ValueExprLoc, "expected numeric constant");
IntegerValue = CE->getValue();
} else {
if (Parser.getTok().isNot(AsmToken::String))
return Error(Parser.getTok().getLoc(), "expected string constant");
StringValue = Parser.getTok().getStringContents();
Parser.Lex();
}
if (Parser.parseToken(AsmToken::EndOfStatement,
"unexpected token in '.attribute' directive"))
return true;
if (IsIntegerValue)
getTargetStreamer().emitAttribute(Tag, IntegerValue);
else if (Tag != RISCVAttrs::ARCH)
getTargetStreamer().emitTextAttribute(Tag, StringValue);
else {
StringRef Arch = StringValue;
for (auto Feature : RISCVFeatureKV)
if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
clearFeatureBits(Feature.Value, Feature.Key);
auto ParseResult = llvm::RISCVISAInfo::parseArchString(
StringValue, /*EnableExperimentalExtension=*/true,
/*ExperimentalExtensionVersionCheck=*/true);
if (!ParseResult) {
std::string Buffer;
raw_string_ostream OutputErrMsg(Buffer);
handleAllErrors(ParseResult.takeError(), [&](llvm::StringError &ErrMsg) {
OutputErrMsg << "invalid arch name '" << Arch << "', "
<< ErrMsg.getMessage();
});
return Error(ValueExprLoc, OutputErrMsg.str());
}
auto &ISAInfo = *ParseResult;
for (auto Feature : RISCVFeatureKV)
if (ISAInfo->hasExtension(Feature.Key))
setFeatureBits(Feature.Value, Feature.Key);
if (ISAInfo->getXLen() == 32)
clearFeatureBits(RISCV::Feature64Bit, "64bit");
else if (ISAInfo->getXLen() == 64)
setFeatureBits(RISCV::Feature64Bit, "64bit");
else
return Error(ValueExprLoc, "bad arch string " + Arch);
// Then emit the arch string.
getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString());
}
return false;
}
/// parseDirectiveInsn
/// ::= .insn [ format encoding, (operands (, operands)*) ]
bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
MCAsmParser &Parser = getParser();
// Expect instruction format as identifier.
StringRef Format;
SMLoc ErrorLoc = Parser.getTok().getLoc();
if (Parser.parseIdentifier(Format))
return Error(ErrorLoc, "expected instruction format");
if (Format != "r" && Format != "r4" && Format != "i" && Format != "b" &&
Format != "sb" && Format != "u" && Format != "j" && Format != "uj" &&
Format != "s")
return Error(ErrorLoc, "invalid instruction format");
std::string FormatName = (".insn_" + Format).str();
ParseInstructionInfo Info;
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands;
if (ParseInstruction(Info, FormatName, L, Operands))
return true;
unsigned Opcode;
uint64_t ErrorInfo;
return MatchAndEmitInstruction(L, Opcode, Operands, Parser.getStreamer(),
ErrorInfo,
/*MatchingInlineAsm=*/false);
}
void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = compressInst(CInst, Inst, getSTI(), S.getContext());
if (Res)
++RISCVNumInstrsCompressed;
S.emitInstruction((Res ? CInst : Inst), getSTI());
}
void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
MCStreamer &Out) {
RISCVMatInt::InstSeq Seq =
RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits());
MCRegister SrcReg = RISCV::X0;
for (RISCVMatInt::Inst &Inst : Seq) {
if (Inst.Opc == RISCV::LUI) {
emitToStreamer(
Out, MCInstBuilder(RISCV::LUI).addReg(DestReg).addImm(Inst.Imm));
} else if (Inst.Opc == RISCV::ADD_UW) {
emitToStreamer(Out, MCInstBuilder(RISCV::ADD_UW)
.addReg(DestReg)
.addReg(SrcReg)
.addReg(RISCV::X0));
} else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
Inst.Opc == RISCV::SH3ADD) {
emitToStreamer(
Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg(
SrcReg));
} else {
emitToStreamer(
Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
Inst.Imm));
}
// Only the first instruction has X0 as its source.
SrcReg = DestReg;
}
}
void RISCVAsmParser::emitAuipcInstPair(MCOperand DestReg, MCOperand TmpReg,
const MCExpr *Symbol,
RISCVMCExpr::VariantKind VKHi,
unsigned SecondOpcode, SMLoc IDLoc,
MCStreamer &Out) {
// A pair of instructions for PC-relative addressing; expands to
// TmpLabel: AUIPC TmpReg, VKHi(symbol)
// OP DestReg, TmpReg, %pcrel_lo(TmpLabel)
MCContext &Ctx = getContext();
MCSymbol *TmpLabel = Ctx.createNamedTempSymbol("pcrel_hi");
Out.emitLabel(TmpLabel);
const RISCVMCExpr *SymbolHi = RISCVMCExpr::create(Symbol, VKHi, Ctx);
emitToStreamer(
Out, MCInstBuilder(RISCV::AUIPC).addOperand(TmpReg).addExpr(SymbolHi));
const MCExpr *RefToLinkTmpLabel =
RISCVMCExpr::create(MCSymbolRefExpr::create(TmpLabel, Ctx),
RISCVMCExpr::VK_RISCV_PCREL_LO, Ctx);
emitToStreamer(Out, MCInstBuilder(SecondOpcode)
.addOperand(DestReg)
.addOperand(TmpReg)
.addExpr(RefToLinkTmpLabel));
}
void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out) {
// The load local address pseudo-instruction "lla" is used in PC-relative
// addressing of local symbols:
// lla rdest, symbol
// expands to
// TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
// ADDI rdest, rdest, %pcrel_lo(TmpLabel)
MCOperand DestReg = Inst.getOperand(0);
const MCExpr *Symbol = Inst.getOperand(1).getExpr();
emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
RISCV::ADDI, IDLoc, Out);
}
void RISCVAsmParser::emitLoadAddress(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out) {
// The load address pseudo-instruction "la" is used in PC-relative and
// GOT-indirect addressing of global symbols:
// la rdest, symbol
// expands to either (for non-PIC)
// TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
// ADDI rdest, rdest, %pcrel_lo(TmpLabel)
// or (for PIC)
// TmpLabel: AUIPC rdest, %got_pcrel_hi(symbol)
// Lx rdest, %pcrel_lo(TmpLabel)(rdest)
MCOperand DestReg = Inst.getOperand(0);
const MCExpr *Symbol = Inst.getOperand(1).getExpr();
unsigned SecondOpcode;
RISCVMCExpr::VariantKind VKHi;
if (ParserOptions.IsPicEnabled) {
SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
VKHi = RISCVMCExpr::VK_RISCV_GOT_HI;
} else {
SecondOpcode = RISCV::ADDI;
VKHi = RISCVMCExpr::VK_RISCV_PCREL_HI;
}
emitAuipcInstPair(DestReg, DestReg, Symbol, VKHi, SecondOpcode, IDLoc, Out);
}
void RISCVAsmParser::emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out) {
// The load TLS IE address pseudo-instruction "la.tls.ie" is used in
// initial-exec TLS model addressing of global symbols:
// la.tls.ie rdest, symbol
// expands to
// TmpLabel: AUIPC rdest, %tls_ie_pcrel_hi(symbol)
// Lx rdest, %pcrel_lo(TmpLabel)(rdest)
MCOperand DestReg = Inst.getOperand(0);
const MCExpr *Symbol = Inst.getOperand(1).getExpr();
unsigned SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GOT_HI,
SecondOpcode, IDLoc, Out);
}
void RISCVAsmParser::emitLoadTLSGDAddress(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out) {
// The load TLS GD address pseudo-instruction "la.tls.gd" is used in
// global-dynamic TLS model addressing of global symbols:
// la.tls.gd rdest, symbol
// expands to
// TmpLabel: AUIPC rdest, %tls_gd_pcrel_hi(symbol)
// ADDI rdest, rdest, %pcrel_lo(TmpLabel)
MCOperand DestReg = Inst.getOperand(0);
const MCExpr *Symbol = Inst.getOperand(1).getExpr();
emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_TLS_GD_HI,
RISCV::ADDI, IDLoc, Out);
}
void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode,
SMLoc IDLoc, MCStreamer &Out,
bool HasTmpReg) {
// The load/store pseudo-instruction does a pc-relative load with
// a symbol.
//
// The expansion looks like this
//
// TmpLabel: AUIPC tmp, %pcrel_hi(symbol)
// [S|L]X rd, %pcrel_lo(TmpLabel)(tmp)
unsigned DestRegOpIdx = HasTmpReg ? 1 : 0;
MCOperand DestReg = Inst.getOperand(DestRegOpIdx);
unsigned SymbolOpIdx = HasTmpReg ? 2 : 1;
MCOperand TmpReg = Inst.getOperand(0);
const MCExpr *Symbol = Inst.getOperand(SymbolOpIdx).getExpr();
emitAuipcInstPair(DestReg, TmpReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
Opcode, IDLoc, Out);
}
void RISCVAsmParser::emitPseudoExtend(MCInst &Inst, bool SignExtend,
int64_t Width, SMLoc IDLoc,
MCStreamer &Out) {
// The sign/zero extend pseudo-instruction does two shifts, with the shift
// amounts dependent on the XLEN.
//
// The expansion looks like this
//
// SLLI rd, rs, XLEN - Width
// SR[A|R]I rd, rd, XLEN - Width
MCOperand DestReg = Inst.getOperand(0);
MCOperand SourceReg = Inst.getOperand(1);
unsigned SecondOpcode = SignExtend ? RISCV::SRAI : RISCV::SRLI;
int64_t ShAmt = (isRV64() ? 64 : 32) - Width;
assert(ShAmt > 0 && "Shift amount must be non-zero.");
emitToStreamer(Out, MCInstBuilder(RISCV::SLLI)
.addOperand(DestReg)
.addOperand(SourceReg)
.addImm(ShAmt));
emitToStreamer(Out, MCInstBuilder(SecondOpcode)
.addOperand(DestReg)
.addOperand(DestReg)
.addImm(ShAmt));
}
void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
MCStreamer &Out) {
if (Inst.getNumOperands() == 3) {
// unmasked va >= x
//
// pseudoinstruction: vmsge{u}.vx vd, va, x
// expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd
emitToStreamer(Out, MCInstBuilder(Opcode)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(2))
.addReg(RISCV::NoRegister));
emitToStreamer(Out, MCInstBuilder(RISCV::VMNAND_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0)));
} else if (Inst.getNumOperands() == 4) {
// masked va >= x, vd != v0
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t
// expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
assert(Inst.getOperand(0).getReg() != RISCV::V0 &&
"The destination register should not be V0.");
emitToStreamer(Out, MCInstBuilder(Opcode)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3)));
emitToStreamer(Out, MCInstBuilder(RISCV::VMXOR_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addReg(RISCV::V0));
} else if (Inst.getNumOperands() == 5 &&
Inst.getOperand(0).getReg() == RISCV::V0) {
// masked va >= x, vd == v0
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
// expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
assert(Inst.getOperand(0).getReg() == RISCV::V0 &&
"The destination register should be V0.");
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
"The temporary vector register should not be V0.");
emitToStreamer(Out, MCInstBuilder(Opcode)
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
.addOperand(Inst.getOperand(4)));
emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1)));
} else if (Inst.getNumOperands() == 5) {
// masked va >= x, any vd
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
// expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd,
// vd, v0; vmor.mm vd, vt, vd
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
"The temporary vector register should not be V0.");
emitToStreamer(Out, MCInstBuilder(Opcode)
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
.addReg(RISCV::NoRegister));
emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(1))
.addReg(RISCV::V0)
.addOperand(Inst.getOperand(1)));
emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addReg(RISCV::V0));
emitToStreamer(Out, MCInstBuilder(RISCV::VMOR_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(0)));
}
}
bool RISCVAsmParser::checkPseudoAddTPRel(MCInst &Inst,
OperandVector &Operands) {
assert(Inst.getOpcode() == RISCV::PseudoAddTPRel && "Invalid instruction");
assert(Inst.getOperand(2).isReg() && "Unexpected second operand kind");
if (Inst.getOperand(2).getReg() != RISCV::X4) {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[3]).getStartLoc();
return Error(ErrorLoc, "the second input operand must be tp/x4 when using "
"%tprel_add modifier");
}
return false;
}
std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultMaskRegOp() const {
return RISCVOperand::createReg(RISCV::NoRegister, llvm::SMLoc(),
llvm::SMLoc(), isRV64());
}
bool RISCVAsmParser::validateInstruction(MCInst &Inst,
OperandVector &Operands) {
if (Inst.getOpcode() == RISCV::PseudoVMSGEU_VX_M_T ||
Inst.getOpcode() == RISCV::PseudoVMSGE_VX_M_T) {
unsigned DestReg = Inst.getOperand(0).getReg();
unsigned TempReg = Inst.getOperand(1).getReg();
if (DestReg == TempReg) {
SMLoc Loc = Operands.back()->getStartLoc();
return Error(Loc, "The temporary vector register cannot be the same as "
"the destination register.");
}
}
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
RISCVII::VConstraintType Constraints =
RISCVII::getConstraint(MCID.TSFlags);
if (Constraints == RISCVII::NoConstraint)
return false;
unsigned DestReg = Inst.getOperand(0).getReg();
// Operands[1] will be the first operand, DestReg.
SMLoc Loc = Operands[1]->getStartLoc();
if (Constraints & RISCVII::VS2Constraint) {
unsigned CheckReg = Inst.getOperand(1).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
}
if ((Constraints & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) {
unsigned CheckReg = Inst.getOperand(2).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
}
if ((Constraints & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) {
// vadc, vsbc are special cases. These instructions have no mask register.
// The destination register could not be V0.
unsigned Opcode = Inst.getOpcode();
if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM ||
Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM ||
Opcode == RISCV::VSBC_VXM || Opcode == RISCV::VFMERGE_VFM ||
Opcode == RISCV::VMERGE_VIM || Opcode == RISCV::VMERGE_VVM ||
Opcode == RISCV::VMERGE_VXM)
return Error(Loc, "The destination vector register group cannot be V0.");
// Regardless masked or unmasked version, the number of operands is the
// same. For example, "viota.m v0, v2" is "viota.m v0, v2, NoRegister"
// actually. We need to check the last operand to ensure whether it is
// masked or not.
unsigned CheckReg = Inst.getOperand(Inst.getNumOperands() - 1).getReg();
assert((CheckReg == RISCV::V0 || CheckReg == RISCV::NoRegister) &&
"Unexpected register for mask operand");
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the mask register.");
}
return false;
}
bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
OperandVector &Operands,
MCStreamer &Out) {
Inst.setLoc(IDLoc);
switch (Inst.getOpcode()) {
default:
break;
case RISCV::PseudoLI: {
MCRegister Reg = Inst.getOperand(0).getReg();
const MCOperand &Op1 = Inst.getOperand(1);
if (Op1.isExpr()) {
// We must have li reg, %lo(sym) or li reg, %pcrel_lo(sym) or similar.
// Just convert to an addi. This allows compatibility with gas.
emitToStreamer(Out, MCInstBuilder(RISCV::ADDI)
.addReg(Reg)
.addReg(RISCV::X0)
.addExpr(Op1.getExpr()));
return false;
}
int64_t Imm = Inst.getOperand(1).getImm();
// On RV32 the immediate here can either be a signed or an unsigned
// 32-bit number. Sign extension has to be performed to ensure that Imm
// represents the expected signed 64-bit number.
if (!isRV64())
Imm = SignExtend64<32>(Imm);
emitLoadImm(Reg, Imm, Out);
return false;
}
case RISCV::PseudoLLA:
emitLoadLocalAddress(Inst, IDLoc, Out);
return false;
case RISCV::PseudoLA:
emitLoadAddress(Inst, IDLoc, Out);
return false;
case RISCV::PseudoLA_TLS_IE:
emitLoadTLSIEAddress(Inst, IDLoc, Out);
return false;
case RISCV::PseudoLA_TLS_GD:
emitLoadTLSGDAddress(Inst, IDLoc, Out);
return false;
case RISCV::PseudoLB:
emitLoadStoreSymbol(Inst, RISCV::LB, IDLoc, Out, /*HasTmpReg=*/false);
return false;
case RISCV::PseudoLBU:
emitLoadStoreSymbol(Inst, RISCV::LBU, IDLoc, Out, /*HasTmpReg=*/false);
return false;
case RISCV::PseudoLH:
emitLoadStoreSymbol(Inst, RISCV::LH, IDLoc, Out, /*HasTmpReg=*/false);
return false;
case RISCV::PseudoLHU:
emitLoadStoreSymbol(Inst, RISCV::LHU, IDLoc, Out, /*HasTmpReg=*/false);
return false;
case RISCV::PseudoLW:
emitLoadStoreSymbol(Inst, RISCV::LW, IDLoc, Out, /*HasTmpReg=*/false);
return false;
case RISCV::PseudoLWU:
emitLoadStoreSymbol(Inst, RISCV::LWU, IDLoc, Out, /*HasTmpReg=*/false);
return false;
case RISCV::PseudoLD:
emitLoadStoreSymbol(Inst, RISCV::LD, IDLoc, Out, /*HasTmpReg=*/false);
return false;
case RISCV::PseudoFLH:
emitLoadStoreSymbol(Inst, RISCV::FLH, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoFLW:
emitLoadStoreSymbol(Inst, RISCV::FLW, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoFLD:
emitLoadStoreSymbol(Inst, RISCV::FLD, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoSB:
emitLoadStoreSymbol(Inst, RISCV::SB, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoSH:
emitLoadStoreSymbol(Inst, RISCV::SH, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoSW:
emitLoadStoreSymbol(Inst, RISCV::SW, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoSD:
emitLoadStoreSymbol(Inst, RISCV::SD, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoFSH:
emitLoadStoreSymbol(Inst, RISCV::FSH, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoFSW:
emitLoadStoreSymbol(Inst, RISCV::FSW, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoFSD:
emitLoadStoreSymbol(Inst, RISCV::FSD, IDLoc, Out, /*HasTmpReg=*/true);
return false;
case RISCV::PseudoAddTPRel:
if (checkPseudoAddTPRel(Inst, Operands))
return true;
break;
case RISCV::PseudoSEXT_B:
emitPseudoExtend(Inst, /*SignExtend=*/true, /*Width=*/8, IDLoc, Out);
return false;
case RISCV::PseudoSEXT_H:
emitPseudoExtend(Inst, /*SignExtend=*/true, /*Width=*/16, IDLoc, Out);
return false;
case RISCV::PseudoZEXT_H:
emitPseudoExtend(Inst, /*SignExtend=*/false, /*Width=*/16, IDLoc, Out);
return false;
case RISCV::PseudoZEXT_W:
emitPseudoExtend(Inst, /*SignExtend=*/false, /*Width=*/32, IDLoc, Out);
return false;
case RISCV::PseudoVMSGEU_VX:
case RISCV::PseudoVMSGEU_VX_M:
case RISCV::PseudoVMSGEU_VX_M_T:
emitVMSGE(Inst, RISCV::VMSLTU_VX, IDLoc, Out);
return false;
case RISCV::PseudoVMSGE_VX:
case RISCV::PseudoVMSGE_VX_M:
case RISCV::PseudoVMSGE_VX_M_T:
emitVMSGE(Inst, RISCV::VMSLT_VX, IDLoc, Out);
return false;
case RISCV::PseudoVMSGE_VI:
case RISCV::PseudoVMSLT_VI: {
// These instructions are signed and so is immediate so we can subtract one
// and change the opcode.
int64_t Imm = Inst.getOperand(2).getImm();
unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGE_VI ? RISCV::VMSGT_VI
: RISCV::VMSLE_VI;
emitToStreamer(Out, MCInstBuilder(Opc)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1))
.addImm(Imm - 1)
.addOperand(Inst.getOperand(3)));
return false;
}
case RISCV::PseudoVMSGEU_VI:
case RISCV::PseudoVMSLTU_VI: {
int64_t Imm = Inst.getOperand(2).getImm();
// Unsigned comparisons are tricky because the immediate is signed. If the
// immediate is 0 we can't just subtract one. vmsltu.vi v0, v1, 0 is always
// false, but vmsle.vi v0, v1, -1 is always true. Instead we use
// vmsne v0, v1, v1 which is always false.
if (Imm == 0) {
unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI
? RISCV::VMSEQ_VV
: RISCV::VMSNE_VV;
emitToStreamer(Out, MCInstBuilder(Opc)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(1))
.addOperand(Inst.getOperand(3)));
} else {
// Other immediate values can subtract one like signed.
unsigned Opc = Inst.getOpcode() == RISCV::PseudoVMSGEU_VI
? RISCV::VMSGTU_VI
: RISCV::VMSLEU_VI;
emitToStreamer(Out, MCInstBuilder(Opc)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1))
.addImm(Imm - 1)
.addOperand(Inst.getOperand(3)));
}
return false;
}
}
emitToStreamer(Out, Inst);
return false;
}
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVAsmParser() {
RegisterMCAsmParser<RISCVAsmParser> X(getTheRISCV32Target());
RegisterMCAsmParser<RISCVAsmParser> Y(getTheRISCV64Target());
}
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 97d24c8e9c0b..e7672a7652cd 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1,11027 +1,11045 @@
//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that RISCV uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#include "RISCVISelLowering.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
#include "RISCVRegisterInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-lower"
STATISTIC(NumTailCalls, "Number of tail calls");
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
if (Subtarget.isRV32E())
report_fatal_error("Codegen not yet implemented for RV32E");
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
!Subtarget.hasStdExtF()) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
} else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
!Subtarget.hasStdExtD()) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
"target-abi)\n";
ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
}
switch (ABI) {
default:
report_fatal_error("Don't know how to lower this ABI");
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64:
case RISCVABI::ABI_LP64F:
case RISCVABI::ABI_LP64D:
break;
}
MVT XLenVT = Subtarget.getXLenVT();
// Set up the register classes.
addRegisterClass(XLenVT, &RISCV::GPRRegClass);
if (Subtarget.hasStdExtZfh())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF())
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
static const MVT::SimpleValueType BoolVecVTs[] = {
MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
static const MVT::SimpleValueType IntVecVTs[] = {
MVT::nxv1i8, MVT::nxv2i8, MVT::nxv4i8, MVT::nxv8i8, MVT::nxv16i8,
MVT::nxv32i8, MVT::nxv64i8, MVT::nxv1i16, MVT::nxv2i16, MVT::nxv4i16,
MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
MVT::nxv4i32, MVT::nxv8i32, MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
MVT::nxv4i64, MVT::nxv8i64};
static const MVT::SimpleValueType F16VecVTs[] = {
MVT::nxv1f16, MVT::nxv2f16, MVT::nxv4f16,
MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
static const MVT::SimpleValueType F32VecVTs[] = {
MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
static const MVT::SimpleValueType F64VecVTs[] = {
MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
unsigned Size = VT.getSizeInBits().getKnownMinValue();
assert(Size <= 512 && isPowerOf2_32(Size));
const TargetRegisterClass *RC;
if (Size <= 64)
RC = &RISCV::VRRegClass;
else if (Size == 128)
RC = &RISCV::VRM2RegClass;
else if (Size == 256)
RC = &RISCV::VRM4RegClass;
else
RC = &RISCV::VRM8RegClass;
addRegisterClass(VT, RC);
};
for (MVT VT : BoolVecVTs)
addRegClassForRVV(VT);
for (MVT VT : IntVecVTs) {
if (VT.getVectorElementType() == MVT::i64 &&
!Subtarget.hasVInstructionsI64())
continue;
addRegClassForRVV(VT);
}
if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
if (Subtarget.hasVInstructionsF64())
for (MVT VT : F64VecVTs)
addRegClassForRVV(VT);
if (Subtarget.useRVVForFixedLengthVectors()) {
auto addRegClassForFixedVectors = [this](MVT VT) {
MVT ContainerVT = getContainerForFixedLengthVector(VT);
unsigned RCID = getRegClassIDForVecVT(ContainerVT);
const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
addRegisterClass(VT, TRI.getRegClass(RCID));
};
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useRVVForFixedLengthVectorVT(VT))
addRegClassForFixedVectors(VT);
}
}
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
setStackPointerRegisterToSaveRestore(RISCV::X2);
for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
setLoadExtAction(N, XLenVT, MVT::i1, Promote);
// TODO: add all necessary setOperationAction calls.
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, XLenVT, Expand);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!Subtarget.hasStdExtZbb()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
}
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ADD, MVT::i32, Custom);
setOperationAction(ISD::SUB, MVT::i32, Custom);
setOperationAction(ISD::SHL, MVT::i32, Custom);
setOperationAction(ISD::SRA, MVT::i32, Custom);
setOperationAction(ISD::SRL, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
} else {
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!Subtarget.hasStdExtM()) {
setOperationAction(ISD::MUL, XLenVT, Expand);
setOperationAction(ISD::MULHS, XLenVT, Expand);
setOperationAction(ISD::MULHU, XLenVT, Expand);
setOperationAction(ISD::SDIV, XLenVT, Expand);
setOperationAction(ISD::UDIV, XLenVT, Expand);
setOperationAction(ISD::SREM, XLenVT, Expand);
setOperationAction(ISD::UREM, XLenVT, Expand);
} else {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::MUL, MVT::i128, Custom);
setOperationAction(ISD::SDIV, MVT::i8, Custom);
setOperationAction(ISD::UDIV, MVT::i8, Custom);
setOperationAction(ISD::UREM, MVT::i8, Custom);
setOperationAction(ISD::SDIV, MVT::i16, Custom);
setOperationAction(ISD::UDIV, MVT::i16, Custom);
setOperationAction(ISD::UREM, MVT::i16, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Custom);
setOperationAction(ISD::UDIV, MVT::i32, Custom);
setOperationAction(ISD::UREM, MVT::i32, Custom);
} else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
}
}
setOperationAction(ISD::SDIVREM, XLenVT, Expand);
setOperationAction(ISD::UDIVREM, XLenVT, Expand);
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
Subtarget.hasStdExtZbkb()) {
if (Subtarget.is64Bit()) {
setOperationAction(ISD::ROTL, MVT::i32, Custom);
setOperationAction(ISD::ROTR, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::ROTL, XLenVT, Expand);
setOperationAction(ISD::ROTR, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbp()) {
// Custom lower bswap/bitreverse so we can convert them to GREVI to enable
// more combining.
setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
setOperationAction(ISD::BSWAP, XLenVT, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
// BSWAP i8 doesn't exist.
setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
setOperationAction(ISD::BSWAP, MVT::i16, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
}
} else {
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
? Legal
: Expand);
// Zbkb can use rev8+brev8 to implement bitreverse.
setOperationAction(ISD::BITREVERSE, XLenVT,
Subtarget.hasStdExtZbkb() ? Custom : Expand);
}
if (Subtarget.hasStdExtZbb()) {
setOperationAction(ISD::SMIN, XLenVT, Legal);
setOperationAction(ISD::SMAX, XLenVT, Legal);
setOperationAction(ISD::UMIN, XLenVT, Legal);
setOperationAction(ISD::UMAX, XLenVT, Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
setOperationAction(ISD::CTLZ, MVT::i32, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
setOperationAction(ISD::CTLZ, XLenVT, Expand);
setOperationAction(ISD::CTPOP, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbt()) {
setOperationAction(ISD::FSHL, XLenVT, Custom);
setOperationAction(ISD::FSHR, XLenVT, Custom);
setOperationAction(ISD::SELECT, XLenVT, Legal);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FSHL, MVT::i32, Custom);
setOperationAction(ISD::FSHR, MVT::i32, Custom);
}
} else {
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
static const ISD::NodeType FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
if (Subtarget.hasStdExtZfh()) {
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::LRINT, MVT::f16, Legal);
setOperationAction(ISD::LLRINT, MVT::f16, Legal);
setOperationAction(ISD::LROUND, MVT::f16, Legal);
setOperationAction(ISD::LLROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal);
setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal);
setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FCEIL, MVT::f16, Promote);
setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
// complete support for all operations in LegalizeDAG.
// We need to custom promote this.
if (Subtarget.is64Bit())
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
}
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
setOperationAction(ISD::LRINT, MVT::f32, Legal);
setOperationAction(ISD::LLRINT, MVT::f32, Legal);
setOperationAction(ISD::LROUND, MVT::f32, Legal);
setOperationAction(ISD::LLROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal);
setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal);
setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
if (Subtarget.hasStdExtD()) {
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
setOperationAction(ISD::LRINT, MVT::f64, Legal);
setOperationAction(ISD::LLRINT, MVT::f64, Legal);
setOperationAction(ISD::LROUND, MVT::f64, Legal);
setOperationAction(ISD::LLROUND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal);
setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal);
setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
for (auto CC : FPCCToExpand)
setCondCodeAction(CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
if (Subtarget.is64Bit()) {
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
}
if (Subtarget.hasStdExtF()) {
setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal);
setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal);
setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal);
setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal);
setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
setOperationAction(ISD::BlockAddress, XLenVT, Custom);
setOperationAction(ISD::ConstantPool, XLenVT, Custom);
setOperationAction(ISD::JumpTable, XLenVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
// TODO: On M-mode only targets, the cycle[h] CSR may not be present.
// Unfortunately this can't be determined just from the ISA naming string.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
} else {
setMaxAtomicSizeInBitsSupported(0);
}
setBooleanContents(ZeroOrOneBooleanContent);
if (Subtarget.hasVInstructions()) {
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
} else {
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
}
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
static const unsigned IntegerVPOps[] = {
ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
ISD::VP_MERGE, ISD::VP_SELECT};
static const unsigned FloatingPointVPOps[] = {
ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
ISD::VP_SELECT};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::VP_MERGE, VT, Expand);
setOperationAction(ISD::VP_SELECT, VT, Expand);
setOperationAction(ISD::VP_AND, VT, Custom);
setOperationAction(ISD::VP_OR, VT, Custom);
setOperationAction(ISD::VP_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(OtherVT, VT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
}
for (MVT VT : IntVecVTs) {
if (VT.getVectorElementType() == MVT::i64 &&
!Subtarget.hasVInstructionsI64())
continue;
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
// Vectors implement MULHS/MULHU.
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::MULHS, VT, Expand);
}
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
// Custom-lower extensions and truncations from/to mask types.
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
for (unsigned VPOpc : IntegerVPOps)
setOperationAction(VPOpc, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::STEP_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
if (VT.getVectorElementType() != MVT::i64) {
MVT FloatEltVT =
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
}
}
// Expand various CCs to best match the RVV ISA, which natively supports UNE
// but no other unordered comparisons, and supports all ordered comparisons
// except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
// purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
// and we pattern-match those back to the "original", swapping operands once
// more. This way we catch both operations and both "vf" and "fv" forms with
// fewer patterns.
static const ISD::CondCode VFPCCToExpand[] = {
ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
// RVV has native FP_ROUND & FP_EXTEND conversions where the element type
// sizes are within one power-of-two of each other. Therefore conversions
// between vXf16 and vXf64 must be lowered as sequences which convert via
// vXf32.
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
// Expand various condition codes (explained above).
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
for (unsigned VPOpc : FloatingPointVPOps)
setOperationAction(VPOpc, VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
// types.
const auto SetCommonVFPExtLoadTruncStoreActions =
[&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
for (auto SmallVT : SmallerVTs) {
setTruncStoreAction(VT, SmallVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
}
};
if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
SetCommonVFPActions(VT);
for (MVT VT : F32VecVTs) {
if (Subtarget.hasVInstructionsF32())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
}
for (MVT VT : F64VecVTs) {
if (Subtarget.hasVInstructionsF64())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
}
if (Subtarget.useRVVForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction(ISD::VP_AND, VT, Custom);
setOperationAction(ISD::VP_OR, VT, Custom);
setOperationAction(ISD::VP_XOR, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
continue;
}
// Use SPLAT_VECTOR to prevent type legalization from destroying the
// splats when type legalizing i64 scalar on RV32.
// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
// improvements first.
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
}
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::XOR, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SREM, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
setOperationAction(ISD::UREM, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
}
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
for (unsigned VPOpc : IntegerVPOps)
setOperationAction(VPOpc, VT, Custom);
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
if (VT.getVectorElementType() != MVT::i64) {
MVT FloatEltVT =
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT =
MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
}
}
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
setTruncStoreAction(VT, OtherVT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::LOAD, VT, Custom);
setOperationAction(ISD::STORE, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::VP_LOAD, VT, Custom);
setOperationAction(ISD::VP_STORE, VT, Custom);
setOperationAction(ISD::VP_GATHER, VT, Custom);
setOperationAction(ISD::VP_SCATTER, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FSQRT, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMINNUM, VT, Custom);
setOperationAction(ISD::FMAXNUM, VT, Custom);
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::FCEIL, VT, Custom);
setOperationAction(ISD::FFLOOR, VT, Custom);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BITCAST, VT, Custom);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
for (unsigned VPOpc : FloatingPointVPOps)
setOperationAction(VPOpc, VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::i64, Custom);
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
if (Subtarget.hasStdExtF())
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
if (Subtarget.hasStdExtD())
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}
}
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
setPrefFunctionAlignment(FunctionAlignment);
setMinimumJumpTableEntries(5);
// Jumps are expensive, compared to logic
setJumpIsExpensive();
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::ANY_EXTEND);
if (Subtarget.hasStdExtF()) {
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
}
if (Subtarget.hasVInstructions()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
setTargetDAGCombine(ISD::MSCATTER);
setTargetDAGCombine(ISD::VP_GATHER);
setTargetDAGCombine(ISD::VP_SCATTER);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::STORE);
}
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
if (Subtarget.hasVInstructions() &&
(VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
}
MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
return Subtarget.getXLenVT();
}
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
default:
return false;
case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
case Intrinsic::riscv_masked_atomicrmw_add_i32:
case Intrinsic::riscv_masked_atomicrmw_sub_i32:
case Intrinsic::riscv_masked_atomicrmw_nand_i32:
case Intrinsic::riscv_masked_atomicrmw_max_i32:
case Intrinsic::riscv_masked_atomicrmw_min_i32:
case Intrinsic::riscv_masked_atomicrmw_umax_i32:
case Intrinsic::riscv_masked_atomicrmw_umin_i32:
case Intrinsic::riscv_masked_cmpxchg_i32:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
Info.align = Align(4);
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
case Intrinsic::riscv_masked_strided_load:
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.ptrVal = I.getArgOperand(1);
Info.memVT = getValueType(DL, I.getType()->getScalarType());
Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOLoad;
return true;
case Intrinsic::riscv_masked_strided_store:
Info.opc = ISD::INTRINSIC_VOID;
Info.ptrVal = I.getArgOperand(1);
Info.memVT =
getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
Info.align = Align(
DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
8);
Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOStore;
return true;
}
}
bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS,
Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// Require a 12-bit signed offset.
if (!isInt<12>(AM.BaseOffs))
return false;
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (!AM.HasBaseReg) // allow "r+i".
break;
return false; // disallow "r+r" or "r+r+i".
default:
return false;
}
return true;
}
bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return isInt<12>(Imm);
}
// On RV32, 64-bit integers are split into their high and low parts and held
// in two different registers, so the trunc is free since the low register can
// just be used.
bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
return false;
unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
unsigned DestBits = DstTy->getPrimitiveSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
!SrcVT.isInteger() || !DstVT.isInteger())
return false;
unsigned SrcBits = SrcVT.getSizeInBits();
unsigned DestBits = DstVT.getSizeInBits();
return (SrcBits == 64 && DestBits == 32);
}
bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
// Zexts are free if they can be combined with a load.
// Don't advertise i32->i64 zextload as being free for RV64. It interacts
// poorly with type legalization of compares preferring sext.
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
EVT MemVT = LD->getMemoryVT();
if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
LD->getExtensionType() == ISD::ZEXTLOAD))
return true;
}
return TargetLowering::isZExtFree(Val, VT2);
}
bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
return Subtarget.hasStdExtZbb();
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasStdExtZbb();
}
bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
EVT VT = Y.getValueType();
// FIXME: Support vectors once we have tests.
if (VT.isVector())
return false;
return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
Subtarget.hasStdExtZbkb()) &&
!isa<ConstantSDNode>(Y);
}
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// splats of scalars can fold into vector instructions.
bool RISCVTargetLowering::shouldSinkOperands(
Instruction *I, SmallVectorImpl<Use *> &Ops) const {
using namespace llvm::PatternMatch;
if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
return false;
auto IsSinker = [&](Instruction *I, int Operand) {
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::ICmp:
case Instruction::FCmp:
return true;
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
return Operand == 1;
case Instruction::Call:
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
return Operand == 0 || Operand == 1;
// FIXME: Our patterns can only match vx/vf instructions when the splat
// it on the RHS, because TableGen doesn't recognize our VP operations
// as commutative.
case Intrinsic::vp_add:
case Intrinsic::vp_mul:
case Intrinsic::vp_and:
case Intrinsic::vp_or:
case Intrinsic::vp_xor:
case Intrinsic::vp_fadd:
case Intrinsic::vp_fmul:
case Intrinsic::vp_shl:
case Intrinsic::vp_lshr:
case Intrinsic::vp_ashr:
case Intrinsic::vp_udiv:
case Intrinsic::vp_sdiv:
case Intrinsic::vp_urem:
case Intrinsic::vp_srem:
return Operand == 1;
// ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
// explicit patterns for both LHS and RHS (as 'vr' versions).
case Intrinsic::vp_sub:
case Intrinsic::vp_fsub:
case Intrinsic::vp_fdiv:
return Operand == 0 || Operand == 1;
default:
return false;
}
}
return false;
default:
return false;
}
};
for (auto OpIdx : enumerate(I->operands())) {
if (!IsSinker(I, OpIdx.index()))
continue;
Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
// Make sure we are not already sinking this operand
if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
continue;
// We are looking for a splat that can be sunk.
if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
m_Undef(), m_ZeroMask())))
continue;
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
// and vector registers
for (Use &U : Op->uses()) {
Instruction *Insn = cast<Instruction>(U.getUser());
if (!IsSinker(Insn, U.getOperandNo()))
return false;
}
Ops.push_back(&Op->getOperandUse(0));
Ops.push_back(&OpIdx.value());
}
return true;
}
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
// FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
return false;
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
return false;
if (VT == MVT::f64 && !Subtarget.hasStdExtD())
return false;
return Imm.isZero();
}
bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
(VT == MVT::f32 && Subtarget.hasStdExtF()) ||
(VT == MVT::f64 && Subtarget.hasStdExtD());
}
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
// FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
// FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
// Changes the condition code and swaps operands if necessary, so the SetCC
// operation matches one of the comparisons supported directly by branches
// in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
// with 1/-1.
static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
ISD::CondCode &CC, SelectionDAG &DAG) {
// Convert X > -1 to X >= 0.
if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
RHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
// Convert X < 1 to 0 >= X.
if (CC == ISD::SETLT && isOneConstant(RHS)) {
RHS = LHS;
LHS = DAG.getConstant(0, DL, RHS.getValueType());
CC = ISD::SETGE;
return;
}
switch (CC) {
default:
break;
case ISD::SETGT:
case ISD::SETLE:
case ISD::SETUGT:
case ISD::SETULE:
CC = ISD::getSetCCSwappedOperands(CC);
std::swap(LHS, RHS);
break;
}
}
RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
assert(VT.isScalableVector() && "Expecting a scalable vector type");
unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
if (VT.getVectorElementType() == MVT::i1)
KnownSize *= 8;
switch (KnownSize) {
default:
llvm_unreachable("Invalid LMUL.");
case 8:
return RISCVII::VLMUL::LMUL_F8;
case 16:
return RISCVII::VLMUL::LMUL_F4;
case 32:
return RISCVII::VLMUL::LMUL_F2;
case 64:
return RISCVII::VLMUL::LMUL_1;
case 128:
return RISCVII::VLMUL::LMUL_2;
case 256:
return RISCVII::VLMUL::LMUL_4;
case 512:
return RISCVII::VLMUL::LMUL_8;
}
}
unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
switch (LMul) {
default:
llvm_unreachable("Invalid LMUL.");
case RISCVII::VLMUL::LMUL_F8:
case RISCVII::VLMUL::LMUL_F4:
case RISCVII::VLMUL::LMUL_F2:
case RISCVII::VLMUL::LMUL_1:
return RISCV::VRRegClassID;
case RISCVII::VLMUL::LMUL_2:
return RISCV::VRM2RegClassID;
case RISCVII::VLMUL::LMUL_4:
return RISCV::VRM4RegClassID;
case RISCVII::VLMUL::LMUL_8:
return RISCV::VRM8RegClassID;
}
}
unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
RISCVII::VLMUL LMUL = getLMUL(VT);
if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
LMUL == RISCVII::VLMUL::LMUL_F4 ||
LMUL == RISCVII::VLMUL::LMUL_F2 ||
LMUL == RISCVII::VLMUL::LMUL_1) {
static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
"Unexpected subreg numbering");
return RISCV::sub_vrm1_0 + Index;
}
if (LMUL == RISCVII::VLMUL::LMUL_2) {
static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
"Unexpected subreg numbering");
return RISCV::sub_vrm2_0 + Index;
}
if (LMUL == RISCVII::VLMUL::LMUL_4) {
static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
"Unexpected subreg numbering");
return RISCV::sub_vrm4_0 + Index;
}
llvm_unreachable("Invalid vector type.");
}
unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
if (VT.getVectorElementType() == MVT::i1)
return RISCV::VRRegClassID;
return getRegClassIDForLMUL(getLMUL(VT));
}
// Attempt to decompose a subvector insert/extract between VecVT and
// SubVecVT via subregister indices. Returns the subregister index that
// can perform the subvector insert/extract with the given element index, as
// well as the index corresponding to any leftover subvectors that must be
// further inserted/extracted within the register class for SubVecVT.
std::pair<unsigned, unsigned>
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
const RISCVRegisterInfo *TRI) {
static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
RISCV::VRM2RegClassID > RISCV::VRRegClassID),
"Register classes not ordered");
unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
// Try to compose a subregister index that takes us from the incoming
// LMUL>1 register class down to the outgoing one. At each step we half
// the LMUL:
// nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
// Note that this is not guaranteed to find a subregister index, such as
// when we are extracting from one VR type to another.
unsigned SubRegIdx = RISCV::NoSubRegister;
for (const unsigned RCID :
{RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
if (VecRegClassID > RCID && SubRegClassID <= RCID) {
VecVT = VecVT.getHalfNumVectorElementsVT();
bool IsHi =
InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
getSubregIndexByMVT(VecVT, IsHi));
if (IsHi)
InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
}
return {SubRegIdx, InsertExtractIdx};
}
// Permit combining of mask vectors as BUILD_VECTOR never expands to scalar
// stores for those types.
bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget.useRVVForFixedLengthVectors() ||
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
if (ScalarTy->isPointerTy())
return true;
if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
ScalarTy->isIntegerTy(32))
return true;
if (ScalarTy->isIntegerTy(64))
return Subtarget.hasVInstructionsI64();
if (ScalarTy->isHalfTy())
return Subtarget.hasVInstructionsF16();
if (ScalarTy->isFloatTy())
return Subtarget.hasVInstructionsF32();
if (ScalarTy->isDoubleTy())
return Subtarget.hasVInstructionsF64();
return false;
}
static SDValue getVLOperand(SDValue Op) {
assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
if (!II)
return SDValue();
return Op.getOperand(II->VLOperand + 1 + HasChain);
}
static bool useRVVForFixedLengthVectorVT(MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
if (!Subtarget.useRVVForFixedLengthVectors())
return false;
// We only support a set of vector types with a consistent maximum fixed size
// across all supported vector element types to avoid legalization issues.
// Therefore -- since the largest is v1024i8/v512i16/etc -- the largest
// fixed-length vector type we support is 1024 bytes.
if (VT.getFixedSizeInBits() > 1024 * 8)
return false;
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
MVT EltVT = VT.getVectorElementType();
// Don't use RVV for vectors we cannot scalarize if required.
switch (EltVT.SimpleTy) {
// i1 is supported but has different rules.
default:
return false;
case MVT::i1:
// Masks can only use a single register.
if (VT.getVectorNumElements() > MinVLen)
return false;
MinVLen /= 8;
break;
case MVT::i8:
case MVT::i16:
case MVT::i32:
break;
case MVT::i64:
if (!Subtarget.hasVInstructionsI64())
return false;
break;
case MVT::f16:
if (!Subtarget.hasVInstructionsF16())
return false;
break;
case MVT::f32:
if (!Subtarget.hasVInstructionsF32())
return false;
break;
case MVT::f64:
if (!Subtarget.hasVInstructionsF64())
return false;
break;
}
// Reject elements larger than ELEN.
if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
return false;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
// Don't use RVV for types that don't fit.
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
return false;
// TODO: Perhaps an artificial restriction, but worth having whilst getting
// the base fixed length RVV support in place.
if (!VT.isPow2VectorType())
return false;
return true;
}
bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
}
// Return the largest legal scalable vector type that matches VT's element type.
static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
const RISCVSubtarget &Subtarget) {
// This may be called before legal types are setup.
assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
"Expected legal fixed length vector!");
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
default:
llvm_unreachable("unexpected element type for RVV container");
case MVT::i1:
case MVT::i8:
case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::f16:
case MVT::f32:
case MVT::f64: {
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
// each fractional LMUL we support SEW between 8 and LMUL*ELEN.
unsigned NumElts =
(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
return MVT::getScalableVectorVT(EltVT, NumElts);
}
}
}
static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
const RISCVSubtarget &Subtarget) {
return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
Subtarget);
}
MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
}
// Grow V to consume an entire RVV register.
static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isScalableVector() &&
"Expected to convert into a scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!");
assert(V.getValueType().isScalableVector() &&
"Expected a scalable vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
// the vector type that it is contained in.
static std::pair<SDValue, SDValue>
getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = VecVT.isFixedLengthVector()
? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
: DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
return {Mask, VL};
}
// As above but assuming the given type is a scalable vector type.
static std::pair<SDValue, SDValue>
getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VecVT.isScalableVector() && "Expecting a scalable vector");
return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
}
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
// of either is (currently) supported. This can get us into an infinite loop
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
// as a ..., etc.
// Until either (or both) of these can reliably lower any node, reporting that
// we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
// the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
// which is not desirable.
bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
EVT VT, unsigned DefinedValues) const {
return false;
}
bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Only splats are currently supported.
if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
return true;
return false;
}
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// RISCV FP-to-int conversions saturate to the destination register size, but
// don't produce 0 for nan. We can use a conversion instruction and fix the
// nan case with a compare and a select.
SDValue Src = Op.getOperand(0);
EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
unsigned Opc;
if (SatVT == DstVT)
Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
else if (DstVT == MVT::i64 && SatVT == MVT::i32)
Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
else
return SDValue();
// FIXME: Support other SatVTs by clamping before or after the conversion.
SDLoc DL(Op);
SDValue FpToInt = DAG.getNode(
Opc, DL, DstVT, Src,
DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT()));
SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
}
// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
// and back. Taking care to avoid converting values that are nan or already
// correct.
// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
// have FRM dependencies modeled yet.
static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert(VT.isVector() && "Unexpected type");
SDLoc DL(Op);
// Freeze the source since we are increasing the number of uses.
SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
// Truncate to integer and convert back to FP.
MVT IntVT = VT.changeVectorElementTypeToInteger();
SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
if (Op.getOpcode() == ISD::FCEIL) {
// If the truncated value is the greater than or equal to the original
// value, we've computed the ceil. Otherwise, we went the wrong way and
// need to increase by 1.
// FIXME: This should use a masked operation. Handle here or in isel?
SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
DAG.getConstantFP(1.0, DL, VT));
SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
} else if (Op.getOpcode() == ISD::FFLOOR) {
// If the truncated value is the less than or equal to the original value,
// we've computed the floor. Otherwise, we went the wrong way and need to
// decrease by 1.
// FIXME: This should use a masked operation. Handle here or in isel?
SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
DAG.getConstantFP(1.0, DL, VT));
SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
}
// Restore the original sign so that -0.0 is preserved.
Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
// Determine the largest integer that can be represented exactly. This and
// values larger than it don't have any fractional bits so don't need to
// be converted.
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
unsigned Precision = APFloat::semanticsPrecision(FltSem);
APFloat MaxVal = APFloat(FltSem);
MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
/*IsSigned*/ false, APFloat::rmNearestTiesToEven);
SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
// If abs(Src) was larger than MaxVal or nan, keep it.
SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
}
static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
struct VIDSequence {
int64_t StepNumerator;
unsigned StepDenominator;
int64_t Addend;
};
// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
// to the (non-zero) step S and start value X. This can be then lowered as the
// RVV sequence (VID * S) + X, for example.
// The step S is represented as an integer numerator divided by a positive
// denominator. Note that the implementation currently only identifies
// sequences in which either the numerator is +/- 1 or the denominator is 1. It
// cannot detect 2/3, for example.
// Note that this method will also match potentially unappealing index
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
// determine whether this is worth generating code for.
static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
unsigned NumElts = Op.getNumOperands();
assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unexpected BUILD_VECTOR");
if (!Op.getValueType().isInteger())
return None;
Optional<unsigned> SeqStepDenom;
Optional<int64_t> SeqStepNum, SeqAddend;
Optional<std::pair<uint64_t, unsigned>> PrevElt;
unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
for (unsigned Idx = 0; Idx < NumElts; Idx++) {
// Assume undef elements match the sequence; we just have to be careful
// when interpolating across them.
if (Op.getOperand(Idx).isUndef())
continue;
// The BUILD_VECTOR must be all constants.
if (!isa<ConstantSDNode>(Op.getOperand(Idx)))
return None;
uint64_t Val = Op.getConstantOperandVal(Idx) &
maskTrailingOnes<uint64_t>(EltSizeInBits);
if (PrevElt) {
// Calculate the step since the last non-undef element, and ensure
// it's consistent across the entire sequence.
unsigned IdxDiff = Idx - PrevElt->second;
int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
// A zero-value value difference means that we're somewhere in the middle
// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
// step change before evaluating the sequence.
if (ValDiff != 0) {
int64_t Remainder = ValDiff % IdxDiff;
// Normalize the step if it's greater than 1.
if (Remainder != ValDiff) {
// The difference must cleanly divide the element span.
if (Remainder != 0)
return None;
ValDiff /= IdxDiff;
IdxDiff = 1;
}
if (!SeqStepNum)
SeqStepNum = ValDiff;
else if (ValDiff != SeqStepNum)
return None;
if (!SeqStepDenom)
SeqStepDenom = IdxDiff;
else if (IdxDiff != *SeqStepDenom)
return None;
}
}
// Record and/or check any addend.
if (SeqStepNum && SeqStepDenom) {
uint64_t ExpectedVal =
(int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
if (!SeqAddend)
SeqAddend = Addend;
else if (SeqAddend != Addend)
return None;
}
// Record this non-undef element for later.
if (!PrevElt || PrevElt->first != Val)
PrevElt = std::make_pair(Val, Idx);
}
// We need to have logged both a step and an addend for this to count as
// a legal index sequence.
if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
return None;
return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
}
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
MVT XLenVT = Subtarget.getXLenVT();
unsigned NumElts = Op.getNumOperands();
if (VT.getVectorElementType() == MVT::i1) {
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
}
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
}
// Lower constant mask BUILD_VECTORs via an integer vector type, in
// scalar integer chunks whose bit-width depends on the number of mask
// bits and XLEN.
// First, determine the most appropriate scalar integer type to use. This
// is at most XLenVT, but may be shrunk to a smaller vector element type
// according to the size of the final vector - use i8 chunks rather than
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits =
std::min(std::max(NumElts, 8u), Subtarget.getXLen());
NumViaIntegerBits = std::min(NumViaIntegerBits,
Subtarget.getMaxELENForFixedLengthVectors());
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
// If we have to use more than one INSERT_VECTOR_ELT then this
// optimization is likely to increase code size; avoid peforming it in
// such a case. We can use a load from a constant pool in this case.
if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
return SDValue();
// Now we can create our integer vector type. Note that it may be larger
// than the resulting mask type: v4i1 would use v1i8 as its integer type.
MVT IntegerViaVecVT =
MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
divideCeil(NumElts, NumViaIntegerBits));
uint64_t Bits = 0;
unsigned BitPos = 0, IntegerEltIdx = 0;
SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
for (unsigned I = 0; I < NumElts; I++, BitPos++) {
// Once we accumulate enough bits to fill our scalar type, insert into
// our vector and clear our accumulated data.
if (I != 0 && I % NumViaIntegerBits == 0) {
if (NumViaIntegerBits <= 32)
Bits = SignExtend64(Bits, 32);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
Bits = 0;
BitPos = 0;
IntegerEltIdx++;
}
SDValue V = Op.getOperand(I);
bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
Bits |= ((uint64_t)BitValue << BitPos);
}
// Insert the (remaining) scalar value into position in our integer
// vector type.
if (NumViaIntegerBits <= 32)
Bits = SignExtend64(Bits, 32);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
DAG.getConstant(IntegerEltIdx, DL, XLenVT));
if (NumElts < NumViaIntegerBits) {
// If we're producing a smaller vector than our minimum legal integer
// type, bitcast to the equivalent (known-legal) mask type, and extract
// our final mask.
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
Vec = DAG.getBitcast(MVT::v8i1, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
DAG.getConstant(0, DL, XLenVT));
} else {
// Else we must have produced an integer type with the same size as the
// mask type; bitcast for the final result.
assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
Vec = DAG.getBitcast(VT, Vec);
}
return Vec;
}
// A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
// vector type, we have a legal equivalently-sized i8 type, so we can use
// that.
MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
SDValue WideVec;
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
// For a splat, perform a scalar truncate before creating the wider
// vector.
assert(Splat.getValueType() == XLenVT &&
"Unexpected type for i1 splat value");
Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
DAG.getConstant(1, DL, XLenVT));
WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
} else {
SmallVector<SDValue, 8> Ops(Op->op_values());
WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
}
return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
// Try and match index sequences, which we can lower to the vid instruction
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
if (auto SimpleVID = isSimpleVIDSequence(Op)) {
int64_t StepNumerator = SimpleVID->StepNumerator;
unsigned StepDenominator = SimpleVID->StepDenominator;
int64_t Addend = SimpleVID->Addend;
assert(StepNumerator != 0 && "Invalid step");
bool Negate = false;
int64_t SplatStepVal = StepNumerator;
unsigned StepOpcode = ISD::MUL;
if (StepNumerator != 1) {
if (isPowerOf2_64(std::abs(StepNumerator))) {
Negate = StepNumerator < 0;
StepOpcode = ISD::SHL;
SplatStepVal = Log2_64(std::abs(StepNumerator));
}
}
// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
// threshold since it's the immediate value many RVV instructions accept.
// There is no vmul.vi instruction so ensure multiply constant can fit in
// a single addi instruction.
if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
(StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
// these, we'd lose the fixed-length vector info and generate worse
// vsetvli code.
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
}
if (StepDenominator != 1) {
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
SDValue SplatAddend =
DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
}
return VID;
}
}
// Attempt to detect "hidden" splats, which only reveal themselves as splats
// when re-interpreted as a vector with a larger element type. For example,
// v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
// could be instead splat as
// v2i32 = build_vector i32 0x00010000, i32 0x00010000
// TODO: This optimization could also work on non-constant splats, but it
// would require bit-manipulation instructions to construct the splat value.
SmallVector<SDValue> Sequence;
unsigned EltBitSize = VT.getScalarSizeInBits();
const auto *BV = cast<BuildVectorSDNode>(Op);
if (VT.isInteger() && EltBitSize < 64 &&
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
BV->getRepeatedSequence(Sequence) &&
(Sequence.size() * EltBitSize) <= 64) {
unsigned SeqLen = Sequence.size();
MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
ViaIntVT == MVT::i64) &&
"Unexpected sequence type");
unsigned EltIdx = 0;
uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
uint64_t SplatValue = 0;
// Construct the amalgamated value which can be splatted as this larger
// vector type.
for (const auto &SeqV : Sequence) {
if (!SeqV.isUndef())
SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
<< (EltIdx * EltBitSize));
EltIdx++;
}
// On RV64, sign-extend from 32 to 64 bits where possible in order to
// achieve better constant materializion.
if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
SplatValue = SignExtend64(SplatValue, 32);
// Since we can't introduce illegal i64 types at this stage, we can only
// perform an i64 splat on RV32 if it is its own sign-extended value. That
// way we can use RVV instructions to splat.
assert((ViaIntVT.bitsLE(XLenVT) ||
(!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
"Unexpected bitcast sequence");
if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
SDValue ViaVL =
DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
MVT ViaContainerVT =
getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
SDValue Splat =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
return DAG.getBitcast(VT, Splat);
}
}
// Try and optimize BUILD_VECTORs with "dominant values" - these are values
// which constitute a large proportion of the elements. In such cases we can
// splat a vector with the dominant element and make up the shortfall with
// INSERT_VECTOR_ELTs.
// Note that this includes vectors of 2 elements by association. The
// upper-most element is the "dominant" one, allowing us to use a splat to
// "insert" the upper element, and an insert of the lower element at position
// 0, which improves codegen.
SDValue DominantValue;
unsigned MostCommonCount = 0;
DenseMap<SDValue, unsigned> ValueCounts;
unsigned NumUndefElts =
count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
// Track the number of scalar loads we know we'd be inserting, estimated as
// any non-zero floating-point constant. Other kinds of element are either
// already in registers or are materialized on demand. The threshold at which
// a vector load is more desirable than several scalar materializion and
// vector-insertion instructions is not known.
unsigned NumScalarLoads = 0;
for (SDValue V : Op->op_values()) {
if (V.isUndef())
continue;
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
NumScalarLoads += !CFP->isExactlyValue(+0.0);
// Is this value dominant? In case of a tie, prefer the highest element as
// it's cheaper to insert near the beginning of a vector than it is at the
// end.
if (++Count >= MostCommonCount) {
DominantValue = V;
MostCommonCount = Count;
}
}
assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
unsigned NumDefElts = NumElts - NumUndefElts;
unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
// Don't perform this optimization when optimizing for size, since
// materializing elements and inserting them tends to cause code bloat.
if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
((MostCommonCount > DominantValueCountThreshold) ||
(ValueCounts.size() <= Log2_32(NumDefElts)))) {
// Start by splatting the most common element.
SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
DenseSet<SDValue> Processed{DominantValue};
MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
for (const auto &OpIdx : enumerate(Op->ops())) {
const SDValue &V = OpIdx.value();
if (V.isUndef() || !Processed.insert(V).second)
continue;
if (ValueCounts[V] == 1) {
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
DAG.getConstant(OpIdx.index(), DL, XLenVT));
} else {
// Blend in all instances of this value using a VSELECT, using a
// mask where each bit signals whether that element is the one
// we're after.
SmallVector<SDValue> Ops;
transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
return DAG.getConstant(V == V1, DL, XLenVT);
});
Vec = DAG.getNode(ISD::VSELECT, DL, VT,
DAG.getBuildVector(SelMaskTy, DL, Ops),
DAG.getSplatBuildVector(VT, DL, V), Vec);
}
}
return Vec;
}
return SDValue();
}
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
SDValue Hi, SDValue VL, SelectionDAG &DAG) {
if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
// If Hi constant is all the same sign bit as Lo, lower this as a custom
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
// If vl is equal to VLMax and Hi constant is equal to Lo, we could use
// vmv.v.x whose EEW = 32 to lower it.
auto *Const = dyn_cast<ConstantSDNode>(VL);
if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) {
MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
// TODO: if vl <= min(VLMAX), we can also do this. But we could not
// access the subtarget here now.
auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL);
return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
}
}
// Fall back to a stack store and stride x0 vector load.
return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
}
// Called by type legalization to handle splat of i64 on RV32.
// FIXME: We can optimize this when the type has sign or zero bits in one
// of the halves.
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
SDValue VL, SelectionDAG &DAG) {
assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(0, DL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(1, DL, MVT::i32));
return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
}
// This function lowers a splat of a scalar operand Splat with the vector
// length VL. It ensures the final sequence is type legal, which is useful when
// lowering a splat after type legalization.
static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (VT.isFloatingPoint()) {
// If VL is 1, we could use vfmv.s.f.
if (isOneConstant(VL))
return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
Scalar, VL);
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
}
MVT XLenVT = Subtarget.getXLenVT();
// Simplest case is that the operand needs to be promoted to XLenVT.
if (Scalar.getValueType().bitsLE(XLenVT)) {
// If the operand is a constant, sign extend to increase our chances
// of being able to use a .vi instruction. ANY_EXTEND would become a
// a zero extend and the simm5 check in isel would fail.
// FIXME: Should we ignore the upper bits in isel instead?
unsigned ExtOpc =
isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Scalar);
// If VL is 1 and the scalar value won't benefit from immediate, we could
// use vmv.s.x.
if (isOneConstant(VL) &&
(!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
VL);
return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
}
assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
"Unexpected scalar for splat lowering!");
if (isOneConstant(VL) && isNullConstant(Scalar))
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
DAG.getConstant(0, DL, XLenVT), VL);
// Otherwise use the more complicated splatting algorithm.
return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
}
// Is the mask a slidedown that shifts in undefs.
static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
int Size = Mask.size();
// Elements shifted in should be undef.
auto CheckUndefs = [&](int Shift) {
for (int i = Size - Shift; i != Size; ++i)
if (Mask[i] >= 0)
return false;
return true;
};
// Elements should be shifted or undef.
auto MatchShift = [&](int Shift) {
for (int i = 0; i != Size - Shift; ++i)
if (Mask[i] >= 0 && Mask[i] != Shift + i)
return false;
return true;
};
// Try all possible shifts.
for (int Shift = 1; Shift != Size; ++Shift)
if (CheckUndefs(Shift) && MatchShift(Shift))
return Shift;
// No match.
return -1;
}
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
const RISCVSubtarget &Subtarget) {
// We need to be able to widen elements to the next larger integer type.
if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
return false;
int Size = Mask.size();
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
int Srcs[] = {-1, -1};
for (int i = 0; i != Size; ++i) {
// Ignore undef elements.
if (Mask[i] < 0)
continue;
// Is this an even or odd element.
int Pol = i % 2;
// Ensure we consistently use the same source for this element polarity.
int Src = Mask[i] / Size;
if (Srcs[Pol] < 0)
Srcs[Pol] = Src;
if (Srcs[Pol] != Src)
return false;
// Make sure the element within the source is appropriate for this element
// in the destination.
int Elt = Mask[i] % Size;
if (Elt != i / 2)
return false;
}
// We need to find a source for each polarity and they can't be the same.
if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
return false;
// Swap the sources if the second source was in the even polarity.
SwapSources = Srcs[0] > Srcs[1];
return true;
}
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
MVT VT = Op.getSimpleValueType();
unsigned NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
SDValue TrueMask, VL;
std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
if (SVN->isSplat()) {
const int Lane = SVN->getSplatIndex();
if (Lane >= 0) {
MVT SVT = VT.getVectorElementType();
// Turn splatted vector load into a strided load with an X0 stride.
SDValue V = V1;
// Peek through CONCAT_VECTORS as VectorCombine can concat a vector
// with undef.
// FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
int Offset = Lane;
if (V.getOpcode() == ISD::CONCAT_VECTORS) {
int OpElements =
V.getOperand(0).getSimpleValueType().getVectorNumElements();
V = V.getOperand(Offset / OpElements);
Offset %= OpElements;
}
// We need to ensure the load isn't atomic or volatile.
if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
auto *Ld = cast<LoadSDNode>(V);
Offset *= SVT.getStoreSize();
SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
TypeSize::Fixed(Offset), DL);
// If this is SEW=64 on RV32, use a strided load with a stride of x0.
if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue IntID =
DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
SDValue Ops[] = {Ld->getChain(),
IntID,
DAG.getUNDEF(ContainerVT),
NewAddr,
DAG.getRegister(RISCV::X0, XLenVT),
VL};
SDValue NewLoad = DAG.getMemIntrinsicNode(
ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
DAG.getMachineFunction().getMachineMemOperand(
Ld->getMemOperand(), Offset, SVT.getStoreSize()));
DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
}
// Otherwise use a scalar load and splat. This will give the best
// opportunity to fold a splat into the operation. ISel can turn it into
// the x0 strided load if we aren't able to fold away the select.
if (SVT.isFloatingPoint())
V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
Ld->getPointerInfo().getWithOffset(Offset),
Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
else
V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
Ld->getPointerInfo().getWithOffset(Offset), SVT,
Ld->getOriginalAlign(),
Ld->getMemOperand()->getFlags());
DAG.makeEquivalentMemoryOrdering(Ld, V);
unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
assert(Lane < (int)NumElts && "Unexpected lane!");
SDValue Gather =
DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
}
ArrayRef<int> Mask = SVN->getMask();
// Try to match as a slidedown.
int SlideAmt = matchShuffleAsSlideDown(Mask);
if (SlideAmt >= 0) {
// TODO: Should we reduce the VL to account for the upper undef elements?
// Requires additional vsetvlis, but might be faster to execute.
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
SDValue SlideDown =
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), V1,
DAG.getConstant(SlideAmt, DL, XLenVT),
TrueMask, VL);
return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
}
// Detect an interleave shuffle and lower to
// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
bool SwapSources;
if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
// Swap sources if needed.
if (SwapSources)
std::swap(V1, V2);
// Extract the lower half of the vectors.
MVT HalfVT = VT.getHalfNumVectorElementsVT();
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
DAG.getConstant(0, DL, XLenVT));
V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
DAG.getConstant(0, DL, XLenVT));
// Double the element width and halve the number of elements in an int type.
unsigned EltBits = VT.getScalarSizeInBits();
MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
MVT WideIntVT =
MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
// Convert this to a scalable vector. We need to base this on the
// destination size to ensure there's always a type with a smaller LMUL.
MVT WideIntContainerVT =
getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
// Convert sources to scalable vectors with the same element count as the
// larger type.
MVT HalfContainerVT = MVT::getVectorVT(
VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
// Cast sources to integer.
MVT IntEltVT = MVT::getIntegerVT(EltBits);
MVT IntHalfVT =
MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
V1 = DAG.getBitcast(IntHalfVT, V1);
V2 = DAG.getBitcast(IntHalfVT, V2);
// Freeze V2 since we use it twice and we need to be sure that the add and
// multiply see the same value.
V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
// Recreate TrueMask using the widened type's element count.
MVT MaskVT =
MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
// Widen V1 and V2 with 0s and add one copy of V2 to V1.
SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
V2, TrueMask, VL);
// Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
DAG.getAllOnesConstant(DL, XLenVT));
SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
V2, Multiplier, TrueMask, VL);
// Add the new copies to our previous addition giving us 2^eltbits copies of
// V2. This is equivalent to shifting V2 left by eltbits. This should
// combine with the vwmulu.vv above to form vwmaccu.vv.
Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
TrueMask, VL);
// Cast back to ContainerVT. We need to re-create a new ContainerVT in case
// WideIntContainerVT is a larger fractional LMUL than implied by the fixed
// vector VT.
ContainerVT =
MVT::getVectorVT(VT.getVectorElementType(),
WideIntContainerVT.getVectorElementCount() * 2);
Add = DAG.getBitcast(ContainerVT, Add);
return convertFromScalableVector(VT, Add, DAG, Subtarget);
}
// Detect shuffles which can be re-expressed as vector selects; these are
// shuffles in which each element in the destination is taken from an element
// at the corresponding index in either source vectors.
bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
int MaskIndex = MaskIdx.value();
return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
});
assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
SmallVector<SDValue> MaskVals;
// As a backup, shuffles can be lowered via a vrgather instruction, possibly
// merged with a second vrgather.
SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
// By default we preserve the original operand order, and use a mask to
// select LHS as true and RHS as false. However, since RVV vector selects may
// feature splats but only on the LHS, we may choose to invert our mask and
// instead select between RHS and LHS.
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
bool InvertMask = IsSelect == SwapOps;
// Keep a track of which non-undef indices are used by each LHS/RHS shuffle
// half.
DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
// Now construct the mask that will be used by the vselect or blended
// vrgather operation. For vrgathers, construct the appropriate indices into
// each vector.
for (int MaskIndex : Mask) {
bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
if (!IsSelect) {
bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
? DAG.getConstant(MaskIndex, DL, XLenVT)
: DAG.getUNDEF(XLenVT));
GatherIndicesRHS.push_back(
IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
: DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
if (IsLHSOrUndefIndex && MaskIndex >= 0)
++LHSIndexCounts[MaskIndex];
if (!IsLHSOrUndefIndex)
++RHSIndexCounts[MaskIndex - NumElts];
}
}
if (SwapOps) {
std::swap(V1, V2);
std::swap(GatherIndicesLHS, GatherIndicesRHS);
}
assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
if (IsSelect)
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
// On such a large vector we're unable to use i8 as the index type.
// FIXME: We could promote the index to i16 and use vrgatherei16, but that
// may involve vector splitting if we're already at LMUL=8, or our
// user-supplied maximum fixed-length LMUL.
return SDValue();
}
unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
MVT IndexVT = VT.changeTypeToInteger();
// Since we can't introduce illegal index types at this stage, use i16 and
// vrgatherei16 if the corresponding index type for plain vrgather is greater
// than XLenVT.
if (IndexVT.getScalarType().bitsGT(XLenVT)) {
GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
}
MVT IndexContainerVT =
ContainerVT.changeVectorElementType(IndexVT.getScalarType());
SDValue Gather;
// TODO: This doesn't trigger for i64 vectors on RV32, since there we
// encounter a bitcasted BUILD_VECTOR with low/high i32 values.
if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
} else {
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
// If only one index is used, we can use a "splat" vrgather.
// TODO: We can splat the most-common index and fix-up any stragglers, if
// that's beneficial.
if (LHSIndexCounts.size() == 1) {
int SplatIndex = LHSIndexCounts.begin()->getFirst();
Gather =
DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
} else {
SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
LHSIndices =
convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
TrueMask, VL);
}
}
// If a second vector operand is used by this shuffle, blend it in with an
// additional vrgather.
if (!V2.isUndef()) {
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
// If only one index is used, we can use a "splat" vrgather.
// TODO: We can splat the most-common index and fix-up any stragglers, if
// that's beneficial.
if (RHSIndexCounts.size() == 1) {
int SplatIndex = RHSIndexCounts.begin()->getFirst();
V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
} else {
SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
RHSIndices =
convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
VL);
}
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SelectMask =
convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
Gather, VL);
}
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (VT.isScalableVector())
return DAG.getFPExtendOrRound(Op, DL, VT);
assert(VT.isFixedLengthVector() &&
"Unexpected value type for RVV FP extend/round lowering");
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
? RISCVISD::FP_EXTEND_VL
: RISCVISD::FP_ROUND_VL;
return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
}
// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
// the exponent.
static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
unsigned EltSize = VT.getScalarSizeInBits();
SDValue Src = Op.getOperand(0);
SDLoc DL(Op);
// We need a FP type that can represent the value.
// TODO: Use f16 for i8 when possible?
MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
// Legal types should have been checked in the RISCVTargetLowering
// constructor.
// TODO: Splitting may make sense in some cases.
assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
"Expected legal float type!");
// For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
// The trailing zero count is equal to log2 of this single bit value.
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
SDValue Neg =
DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
}
// We have a legal FP type, convert to it.
SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
// Bitcast to integer and shift the exponent to the LSB.
EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
DAG.getConstant(ShiftAmt, DL, IntVT));
// Truncate back to original type to allow vnsrl.
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
// The exponent contains log2 of the value in biased form.
unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
// For trailing zeros, we just need to subtract the bias.
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
return DAG.getNode(ISD::SUB, DL, VT, Trunc,
DAG.getConstant(ExponentBias, DL, VT));
// For leading zeros, we need to remove the bias and convert from log2 to
// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
unsigned Adjust = ExponentBias + (EltSize - 1);
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
}
// While RVV has alignment restrictions, we should always be able to load as a
// legal equivalently-sized byte-typed vector instead. This method is
// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
// the load is already correctly-aligned, it returns SDValue().
SDValue RISCVTargetLowering::expandUnalignedRVVLoad(SDValue Op,
SelectionDAG &DAG) const {
auto *Load = cast<LoadSDNode>(Op);
assert(Load && Load->getMemoryVT().isVector() && "Expected vector load");
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
Load->getMemoryVT(),
*Load->getMemOperand()))
return SDValue();
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
unsigned EltSizeBits = VT.getScalarSizeInBits();
assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
"Unexpected unaligned RVV load type");
MVT NewVT =
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
assert(NewVT.isValid() &&
"Expecting equally-sized RVV vector types to be legal");
SDValue L = DAG.getLoad(NewVT, DL, Load->getChain(), Load->getBasePtr(),
Load->getPointerInfo(), Load->getOriginalAlign(),
Load->getMemOperand()->getFlags());
return DAG.getMergeValues({DAG.getBitcast(VT, L), L.getValue(1)}, DL);
}
// While RVV has alignment restrictions, we should always be able to store as a
// legal equivalently-sized byte-typed vector instead. This method is
// responsible for re-expressing a ISD::STORE via a correctly-aligned type. It
// returns SDValue() if the store is already correctly aligned.
SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
SelectionDAG &DAG) const {
auto *Store = cast<StoreSDNode>(Op);
assert(Store && Store->getValue().getValueType().isVector() &&
"Expected vector store");
if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
Store->getMemoryVT(),
*Store->getMemOperand()))
return SDValue();
SDLoc DL(Op);
SDValue StoredVal = Store->getValue();
MVT VT = StoredVal.getSimpleValueType();
unsigned EltSizeBits = VT.getScalarSizeInBits();
assert((EltSizeBits == 16 || EltSizeBits == 32 || EltSizeBits == 64) &&
"Unexpected unaligned RVV store type");
MVT NewVT =
MVT::getVectorVT(MVT::i8, VT.getVectorElementCount() * (EltSizeBits / 8));
assert(NewVT.isValid() &&
"Expecting equally-sized RVV vector types to be legal");
StoredVal = DAG.getBitcast(NewVT, StoredVal);
return DAG.getStore(Store->getChain(), DL, StoredVal, Store->getBasePtr(),
Store->getPointerInfo(), Store->getOriginalAlign(),
Store->getMemOperand()->getFlags());
}
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
report_fatal_error("unimplemented operand");
case ISD::GlobalAddress:
return lowerGlobalAddress(Op, DAG);
case ISD::BlockAddress:
return lowerBlockAddress(Op, DAG);
case ISD::ConstantPool:
return lowerConstantPool(Op, DAG);
case ISD::JumpTable:
return lowerJumpTable(Op, DAG);
case ISD::GlobalTLSAddress:
return lowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT:
return lowerSELECT(Op, DAG);
case ISD::BRCOND:
return lowerBRCOND(Op, DAG);
case ISD::VASTART:
return lowerVASTART(Op, DAG);
case ISD::FRAMEADDR:
return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
return lowerRETURNADDR(Op, DAG);
case ISD::SHL_PARTS:
return lowerShiftLeftParts(Op, DAG);
case ISD::SRA_PARTS:
return lowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return lowerShiftRightParts(Op, DAG, false);
case ISD::BITCAST: {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VT.isFixedLengthVector()) {
// We can handle fixed length vector bitcasts with a simple replacement
// in isel.
if (Op0VT.isFixedLengthVector())
return Op;
// When bitcasting from scalar to fixed-length vector, insert the scalar
// into a one-element vector of the result type, and perform a vector
// bitcast.
if (!Op0VT.isVector()) {
EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
if (!isTypeLegal(BVT))
return SDValue();
return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
DAG.getUNDEF(BVT), Op0,
DAG.getConstant(0, DL, XLenVT)));
}
return SDValue();
}
// Custom-legalize bitcasts from fixed-length vector types to scalar types
// thus: bitcast the vector to a one-element vector type whose element type
// is the same as the result type, and extract the first element.
if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
if (!isTypeLegal(BVT))
return SDValue();
SDValue BVec = DAG.getBitcast(BVT, Op0);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
DAG.getConstant(0, DL, XLenVT));
}
if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
return FPConv;
}
if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
Subtarget.hasStdExtF()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
SDValue FPConv =
DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
return FPConv;
}
return SDValue();
}
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID:
return LowerINTRINSIC_VOID(Op, DAG);
case ISD::BSWAP:
case ISD::BITREVERSE: {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
if (Subtarget.hasStdExtZbp()) {
// Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
// Start with the maximum immediate value which is the bitwidth - 1.
unsigned Imm = VT.getSizeInBits() - 1;
// If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
if (Op.getOpcode() == ISD::BSWAP)
Imm &= ~0x7U;
return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
DAG.getConstant(Imm, DL, VT));
}
assert(Subtarget.hasStdExtZbkb() && "Unexpected custom legalization");
assert(Op.getOpcode() == ISD::BITREVERSE && "Unexpected opcode");
// Expand bitreverse to a bswap(rev8) followed by brev8.
SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
// We use the Zbp grevi encoding for rev.b/brev8 which will be recognized
// as brev8 by an isel pattern.
return DAG.getNode(RISCVISD::GREV, DL, VT, BSwap,
DAG.getConstant(7, DL, VT));
}
case ISD::FSHL:
case ISD::FSHR: {
MVT VT = Op.getSimpleValueType();
assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
SDLoc DL(Op);
// FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
// use log(XLen) bits. Mask the shift amount accordingly to prevent
// accidentally setting the extra bit.
unsigned ShAmtWidth = Subtarget.getXLen() - 1;
SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
DAG.getConstant(ShAmtWidth, DL, VT));
// fshl and fshr concatenate their operands in the same order. fsr and fsl
// instruction use different orders. fshl will return its first operand for
// shift of zero, fshr will return its second operand. fsl and fsr both
// return rs1 so the ISD nodes need to have different operand orders.
// Shift amount is in rs2.
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
unsigned Opc = RISCVISD::FSL;
if (Op.getOpcode() == ISD::FSHR) {
std::swap(Op0, Op1);
Opc = RISCVISD::FSR;
}
return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
}
case ISD::TRUNCATE: {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
// Only custom-lower vector truncates
if (!VT.isVector())
return Op;
// Truncates to mask types are handled differently
if (VT.getVectorElementType() == MVT::i1)
return lowerVectorMaskTrunc(Op, DAG);
// RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
// truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
// truncate by one power of two at a time.
MVT DstEltVT = VT.getVectorElementType();
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
MVT SrcEltVT = SrcVT.getVectorElementType();
assert(DstEltVT.bitsLT(SrcEltVT) &&
isPowerOf2_64(DstEltVT.getSizeInBits()) &&
isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
"Unexpected vector truncate lowering");
MVT ContainerVT = SrcVT;
if (SrcVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(SrcVT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
SDValue Result = Src;
SDValue Mask, VL;
std::tie(Mask, VL) =
getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
LLVMContext &Context = *DAG.getContext();
const ElementCount Count = ContainerVT.getVectorElementCount();
do {
SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
Mask, VL);
} while (SrcEltVT != DstEltVT);
if (SrcVT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return Result;
}
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
case ISD::SIGN_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
case ISD::SPLAT_VECTOR_PARTS:
return lowerSPLAT_VECTOR_PARTS(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::VSCALE: {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
// We define our scalable vector types for lmul=1 to use a 64 bit known
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
report_fatal_error("Support for VLEN==32 is incomplete.");
if (isa<ConstantSDNode>(Op.getOperand(0))) {
// We assume VLENB is a multiple of 8. We manually choose the best shift
// here because SimplifyDemandedBits isn't always able to simplify it.
uint64_t Val = Op.getConstantOperandVal(0);
if (isPowerOf2_64(Val)) {
uint64_t Log2 = Log2_64(Val);
if (Log2 < 3)
return DAG.getNode(ISD::SRL, DL, VT, VLENB,
DAG.getConstant(3 - Log2, DL, VT));
if (Log2 > 3)
return DAG.getNode(ISD::SHL, DL, VT, VLENB,
DAG.getConstant(Log2 - 3, DL, VT));
return VLENB;
}
// If the multiplier is a multiple of 8, scale it down to avoid needing
// to shift the VLENB value.
if ((Val % 8) == 0)
return DAG.getNode(ISD::MUL, DL, VT, VLENB,
DAG.getConstant(Val / 8, DL, VT));
}
SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
DAG.getConstant(3, DL, VT));
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
}
case ISD::FPOWI: {
// Custom promote f16 powi with illegal i32 integer type on RV64. Once
// promoted this will be legalized into a libcall by LegalizeIntegerTypes.
if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
Op.getOperand(1).getValueType() == MVT::i32) {
SDLoc DL(Op);
SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
SDValue Powi =
DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
DAG.getIntPtrConstant(0, DL));
}
return SDValue();
}
case ISD::FP_EXTEND: {
// RVV can only do fp_extend to types double the size as the source. We
// custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
// via f32.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
// Prepare any fixed-length vector operands.
MVT ContainerVT = VT;
if (SrcVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
MVT SrcContainerVT =
ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
}
if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
SrcVT.getVectorElementType() != MVT::f16) {
// For scalable vectors, we only need to close the gap between
// vXf16->vXf64.
if (!VT.isFixedLengthVector())
return Op;
// For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
MVT InterVT = VT.changeVectorElementType(MVT::f32);
MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
SDValue IntermediateExtend = getRVVFPExtendOrRound(
Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
DL, DAG, Subtarget);
if (VT.isFixedLengthVector())
return convertFromScalableVector(VT, Extend, DAG, Subtarget);
return Extend;
}
case ISD::FP_ROUND: {
// RVV can only do fp_round to types half the size as the source. We
// custom-lower f64->f16 rounds via RVV's round-to-odd float
// conversion instruction.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
// Prepare any fixed-length vector operands.
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
ContainerVT =
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
}
if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
SrcVT.getVectorElementType() != MVT::f64) {
// For scalable vectors, we only need to close the gap between
// vXf64<->vXf16.
if (!VT.isFixedLengthVector())
return Op;
// For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
SDValue IntermediateRound =
DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
DL, DAG, Subtarget);
if (VT.isFixedLengthVector())
return convertFromScalableVector(VT, Round, DAG, Subtarget);
return Round;
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: {
// RVV can only do fp<->int conversions to types half/double the size as
// the source. We custom-lower any conversions that do two hops into
// sequences.
MVT VT = Op.getSimpleValueType();
if (!VT.isVector())
return Op;
SDLoc DL(Op);
SDValue Src = Op.getOperand(0);
MVT EltVT = VT.getVectorElementType();
MVT SrcVT = Src.getSimpleValueType();
MVT SrcEltVT = SrcVT.getVectorElementType();
unsigned EltSize = EltVT.getSizeInBits();
unsigned SrcEltSize = SrcEltVT.getSizeInBits();
assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
"Unexpected vector element types");
bool IsInt2FP = SrcEltVT.isInteger();
// Widening conversions
if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
if (IsInt2FP) {
// Do a regular integer sign/zero extension then convert to float.
MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
VT.getVectorElementCount());
unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
? ISD::ZERO_EXTEND
: ISD::SIGN_EXTEND;
SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
}
// FP2Int
assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
// Do one doubling fp_extend then complete the operation by converting
// to int.
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
}
// Narrowing conversions
if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
if (IsInt2FP) {
// One narrowing int_to_fp, then an fp_round.
assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
return DAG.getFPExtendOrRound(Int2FP, DL, VT);
}
// FP2Int
// One narrowing fp_to_int, then truncate the integer. If the float isn't
// representable by the integer, the result is poison.
MVT IVecVT =
MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
VT.getVectorElementCount());
SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
}
// Scalable vectors can exit here. Patterns will handle equally-sized
// conversions halving/doubling ones.
if (!VT.isFixedLengthVector())
return Op;
// For fixed-length vectors we lower to a custom "VL" node.
unsigned RVVOpc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Impossible opcode");
case ISD::FP_TO_SINT:
RVVOpc = RISCVISD::FP_TO_SINT_VL;
break;
case ISD::FP_TO_UINT:
RVVOpc = RISCVISD::FP_TO_UINT_VL;
break;
case ISD::SINT_TO_FP:
RVVOpc = RISCVISD::SINT_TO_FP_VL;
break;
case ISD::UINT_TO_FP:
RVVOpc = RISCVISD::UINT_TO_FP_VL;
break;
}
MVT ContainerVT, SrcContainerVT;
// Derive the reference container type from the larger vector type.
if (SrcEltSize > EltSize) {
SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
ContainerVT =
SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
} else {
ContainerVT = getContainerForFixedLengthVector(VT);
SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
}
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
case ISD::FTRUNC:
case ISD::FCEIL:
case ISD::FFLOOR:
return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_SMIN:
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
return lowerFPVECREDUCE(Op, DAG);
case ISD::VP_REDUCE_ADD:
case ISD::VP_REDUCE_UMAX:
case ISD::VP_REDUCE_SMAX:
case ISD::VP_REDUCE_UMIN:
case ISD::VP_REDUCE_SMIN:
case ISD::VP_REDUCE_FADD:
case ISD::VP_REDUCE_SEQ_FADD:
case ISD::VP_REDUCE_FMIN:
case ISD::VP_REDUCE_FMAX:
return lowerVPREDUCE(Op, DAG);
case ISD::VP_REDUCE_AND:
case ISD::VP_REDUCE_OR:
case ISD::VP_REDUCE_XOR:
if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
return lowerVPREDUCE(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return lowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::STEP_VECTOR:
return lowerSTEP_VECTOR(Op, DAG);
case ISD::VECTOR_REVERSE:
return lowerVECTOR_REVERSE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
if (Op.getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskSplat(Op, DAG);
return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
case ISD::CONCAT_VECTORS: {
// Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
// better than going through the stack, as the default expansion does.
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
unsigned NumOpElts =
Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
SDValue Vec = DAG.getUNDEF(VT);
for (const auto &OpIdx : enumerate(Op->ops())) {
SDValue SubVec = OpIdx.value();
// Don't insert undef subvectors.
if (SubVec.isUndef())
continue;
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, SubVec,
DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
}
return Vec;
}
case ISD::LOAD:
if (auto V = expandUnalignedRVVLoad(Op, DAG))
return V;
if (Op.getValueType().isFixedLengthVector())
return lowerFixedLengthVectorLoadToRVV(Op, DAG);
return Op;
case ISD::STORE:
if (auto V = expandUnalignedRVVStore(Op, DAG))
return V;
if (Op.getOperand(1).getValueType().isFixedLengthVector())
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
return Op;
case ISD::MLOAD:
case ISD::VP_LOAD:
return lowerMaskedLoad(Op, DAG);
case ISD::MSTORE:
case ISD::VP_STORE:
return lowerMaskedStore(Op, DAG);
case ISD::SETCC:
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
case ISD::ADD:
return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
case ISD::SUB:
return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
case ISD::MUL:
return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
case ISD::MULHS:
return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
case ISD::MULHU:
return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
case ISD::AND:
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
RISCVISD::AND_VL);
case ISD::OR:
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
RISCVISD::OR_VL);
case ISD::XOR:
return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
RISCVISD::XOR_VL);
case ISD::SDIV:
return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
case ISD::SREM:
return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
case ISD::UDIV:
return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
case ISD::UREM:
return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
if (Op.getSimpleValueType().isFixedLengthVector())
return lowerFixedLengthVectorShiftToRVV(Op, DAG);
// This can be called for an i32 shift amount that needs to be promoted.
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
case ISD::SADDSAT:
return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL);
case ISD::UADDSAT:
return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL);
case ISD::SSUBSAT:
return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL);
case ISD::USUBSAT:
return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL);
case ISD::FADD:
return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
case ISD::FSUB:
return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
case ISD::FMUL:
return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
case ISD::FDIV:
return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
case ISD::FNEG:
return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
case ISD::FABS:
return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
case ISD::FSQRT:
return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
case ISD::FMA:
return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
case ISD::SMIN:
return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
case ISD::SMAX:
return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
case ISD::UMIN:
return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
case ISD::UMAX:
return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
case ISD::FMINNUM:
return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
case ISD::FMAXNUM:
return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
case ISD::ABS:
return lowerABS(Op, DAG);
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTTZ_ZERO_UNDEF:
return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
case ISD::MGATHER:
case ISD::VP_GATHER:
return lowerMaskedGather(Op, DAG);
case ISD::MSCATTER:
case ISD::VP_SCATTER:
return lowerMaskedScatter(Op, DAG);
case ISD::FLT_ROUNDS_:
return lowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
return lowerSET_ROUNDING(Op, DAG);
case ISD::VP_SELECT:
return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
case ISD::VP_MERGE:
return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
case ISD::VP_ADD:
return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
case ISD::VP_SUB:
return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
case ISD::VP_MUL:
return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
case ISD::VP_SDIV:
return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
case ISD::VP_UDIV:
return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
case ISD::VP_SREM:
return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
case ISD::VP_UREM:
return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
case ISD::VP_AND:
return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL);
case ISD::VP_OR:
return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL);
case ISD::VP_XOR:
return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL);
case ISD::VP_ASHR:
return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
case ISD::VP_LSHR:
return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
case ISD::VP_SHL:
return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
case ISD::VP_FADD:
return lowerVPOp(Op, DAG, RISCVISD::FADD_VL);
case ISD::VP_FSUB:
return lowerVPOp(Op, DAG, RISCVISD::FSUB_VL);
case ISD::VP_FMUL:
return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
case ISD::VP_FDIV:
return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
}
}
static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
}
static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
Flags);
}
static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flags);
}
static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
}
template <class NodeTy>
SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
bool IsLocal) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
if (isPositionIndependent()) {
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
if (IsLocal)
// Use PC-relative addressing to access the symbol. This generates the
// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
// %pcrel_lo(auipc)).
return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
// Use PC-relative addressing to access the GOT for this symbol, then load
// the address from the GOT. This generates the pattern (PseudoLA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
}
switch (getTargetMachine().getCodeModel()) {
default:
report_fatal_error("Unsupported code model for lowering");
case CodeModel::Small: {
// Generate a sequence for accessing addresses within the first 2 GiB of
// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
}
case CodeModel::Medium: {
// Generate a sequence for accessing addresses within any 2GiB range within
// the address space. This generates the pattern (PseudoLLA sym), which
// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
}
}
}
SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
const GlobalValue *GV = N->getGlobal();
bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
SDValue Addr = getAddr(N, DAG, IsLocal);
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
if (Offset != 0)
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
DAG.getConstant(Offset, DL, XLenVT));
return Addr;
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
return getAddr(N, DAG);
}
SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
return getAddr(N, DAG);
}
SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
SelectionDAG &DAG) const {
JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
return getAddr(N, DAG);
}
SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG,
bool UseGOT) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
const GlobalValue *GV = N->getGlobal();
MVT XLenVT = Subtarget.getXLenVT();
if (UseGOT) {
// Use PC-relative addressing to access the GOT for this TLS symbol, then
// load the address from the GOT and add the thread pointer. This generates
// the pattern (PseudoLA_TLS_IE sym), which expands to
// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
SDValue Load =
SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
// Add the thread pointer.
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
}
// Generate a sequence for accessing the address relative to the thread
// pointer, with the appropriate adjustment for the thread pointer offset.
// This generates the pattern
// (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
SDValue AddrHi =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
SDValue AddrAdd =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
SDValue AddrLo =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
SDValue MNAdd = SDValue(
DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
0);
return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
}
SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
const GlobalValue *GV = N->getGlobal();
// Use a PC-relative addressing mode to access the global dynamic GOT address.
// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
SDValue Load =
SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
// Prepare argument list to generate call.
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Load;
Entry.Ty = CallTy;
Args.push_back(Entry);
// Setup call to __tls_get_addr.
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::C, CallTy,
DAG.getExternalSymbol("__tls_get_addr", Ty),
std::move(Args));
return LowerCallTo(CLI).first;
}
SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
int64_t Offset = N->getOffset();
MVT XLenVT = Subtarget.getXLenVT();
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
if (DAG.getMachineFunction().getFunction().getCallingConv() ==
CallingConv::GHC)
report_fatal_error("In GHC calling convention TLS is not supported");
SDValue Addr;
switch (Model) {
case TLSModel::LocalExec:
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
break;
case TLSModel::InitialExec:
Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
break;
case TLSModel::LocalDynamic:
case TLSModel::GeneralDynamic:
Addr = getDynamicTLSAddr(N, DAG);
break;
}
// In order to maximise the opportunity for common subexpression elimination,
// emit a separate ADD node for the global address offset instead of folding
// it in the global address node. Later peephole optimisations may choose to
// fold it back in when profitable.
if (Offset != 0)
return DAG.getNode(ISD::ADD, DL, Ty, Addr,
DAG.getConstant(Offset, DL, XLenVT));
return Addr;
}
SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue CondV = Op.getOperand(0);
SDValue TrueV = Op.getOperand(1);
SDValue FalseV = Op.getOperand(2);
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
// Lower vector SELECTs to VSELECTs by splatting the condition.
if (VT.isVector()) {
MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
SDValue CondSplat = VT.isScalableVector()
? DAG.getSplatVector(SplatCondVT, DL, CondV)
: DAG.getSplatBuildVector(SplatCondVT, DL, CondV);
return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
}
// If the result type is XLenVT and CondV is the output of a SETCC node
// which also operated on XLenVT inputs, then merge the SETCC node into the
// lowered RISCVISD::SELECT_CC to take advantage of the integer
// compare+branch instructions. i.e.:
// (select (setcc lhs, rhs, cc), truev, falsev)
// -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
if (VT == XLenVT && CondV.getOpcode() == ISD::SETCC &&
CondV.getOperand(0).getSimpleValueType() == XLenVT) {
SDValue LHS = CondV.getOperand(0);
SDValue RHS = CondV.getOperand(1);
const auto *CC = cast<CondCodeSDNode>(CondV.getOperand(2));
ISD::CondCode CCVal = CC->get();
// Special case for a select of 2 constants that have a diffence of 1.
// Normally this is done by DAGCombine, but if the select is introduced by
// type legalization or op legalization, we miss it. Restricting to SETLT
// case for now because that is what signed saturating add/sub need.
// FIXME: We don't need the condition to be SETLT or even a SETCC,
// but we would probably want to swap the true/false values if the condition
// is SETGE/SETLE to avoid an XORI.
if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
CCVal == ISD::SETLT) {
const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
if (TrueVal - 1 == FalseVal)
return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
if (TrueVal + 1 == FalseVal)
return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
}
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
SDValue TargetCC = DAG.getCondCode(CCVal);
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
// Otherwise:
// (select condv, truev, falsev)
// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
SDValue SetNE = DAG.getCondCode(ISD::SETNE);
SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue CondV = Op.getOperand(1);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
if (CondV.getOpcode() == ISD::SETCC &&
CondV.getOperand(0).getValueType() == XLenVT) {
SDValue LHS = CondV.getOperand(0);
SDValue RHS = CondV.getOperand(1);
ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
SDValue TargetCC = DAG.getCondCode(CCVal);
return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
LHS, RHS, TargetCC, Op.getOperand(2));
}
return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
CondV, DAG.getConstant(0, DL, XLenVT),
DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
}
SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
SDLoc DL(Op);
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy(MF.getDataLayout()));
// vastart just stores the address of the VarArgsFrameIndex slot into the
// memory location argument.
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
MachinePointerInfo(SV));
}
SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
Register FrameReg = RI.getFrameRegister(MF);
int XLenInBytes = Subtarget.getXLen() / 8;
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
while (Depth--) {
int Offset = -(XLenInBytes * 2);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
DAG.getIntPtrConstant(Offset, DL));
FrameAddr =
DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
}
return FrameAddr;
}
SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setReturnAddressIsTaken(true);
MVT XLenVT = Subtarget.getXLenVT();
int XLenInBytes = Subtarget.getXLen() / 8;
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
if (Depth) {
int Off = -XLenInBytes;
SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
SDValue Offset = DAG.getConstant(Off, DL, VT);
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
MachinePointerInfo());
}
// Return the value of the return address register, marking it an implicit
// live-in.
Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
}
SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
EVT VT = Lo.getValueType();
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = Lo << Shamt
// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt-XLEN)
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
SDValue ShiftRightLo =
DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
SDValue Parts[2] = {Lo, Hi};
return DAG.getMergeValues(Parts, DL);
}
SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
bool IsSRA) const {
SDLoc DL(Op);
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
SDValue Shamt = Op.getOperand(2);
EVT VT = Lo.getValueType();
// SRA expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt-XLEN);
// Hi = Hi >>s (XLEN-1)
//
// SRL expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt-XLEN);
// Hi = 0;
unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
SDValue ShiftLeftHi =
DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
SDValue HiFalse =
IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
SDValue Parts[2] = {Lo, Hi};
return DAG.getMergeValues(Parts, DL);
}
// Lower splats of i1 types to SETCC. For each mask vector type, we have a
// legal equivalently-sized i8 type, so we can use that as a go-between.
SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue SplatVal = Op.getOperand(0);
// All-zeros or all-ones splats are handled specially.
if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
}
if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
}
MVT XLenVT = Subtarget.getXLenVT();
assert(SplatVal.getValueType() == XLenVT &&
"Unexpected type for i1 splat value");
MVT InterVT = VT.changeVectorElementType(MVT::i8);
SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
DAG.getConstant(1, DL, XLenVT));
SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
SDValue Zero = DAG.getConstant(0, DL, InterVT);
return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
}
// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
// illegal (currently only vXi64 RV32).
// FIXME: We could also catch non-constant sign-extended i32 values and lower
// them to SPLAT_VECTOR_I64
SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
"Unexpected SPLAT_VECTOR_PARTS lowering");
assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
SDValue Lo = Op.getOperand(0);
SDValue Hi = Op.getOperand(1);
if (VecVT.isFixedLengthVector()) {
MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
}
if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
// If Hi constant is all the same sign bit as Lo, lower this as a custom
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
}
// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
isa<ConstantSDNode>(Hi.getOperand(1)) &&
Hi.getConstantOperandVal(1) == 31)
return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
// Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
// for zero/any-extension or -1 for sign-extension:
// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
// Note that any-extension is lowered identically to zero-extension.
SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
int64_t ExtTrueVal) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
SDValue Src = Op.getOperand(0);
// Only custom-lower extensions from mask types
assert(Src.getValueType().isVector() &&
Src.getValueType().getVectorElementType() == MVT::i1);
MVT XLenVT = Subtarget.getXLenVT();
SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
if (VecVT.isScalableVector()) {
// Be careful not to introduce illegal scalar types at this stage, and be
// careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
// illegal and must be expanded. Since we know that the constants are
// sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
bool IsRV32E64 =
!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
if (!IsRV32E64) {
SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
} else {
SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
SplatTrueVal =
DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
}
return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
}
MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
MVT I1ContainerVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
SplatTrueVal =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
SplatTrueVal, SplatZero, VL);
return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
MVT ExtVT = Op.getSimpleValueType();
// Only custom-lower extensions from fixed-length vector types.
if (!ExtVT.isFixedLengthVector())
return Op;
MVT VT = Op.getOperand(0).getSimpleValueType();
// Grab the canonical container type for the extended type. Infer the smaller
// type from that to ensure the same number of vector elements, as we know
// the LMUL will be sufficient to hold the smaller type.
MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
// Get the extended container type manually to ensure the same number of
// vector elements between source and dest.
MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
ContainerExtVT.getVectorElementCount());
SDValue Op1 =
convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
}
// Custom-lower truncations from vectors to mask vectors by using a mask and a
// setcc operation:
// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT MaskVT = Op.getValueType();
// Only expect to custom-lower truncations to mask types
assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
"Unexpected type for vector mask lowering");
SDValue Src = Op.getOperand(0);
MVT VecVT = Src.getSimpleValueType();
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
if (VecVT.isScalableVector()) {
SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
}
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SDValue Trunc =
DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
DAG.getCondCode(ISD::SETNE), Mask, VL);
return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
}
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the
// original vector (with an undisturbed tail policy for elements >= VL), we
// achieve the desired result of leaving all elements untouched except the one
// at VL-1, which is replaced with the desired value.
SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
SDValue Vec = Op.getOperand(0);
SDValue Val = Op.getOperand(1);
SDValue Idx = Op.getOperand(2);
if (VecVT.getVectorElementType() == MVT::i1) {
// FIXME: For now we just promote to an i8 vector and insert into that,
// but this is probably not optimal.
MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
}
MVT ContainerVT = VecVT;
// If the operand is a fixed-length vector, convert to a scalable one.
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
MVT XLenVT = Subtarget.getXLenVT();
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
// Even i64-element vectors on RV32 can be lowered without scalar
// legalization if the most-significant 32 bits of the value are not affected
// by the sign-extension of the lower 32 bits.
// TODO: We could also catch sign extensions of a 32-bit value.
if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
const auto *CVal = cast<ConstantSDNode>(Val);
if (isInt<32>(CVal->getSExtValue())) {
IsLegalInsert = true;
Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
}
}
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
SDValue ValInVec;
if (IsLegalInsert) {
unsigned Opc =
VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
if (isNullConstant(Idx)) {
Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
if (!VecVT.isFixedLengthVector())
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
}
ValInVec =
DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
} else {
// On RV32, i64-element vectors must be specially handled to place the
// value at element 0, by using two vslide1up instructions in sequence on
// the i32 split lo/hi value. Use an equivalently-sized i32 vector for
// this.
SDValue One = DAG.getConstant(1, DL, XLenVT);
SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
MVT I32ContainerVT =
MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
SDValue I32Mask =
getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
// Limit the active VL to two.
SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
// Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
// undef doesn't obey the earlyclobber constraint. Just splat a zero value.
ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
InsertI64VL);
// First slide in the hi value, then the lo in underneath it.
ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
ValHi, I32Mask, InsertI64VL);
ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
ValLo, I32Mask, InsertI64VL);
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
}
// Now that the value is in a vector, slide it into position.
SDValue InsertVL =
DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
ValInVec, Idx, Mask, InsertVL);
if (!VecVT.isFixedLengthVector())
return Slideup;
return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
}
// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
// types this is done using VMV_X_S to allow us to glean information about the
// sign bits of the result.
SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Idx = Op.getOperand(1);
SDValue Vec = Op.getOperand(0);
EVT EltVT = Op.getValueType();
MVT VecVT = Vec.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VecVT.getVectorElementType() == MVT::i1) {
if (VecVT.isFixedLengthVector()) {
unsigned NumElts = VecVT.getVectorNumElements();
if (NumElts >= 8) {
MVT WideEltVT;
unsigned WidenVecLen;
SDValue ExtractElementIdx;
SDValue ExtractBitIdx;
unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors();
MVT LargestEltVT = MVT::getIntegerVT(
std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
if (NumElts <= LargestEltVT.getSizeInBits()) {
assert(isPowerOf2_32(NumElts) &&
"the number of elements should be power of 2");
WideEltVT = MVT::getIntegerVT(NumElts);
WidenVecLen = 1;
ExtractElementIdx = DAG.getConstant(0, DL, XLenVT);
ExtractBitIdx = Idx;
} else {
WideEltVT = LargestEltVT;
WidenVecLen = NumElts / WideEltVT.getSizeInBits();
// extract element index = index / element width
ExtractElementIdx = DAG.getNode(
ISD::SRL, DL, XLenVT, Idx,
DAG.getConstant(Log2_64(WideEltVT.getSizeInBits()), DL, XLenVT));
// mask bit index = index % element width
ExtractBitIdx = DAG.getNode(
ISD::AND, DL, XLenVT, Idx,
DAG.getConstant(WideEltVT.getSizeInBits() - 1, DL, XLenVT));
}
MVT WideVT = MVT::getVectorVT(WideEltVT, WidenVecLen);
Vec = DAG.getNode(ISD::BITCAST, DL, WideVT, Vec);
SDValue ExtractElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, XLenVT,
Vec, ExtractElementIdx);
// Extract the bit from GPR.
SDValue ShiftRight =
DAG.getNode(ISD::SRL, DL, XLenVT, ExtractElt, ExtractBitIdx);
return DAG.getNode(ISD::AND, DL, XLenVT, ShiftRight,
DAG.getConstant(1, DL, XLenVT));
}
}
// Otherwise, promote to an i8 vector and extract from that.
MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
}
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
// If the index is 0, the vector is already in the right position.
if (!isNullConstant(Idx)) {
// Use a VL of 1 to avoid processing more elements than we need.
SDValue VL = DAG.getConstant(1, DL, XLenVT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
}
if (!EltVT.isInteger()) {
// Floating-point extracts are handled in TableGen.
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
DAG.getConstant(0, DL, XLenVT));
}
SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
}
// Some RVV intrinsics may claim that they want an integer operand to be
// promoted or expanded.
static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
if (!Subtarget.hasVInstructions())
return SDValue();
bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
SDLoc DL(Op);
const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
if (!II || !II->hasSplatOperand())
return SDValue();
unsigned SplatOp = II->SplatOperand + 1 + HasChain;
assert(SplatOp < Op.getNumOperands());
SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
SDValue &ScalarOp = Operands[SplatOp];
MVT OpVT = ScalarOp.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
// If this isn't a scalar, or its type is XLenVT we're done.
if (!OpVT.isScalarInteger() || OpVT == XLenVT)
return SDValue();
// Simplest case is that the operand needs to be promoted to XLenVT.
if (OpVT.bitsLT(XLenVT)) {
// If the operand is a constant, sign extend to increase our chances
// of being able to use a .vi instruction. ANY_EXTEND would become a
// a zero extend and the simm5 check in isel would fail.
// FIXME: Should we ignore the upper bits in isel instead?
unsigned ExtOpc =
isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
// Use the previous operand to get the vXi64 VT. The result might be a mask
// VT for compares. Using the previous operand assumes that the previous
// operand will never have a smaller element size than a scalar operand and
// that a widening operation never uses SEW=64.
// NOTE: If this fails the below assert, we can probably just find the
// element count from any operand or result and use it to construct the VT.
assert(II->SplatOperand > 0 && "Unexpected splat operand!");
MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
// The more complex case is when the scalar is larger than XLenVT.
assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
// If this is a sign-extended 32-bit constant, we can truncate it and rely
// on the instruction to sign-extend since SEW>XLEN.
if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
if (isInt<32>(CVal->getSExtValue())) {
ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
}
// We need to convert the scalar to a splat vector.
// FIXME: Can we implicitly truncate the scalar if it is known to
// be sign extended?
SDValue VL = getVLOperand(Op);
assert(VL.getValueType() == XLenVT);
ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(0);
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
switch (IntNo) {
default:
break; // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getRegister(RISCV::X4, PtrVT);
}
case Intrinsic::riscv_orc_b:
case Intrinsic::riscv_brev8: {
// Lower to the GORCI encoding for orc.b or the GREVI encoding for brev8.
unsigned Opc =
IntNo == Intrinsic::riscv_brev8 ? RISCVISD::GREV : RISCVISD::GORC;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
DAG.getConstant(7, DL, XLenVT));
}
case Intrinsic::riscv_grev:
case Intrinsic::riscv_gorc: {
unsigned Opc =
IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::riscv_zip:
case Intrinsic::riscv_unzip: {
// Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
// For i32 the immdiate is 15. For i64 the immediate is 31.
unsigned Opc =
IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
unsigned BitWidth = Op.getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && BitWidth >= 2 && "Unexpected bit width");
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1),
DAG.getConstant((BitWidth / 2) - 1, DL, XLenVT));
}
case Intrinsic::riscv_shfl:
case Intrinsic::riscv_unshfl: {
unsigned Opc =
IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::riscv_bcompress:
case Intrinsic::riscv_bdecompress: {
unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
: RISCVISD::BDECOMPRESS;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
case Intrinsic::riscv_bfp:
return DAG.getNode(RISCVISD::BFP, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
case Intrinsic::riscv_fsl:
return DAG.getNode(RISCVISD::FSL, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case Intrinsic::riscv_fsr:
return DAG.getNode(RISCVISD::FSR, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case Intrinsic::riscv_vmv_x_s:
assert(Op.getValueType() == XLenVT && "Unexpected VT!");
return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::riscv_vmv_v_x:
return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
Op.getSimpleValueType(), DL, DAG, Subtarget);
case Intrinsic::riscv_vfmv_v_f:
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::riscv_vmv_s_x: {
SDValue Scalar = Op.getOperand(2);
if (Scalar.getValueType().bitsLE(XLenVT)) {
Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
Op.getOperand(1), Scalar, Op.getOperand(3));
}
assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
// This is an i64 value that lives in two scalar registers. We have to
// insert this in a convoluted way. First we build vXi64 splat containing
// the/ two values that we assemble using some bit math. Next we'll use
// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
// to merge element 0 from our splat into the source vector.
// FIXME: This is probably not the best way to do this, but it is
// consistent with INSERT_VECTOR_ELT lowering so it is a good starting
// point.
// sw lo, (a0)
// sw hi, 4(a0)
// vlse vX, (a0)
//
// vid.v vVid
// vmseq.vx mMask, vVid, 0
// vmerge.vvm vDest, vSrc, vVal, mMask
MVT VT = Op.getSimpleValueType();
SDValue Vec = Op.getOperand(1);
SDValue VL = getVLOperand(Op);
SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
DAG.getConstant(0, DL, MVT::i32), VL);
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
SDValue SelectCond =
DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
DAG.getCondCode(ISD::SETEQ), Mask, VL);
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
Vec, VL);
}
case Intrinsic::riscv_vslide1up:
case Intrinsic::riscv_vslide1down:
case Intrinsic::riscv_vslide1up_mask:
case Intrinsic::riscv_vslide1down_mask: {
// We need to special case these when the scalar is larger than XLen.
unsigned NumOps = Op.getNumOperands();
bool IsMasked = NumOps == 7;
unsigned OpOffset = IsMasked ? 1 : 0;
SDValue Scalar = Op.getOperand(2 + OpOffset);
if (Scalar.getValueType().bitsLE(XLenVT))
break;
// Splatting a sign extended constant is fine.
if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
if (isInt<32>(CVal->getSExtValue()))
break;
MVT VT = Op.getSimpleValueType();
assert(VT.getVectorElementType() == MVT::i64 &&
Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
// Convert the vector source to the equivalent nxvXi32 vector.
MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(0, DL, XLenVT));
SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(1, DL, XLenVT));
// Double the VL since we halved SEW.
SDValue VL = getVLOperand(Op);
SDValue I32VL =
DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
// Shift the two scalar parts in using SEW=32 slide1up/slide1down
// instructions.
if (IntNo == Intrinsic::riscv_vslide1up ||
IntNo == Intrinsic::riscv_vslide1up_mask) {
Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
I32Mask, I32VL);
Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
I32Mask, I32VL);
} else {
Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
I32Mask, I32VL);
Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
I32Mask, I32VL);
}
// Convert back to nxvXi64.
Vec = DAG.getBitcast(VT, Vec);
if (!IsMasked)
return Vec;
// Apply mask after the operation.
SDValue Mask = Op.getOperand(NumOps - 3);
SDValue MaskedOff = Op.getOperand(1);
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
}
}
return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
}
SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
switch (IntNo) {
default:
break;
case Intrinsic::riscv_masked_strided_load: {
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
SDValue Mask = Op.getOperand(5);
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
MVT VT = Op->getSimpleValueType(0);
MVT ContainerVT = getContainerForFixedLengthVector(VT);
SDValue PassThru = Op.getOperand(2);
if (!IsUnmasked) {
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
SDValue IntID = DAG.getTargetConstant(
IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
XLenVT);
auto *Load = cast<MemIntrinsicSDNode>(Op);
SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
if (IsUnmasked)
Ops.push_back(DAG.getUNDEF(ContainerVT));
else
Ops.push_back(PassThru);
Ops.push_back(Op.getOperand(3)); // Ptr
Ops.push_back(Op.getOperand(4)); // Stride
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
if (!IsUnmasked) {
SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
Ops.push_back(Policy);
}
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue Result =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
Load->getMemoryVT(), Load->getMemOperand());
SDValue Chain = Result.getValue(1);
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
}
return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
}
SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
switch (IntNo) {
default:
break;
case Intrinsic::riscv_masked_strided_store: {
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
SDValue Mask = Op.getOperand(5);
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
SDValue Val = Op.getOperand(2);
MVT VT = Val.getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
SDValue IntID = DAG.getTargetConstant(
IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
XLenVT);
auto *Store = cast<MemIntrinsicSDNode>(Op);
SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
Ops.push_back(Val);
Ops.push_back(Op.getOperand(3)); // Ptr
Ops.push_back(Op.getOperand(4)); // Stride
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
Ops, Store->getMemoryVT(),
Store->getMemOperand());
}
}
return SDValue();
}
static MVT getLMUL1VT(MVT VT) {
assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
"Unexpected vector MVT");
return MVT::getScalableVectorVT(
VT.getVectorElementType(),
RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
}
static unsigned getRVVReductionOp(unsigned ISDOpcode) {
switch (ISDOpcode) {
default:
llvm_unreachable("Unhandled reduction");
case ISD::VECREDUCE_ADD:
return RISCVISD::VECREDUCE_ADD_VL;
case ISD::VECREDUCE_UMAX:
return RISCVISD::VECREDUCE_UMAX_VL;
case ISD::VECREDUCE_SMAX:
return RISCVISD::VECREDUCE_SMAX_VL;
case ISD::VECREDUCE_UMIN:
return RISCVISD::VECREDUCE_UMIN_VL;
case ISD::VECREDUCE_SMIN:
return RISCVISD::VECREDUCE_SMIN_VL;
case ISD::VECREDUCE_AND:
return RISCVISD::VECREDUCE_AND_VL;
case ISD::VECREDUCE_OR:
return RISCVISD::VECREDUCE_OR_VL;
case ISD::VECREDUCE_XOR:
return RISCVISD::VECREDUCE_XOR_VL;
}
}
SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
SelectionDAG &DAG,
bool IsVP) const {
SDLoc DL(Op);
SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
MVT VecVT = Vec.getSimpleValueType();
assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
Op.getOpcode() == ISD::VECREDUCE_XOR ||
Op.getOpcode() == ISD::VP_REDUCE_AND ||
Op.getOpcode() == ISD::VP_REDUCE_OR ||
Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
"Unexpected reduction lowering");
MVT XLenVT = Subtarget.getXLenVT();
assert(Op.getValueType() == XLenVT &&
"Expected reduction output to be legalized to XLenVT");
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
SDValue Mask, VL;
if (IsVP) {
Mask = Op.getOperand(2);
VL = Op.getOperand(3);
} else {
std::tie(Mask, VL) =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
}
unsigned BaseOpc;
ISD::CondCode CC;
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unhandled reduction");
case ISD::VECREDUCE_AND:
case ISD::VP_REDUCE_AND: {
// vcpop ~x == 0
SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
CC = ISD::SETEQ;
BaseOpc = ISD::AND;
break;
}
case ISD::VECREDUCE_OR:
case ISD::VP_REDUCE_OR:
// vcpop x != 0
Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
CC = ISD::SETNE;
BaseOpc = ISD::OR;
break;
case ISD::VECREDUCE_XOR:
case ISD::VP_REDUCE_XOR: {
// ((vcpop x) & 1) != 0
SDValue One = DAG.getConstant(1, DL, XLenVT);
Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
CC = ISD::SETNE;
BaseOpc = ISD::XOR;
break;
}
}
SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
if (!IsVP)
return SetCC;
// Now include the start value in the operation.
// Note that we must return the start value when no elements are operated
// upon. The vcpop instructions we've emitted in each case above will return
// 0 for an inactive vector, and so we've already received the neutral value:
// AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
// can simply include the start value.
return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
}
SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Vec = Op.getOperand(0);
EVT VecEVT = Vec.getValueType();
unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
// Due to ordering in legalize types we may have a vector type that needs to
// be split. Do that manually so we can get down to a legal type.
while (getTypeAction(*DAG.getContext(), VecEVT) ==
TargetLowering::TypeSplitVector) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
VecEVT = Lo.getValueType();
Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
}
// TODO: The type may need to be widened rather than split. Or widened before
// it can be split.
if (!isTypeLegal(VecEVT))
return SDValue();
MVT VecVT = VecEVT.getSimpleVT();
MVT VecEltVT = VecVT.getVectorElementType();
unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
MVT M1VT = getLMUL1VT(ContainerVT);
MVT XLenVT = Subtarget.getXLenVT();
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
SDValue NeutralElem =
DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
SDValue IdentitySplat = lowerScalarSplat(
NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
IdentitySplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
DAG.getConstant(0, DL, XLenVT));
return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
}
// Given a reduction op, this function returns the matching reduction opcode,
// the vector SDValue and the scalar SDValue required to lower this to a
// RISCVISD node.
static std::tuple<unsigned, SDValue, SDValue>
getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
SDLoc DL(Op);
auto Flags = Op->getFlags();
unsigned Opcode = Op.getOpcode();
unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
switch (Opcode) {
default:
llvm_unreachable("Unhandled reduction");
case ISD::VECREDUCE_FADD: {
// Use positive zero if we can. It is cheaper to materialize.
SDValue Zero =
DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT);
return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero);
}
case ISD::VECREDUCE_SEQ_FADD:
return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
Op.getOperand(0));
case ISD::VECREDUCE_FMIN:
return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
case ISD::VECREDUCE_FMAX:
return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
}
}
SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecEltVT = Op.getSimpleValueType();
unsigned RVVOpcode;
SDValue VectorVal, ScalarVal;
std::tie(RVVOpcode, VectorVal, ScalarVal) =
getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
MVT VecVT = VectorVal.getSimpleValueType();
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
}
MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
MVT XLenVT = Subtarget.getXLenVT();
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
SDValue ScalarSplat = lowerScalarSplat(
ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
VectorVal, ScalarSplat, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
DAG.getConstant(0, DL, XLenVT));
}
static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
switch (ISDOpcode) {
default:
llvm_unreachable("Unhandled reduction");
case ISD::VP_REDUCE_ADD:
return RISCVISD::VECREDUCE_ADD_VL;
case ISD::VP_REDUCE_UMAX:
return RISCVISD::VECREDUCE_UMAX_VL;
case ISD::VP_REDUCE_SMAX:
return RISCVISD::VECREDUCE_SMAX_VL;
case ISD::VP_REDUCE_UMIN:
return RISCVISD::VECREDUCE_UMIN_VL;
case ISD::VP_REDUCE_SMIN:
return RISCVISD::VECREDUCE_SMIN_VL;
case ISD::VP_REDUCE_AND:
return RISCVISD::VECREDUCE_AND_VL;
case ISD::VP_REDUCE_OR:
return RISCVISD::VECREDUCE_OR_VL;
case ISD::VP_REDUCE_XOR:
return RISCVISD::VECREDUCE_XOR_VL;
case ISD::VP_REDUCE_FADD:
return RISCVISD::VECREDUCE_FADD_VL;
case ISD::VP_REDUCE_SEQ_FADD:
return RISCVISD::VECREDUCE_SEQ_FADD_VL;
case ISD::VP_REDUCE_FMAX:
return RISCVISD::VECREDUCE_FMAX_VL;
case ISD::VP_REDUCE_FMIN:
return RISCVISD::VECREDUCE_FMIN_VL;
}
}
SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Vec = Op.getOperand(1);
EVT VecEVT = Vec.getValueType();
// TODO: The type may need to be widened rather than split. Or widened before
// it can be split.
if (!isTypeLegal(VecEVT))
return SDValue();
MVT VecVT = VecEVT.getSimpleVT();
MVT VecEltVT = VecVT.getVectorElementType();
unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
SDValue VL = Op.getOperand(3);
SDValue Mask = Op.getOperand(2);
MVT M1VT = getLMUL1VT(ContainerVT);
MVT XLenVT = Subtarget.getXLenVT();
MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
SDValue StartSplat =
lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT,
DL, DAG, Subtarget);
SDValue Reduction =
DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
DAG.getConstant(0, DL, XLenVT));
if (!VecVT.isInteger())
return Elt0;
return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
}
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
SDValue SubVec = Op.getOperand(1);
MVT VecVT = Vec.getSimpleValueType();
MVT SubVecVT = SubVec.getSimpleValueType();
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
unsigned OrigIdx = Op.getConstantOperandVal(2);
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
// We don't have the ability to slide mask vectors up indexed by their i1
// elements; the smallest we can do is i8. Often we are able to bitcast to
// equivalent i8 vectors. Note that when inserting a fixed-length vector
// into a scalable one, we might not necessarily have enough scalable
// elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
if (SubVecVT.getVectorElementType() == MVT::i1 &&
(OrigIdx != 0 || !Vec.isUndef())) {
if (VecVT.getVectorMinNumElements() >= 8 &&
SubVecVT.getVectorMinNumElements() >= 8) {
assert(OrigIdx % 8 == 0 && "Invalid index");
assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
SubVecVT.getVectorMinNumElements() % 8 == 0 &&
"Unexpected mask vector lowering");
OrigIdx /= 8;
SubVecVT =
MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
SubVecVT.isScalableVector());
VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
VecVT.isScalableVector());
Vec = DAG.getBitcast(VecVT, Vec);
SubVec = DAG.getBitcast(SubVecVT, SubVec);
} else {
// We can't slide this mask vector up indexed by its i1 elements.
// This poses a problem when we wish to insert a scalable vector which
// can't be re-expressed as a larger type. Just choose the slow path and
// extend to a larger type, then truncate back down.
MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
Op.getOperand(2));
SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
}
}
// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
// register size. Therefore we must slide the vector group up the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
return Op;
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), SubVec,
DAG.getConstant(0, DL, XLenVT));
if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
return DAG.getBitcast(Op.getValueType(), SubVec);
}
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. Note
// that for slideup this includes the offset.
SDValue VL =
DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
SubVec, SlideupAmt, Mask, VL);
if (VecVT.isFixedLengthVector())
Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
return DAG.getBitcast(Op.getValueType(), Slideup);
}
unsigned SubRegIdx, RemIdx;
std::tie(SubRegIdx, RemIdx) =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
VecVT, SubVecVT, OrigIdx, TRI);
RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
// 1. If the Idx has been completely eliminated and this subvector's size is
// a vector register or a multiple thereof, or the surrounding elements are
// undef, then this is a subvector insert which naturally aligns to a vector
// register. These can easily be handled using subregister manipulation.
// 2. If the subvector is smaller than a vector register, then the insertion
// must preserve the undisturbed elements of the register. We do this by
// lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
// (which resolves to a subregister copy), performing a VSLIDEUP to place the
// subvector within the vector register, and an INSERT_SUBVECTOR of that
// LMUL=1 type back into the larger vector (resolving to another subregister
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
// to avoid allocating a large register group to hold our subvector.
if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
return Op;
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
// (in our case undisturbed). This means we can set up a subvector insertion
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
// size of the subvector.
MVT InterSubVT = VecVT;
SDValue AlignedExtract = Vec;
unsigned AlignedIdx = OrigIdx - RemIdx;
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
InterSubVT = getLMUL1VT(VecVT);
// Extract a subvector equal to the nearest full vector register type. This
// should resolve to a EXTRACT_SUBREG instruction.
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
DAG.getConstant(AlignedIdx, DL, XLenVT));
}
SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
// For scalable vectors this must be further multiplied by vscale.
SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
// Construct the vector length corresponding to RemIdx + length(SubVecVT).
VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
DAG.getUNDEF(InterSubVT), SubVec,
DAG.getConstant(0, DL, XLenVT));
SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
AlignedExtract, SubVec, SlideupAmt, Mask, VL);
// If required, insert this subvector back into the correct vector register.
// This should resolve to an INSERT_SUBREG instruction.
if (VecVT.bitsGT(InterSubVT))
Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
DAG.getConstant(AlignedIdx, DL, XLenVT));
// We might have bitcast from a mask type: cast back to the original type if
// required.
return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
}
SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
MVT SubVecVT = Op.getSimpleValueType();
MVT VecVT = Vec.getSimpleValueType();
SDLoc DL(Op);
MVT XLenVT = Subtarget.getXLenVT();
unsigned OrigIdx = Op.getConstantOperandVal(1);
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
// We don't have the ability to slide mask vectors down indexed by their i1
// elements; the smallest we can do is i8. Often we are able to bitcast to
// equivalent i8 vectors. Note that when extracting a fixed-length vector
// from a scalable one, we might not necessarily have enough scalable
// elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
if (VecVT.getVectorMinNumElements() >= 8 &&
SubVecVT.getVectorMinNumElements() >= 8) {
assert(OrigIdx % 8 == 0 && "Invalid index");
assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
SubVecVT.getVectorMinNumElements() % 8 == 0 &&
"Unexpected mask vector lowering");
OrigIdx /= 8;
SubVecVT =
MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
SubVecVT.isScalableVector());
VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
VecVT.isScalableVector());
Vec = DAG.getBitcast(VecVT, Vec);
} else {
// We can't slide this mask vector down, indexed by its i1 elements.
// This poses a problem when we wish to extract a scalable vector which
// can't be re-expressed as a larger type. Just choose the slow path and
// extend to a larger type, then truncate back down.
// TODO: We could probably improve this when extracting certain fixed
// from fixed, where we can extract as i8 and shift the correct element
// right to reach the desired subvector?
MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
Op.getOperand(1));
SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
}
}
// If the subvector vector is a fixed-length type, we cannot use subregister
// manipulation to simplify the codegen; we don't know which register of a
// LMUL group contains the specific subvector as we only know the minimum
// register size. Therefore we must slide the vector group down the full
// amount.
if (SubVecVT.isFixedLengthVector()) {
// With an index of 0 this is a cast-like subvector, which can be performed
// with subregister operations.
if (OrigIdx == 0)
return Op;
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
SDValue Mask =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. This
// avoids sliding down elements we're going to discard straight away.
SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
SDValue Slidedown =
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
// Now we can use a cast-like subvector extract to get the result.
Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
DAG.getConstant(0, DL, XLenVT));
return DAG.getBitcast(Op.getValueType(), Slidedown);
}
unsigned SubRegIdx, RemIdx;
std::tie(SubRegIdx, RemIdx) =
RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
VecVT, SubVecVT, OrigIdx, TRI);
// If the Idx has been completely eliminated then this is a subvector extract
// which naturally aligns to a vector register. These can easily be handled
// using subregister manipulation.
if (RemIdx == 0)
return Op;
// Else we must shift our vector register directly to extract the subvector.
// Do this using VSLIDEDOWN.
// If the vector type is an LMUL-group type, extract a subvector equal to the
// nearest full vector register type. This should resolve to a EXTRACT_SUBREG
// instruction.
MVT InterSubVT = VecVT;
if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
InterSubVT = getLMUL1VT(VecVT);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
}
// Slide this vector register down by the desired number of elements in order
// to place the desired subvector starting at element 0.
SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
// For scalable vectors this must be further multiplied by vscale.
SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
SDValue Slidedown =
DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
// Now the vector is in the right position, extract our final subvector. This
// should resolve to a COPY.
Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
DAG.getConstant(0, DL, XLenVT));
// We might have bitcast from a mask type: cast back to the original type if
// required.
return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
}
// Lower step_vector to the vid instruction. Any non-identity step value must
// be accounted for my manual expansion.
SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
uint64_t StepValImm = Op.getConstantOperandVal(0);
if (StepValImm != 1) {
if (isPowerOf2_64(StepValImm)) {
SDValue StepVal =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
} else {
SDValue StepVal = lowerScalarSplat(
DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT,
DL, DAG, Subtarget);
StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
}
}
return StepVec;
}
// Implement vector_reverse using vrgather.vv with indices determined by
// subtracting the id of each element from (VLMAX-1). This will convert
// the indices like so:
// (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
// TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
unsigned EltSize = VecVT.getScalarSizeInBits();
unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
unsigned MaxVLMAX = 0;
unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
if (VectorBitsMax != 0)
MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
MVT IntVT = VecVT.changeVectorElementTypeToInteger();
// If this is SEW=8 and VLMAX is unknown or more than 256, we need
// to use vrgatherei16.vv.
// TODO: It's also possible to use vrgatherei16.vv for other types to
// decrease register width for the index calculation.
if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
// If this is LMUL=8, we have to split before can use vrgatherei16.vv.
// Reverse each half, then reassemble them in reverse order.
// NOTE: It's also possible that after splitting that VLMAX no longer
// requires vrgatherei16.vv.
if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
// Reassemble the low and high pieces reversed.
// FIXME: This is a CONCAT_VECTORS.
SDValue Res =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
DAG.getIntPtrConstant(0, DL));
return DAG.getNode(
ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
}
// Just promote the int type to i16 which will double the LMUL.
IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
}
MVT XLenVT = Subtarget.getXLenVT();
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
// Calculate VLMAX-1 for the desired SEW.
unsigned MinElts = VecVT.getVectorMinNumElements();
SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
DAG.getConstant(MinElts, DL, XLenVT));
SDValue VLMinus1 =
DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
// Splat VLMAX-1 taking care to handle SEW==64 on RV32.
bool IsRV32E64 =
!Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
SDValue SplatVL;
if (!IsRV32E64)
SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
else
SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
SDValue Indices =
DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
}
SDValue
RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
auto *Load = cast<LoadSDNode>(Op);
assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
Load->getMemoryVT(),
*Load->getMemOperand()) &&
"Expecting a correctly-aligned load");
MVT VT = Op.getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
SDValue VL =
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue NewLoad = DAG.getMemIntrinsicNode(
RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
Load->getMemoryVT(), Load->getMemOperand());
SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
return DAG.getMergeValues({Result, Load->getChain()}, DL);
}
SDValue
RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
auto *Store = cast<StoreSDNode>(Op);
assert(allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
Store->getMemoryVT(),
*Store->getMemOperand()) &&
"Expecting a correctly-aligned store");
SDValue StoreVal = Store->getValue();
MVT VT = StoreVal.getSimpleValueType();
// If the size less than a byte, we need to pad with zeros to make a byte.
if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
VT = MVT::v8i1;
StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
DAG.getConstant(0, DL, VT), StoreVal,
DAG.getIntPtrConstant(0, DL));
}
MVT ContainerVT = getContainerForFixedLengthVector(VT);
SDValue VL =
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
SDValue NewValue =
convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
return DAG.getMemIntrinsicNode(
RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
{Store->getChain(), NewValue, Store->getBasePtr(), VL},
Store->getMemoryVT(), Store->getMemOperand());
}
SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
const auto *MemSD = cast<MemSDNode>(Op);
EVT MemVT = MemSD->getMemoryVT();
MachineMemOperand *MMO = MemSD->getMemOperand();
SDValue Chain = MemSD->getChain();
SDValue BasePtr = MemSD->getBasePtr();
SDValue Mask, PassThru, VL;
if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
Mask = VPLoad->getMask();
PassThru = DAG.getUNDEF(VT);
VL = VPLoad->getVectorLength();
} else {
const auto *MLoad = cast<MaskedLoadSDNode>(Op);
Mask = MLoad->getMask();
PassThru = MLoad->getPassThru();
}
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
if (!IsUnmasked) {
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
if (IsUnmasked)
Ops.push_back(DAG.getUNDEF(ContainerVT));
else
Ops.push_back(PassThru);
Ops.push_back(BasePtr);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
if (!IsUnmasked)
Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue Result =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
Chain = Result.getValue(1);
if (VT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
const auto *MemSD = cast<MemSDNode>(Op);
EVT MemVT = MemSD->getMemoryVT();
MachineMemOperand *MMO = MemSD->getMemOperand();
SDValue Chain = MemSD->getChain();
SDValue BasePtr = MemSD->getBasePtr();
SDValue Val, Mask, VL;
if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
Val = VPStore->getValue();
Mask = VPStore->getMask();
VL = VPStore->getVectorLength();
} else {
const auto *MStore = cast<MaskedStoreSDNode>(Op);
Val = MStore->getValue();
Mask = MStore->getMask();
}
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
MVT VT = Val.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
Ops.push_back(Val);
Ops.push_back(BasePtr);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
SDValue
RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
SelectionDAG &DAG) const {
MVT InVT = Op.getOperand(0).getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(InVT);
MVT VT = Op.getSimpleValueType();
SDValue Op1 =
convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
SDValue Op2 =
convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
SDLoc DL(Op);
SDValue VL =
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
Op.getOperand(2), Mask, VL);
return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() == MVT::i1)
return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
}
SDValue
RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc;
switch (Op.getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
}
return lowerToScalableOp(Op, DAG, Opc);
}
// Lower vector ABS to smax(X, sub(0, X)).
SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue X = Op.getOperand(0);
assert(VT.isFixedLengthVector() && "Unexpected type");
MVT ContainerVT = getContainerForFixedLengthVector(VT);
X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue SplatZero =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
SDValue NegX =
DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
SDValue Max =
DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
return convertFromScalableVector(VT, Max, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue Mag = Op.getOperand(0);
SDValue Sign = Op.getOperand(1);
assert(Mag.getValueType() == Sign.getValueType() &&
"Can only handle COPYSIGN with matching types.");
MVT ContainerVT = getContainerForFixedLengthVector(VT);
Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue CopySign =
DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
MVT I1ContainerVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue CC =
convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
SDValue Op1 =
convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
SDValue Op2 =
convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue Select =
DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
return convertFromScalableVector(VT, Select, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOpc,
bool HasMask) const {
MVT VT = Op.getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
// Create list of operands by converting existing ones to scalable types.
SmallVector<SDValue, 6> Ops;
for (const SDValue &V : Op->op_values()) {
assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
// Pass through non-vector operands.
if (!V.getValueType().isVector()) {
Ops.push_back(V);
continue;
}
// "cast" fixed length vector to a scalable vector.
assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
"Only fixed length vectors are supported!");
Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
}
SDLoc DL(Op);
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
if (HasMask)
Ops.push_back(Mask);
Ops.push_back(VL);
SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
}
// Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
// * Operands of each node are assumed to be in the same order.
// * The EVL operand is promoted from i32 to i64 on RV64.
// * Fixed-length vectors are converted to their scalable-vector container
// types.
SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
unsigned RISCVISDOpc) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SmallVector<SDValue, 4> Ops;
for (const auto &OpIdx : enumerate(Op->ops())) {
SDValue V = OpIdx.value();
assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
// Pass through operands which aren't fixed-length vectors.
if (!V.getValueType().isFixedLengthVector()) {
Ops.push_back(V);
continue;
}
// "cast" fixed length vector to a scalable vector.
MVT OpVT = V.getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
assert(useRVVForFixedLengthVectorVT(OpVT) &&
"Only fixed length vectors are supported!");
Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
}
if (!VT.isFixedLengthVector())
return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
MVT ContainerVT = getContainerForFixedLengthVector(VT);
SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
}
SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
unsigned MaskOpc,
unsigned VecOpc) const {
MVT VT = Op.getSimpleValueType();
if (VT.getVectorElementType() != MVT::i1)
return lowerVPOp(Op, DAG, VecOpc);
// It is safe to drop mask parameter as masked-off elements are undef.
SDValue Op1 = Op->getOperand(0);
SDValue Op2 = Op->getOperand(1);
SDValue VL = Op->getOperand(3);
MVT ContainerVT = VT;
const bool IsFixed = VT.isFixedLengthVector();
if (IsFixed) {
ContainerVT = getContainerForFixedLengthVector(VT);
Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
}
SDLoc DL(Op);
SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL);
if (!IsFixed)
return Val;
return convertFromScalableVector(VT, Val, DAG, Subtarget);
}
// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
// matched to a RVV indexed load. The RVV indexed load instructions only
// support the "unsigned unscaled" addressing mode; indices are implicitly
// zero-extended or truncated to XLEN and are treated as byte offsets. Any
// signed or scaled indexing is extended to the XLEN value type and scaled
// accordingly.
SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
const auto *MemSD = cast<MemSDNode>(Op.getNode());
EVT MemVT = MemSD->getMemoryVT();
MachineMemOperand *MMO = MemSD->getMemOperand();
SDValue Chain = MemSD->getChain();
SDValue BasePtr = MemSD->getBasePtr();
ISD::LoadExtType LoadExtType;
SDValue Index, Mask, PassThru, VL;
if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
Index = VPGN->getIndex();
Mask = VPGN->getMask();
PassThru = DAG.getUNDEF(VT);
VL = VPGN->getVectorLength();
// VP doesn't support extending loads.
LoadExtType = ISD::NON_EXTLOAD;
} else {
// Else it must be a MGATHER.
auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
Index = MGN->getIndex();
Mask = MGN->getMask();
PassThru = MGN->getPassThru();
LoadExtType = MGN->getExtensionType();
}
MVT IndexVT = Index.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Unexpected VTs!");
assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
// Targets have to explicitly opt-in for extending vector loads.
assert(LoadExtType == ISD::NON_EXTLOAD &&
"Unexpected extending MGATHER/VP_GATHER");
(void)LoadExtType;
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
// We need to use the larger of the result and index type to determine the
// scalable type to use so we don't increase LMUL for any operand/result.
if (VT.bitsGE(IndexVT)) {
ContainerVT = getContainerForFixedLengthVector(VT);
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
ContainerVT.getVectorElementCount());
} else {
IndexVT = getContainerForFixedLengthVector(IndexVT);
ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
IndexVT.getVectorElementCount());
}
Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
if (!IsUnmasked) {
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
}
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
VL);
Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
TrueMask, VL);
}
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
if (IsUnmasked)
Ops.push_back(DAG.getUNDEF(ContainerVT));
else
Ops.push_back(PassThru);
Ops.push_back(BasePtr);
Ops.push_back(Index);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
if (!IsUnmasked)
Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue Result =
DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
Chain = Result.getValue(1);
if (VT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
// matched to a RVV indexed store. The RVV indexed store instructions only
// support the "unsigned unscaled" addressing mode; indices are implicitly
// zero-extended or truncated to XLEN and are treated as byte offsets. Any
// signed or scaled indexing is extended to the XLEN value type and scaled
// accordingly.
SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
const auto *MemSD = cast<MemSDNode>(Op.getNode());
EVT MemVT = MemSD->getMemoryVT();
MachineMemOperand *MMO = MemSD->getMemOperand();
SDValue Chain = MemSD->getChain();
SDValue BasePtr = MemSD->getBasePtr();
bool IsTruncatingStore = false;
SDValue Index, Mask, Val, VL;
if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
Index = VPSN->getIndex();
Mask = VPSN->getMask();
Val = VPSN->getValue();
VL = VPSN->getVectorLength();
// VP doesn't support truncating stores.
IsTruncatingStore = false;
} else {
// Else it must be a MSCATTER.
auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
Index = MSN->getIndex();
Mask = MSN->getMask();
Val = MSN->getValue();
IsTruncatingStore = MSN->isTruncatingStore();
}
MVT VT = Val.getSimpleValueType();
MVT IndexVT = Index.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Unexpected VTs!");
assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
// Targets have to explicitly opt-in for extending vector loads and
// truncating vector stores.
assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
(void)IsTruncatingStore;
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
// We need to use the larger of the value and index type to determine the
// scalable type to use so we don't increase LMUL for any operand/result.
if (VT.bitsGE(IndexVT)) {
ContainerVT = getContainerForFixedLengthVector(VT);
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
ContainerVT.getVectorElementCount());
} else {
IndexVT = getContainerForFixedLengthVector(IndexVT);
ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
IndexVT.getVectorElementCount());
}
Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
MVT MaskVT =
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
if (!VL)
VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
if (XLenVT == MVT::i32 && IndexVT.getVectorElementType().bitsGT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, Mask.getValueType(),
VL);
Index = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, IndexVT, Index,
TrueMask, VL);
}
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
Ops.push_back(Val);
Ops.push_back(BasePtr);
Ops.push_back(Index);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
const MVT XLenVT = Subtarget.getXLenVT();
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue SysRegNo = DAG.getTargetConstant(
RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
// Encoding used for rounding mode in RISCV differs from that used in
// FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
// table, which consists of a sequence of 4-bit fields, each representing
// corresponding FLT_ROUNDS mode.
static const int Table =
(int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
(int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
(int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
(int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
(int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
SDValue Shift =
DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
DAG.getConstant(Table, DL, XLenVT), Shift);
SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
DAG.getConstant(7, DL, XLenVT));
return DAG.getMergeValues({Masked, Chain}, DL);
}
SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
SelectionDAG &DAG) const {
const MVT XLenVT = Subtarget.getXLenVT();
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue RMValue = Op->getOperand(1);
SDValue SysRegNo = DAG.getTargetConstant(
RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
// Encoding used for rounding mode in RISCV differs from that used in
// FLT_ROUNDS. To convert it the C rounding mode is used as an index in
// a table, which consists of a sequence of 4-bit fields, each representing
// corresponding RISCV mode.
static const unsigned Table =
(RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
(RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
(RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
(RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
(RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
DAG.getConstant(2, DL, XLenVT));
SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
DAG.getConstant(Table, DL, XLenVT), Shift);
RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
DAG.getConstant(0x7, DL, XLenVT));
return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
RMValue);
}
static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
switch (IntNo) {
default:
llvm_unreachable("Unexpected Intrinsic");
case Intrinsic::riscv_grev:
return RISCVISD::GREVW;
case Intrinsic::riscv_gorc:
return RISCVISD::GORCW;
case Intrinsic::riscv_bcompress:
return RISCVISD::BCOMPRESSW;
case Intrinsic::riscv_bdecompress:
return RISCVISD::BDECOMPRESSW;
case Intrinsic::riscv_bfp:
return RISCVISD::BFPW;
case Intrinsic::riscv_fsl:
return RISCVISD::FSLW;
case Intrinsic::riscv_fsr:
return RISCVISD::FSRW;
}
}
// Converts the given intrinsic to a i64 operation with any extension.
static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
unsigned IntNo) {
SDLoc DL(N);
RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2);
// ReplaceNodeResults requires we maintain the same type for the return value.
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
// Returns the opcode of the target-specific SDNode that implements the 32-bit
// form of the given Opcode.
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
switch (Opcode) {
default:
llvm_unreachable("Unexpected opcode");
case ISD::SHL:
return RISCVISD::SLLW;
case ISD::SRA:
return RISCVISD::SRAW;
case ISD::SRL:
return RISCVISD::SRLW;
case ISD::SDIV:
return RISCVISD::DIVW;
case ISD::UDIV:
return RISCVISD::DIVUW;
case ISD::UREM:
return RISCVISD::REMUW;
case ISD::ROTL:
return RISCVISD::ROLW;
case ISD::ROTR:
return RISCVISD::RORW;
case RISCVISD::GREV:
return RISCVISD::GREVW;
case RISCVISD::GORC:
return RISCVISD::GORCW;
}
}
// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
// node. Because i8/i16/i32 isn't a legal type for RV64, these operations would
// otherwise be promoted to i64, making it difficult to select the
// SLLW/DIVUW/.../*W later one because the fact the operation was originally of
// type i8/i16/i32 is lost.
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
unsigned ExtOpc = ISD::ANY_EXTEND) {
SDLoc DL(N);
RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
// ReplaceNodeResults requires we maintain the same type for the return value.
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
// Converts the given 32-bit operation to a i64 operation with signed extension
// semantic to reduce the signed extension instructions.
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
DAG.getValueType(MVT::i32));
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
}
void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const {
SDLoc DL(N);
switch (N->getOpcode()) {
default:
llvm_unreachable("Don't know how to custom type legalize this operation!");
case ISD::STRICT_FP_TO_SINT:
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
bool IsStrict = N->isStrictFPOpcode();
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT ||
N->getOpcode() == ISD::STRICT_FP_TO_SINT;
SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
TargetLowering::TypeSoftenFloat) {
if (!isTypeLegal(Op0.getValueType()))
return;
if (IsStrict) {
unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RV64
: RISCVISD::STRICT_FCVT_WU_RV64;
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
SDValue Res = DAG.getNode(
Opc, DL, VTs, N->getOperand(0), Op0,
DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
Results.push_back(Res.getValue(1));
return;
}
unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
SDValue Res =
DAG.getNode(Opc, DL, MVT::i64, Op0,
DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
// If the FP type needs to be softened, emit a library call using the 'si'
// version. If we left it to default legalization we'd end up with 'di'. If
// the FP type doesn't need to be softened just let generic type
// legalization promote the result type.
RTLIB::Libcall LC;
if (IsSigned)
LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
else
LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
MakeLibCallOptions CallOptions;
EVT OpVT = Op0.getValueType();
CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Result;
std::tie(Result, Chain) =
makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
Results.push_back(Result);
if (IsStrict)
Results.push_back(Chain);
break;
}
case ISD::READCYCLECOUNTER: {
assert(!Subtarget.is64Bit() &&
"READCYCLECOUNTER only has custom type legalization on riscv32");
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue RCW =
DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
Results.push_back(
DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
Results.push_back(RCW.getValue(2));
break;
}
case ISD::MUL: {
unsigned Size = N->getSimpleValueType(0).getSizeInBits();
unsigned XLen = Subtarget.getXLen();
// This multiply needs to be expanded, try to use MULHSU+MUL if possible.
if (Size > XLen) {
assert(Size == (XLen * 2) && "Unexpected custom legalisation");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
APInt HighMask = APInt::getHighBitsSet(Size, XLen);
bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
// We need exactly one side to be unsigned.
if (LHSIsU == RHSIsU)
return;
auto MakeMULPair = [&](SDValue S, SDValue U) {
MVT XLenVT = Subtarget.getXLenVT();
S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
};
bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
// The other operand should be signed, but still prefer MULH when
// possible.
if (RHSIsU && LHSIsS && !RHSIsS)
Results.push_back(MakeMULPair(LHS, RHS));
else if (LHSIsU && RHSIsS && !LHSIsS)
Results.push_back(MakeMULPair(RHS, LHS));
return;
}
LLVM_FALLTHROUGH;
}
case ISD::ADD:
case ISD::SUB:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
break;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (N->getOperand(1).getOpcode() != ISD::Constant) {
Results.push_back(customLegalizeToWOp(N, DAG));
break;
}
// Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
// similar to customLegalizeToWOpWithSExt, but we must zero_extend the
// shift amount.
if (N->getOpcode() == ISD::SHL) {
SDLoc DL(N);
SDValue NewOp0 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 =
DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
DAG.getValueType(MVT::i32));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
}
break;
case ISD::ROTL:
case ISD::ROTR:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOp(N, DAG));
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
SDValue NewOp0 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
bool IsCTZ =
N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
case ISD::SDIV:
case ISD::UDIV:
case ISD::UREM: {
MVT VT = N->getSimpleValueType(0);
assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
"Unexpected custom legalisation");
// Don't promote division/remainder by constant since we should expand those
// to multiply by magic constant.
// FIXME: What if the expansion is disabled for minsize.
if (N->getOperand(1).getOpcode() == ISD::Constant)
return;
// If the input is i32, use ANY_EXTEND since the W instructions don't read
// the upper 32 bits. For other types we need to sign or zero extend
// based on the opcode.
unsigned ExtOpc = ISD::ANY_EXTEND;
if (VT != MVT::i32)
ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND;
Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
break;
}
case ISD::UADDO:
case ISD::USUBO: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
bool IsAdd = N->getOpcode() == ISD::UADDO;
// Create an ADDW or SUBW.
SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue Res =
DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
DAG.getValueType(MVT::i32));
// Sign extend the LHS and perform an unsigned compare with the ADDW result.
// Since the inputs are sign extended from i32, this is equivalent to
// comparing the lower 32 bits.
LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
IsAdd ? ISD::SETULT : ISD::SETUGT);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
Results.push_back(Overflow);
return;
}
case ISD::UADDSAT:
case ISD::USUBSAT: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (Subtarget.hasStdExtZbb()) {
// With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
// sign extend allows overflow of the lower 32 bits to be detected on
// the promoted size.
SDValue LHS =
DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue RHS =
DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
// Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
// promotion for UADDO/USUBO.
Results.push_back(expandAddSubSat(N, DAG));
return;
}
case ISD::BITCAST: {
EVT VT = N->getValueType(0);
assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
SDValue Op0 = N->getOperand(0);
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
} else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
Subtarget.hasStdExtF()) {
SDValue FPConv =
DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
isTypeLegal(Op0VT)) {
// Custom-legalize bitcasts from fixed-length vector types to illegal
// scalar types in order to improve codegen. Bitcast the vector to a
// one-element vector type whose element type is the same as the result
// type, and extract the first element.
EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
if (isTypeLegal(BVT)) {
SDValue BVec = DAG.getBitcast(BVT, Op0);
Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
DAG.getConstant(0, DL, XLenVT)));
}
}
break;
}
case RISCVISD::GREV:
case RISCVISD::GORC: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
// This is similar to customLegalizeToWOp, except that we pass the second
// operand (a TargetConstant) straight through: it is already of type
// XLenVT.
RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
SDValue NewOp0 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
// ReplaceNodeResults requires we maintain the same type for the return
// value.
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
break;
}
case RISCVISD::SHFL: {
// There is no SHFLIW instruction, but we can just promote the operation.
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
SDValue NewOp0 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
// ReplaceNodeResults requires we maintain the same type for the return
// value.
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
break;
}
case ISD::BSWAP:
case ISD::BITREVERSE: {
MVT VT = N->getSimpleValueType(0);
MVT XLenVT = Subtarget.getXLenVT();
assert((VT == MVT::i8 || VT == MVT::i16 ||
(VT == MVT::i32 && Subtarget.is64Bit())) &&
Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
unsigned Imm = VT.getSizeInBits() - 1;
// If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
if (N->getOpcode() == ISD::BSWAP)
Imm &= ~0x7U;
unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
SDValue GREVI =
DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
// ReplaceNodeResults requires we maintain the same type for the return
// value.
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
break;
}
case ISD::FSHL:
case ISD::FSHR: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
SDValue NewOp0 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewShAmt =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
// FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
// Mask the shift amount to 5 bits to prevent accidentally setting bit 5.
NewShAmt = DAG.getNode(ISD::AND, DL, MVT::i64, NewShAmt,
DAG.getConstant(0x1f, DL, MVT::i64));
// fshl and fshr concatenate their operands in the same order. fsrw and fslw
// instruction use different orders. fshl will return its first operand for
// shift of zero, fshr will return its second operand. fsl and fsr both
// return rs1 so the ISD nodes need to have different operand orders.
// Shift amount is in rs2.
unsigned Opc = RISCVISD::FSLW;
if (N->getOpcode() == ISD::FSHR) {
std::swap(NewOp0, NewOp1);
Opc = RISCVISD::FSRW;
}
SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewShAmt);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
// type is illegal (currently only vXi64 RV32).
// With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
// transferred to the destination register. We issue two of these from the
// upper- and lower- halves of the SEW-bit vector element, slid down to the
// first element.
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(1);
// The vector type hasn't been legalized yet so we can't issue target
// specific nodes if it needs legalization.
// FIXME: We would manually legalize if it's important.
if (!isTypeLegal(Vec.getValueType()))
return;
MVT VecVT = Vec.getSimpleValueType();
assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
VecVT.getVectorElementType() == MVT::i64 &&
"Unexpected EXTRACT_VECTOR_ELT legalization");
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
MVT XLenVT = Subtarget.getXLenVT();
// Use a VL of 1 to avoid processing more elements than we need.
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue VL = DAG.getConstant(1, DL, XLenVT);
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
// Unless the index is known to be 0, we must slide the vector down to get
// the desired element into index 0.
if (!isNullConstant(Idx)) {
Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
}
// Extract the lower XLEN bits of the correct vector element.
SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
// To extract the upper XLEN bits of the vector element, shift the first
// element right by 32 bits and re-extract the lower XLEN bits.
SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
DAG.getConstant(32, DL, XLenVT), VL);
SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
ThirtyTwoV, Mask, VL);
SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IntNo) {
default:
llvm_unreachable(
"Don't know how to custom type legalize this intrinsic!");
case Intrinsic::riscv_grev:
case Intrinsic::riscv_gorc:
case Intrinsic::riscv_bcompress:
case Intrinsic::riscv_bdecompress:
case Intrinsic::riscv_bfp: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
break;
}
case Intrinsic::riscv_fsl:
case Intrinsic::riscv_fsr: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewOp2 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
SDValue NewOp3 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3));
unsigned Opc = getRISCVWOpcodeByIntr(IntNo);
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
break;
}
case Intrinsic::riscv_orc_b: {
// Lower to the GORCI encoding for orc.b with the operand extended.
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
// If Zbp is enabled, use GORCIW which will sign extend the result.
unsigned Opc =
Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
DAG.getConstant(7, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
case Intrinsic::riscv_shfl:
case Intrinsic::riscv_unshfl: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue NewOp2 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
unsigned Opc =
IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
// There is no (UN)SHFLIW. If the control word is a constant, we can use
// (UN)SHFLI with bit 4 of the control word cleared. The upper 32 bit half
// will be shuffled the same way as the lower 32 bit half, but the two
// halves won't cross.
if (isa<ConstantSDNode>(NewOp2)) {
NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
DAG.getConstant(0xf, DL, MVT::i64));
Opc =
IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
}
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
break;
}
case Intrinsic::riscv_vmv_x_s: {
EVT VT = N->getValueType(0);
MVT XLenVT = Subtarget.getXLenVT();
if (VT.bitsLT(XLenVT)) {
// Simple case just extract using vmv.x.s and truncate.
SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
Subtarget.getXLenVT(), N->getOperand(1));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
return;
}
assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
"Unexpected custom legalization");
// We need to do the move in two steps.
SDValue Vec = N->getOperand(1);
MVT VecVT = Vec.getSimpleValueType();
// First extract the lower XLEN bits of the element.
SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
// To extract the upper XLEN bits of the vector element, shift the first
// element right by 32 bits and re-extract the lower XLEN bits.
SDValue VL = DAG.getConstant(1, DL, XLenVT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
DAG.getConstant(32, DL, XLenVT), VL);
SDValue LShr32 =
DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
Results.push_back(
DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
break;
}
}
break;
}
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMIN:
if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
Results.push_back(V);
break;
case ISD::VP_REDUCE_ADD:
case ISD::VP_REDUCE_AND:
case ISD::VP_REDUCE_OR:
case ISD::VP_REDUCE_XOR:
case ISD::VP_REDUCE_SMAX:
case ISD::VP_REDUCE_UMAX:
case ISD::VP_REDUCE_SMIN:
case ISD::VP_REDUCE_UMIN:
if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
Results.push_back(V);
break;
case ISD::FLT_ROUNDS_: {
SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
Results.push_back(Res.getValue(0));
Results.push_back(Res.getValue(1));
break;
}
}
}
// A structure to hold one of the bit-manipulation patterns below. Together, a
// SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
// (or (and (shl x, 1), 0xAAAAAAAA),
// (and (srl x, 1), 0x55555555))
struct RISCVBitmanipPat {
SDValue Op;
unsigned ShAmt;
bool IsSHL;
bool formsPairWith(const RISCVBitmanipPat &Other) const {
return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
}
};
// Matches patterns of the form
// (and (shl x, C2), (C1 << C2))
// (and (srl x, C2), C1)
// (shl (and x, C1), C2)
// (srl (and x, (C1 << C2)), C2)
// Where C2 is a power of 2 and C1 has at least that many leading zeroes.
// The expected masks for each shift amount are specified in BitmanipMasks where
// BitmanipMasks[log2(C2)] specifies the expected C1 value.
// The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
// BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
// XLen is 64.
static Optional<RISCVBitmanipPat>
matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
"Unexpected number of masks");
Optional<uint64_t> Mask;
// Optionally consume a mask around the shift operation.
if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
Mask = Op.getConstantOperandVal(1);
Op = Op.getOperand(0);
}
if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
return None;
bool IsSHL = Op.getOpcode() == ISD::SHL;
if (!isa<ConstantSDNode>(Op.getOperand(1)))
return None;
uint64_t ShAmt = Op.getConstantOperandVal(1);
unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
if (ShAmt >= Width || !isPowerOf2_64(ShAmt))
return None;
// If we don't have enough masks for 64 bit, then we must be trying to
// match SHFL so we're only allowed to shift 1/4 of the width.
if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
return None;
SDValue Src = Op.getOperand(0);
// The expected mask is shifted left when the AND is found around SHL
// patterns.
// ((x >> 1) & 0x55555555)
// ((x << 1) & 0xAAAAAAAA)
bool SHLExpMask = IsSHL;
if (!Mask) {
// Sometimes LLVM keeps the mask as an operand of the shift, typically when
// the mask is all ones: consume that now.
if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
Mask = Src.getConstantOperandVal(1);
Src = Src.getOperand(0);
// The expected mask is now in fact shifted left for SRL, so reverse the
// decision.
// ((x & 0xAAAAAAAA) >> 1)
// ((x & 0x55555555) << 1)
SHLExpMask = !SHLExpMask;
} else {
// Use a default shifted mask of all-ones if there's no AND, truncated
// down to the expected width. This simplifies the logic later on.
Mask = maskTrailingOnes<uint64_t>(Width);
*Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
}
}
unsigned MaskIdx = Log2_32(ShAmt);
uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
if (SHLExpMask)
ExpMask <<= ShAmt;
if (Mask != ExpMask)
return None;
return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
}
// Matches any of the following bit-manipulation patterns:
// (and (shl x, 1), (0x55555555 << 1))
// (and (srl x, 1), 0x55555555)
// (shl (and x, 0x55555555), 1)
// (srl (and x, (0x55555555 << 1)), 1)
// where the shift amount and mask may vary thus:
// [1] = 0x55555555 / 0xAAAAAAAA
// [2] = 0x33333333 / 0xCCCCCCCC
// [4] = 0x0F0F0F0F / 0xF0F0F0F0
// [8] = 0x00FF00FF / 0xFF00FF00
// [16] = 0x0000FFFF / 0xFFFFFFFF
// [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
// These are the unshifted masks which we use to match bit-manipulation
// patterns. They may be shifted left in certain circumstances.
static const uint64_t BitmanipMasks[] = {
0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
return matchRISCVBitmanipPat(Op, BitmanipMasks);
}
// Match the following pattern as a GREVI(W) operation
// (or (BITMANIP_SHL x), (BITMANIP_SRL x))
static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
EVT VT = Op.getValueType();
if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
auto LHS = matchGREVIPat(Op.getOperand(0));
auto RHS = matchGREVIPat(Op.getOperand(1));
if (LHS && RHS && LHS->formsPairWith(*RHS)) {
SDLoc DL(Op);
return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
DAG.getConstant(LHS->ShAmt, DL, VT));
}
}
return SDValue();
}
// Matches any the following pattern as a GORCI(W) operation
// 1. (or (GREVI x, shamt), x) if shamt is a power of 2
// 2. (or x, (GREVI x, shamt)) if shamt is a power of 2
// 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
// Note that with the variant of 3.,
// (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
// the inner pattern will first be matched as GREVI and then the outer
// pattern will be matched to GORC via the first rule above.
// 4. (or (rotl/rotr x, bitwidth/2), x)
static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
EVT VT = Op.getValueType();
if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
SDLoc DL(Op);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
isa<ConstantSDNode>(Reverse.getOperand(1)) &&
isPowerOf2_32(Reverse.getConstantOperandVal(1)))
return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
// We can also form GORCI from ROTL/ROTR by half the bitwidth.
if ((Reverse.getOpcode() == ISD::ROTL ||
Reverse.getOpcode() == ISD::ROTR) &&
Reverse.getOperand(0) == X &&
isa<ConstantSDNode>(Reverse.getOperand(1))) {
uint64_t RotAmt = Reverse.getConstantOperandVal(1);
if (RotAmt == (VT.getSizeInBits() / 2))
return DAG.getNode(RISCVISD::GORC, DL, VT, X,
DAG.getConstant(RotAmt, DL, VT));
}
return SDValue();
};
// Check for either commutable permutation of (or (GREVI x, shamt), x)
if (SDValue V = MatchOROfReverse(Op0, Op1))
return V;
if (SDValue V = MatchOROfReverse(Op1, Op0))
return V;
// OR is commutable so canonicalize its OR operand to the left
if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
std::swap(Op0, Op1);
if (Op0.getOpcode() != ISD::OR)
return SDValue();
SDValue OrOp0 = Op0.getOperand(0);
SDValue OrOp1 = Op0.getOperand(1);
auto LHS = matchGREVIPat(OrOp0);
// OR is commutable so swap the operands and try again: x might have been
// on the left
if (!LHS) {
std::swap(OrOp0, OrOp1);
LHS = matchGREVIPat(OrOp0);
}
auto RHS = matchGREVIPat(Op1);
if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
DAG.getConstant(LHS->ShAmt, DL, VT));
}
}
return SDValue();
}
// Matches any of the following bit-manipulation patterns:
// (and (shl x, 1), (0x22222222 << 1))
// (and (srl x, 1), 0x22222222)
// (shl (and x, 0x22222222), 1)
// (srl (and x, (0x22222222 << 1)), 1)
// where the shift amount and mask may vary thus:
// [1] = 0x22222222 / 0x44444444
// [2] = 0x0C0C0C0C / 0x3C3C3C3C
// [4] = 0x00F000F0 / 0x0F000F00
// [8] = 0x0000FF00 / 0x00FF0000
// [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
// These are the unshifted masks which we use to match bit-manipulation
// patterns. They may be shifted left in certain circumstances.
static const uint64_t BitmanipMasks[] = {
0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
return matchRISCVBitmanipPat(Op, BitmanipMasks);
}
// Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
EVT VT = Op.getValueType();
if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
return SDValue();
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
// Or is commutable so canonicalize the second OR to the LHS.
if (Op0.getOpcode() != ISD::OR)
std::swap(Op0, Op1);
if (Op0.getOpcode() != ISD::OR)
return SDValue();
// We found an inner OR, so our operands are the operands of the inner OR
// and the other operand of the outer OR.
SDValue A = Op0.getOperand(0);
SDValue B = Op0.getOperand(1);
SDValue C = Op1;
auto Match1 = matchSHFLPat(A);
auto Match2 = matchSHFLPat(B);
// If neither matched, we failed.
if (!Match1 && !Match2)
return SDValue();
// We had at least one match. if one failed, try the remaining C operand.
if (!Match1) {
std::swap(A, C);
Match1 = matchSHFLPat(A);
if (!Match1)
return SDValue();
} else if (!Match2) {
std::swap(B, C);
Match2 = matchSHFLPat(B);
if (!Match2)
return SDValue();
}
assert(Match1 && Match2);
// Make sure our matches pair up.
if (!Match1->formsPairWith(*Match2))
return SDValue();
// All the remains is to make sure C is an AND with the same input, that masks
// out the bits that are being shuffled.
if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
C.getOperand(0) != Match1->Op)
return SDValue();
uint64_t Mask = C.getConstantOperandVal(1);
static const uint64_t BitmanipMasks[] = {
0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
};
unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
unsigned MaskIdx = Log2_32(Match1->ShAmt);
uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
if (Mask != ExpMask)
return SDValue();
SDLoc DL(Op);
return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
DAG.getConstant(Match1->ShAmt, DL, VT));
}
// Optimize (add (shl x, c0), (shl y, c1)) ->
// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// Perform this optimization only in the zba extension.
if (!Subtarget.hasStdExtZba())
return SDValue();
// Skip for vector types and larger types.
EVT VT = N->getValueType(0);
if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
return SDValue();
// The two operand nodes must be SHL and have no other use.
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
!N0->hasOneUse() || !N1->hasOneUse())
return SDValue();
// Check c0 and c1.
auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
if (!N0C || !N1C)
return SDValue();
int64_t C0 = N0C->getSExtValue();
int64_t C1 = N1C->getSExtValue();
if (C0 <= 0 || C1 <= 0)
return SDValue();
// Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
int64_t Bits = std::min(C0, C1);
int64_t Diff = std::abs(C0 - C1);
if (Diff != 1 && Diff != 2 && Diff != 3)
return SDValue();
// Build nodes.
SDLoc DL(N);
SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
SDValue NA0 =
DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
}
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
// not undo itself, but they are redundant.
static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
SDValue Src = N->getOperand(0);
if (Src.getOpcode() != N->getOpcode())
return SDValue();
if (!isa<ConstantSDNode>(N->getOperand(1)) ||
!isa<ConstantSDNode>(Src.getOperand(1)))
return SDValue();
unsigned ShAmt1 = N->getConstantOperandVal(1);
unsigned ShAmt2 = Src.getConstantOperandVal(1);
Src = Src.getOperand(0);
unsigned CombinedShAmt;
if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
CombinedShAmt = ShAmt1 | ShAmt2;
else
CombinedShAmt = ShAmt1 ^ ShAmt2;
if (CombinedShAmt == 0)
return Src;
SDLoc DL(N);
return DAG.getNode(
N->getOpcode(), DL, N->getValueType(0), Src,
DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
}
// Combine a constant select operand into its use:
//
// (and (select cond, -1, c), x)
// -> (select cond, x, (and x, c)) [AllOnes=1]
// (or (select cond, 0, c), x)
// -> (select cond, x, (or x, c)) [AllOnes=0]
// (xor (select cond, 0, c), x)
// -> (select cond, x, (xor x, c)) [AllOnes=0]
// (add (select cond, 0, c), x)
// -> (select cond, x, (add x, c)) [AllOnes=0]
// (sub x, (select cond, 0, c))
// -> (select cond, x, (sub x, c)) [AllOnes=0]
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
SelectionDAG &DAG, bool AllOnes) {
EVT VT = N->getValueType(0);
// Skip vectors.
if (VT.isVector())
return SDValue();
if ((Slct.getOpcode() != ISD::SELECT &&
Slct.getOpcode() != RISCVISD::SELECT_CC) ||
!Slct.hasOneUse())
return SDValue();
auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
};
bool SwapSelectOps;
unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
SDValue TrueVal = Slct.getOperand(1 + OpOffset);
SDValue FalseVal = Slct.getOperand(2 + OpOffset);
SDValue NonConstantVal;
if (isZeroOrAllOnes(TrueVal, AllOnes)) {
SwapSelectOps = false;
NonConstantVal = FalseVal;
} else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
SwapSelectOps = true;
NonConstantVal = TrueVal;
} else
return SDValue();
// Slct is now know to be the desired identity constant when CC is true.
TrueVal = OtherOp;
FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
// Unless SwapSelectOps says the condition should be false.
if (SwapSelectOps)
std::swap(TrueVal, FalseVal);
if (Slct.getOpcode() == RISCVISD::SELECT_CC)
return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
{Slct.getOperand(0), Slct.getOperand(1),
Slct.getOperand(2), TrueVal, FalseVal});
return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
{Slct.getOperand(0), TrueVal, FalseVal});
}
// Attempt combineSelectAndUse on each operand of a commutative operator N.
static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
bool AllOnes) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
return Result;
if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
return Result;
return SDValue();
}
// Transform (add (mul x, c0), c1) ->
// (add (mul (add x, c1/c0), c0), c1%c0).
// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
// that should be excluded is when c0*(c1/c0) is simm12, which will lead
// to an infinite loop in DAGCombine if transformed.
// Or transform (add (mul x, c0), c1) ->
// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
// lead to an infinite loop in DAGCombine if transformed.
// Or transform (add (mul x, c0), c1) ->
// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
// lead to an infinite loop in DAGCombine if transformed.
// Or transform (add (mul x, c0), c1) ->
// (mul (add x, c1/c0), c0).
// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// Skip for vector types and larger types.
EVT VT = N->getValueType(0);
if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
return SDValue();
// The first operand node must be a MUL and has no other use.
SDValue N0 = N->getOperand(0);
if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
return SDValue();
// Check if c0 and c1 match above conditions.
auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!N0C || !N1C)
return SDValue();
+ // If N0C has multiple uses it's possible one of the cases in
+ // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
+ // in an infinite loop.
+ if (!N0C->hasOneUse())
+ return SDValue();
int64_t C0 = N0C->getSExtValue();
int64_t C1 = N1C->getSExtValue();
int64_t CA, CB;
if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
return SDValue();
// Search for proper CA (non-zero) and CB that both are simm12.
if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
!isInt<12>(C0 * (C1 / C0))) {
CA = C1 / C0;
CB = C1 % C0;
} else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
CA = C1 / C0 + 1;
CB = C1 % C0 - C0;
} else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
CA = C1 / C0 - 1;
CB = C1 % C0 + C0;
} else
return SDValue();
// Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
SDLoc DL(N);
SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
DAG.getConstant(CA, DL, VT));
SDValue New1 =
DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
}
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
return V;
if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
return V;
// fold (add (select lhs, rhs, cc, 0, y), x) ->
// (select lhs, rhs, cc, x, (add x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
// fold (sub x, (select lhs, rhs, cc, 0, y)) ->
// (select lhs, rhs, cc, x, (sub x, y))
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
}
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
// fold (and (select lhs, rhs, cc, -1, y), x) ->
// (select lhs, rhs, cc, x, (and x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
}
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (Subtarget.hasStdExtZbp()) {
if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
return GREV;
if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
return GORC;
if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
return SHFL;
}
// fold (or (select cond, 0, y), x) ->
// (select cond, x, (or x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
// fold (xor (select cond, 0, y), x) ->
// (select cond, x, (xor x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
// removed during type legalization leaving an ADD/SUB/MUL use that won't use
// ADDW/SUBW/MULW.
static SDValue performANY_EXTENDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
if (!Subtarget.is64Bit())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
return SDValue();
// The opcode must be one that can implicitly sign_extend.
// FIXME: Additional opcodes.
switch (Src.getOpcode()) {
default:
return SDValue();
case ISD::MUL:
if (!Subtarget.hasStdExtM())
return SDValue();
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB:
break;
}
// Only handle cases where the result is used by a CopyToReg. That likely
// means the value is a liveout of the basic block. This helps prevent
// infinite combine loops like PR51206.
if (none_of(N->uses(),
[](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
return SDValue();
SmallVector<SDNode *, 4> SetCCs;
for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
UE = Src.getNode()->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
if (User == N)
continue;
if (UI.getUse().getResNo() != Src.getResNo())
continue;
// All i32 setccs are legalized by sign extending operands.
if (User->getOpcode() == ISD::SETCC) {
SetCCs.push_back(User);
continue;
}
// We don't know if we can extend this user.
break;
}
// If we don't have any SetCCs, this isn't worthwhile.
if (SetCCs.empty())
return SDValue();
SDLoc DL(N);
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
DCI.CombineTo(N, SExt);
// Promote all the setccs.
for (SDNode *SetCC : SetCCs) {
SmallVector<SDValue, 4> Ops;
for (unsigned j = 0; j != 2; ++j) {
SDValue SOp = SetCC->getOperand(j);
if (SOp == Src)
Ops.push_back(SExt);
else
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
}
Ops.push_back(SetCC->getOperand(2));
DCI.CombineTo(SetCC,
DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
}
return SDValue(N, 0);
}
// Try to form VWMUL, VWMULU or VWMULSU.
// TODO: Support VWMULSU.vx with a sign extend Op and a splat of scalar Op.
static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
bool Commute) {
assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
if (Commute)
std::swap(Op0, Op1);
bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
bool IsVWMULSU = IsSignExt && Op1.getOpcode() == RISCVISD::VZEXT_VL;
if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
return SDValue();
SDValue Mask = N->getOperand(2);
SDValue VL = N->getOperand(3);
// Make sure the mask and VL match.
if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
return SDValue();
MVT VT = N->getSimpleValueType(0);
// Determine the narrow size for a widening multiply.
unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
VT.getVectorElementCount());
SDLoc DL(N);
// See if the other operand is the same opcode.
if (IsVWMULSU || Op0.getOpcode() == Op1.getOpcode()) {
if (!Op1.hasOneUse())
return SDValue();
// Make sure the mask and VL match.
if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
return SDValue();
Op1 = Op1.getOperand(0);
} else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
// The operand is a splat of a scalar.
// The VL must be the same.
if (Op1.getOperand(1) != VL)
return SDValue();
// Get the scalar value.
Op1 = Op1.getOperand(0);
// See if have enough sign bits or zero bits in the scalar to use a
// widening multiply by splatting to smaller element size.
unsigned EltBits = VT.getScalarSizeInBits();
unsigned ScalarBits = Op1.getValueSizeInBits();
// Make sure we're getting all element bits from the scalar register.
// FIXME: Support implicit sign extension of vmv.v.x?
if (ScalarBits < EltBits)
return SDValue();
if (IsSignExt) {
if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
return SDValue();
} else {
APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
if (!DAG.MaskedValueIsZero(Op1, Mask))
return SDValue();
}
Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
} else
return SDValue();
Op0 = Op0.getOperand(0);
// Re-introduce narrower extends if needed.
unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
if (Op0.getValueType() != NarrowVT)
Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ // vwmulsu requires second operand to be zero extended.
+ ExtOpc = IsVWMULSU ? RISCVISD::VZEXT_VL : ExtOpc;
if (Op1.getValueType() != NarrowVT)
Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
unsigned WMulOpc = RISCVISD::VWMULSU_VL;
if (!IsVWMULSU)
WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
}
static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
switch (Op.getOpcode()) {
case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
case ISD::FTRUNC: return RISCVFPRndMode::RTZ;
case ISD::FFLOOR: return RISCVFPRndMode::RDN;
case ISD::FCEIL: return RISCVFPRndMode::RUP;
case ISD::FROUND: return RISCVFPRndMode::RMM;
}
return RISCVFPRndMode::Invalid;
}
// Fold
// (fp_to_int (froundeven X)) -> fcvt X, rne
// (fp_to_int (ftrunc X)) -> fcvt X, rtz
// (fp_to_int (ffloor X)) -> fcvt X, rdn
// (fp_to_int (fceil X)) -> fcvt X, rup
// (fp_to_int (fround X)) -> fcvt X, rmm
static SDValue performFP_TO_INTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT XLenVT = Subtarget.getXLenVT();
// Only handle XLen or i32 types. Other types narrower than XLen will
// eventually be legalized to XLenVT.
EVT VT = N->getValueType(0);
if (VT != MVT::i32 && VT != XLenVT)
return SDValue();
SDValue Src = N->getOperand(0);
// Ensure the FP type is also legal.
if (!TLI.isTypeLegal(Src.getValueType()))
return SDValue();
// Don't do this for f16 with Zfhmin and not Zfh.
if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
return SDValue();
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
if (FRM == RISCVFPRndMode::Invalid)
return SDValue();
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
unsigned Opc;
if (VT == XLenVT)
Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
else
Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
SDLoc DL(N);
SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src.getOperand(0),
DAG.getTargetConstant(FRM, DL, XLenVT));
return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
}
// Fold
// (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
// (fp_to_int_sat (ftrunc X)) -> (select X == nan, 0, (fcvt X, rtz))
// (fp_to_int_sat (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))
// (fp_to_int_sat (fceil X)) -> (select X == nan, 0, (fcvt X, rup))
// (fp_to_int_sat (fround X)) -> (select X == nan, 0, (fcvt X, rmm))
static SDValue performFP_TO_INT_SATCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
MVT XLenVT = Subtarget.getXLenVT();
// Only handle XLen types. Other types narrower than XLen will eventually be
// legalized to XLenVT.
EVT DstVT = N->getValueType(0);
if (DstVT != XLenVT)
return SDValue();
SDValue Src = N->getOperand(0);
// Ensure the FP type is also legal.
if (!TLI.isTypeLegal(Src.getValueType()))
return SDValue();
// Don't do this for f16 with Zfhmin and not Zfh.
if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
return SDValue();
EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
if (FRM == RISCVFPRndMode::Invalid)
return SDValue();
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
unsigned Opc;
if (SatVT == DstVT)
Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
else if (DstVT == MVT::i64 && SatVT == MVT::i32)
Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
else
return SDValue();
// FIXME: Support other SatVTs by clamping before or after the conversion.
Src = Src.getOperand(0);
SDLoc DL(N);
SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
DAG.getTargetConstant(FRM, DL, XLenVT));
// RISCV FP-to-int conversions saturate to the destination register size, but
// don't produce 0 for nan.
SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
}
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
// Helper to call SimplifyDemandedBits on an operand of N where only some low
// bits are demanded. N will be added to the Worklist if it was not deleted.
// Caller should return SDValue(N, 0) if this returns true.
auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
SDValue Op = N->getOperand(OpNo);
APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
if (!SimplifyDemandedBits(Op, Mask, DCI))
return false;
if (N->getOpcode() != ISD::DELETED_NODE)
DCI.AddToWorklist(N);
return true;
};
switch (N->getOpcode()) {
default:
break;
case RISCVISD::SplitF64: {
SDValue Op0 = N->getOperand(0);
// If the input to SplitF64 is just BuildPairF64 then the operation is
// redundant. Instead, use BuildPairF64's operands directly.
if (Op0->getOpcode() == RISCVISD::BuildPairF64)
return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
SDLoc DL(N);
// It's cheaper to materialise two 32-bit integers than to load a double
// from the constant pool and transfer it to integer registers through the
// stack.
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
APInt V = C->getValueAPF().bitcastToAPInt();
SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
return DCI.CombineTo(N, Lo, Hi);
}
// This is a target-specific version of a DAGCombine performed in
// DAGCombiner::visitBITCAST. It performs the equivalent of:
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
!Op0.getNode()->hasOneUse())
break;
SDValue NewSplitF64 =
DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
Op0.getOperand(0));
SDValue Lo = NewSplitF64.getValue(0);
SDValue Hi = NewSplitF64.getValue(1);
APInt SignBit = APInt::getSignMask(32);
if (Op0.getOpcode() == ISD::FNEG) {
SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
DAG.getConstant(SignBit, DL, MVT::i32));
return DCI.CombineTo(N, Lo, NewHi);
}
assert(Op0.getOpcode() == ISD::FABS);
SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
DAG.getConstant(~SignBit, DL, MVT::i32));
return DCI.CombineTo(N, Lo, NewHi);
}
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW:
case RISCVISD::ROLW:
case RISCVISD::RORW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
break;
}
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
if (SimplifyDemandedLowBitsHelper(0, 32))
return SDValue(N, 0);
break;
}
case RISCVISD::GREV:
case RISCVISD::GORC: {
// Only the lower log2(Bitwidth) bits of the the shift amount are read.
unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
return SDValue(N, 0);
return combineGREVI_GORCI(N, DAG);
}
case RISCVISD::GREVW:
case RISCVISD::GORCW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
return combineGREVI_GORCI(N, DAG);
}
case RISCVISD::SHFL:
case RISCVISD::UNSHFL: {
// Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
return SDValue(N, 0);
break;
}
case RISCVISD::SHFLW:
case RISCVISD::UNSHFLW: {
// Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 4))
return SDValue(N, 0);
break;
}
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW: {
// Only the lower 32 bits of LHS and RHS are read.
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 32))
return SDValue(N, 0);
break;
}
case RISCVISD::FMV_X_ANYEXTH:
case RISCVISD::FMV_X_ANYEXTW_RV64: {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
MVT VT = N->getSimpleValueType(0);
// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
// conversion is unnecessary and can be replaced with the FMV_W_X_RV64
// operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
(N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
Op0->getOpcode() == RISCVISD::FMV_H_X)) {
assert(Op0.getOperand(0).getValueType() == VT &&
"Unexpected value type!");
return Op0.getOperand(0);
}
// This is a target-specific version of a DAGCombine performed in
// DAGCombiner::visitBITCAST. It performs the equivalent of:
// fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
// fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
!Op0.getNode()->hasOneUse())
break;
SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
if (Op0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
DAG.getConstant(SignBit, DL, VT));
assert(Op0.getOpcode() == ISD::FABS);
return DAG.getNode(ISD::AND, DL, VT, NewFMV,
DAG.getConstant(~SignBit, DL, VT));
}
case ISD::ADD:
return performADDCombine(N, DAG, Subtarget);
case ISD::SUB:
return performSUBCombine(N, DAG);
case ISD::AND:
return performANDCombine(N, DAG);
case ISD::OR:
return performORCombine(N, DAG, Subtarget);
case ISD::XOR:
return performXORCombine(N, DAG);
case ISD::ANY_EXTEND:
return performANY_EXTENDCombine(N, DCI, Subtarget);
case ISD::ZERO_EXTEND:
// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
// type legalization. This is safe because fp_to_uint produces poison if
// it overflows.
if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) {
SDValue Src = N->getOperand(0);
if (Src.getOpcode() == ISD::FP_TO_UINT &&
isTypeLegal(Src.getOperand(0).getValueType()))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64,
Src.getOperand(0));
if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() &&
isTypeLegal(Src.getOperand(1).getValueType())) {
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs,
Src.getOperand(0), Src.getOperand(1));
DCI.CombineTo(N, Res);
DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1));
DCI.recursivelyDeleteUnusedNodes(Src.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked.
}
}
return SDValue();
case RISCVISD::SELECT_CC: {
// Transform
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue TrueV = N->getOperand(3);
SDValue FalseV = N->getOperand(4);
// If the True and False values are the same, we don't need a select_cc.
if (TrueV == FalseV)
return TrueV;
ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
if (!ISD::isIntEqualitySetCC(CCVal))
break;
// Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
// (select_cc X, Y, lt, trueV, falseV)
// Sometimes the setcc is introduced after select_cc has been formed.
if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
// If we're looking for eq 0 instead of ne 0, we need to invert the
// condition.
bool Invert = CCVal == ISD::SETEQ;
CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
if (Invert)
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
SDLoc DL(N);
RHS = LHS.getOperand(1);
LHS = LHS.getOperand(0);
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
SDValue TargetCC = DAG.getCondCode(CCVal);
return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
{LHS, RHS, TargetCC, TrueV, FalseV});
}
// Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
// (select_cc X, Y, eq/ne, trueV, falseV)
if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
{LHS.getOperand(0), LHS.getOperand(1),
N->getOperand(2), TrueV, FalseV});
// (select_cc X, 1, setne, trueV, falseV) ->
// (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
// This can occur when legalizing some floating point comparisons.
APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
SDLoc DL(N);
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
SDValue TargetCC = DAG.getCondCode(CCVal);
RHS = DAG.getConstant(0, DL, LHS.getValueType());
return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
{LHS, RHS, TargetCC, TrueV, FalseV});
}
break;
}
case RISCVISD::BR_CC: {
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
if (!ISD::isIntEqualitySetCC(CCVal))
break;
// Fold (br_cc (setlt X, Y), 0, ne, dest) ->
// (br_cc X, Y, lt, dest)
// Sometimes the setcc is introduced after br_cc has been formed.
if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
// If we're looking for eq 0 instead of ne 0, we need to invert the
// condition.
bool Invert = CCVal == ISD::SETEQ;
CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
if (Invert)
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
SDLoc DL(N);
RHS = LHS.getOperand(1);
LHS = LHS.getOperand(0);
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
N->getOperand(4));
}
// Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
// (br_cc X, Y, eq/ne, trueV, falseV)
if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
N->getOperand(3), N->getOperand(4));
// (br_cc X, 1, setne, br_cc) ->
// (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
// This can occur when legalizing some floating point comparisons.
APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
SDLoc DL(N);
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
SDValue TargetCC = DAG.getCondCode(CCVal);
RHS = DAG.getConstant(0, DL, LHS.getValueType());
return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
N->getOperand(0), LHS, RHS, TargetCC,
N->getOperand(4));
}
break;
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return performFP_TO_INTCombine(N, DCI, Subtarget);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
case ISD::FCOPYSIGN: {
EVT VT = N->getValueType(0);
if (!VT.isVector())
break;
// There is a form of VFSGNJ which injects the negated sign of its second
// operand. Try and bubble any FNEG up after the extend/round to produce
// this optimized pattern. Avoid modifying cases where FP_ROUND and
// TRUNC=1.
SDValue In2 = N->getOperand(1);
// Avoid cases where the extend/round has multiple uses, as duplicating
// those is typically more expensive than removing a fneg.
if (!In2.hasOneUse())
break;
if (In2.getOpcode() != ISD::FP_EXTEND &&
(In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
break;
In2 = In2.getOperand(0);
if (In2.getOpcode() != ISD::FNEG)
break;
SDLoc DL(N);
SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
}
case ISD::MGATHER:
case ISD::MSCATTER:
case ISD::VP_GATHER:
case ISD::VP_SCATTER: {
if (!DCI.isBeforeLegalize())
break;
SDValue Index, ScaleOp;
bool IsIndexScaled = false;
bool IsIndexSigned = false;
if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
Index = VPGSN->getIndex();
ScaleOp = VPGSN->getScale();
IsIndexScaled = VPGSN->isIndexScaled();
IsIndexSigned = VPGSN->isIndexSigned();
} else {
const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
Index = MGSN->getIndex();
ScaleOp = MGSN->getScale();
IsIndexScaled = MGSN->isIndexScaled();
IsIndexSigned = MGSN->isIndexSigned();
}
EVT IndexVT = Index.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
// RISCV indexed loads only support the "unsigned unscaled" addressing
// mode, so anything else must be manually legalized.
bool NeedsIdxLegalization =
IsIndexScaled ||
(IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
if (!NeedsIdxLegalization)
break;
SDLoc DL(N);
// Any index legalization should first promote to XLenVT, so we don't lose
// bits when scaling. This may create an illegal index type so we let
// LLVM's legalization take care of the splitting.
// FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
DL, IndexVT, Index);
}
unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
if (IsIndexScaled && Scale != 1) {
// Manually scale the indices by the element size.
// TODO: Sanitize the scale operand here?
// TODO: For VP nodes, should we use VP_SHL here?
assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
}
ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
{VPGN->getChain(), VPGN->getBasePtr(), Index,
VPGN->getScale(), VPGN->getMask(),
VPGN->getVectorLength()},
VPGN->getMemOperand(), NewIndexTy);
if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
{VPSN->getChain(), VPSN->getValue(),
VPSN->getBasePtr(), Index, VPSN->getScale(),
VPSN->getMask(), VPSN->getVectorLength()},
VPSN->getMemOperand(), NewIndexTy);
if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
return DAG.getMaskedGather(
N->getVTList(), MGN->getMemoryVT(), DL,
{MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
MGN->getBasePtr(), Index, MGN->getScale()},
MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
const auto *MSN = cast<MaskedScatterSDNode>(N);
return DAG.getMaskedScatter(
N->getVTList(), MSN->getMemoryVT(), DL,
{MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
Index, MSN->getScale()},
MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
}
case RISCVISD::SRA_VL:
case RISCVISD::SRL_VL:
case RISCVISD::SHL_VL: {
SDValue ShAmt = N->getOperand(1);
if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
SDLoc DL(N);
SDValue VL = N->getOperand(3);
EVT VT = N->getValueType(0);
ShAmt =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
N->getOperand(2), N->getOperand(3));
}
break;
}
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: {
SDValue ShAmt = N->getOperand(1);
if (ShAmt.getOpcode() == RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL) {
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
SDLoc DL(N);
EVT VT = N->getValueType(0);
ShAmt =
DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
}
break;
}
case RISCVISD::MUL_VL:
if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
return V;
// Mul is commutative.
return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
case ISD::STORE: {
auto *Store = cast<StoreSDNode>(N);
SDValue Val = Store->getValue();
// Combine store of vmv.x.s to vse with VL of 1.
// FIXME: Support FP.
if (Val.getOpcode() == RISCVISD::VMV_X_S) {
SDValue Src = Val.getOperand(0);
EVT VecVT = Src.getValueType();
EVT MemVT = Store->getMemoryVT();
// The memory VT and the element type must match.
if (VecVT.getVectorElementType() == MemVT) {
SDLoc DL(N);
MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
return DAG.getStoreVP(
Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
DAG.getConstant(1, DL, MaskVT),
DAG.getConstant(1, DL, Subtarget.getXLenVT()), MemVT,
Store->getMemOperand(), Store->getAddressingMode(),
Store->isTruncatingStore(), /*IsCompress*/ false);
}
}
break;
}
}
return SDValue();
}
bool RISCVTargetLowering::isDesirableToCommuteWithShift(
const SDNode *N, CombineLevel Level) const {
// The following folds are only desirable if `(OP _, c1 << c2)` can be
// materialised in fewer instructions than `(OP _, c1)`:
//
// (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
// (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
SDValue N0 = N->getOperand(0);
EVT Ty = N0.getValueType();
if (Ty.isScalarInteger() &&
(N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (C1 && C2) {
const APInt &C1Int = C1->getAPIntValue();
APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
// We can materialise `c1 << c2` into an add immediate, so it's "free",
// and the combine should happen, to potentially allow further combines
// later.
if (ShiftedC1Int.getMinSignedBits() <= 64 &&
isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
return true;
// We can materialise `c1` in an add immediate, so it's "free", and the
// combine should be prevented.
if (C1Int.getMinSignedBits() <= 64 &&
isLegalAddImmediate(C1Int.getSExtValue()))
return false;
// Neither constant will fit into an immediate, so find materialisation
// costs.
int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
Subtarget.getFeatureBits(),
/*CompressionCost*/true);
int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
ShiftedC1Int, Ty.getSizeInBits(), Subtarget.getFeatureBits(),
/*CompressionCost*/true);
// Materialising `c1` is cheaper than materialising `c1 << c2`, so the
// combine should be prevented.
if (C1Cost < ShiftedC1Cost)
return false;
}
}
return true;
}
bool RISCVTargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
TargetLoweringOpt &TLO) const {
// Delay this optimization as late as possible.
if (!TLO.LegalOps)
return false;
EVT VT = Op.getValueType();
if (VT.isVector())
return false;
// Only handle AND for now.
if (Op.getOpcode() != ISD::AND)
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;
const APInt &Mask = C->getAPIntValue();
// Clear all non-demanded bits initially.
APInt ShrunkMask = Mask & DemandedBits;
// Try to make a smaller immediate by setting undemanded bits.
APInt ExpandedMask = Mask | ~DemandedBits;
auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
};
auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
if (NewMask == Mask)
return true;
SDLoc DL(Op);
SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
};
// If the shrunk mask fits in sign extended 12 bits, let the target
// independent code apply it.
if (ShrunkMask.isSignedIntN(12))
return false;
// Preserve (and X, 0xffff) when zext.h is supported.
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
if (IsLegalMask(NewMask))
return UseMask(NewMask);
}
// Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
if (VT == MVT::i64) {
APInt NewMask = APInt(64, 0xffffffff);
if (IsLegalMask(NewMask))
return UseMask(NewMask);
}
// For the remaining optimizations, we need to be able to make a negative
// number through a combination of mask and undemanded bits.
if (!ExpandedMask.isNegative())
return false;
// What is the fewest number of bits we need to represent the negative number.
unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
// Try to make a 12 bit negative immediate. If that fails try to make a 32
// bit negative immediate unless the shrunk immediate already fits in 32 bits.
APInt NewMask = ShrunkMask;
if (MinSignedBits <= 12)
NewMask.setBitsFrom(11);
else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
NewMask.setBitsFrom(31);
else
return false;
// Check that our new mask is a subset of the demanded mask.
assert(IsLegalMask(NewMask));
return UseMask(NewMask);
}
static void computeGREV(APInt &Src, unsigned ShAmt) {
ShAmt &= Src.getBitWidth() - 1;
uint64_t x = Src.getZExtValue();
if (ShAmt & 1)
x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
if (ShAmt & 2)
x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
if (ShAmt & 4)
x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
if (ShAmt & 8)
x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
if (ShAmt & 16)
x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
if (ShAmt & 32)
x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
Src = x;
}
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
const SelectionDAG &DAG,
unsigned Depth) const {
unsigned BitWidth = Known.getBitWidth();
unsigned Opc = Op.getOpcode();
assert((Opc >= ISD::BUILTIN_OP_END ||
Opc == ISD::INTRINSIC_WO_CHAIN ||
Opc == ISD::INTRINSIC_W_CHAIN ||
Opc == ISD::INTRINSIC_VOID) &&
"Should use MaskedValueIsZero if you don't know whether Op"
" is a target node!");
Known.resetAll();
switch (Opc) {
default: break;
case RISCVISD::SELECT_CC: {
Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
// Only known if known in both the LHS and RHS.
Known = KnownBits::commonBits(Known, Known2);
break;
}
case RISCVISD::REMUW: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// We only care about the lower 32 bits.
Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
// Restore the original width by sign extending.
Known = Known.sext(BitWidth);
break;
}
case RISCVISD::DIVUW: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// We only care about the lower 32 bits.
Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
// Restore the original width by sign extending.
Known = Known.sext(BitWidth);
break;
}
case RISCVISD::CTZW: {
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
unsigned LowBits = Log2_32(PossibleTZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
}
case RISCVISD::CLZW: {
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
unsigned LowBits = Log2_32(PossibleLZ) + 1;
Known.Zero.setBitsFrom(LowBits);
break;
}
case RISCVISD::GREV:
case RISCVISD::GREVW: {
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
if (Opc == RISCVISD::GREVW)
Known = Known.trunc(32);
unsigned ShAmt = C->getZExtValue();
computeGREV(Known.Zero, ShAmt);
computeGREV(Known.One, ShAmt);
if (Opc == RISCVISD::GREVW)
Known = Known.sext(BitWidth);
}
break;
}
case RISCVISD::READ_VLENB: {
// If we know the minimum VLen from Zvl extensions, we can use that to
// determine the trailing zeros of VLENB.
// FIXME: Limit to 128 bit vectors until we have more testing.
unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8;
if (MinVLenB > 0)
Known.Zero.setLowBits(Log2_32(MinVLenB));
// We assume VLENB is no more than 65536 / 8 bytes.
Known.Zero.setBitsFrom(14);
break;
}
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo =
Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
switch (IntNo) {
default:
// We can't do anything for most intrinsics.
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
case Intrinsic::riscv_vsetvli_opt:
case Intrinsic::riscv_vsetvlimax_opt:
// Assume that VL output is positive and would fit in an int32_t.
// TODO: VLEN might be capped at 16 bits in a future V spec update.
if (BitWidth >= 32)
Known.Zero.setBitsFrom(31);
break;
}
break;
}
}
}
unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case RISCVISD::SELECT_CC: {
unsigned Tmp =
DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
if (Tmp == 1) return 1; // Early out.
unsigned Tmp2 =
DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
return std::min(Tmp, Tmp2);
}
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW:
case RISCVISD::DIVW:
case RISCVISD::DIVUW:
case RISCVISD::REMUW:
case RISCVISD::ROLW:
case RISCVISD::RORW:
case RISCVISD::GREVW:
case RISCVISD::GORCW:
case RISCVISD::FSLW:
case RISCVISD::FSRW:
case RISCVISD::SHFLW:
case RISCVISD::UNSHFLW:
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW:
case RISCVISD::BFPW:
case RISCVISD::FCVT_W_RV64:
case RISCVISD::FCVT_WU_RV64:
case RISCVISD::STRICT_FCVT_W_RV64:
case RISCVISD::STRICT_FCVT_WU_RV64:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
return 33;
case RISCVISD::SHFL:
case RISCVISD::UNSHFL: {
// There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
// cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
// will stay within the upper 32 bits. If there were more than 32 sign bits
// before there will be at least 33 sign bits after.
if (Op.getValueType() == MVT::i64 &&
isa<ConstantSDNode>(Op.getOperand(1)) &&
(Op.getConstantOperandVal(1) & 0x10) == 0) {
unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
if (Tmp > 32)
return 33;
}
break;
}
case RISCVISD::VMV_X_S: {
// The number of sign bits of the scalar result is computed by obtaining the
// element type of the input vector operand, subtracting its width from the
// XLEN, and then adding one (sign bit within the element type). If the
// element type is wider than XLen, the least-significant XLEN bits are
// taken.
unsigned XLen = Subtarget.getXLen();
unsigned EltBits = Op.getOperand(0).getScalarValueSizeInBits();
if (EltBits <= XLen)
return XLen - EltBits + 1;
break;
}
}
return 1;
}
static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
// To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
// Should the count have wrapped while it was being read, we need to try
// again.
// ...
// read:
// rdcycleh x3 # load high word of cycle
// rdcycle x2 # load low word of cycle
// rdcycleh x4 # load high word of cycle
// bne x3, x4, read # check if high word reads match, otherwise try again
// ...
MachineFunction &MF = *BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = ++BB->getIterator();
MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
MF.insert(It, LoopMBB);
MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
MF.insert(It, DoneMBB);
// Transfer the remainder of BB and its successor edges to DoneMBB.
DoneMBB->splice(DoneMBB->begin(), BB,
std::next(MachineBasicBlock::iterator(MI)), BB->end());
DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(LoopMBB);
MachineRegisterInfo &RegInfo = MF.getRegInfo();
Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
Register LoReg = MI.getOperand(0).getReg();
Register HiReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
.addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
.addReg(RISCV::X0);
BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
.addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
.addReg(RISCV::X0);
BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
.addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
.addReg(RISCV::X0);
BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
.addReg(HiReg)
.addReg(ReadAgainReg)
.addMBB(LoopMBB);
LoopMBB->addSuccessor(LoopMBB);
LoopMBB->addSuccessor(DoneMBB);
MI.eraseFromParent();
return DoneMBB;
}
static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
Register LoReg = MI.getOperand(0).getReg();
Register HiReg = MI.getOperand(1).getReg();
Register SrcReg = MI.getOperand(2).getReg();
const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
RI);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMOLo =
MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
"Unexpected instruction");
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
Register DstReg = MI.getOperand(0).getReg();
Register LoReg = MI.getOperand(1).getReg();
Register HiReg = MI.getOperand(2).getReg();
const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
MachineMemOperand *MMOLo =
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
MachineMemOperand *MMOHi = MF.getMachineMemOperand(
MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMOLo);
BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
.addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
.addFrameIndex(FI)
.addImm(4)
.addMemOperand(MMOHi);
TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
static bool isSelectPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case RISCV::Select_GPR_Using_CC_GPR:
case RISCV::Select_FPR16_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
return true;
}
}
static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
unsigned RelOpcode, unsigned EqOpcode,
const RISCVSubtarget &Subtarget) {
DebugLoc DL = MI.getDebugLoc();
Register DstReg = MI.getOperand(0).getReg();
Register Src1Reg = MI.getOperand(1).getReg();
Register Src2Reg = MI.getOperand(2).getReg();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
Register SavedFFlags = MRI.createVirtualRegister(&RISCV::GPRRegClass);
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
// Save the current FFLAGS.
BuildMI(*BB, MI, DL, TII.get(RISCV::ReadFFLAGS), SavedFFlags);
auto MIB = BuildMI(*BB, MI, DL, TII.get(RelOpcode), DstReg)
.addReg(Src1Reg)
.addReg(Src2Reg);
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
// Restore the FFLAGS.
BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
.addReg(SavedFFlags, RegState::Kill);
// Issue a dummy FEQ opcode to raise exception for signaling NaNs.
auto MIB2 = BuildMI(*BB, MI, DL, TII.get(EqOpcode), RISCV::X0)
.addReg(Src1Reg, getKillRegState(MI.getOperand(1).isKill()))
.addReg(Src2Reg, getKillRegState(MI.getOperand(2).isKill()));
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
MIB2->setFlag(MachineInstr::MIFlag::NoFPExcept);
// Erase the pseudoinstruction.
MI.eraseFromParent();
return BB;
}
static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
MachineBasicBlock *BB,
const RISCVSubtarget &Subtarget) {
// To "insert" Select_* instructions, we actually have to insert the triangle
// control-flow pattern. The incoming instructions know the destination vreg
// to set, the condition code register to branch on, the true/false values to
// select between, and the condcode to use to select the appropriate branch.
//
// We produce the following control flow:
// HeadMBB
// | \
// | IfFalseMBB
// | /
// TailMBB
//
// When we find a sequence of selects we attempt to optimize their emission
// by sharing the control flow. Currently we only handle cases where we have
// multiple selects with the exact same condition (same LHS, RHS and CC).
// The selects may be interleaved with other instructions if the other
// instructions meet some requirements we deem safe:
// - They are debug instructions. Otherwise,
// - They do not have side-effects, do not access memory and their inputs do
// not depend on the results of the select pseudo-instructions.
// The TrueV/FalseV operands of the selects cannot depend on the result of
// previous selects in the sequence.
// These conditions could be further relaxed. See the X86 target for a
// related approach and more information.
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
SmallVector<MachineInstr *, 4> SelectDebugValues;
SmallSet<Register, 4> SelectDests;
SelectDests.insert(MI.getOperand(0).getReg());
MachineInstr *LastSelectPseudo = &MI;
for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
SequenceMBBI != E; ++SequenceMBBI) {
if (SequenceMBBI->isDebugInstr())
continue;
else if (isSelectPseudo(*SequenceMBBI)) {
if (SequenceMBBI->getOperand(1).getReg() != LHS ||
SequenceMBBI->getOperand(2).getReg() != RHS ||
SequenceMBBI->getOperand(3).getImm() != CC ||
SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
break;
LastSelectPseudo = &*SequenceMBBI;
SequenceMBBI->collectDebugValues(SelectDebugValues);
SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
} else {
if (SequenceMBBI->hasUnmodeledSideEffects() ||
SequenceMBBI->mayLoadOrStore())
break;
if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
}))
break;
}
}
const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator I = ++BB->getIterator();
MachineBasicBlock *HeadMBB = BB;
MachineFunction *F = BB->getParent();
MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(I, IfFalseMBB);
F->insert(I, TailMBB);
// Transfer debug instructions associated with the selects to TailMBB.
for (MachineInstr *DebugInstr : SelectDebugValues) {
TailMBB->push_back(DebugInstr->removeFromParent());
}
// Move all instructions after the sequence to TailMBB.
TailMBB->splice(TailMBB->end(), HeadMBB,
std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
// Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi nodes for the selects.
TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
// Set the successors for HeadMBB.
HeadMBB->addSuccessor(IfFalseMBB);
HeadMBB->addSuccessor(TailMBB);
// Insert appropriate branch.
BuildMI(HeadMBB, DL, TII.getBrCond(CC))
.addReg(LHS)
.addReg(RHS)
.addMBB(TailMBB);
// IfFalseMBB just falls through to TailMBB.
IfFalseMBB->addSuccessor(TailMBB);
// Create PHIs for all of the select pseudo-instructions.
auto SelectMBBI = MI.getIterator();
auto SelectEnd = std::next(LastSelectPseudo->getIterator());
auto InsertionPoint = TailMBB->begin();
while (SelectMBBI != SelectEnd) {
auto Next = std::next(SelectMBBI);
if (isSelectPseudo(*SelectMBBI)) {
// %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
.addReg(SelectMBBI->getOperand(4).getReg())
.addMBB(HeadMBB)
.addReg(SelectMBBI->getOperand(5).getReg())
.addMBB(IfFalseMBB);
SelectMBBI->eraseFromParent();
}
SelectMBBI = Next;
}
F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
return TailMBB;
}
MachineBasicBlock *
RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected instr type to insert");
case RISCV::ReadCycleWide:
assert(!Subtarget.is64Bit() &&
"ReadCycleWrite is only to be used on riscv32");
return emitReadCycleWidePseudo(MI, BB);
case RISCV::Select_GPR_Using_CC_GPR:
case RISCV::Select_FPR16_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
return emitSelectPseudo(MI, BB, Subtarget);
case RISCV::BuildPairF64Pseudo:
return emitBuildPairF64Pseudo(MI, BB);
case RISCV::SplitF64Pseudo:
return emitSplitF64Pseudo(MI, BB);
case RISCV::PseudoQuietFLE_H:
return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
case RISCV::PseudoQuietFLT_H:
return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
case RISCV::PseudoQuietFLE_S:
return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
case RISCV::PseudoQuietFLT_S:
return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
case RISCV::PseudoQuietFLE_D:
return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
case RISCV::PseudoQuietFLT_D:
return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
}
}
void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
// Add FRM dependency to any instructions with dynamic rounding mode.
unsigned Opc = MI.getOpcode();
auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
if (Idx < 0)
return;
if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN)
return;
// If the instruction already reads FRM, don't add another read.
if (MI.readsRegister(RISCV::FRM))
return;
MI.addOperand(
MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true));
}
// Calling Convention Implementation.
// The expectations for frontend ABI lowering vary from target to target.
// Ideally, an LLVM frontend would be able to avoid worrying about many ABI
// details, but this is a longer term goal. For now, we simply try to keep the
// role of the frontend as simple and well-defined as possible. The rules can
// be summarised as:
// * Never split up large scalar arguments. We handle them here.
// * If a hardfloat calling convention is being used, and the struct may be
// passed in a pair of registers (fp+fp, int+fp), and both registers are
// available, then pass as two separate arguments. If either the GPRs or FPRs
// are exhausted, then pass according to the rule below.
// * If a struct could never be passed in registers or directly in a stack
// slot (as it is larger than 2*XLEN and the floating point rules don't
// apply), then pass it using a pointer with the byval attribute.
// * If a struct is less than 2*XLEN, then coerce to either a two-element
// word-sized array or a 2*XLEN scalar (depending on alignment).
// * The frontend can determine whether a struct is returned by reference or
// not based on its size and fields. If it will be returned by reference, the
// frontend must modify the prototype so a pointer with the sret annotation is
// passed as the first argument. This is not necessary for large scalar
// returns.
// * Struct return values and varargs should be coerced to structs containing
// register-size fields in the same situations they would be for fixed
// arguments.
static const MCPhysReg ArgGPRs[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
};
static const MCPhysReg ArgFPR16s[] = {
RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
};
static const MCPhysReg ArgFPR32s[] = {
RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
};
static const MCPhysReg ArgFPR64s[] = {
RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
};
// This is an interim calling convention and it may be changed in the future.
static const MCPhysReg ArgVRs[] = {
RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
RISCV::V20M2, RISCV::V22M2};
static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
RISCV::V20M4};
static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
// Pass a 2*XLEN argument that has been split into two XLEN values through
// registers or the stack as necessary.
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
MVT ValVT2, MVT LocVT2,
ISD::ArgFlagsTy ArgFlags2) {
unsigned XLenInBytes = XLen / 8;
if (Register Reg = State.AllocateReg(ArgGPRs)) {
// At least one half can be passed via register.
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
VA1.getLocVT(), CCValAssign::Full));
} else {
// Both halves must be passed on the stack, with proper alignment.
Align StackAlign =
std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
State.addLoc(
CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
State.AllocateStack(XLenInBytes, StackAlign),
VA1.getLocVT(), CCValAssign::Full));
State.addLoc(CCValAssign::getMem(
ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
LocVT2, CCValAssign::Full));
return false;
}
if (Register Reg = State.AllocateReg(ArgGPRs)) {
// The second half can also be passed via register.
State.addLoc(
CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
} else {
// The second half is passed via the stack, without additional alignment.
State.addLoc(CCValAssign::getMem(
ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
LocVT2, CCValAssign::Full));
}
return false;
}
static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
Optional<unsigned> FirstMaskArgument,
CCState &State, const RISCVTargetLowering &TLI) {
const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
if (RC == &RISCV::VRRegClass) {
// Assign the first mask argument to V0.
// This is an interim calling convention and it may be changed in the
// future.
if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
return State.AllocateReg(RISCV::V0);
return State.AllocateReg(ArgVRs);
}
if (RC == &RISCV::VRM2RegClass)
return State.AllocateReg(ArgVRM2s);
if (RC == &RISCV::VRM4RegClass)
return State.AllocateReg(ArgVRM4s);
if (RC == &RISCV::VRM8RegClass)
return State.AllocateReg(ArgVRM8s);
llvm_unreachable("Unhandled register class for ValueType");
}
// Implements the RISC-V calling convention. Returns true upon failure.
static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
Optional<unsigned> FirstMaskArgument) {
unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
assert(XLen == 32 || XLen == 64);
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
// Any return value split in to more than two values can't be returned
// directly. Vectors are returned via the available vector registers.
if (!LocVT.isVector() && IsRet && ValNo > 1)
return true;
// UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
// variadic argument, or if no F16/F32 argument registers are available.
bool UseGPRForF16_F32 = true;
// UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
// variadic argument, or if no F64 argument registers are available.
bool UseGPRForF64 = true;
switch (ABI) {
default:
llvm_unreachable("Unexpected ABI");
case RISCVABI::ABI_ILP32:
case RISCVABI::ABI_LP64:
break;
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_LP64F:
UseGPRForF16_F32 = !IsFixed;
break;
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64D:
UseGPRForF16_F32 = !IsFixed;
UseGPRForF64 = !IsFixed;
break;
}
// FPR16, FPR32, and FPR64 alias each other.
if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
UseGPRForF16_F32 = true;
UseGPRForF64 = true;
}
// From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
// similar local variables rather than directly checking against the target
// ABI.
if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
LocVT = XLenVT;
LocInfo = CCValAssign::BCvt;
} else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
LocVT = MVT::i64;
LocInfo = CCValAssign::BCvt;
}
// If this is a variadic argument, the RISC-V calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
// alignment (RV32) or 16-byte alignment (RV64). An aligned register should
// be used regardless of whether the original argument was split during
// legalisation or not. The argument will not be passed by registers if the
// original type is larger than 2*XLEN, so the register alignment rule does
// not apply.
unsigned TwoXLenInBytes = (2 * XLen) / 8;
if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
// Skip 'odd' register if necessary.
if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
State.AllocateReg(ArgGPRs);
}
SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
State.getPendingArgFlags();
assert(PendingLocs.size() == PendingArgFlags.size() &&
"PendingLocs and PendingArgFlags out of sync");
// Handle passing f64 on RV32D with a soft float ABI or when floating point
// registers are exhausted.
if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
"Can't lower f64 if it is split");
// Depending on available argument GPRS, f64 may be passed in a pair of
// GPRs, split between a GPR and the stack, or passed completely on the
// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
// cases.
Register Reg = State.AllocateReg(ArgGPRs);
LocVT = MVT::i32;
if (!Reg) {
unsigned StackOffset = State.AllocateStack(8, Align(8));
State.addLoc(
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
if (!State.AllocateReg(ArgGPRs))
State.AllocateStack(4, Align(4));
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
// Fixed-length vectors are located in the corresponding scalable-vector
// container types.
if (ValVT.isFixedLengthVector())
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
// Split arguments might be passed indirectly, so keep track of the pending
// values. Split vectors are passed via a mix of registers and indirectly, so
// treat them as we would any other argument.
if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
LocVT = XLenVT;
LocInfo = CCValAssign::Indirect;
PendingLocs.push_back(
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
PendingArgFlags.push_back(ArgFlags);
if (!ArgFlags.isSplitEnd()) {
return false;
}
}
// If the split argument only had two elements, it should be passed directly
// in registers or on the stack.
if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
PendingLocs.size() <= 2) {
assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
// Apply the normal calling convention rules to the first half of the
// split argument.
CCValAssign VA = PendingLocs[0];
ISD::ArgFlagsTy AF = PendingArgFlags[0];
PendingLocs.clear();
PendingArgFlags.clear();
return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
ArgFlags);
}
// Allocate to a register if possible, or else a stack slot.
Register Reg;
unsigned StoreSizeBytes = XLen / 8;
Align StackAlign = Align(XLen / 8);
if (ValVT == MVT::f16 && !UseGPRForF16_F32)
Reg = State.AllocateReg(ArgFPR16s);
else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
Reg = State.AllocateReg(ArgFPR32s);
else if (ValVT == MVT::f64 && !UseGPRForF64)
Reg = State.AllocateReg(ArgFPR64s);
else if (ValVT.isVector()) {
Reg = allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI);
if (!Reg) {
// For return values, the vector must be passed fully via registers or
// via the stack.
// FIXME: The proposed vector ABI only mandates v8-v15 for return values,
// but we're using all of them.
if (IsRet)
return true;
// Try using a GPR to pass the address
if ((Reg = State.AllocateReg(ArgGPRs))) {
LocVT = XLenVT;
LocInfo = CCValAssign::Indirect;
} else if (ValVT.isScalableVector()) {
LocVT = XLenVT;
LocInfo = CCValAssign::Indirect;
} else {
// Pass fixed-length vectors on the stack.
LocVT = ValVT;
StoreSizeBytes = ValVT.getStoreSize();
// Align vectors to their element sizes, being careful for vXi1
// vectors.
StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
}
}
} else {
Reg = State.AllocateReg(ArgGPRs);
}
unsigned StackOffset =
Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
// If we reach this point and PendingLocs is non-empty, we must be at the
// end of a split argument that must be passed indirectly.
if (!PendingLocs.empty()) {
assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
for (auto &It : PendingLocs) {
if (Reg)
It.convertToReg(Reg);
else
It.convertToMem(StackOffset);
State.addLoc(It);
}
PendingLocs.clear();
PendingArgFlags.clear();
return false;
}
assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
(TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
"Expected an XLenVT or vector types at this stage");
if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
// When a floating-point value is passed on the stack, no bit-conversion is
// needed.
if (ValVT.isFloatingPoint()) {
LocVT = ValVT;
LocInfo = CCValAssign::Full;
}
State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
template <typename ArgTy>
static Optional<unsigned> preAssignMask(const ArgTy &Args) {
for (const auto &ArgIdx : enumerate(Args)) {
MVT ArgVT = ArgIdx.value().VT;
if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
return ArgIdx.index();
}
return None;
}
void RISCVTargetLowering::analyzeInputArgs(
MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
RISCVCCAssignFn Fn) const {
unsigned NumArgs = Ins.size();
FunctionType *FType = MF.getFunction().getFunctionType();
Optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Ins);
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
Type *ArgTy = nullptr;
if (IsRet)
ArgTy = FType->getReturnType();
else if (Ins[i].isOrigArg())
ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
FirstMaskArgument)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n');
llvm_unreachable(nullptr);
}
}
}
void RISCVTargetLowering::analyzeOutputArgs(
MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
CallLoweringInfo *CLI, RISCVCCAssignFn Fn) const {
unsigned NumArgs = Outs.size();
Optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
for (unsigned i = 0; i != NumArgs; i++) {
MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
FirstMaskArgument)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n");
llvm_unreachable(nullptr);
}
}
}
// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
// values.
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
const CCValAssign &VA, const SDLoc &DL,
const RISCVSubtarget &Subtarget) {
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
else
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
}
return Val;
}
// The caller is responsible for loading the full value if the argument is
// passed with CCValAssign::Indirect.
static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
const CCValAssign &VA, const SDLoc &DL,
const RISCVTargetLowering &TLI) {
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
EVT LocVT = VA.getLocVT();
SDValue Val;
const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
Register VReg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(VA.getLocReg(), VReg);
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
if (VA.getLocInfo() == CCValAssign::Indirect)
return Val;
return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
}
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
const CCValAssign &VA, const SDLoc &DL,
const RISCVSubtarget &Subtarget) {
EVT LocVT = VA.getLocVT();
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
else
Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
break;
}
return Val;
}
// The caller is responsible for loading the full value if the argument is
// passed with CCValAssign::Indirect.
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
const CCValAssign &VA, const SDLoc &DL) {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
EVT LocVT = VA.getLocVT();
EVT ValVT = VA.getValVT();
EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
if (ValVT.isScalableVector()) {
// When the value is a scalable vector, we save the pointer which points to
// the scalable vector value in the stack. The ValVT will be the pointer
// type, instead of the scalable vector type.
ValVT = LocVT;
}
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
/*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val;
ISD::LoadExtType ExtType;
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
case CCValAssign::Indirect:
case CCValAssign::BCvt:
ExtType = ISD::NON_EXTLOAD;
break;
}
Val = DAG.getExtLoad(
ExtType, DL, LocVT, Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
return Val;
}
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
const CCValAssign &VA, const SDLoc &DL) {
assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
"Unexpected VA");
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
if (VA.isMemLoc()) {
// f64 is passed on the stack.
int FI =
MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
return DAG.getLoad(MVT::f64, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
}
assert(VA.isRegLoc() && "Expected register VA assignment");
Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
SDValue Hi;
if (VA.getLocReg() == RISCV::X17) {
// Second half of f64 is passed on the stack.
int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
} else {
// Second half of f64 is passed in another GPR.
Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
// FastCC has less than 1% performance improvement for some particular
// benchmark. But theoretically, it may has benenfit for some cases.
static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
Optional<unsigned> FirstMaskArgument) {
// X5 and X6 might be used for save-restore libcall.
static const MCPhysReg GPRList[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
RISCV::X29, RISCV::X30, RISCV::X31};
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
if (unsigned Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
if (LocVT == MVT::f16) {
static const MCPhysReg FPR16List[] = {
RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
if (unsigned Reg = State.AllocateReg(FPR16List)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
if (LocVT == MVT::f32) {
static const MCPhysReg FPR32List[] = {
RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
if (unsigned Reg = State.AllocateReg(FPR32List)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
if (LocVT == MVT::f64) {
static const MCPhysReg FPR64List[] = {
RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
if (unsigned Reg = State.AllocateReg(FPR64List)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
unsigned Offset4 = State.AllocateStack(4, Align(4));
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
return false;
}
if (LocVT == MVT::i64 || LocVT == MVT::f64) {
unsigned Offset5 = State.AllocateStack(8, Align(8));
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
return false;
}
if (LocVT.isVector()) {
if (unsigned Reg =
allocateRVVReg(ValVT, ValNo, FirstMaskArgument, State, TLI)) {
// Fixed-length vectors are located in the corresponding scalable-vector
// container types.
if (ValVT.isFixedLengthVector())
LocVT = TLI.getContainerForFixedLengthVector(LocVT);
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
if (unsigned GPRReg = State.AllocateReg(GPRList)) {
LocInfo = CCValAssign::Indirect;
LocVT = TLI.getSubtarget().getXLenVT();
State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
} else if (ValVT.isFixedLengthVector()) {
auto StackAlign =
MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
unsigned StackOffset =
State.AllocateStack(ValVT.getStoreSize(), StackAlign);
State.addLoc(
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
} else {
// Can't pass scalable vectors on the stack.
return true;
}
}
return false;
}
return true; // CC didn't match.
}
static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
// s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
static const MCPhysReg GPRList[] = {
RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
if (unsigned Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
if (LocVT == MVT::f32) {
// Pass in STG registers: F1, ..., F6
// fs0 ... fs5
static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
RISCV::F18_F, RISCV::F19_F,
RISCV::F20_F, RISCV::F21_F};
if (unsigned Reg = State.AllocateReg(FPR32List)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
if (LocVT == MVT::f64) {
// Pass in STG registers: D1, ..., D6
// fs6 ... fs11
static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
RISCV::F24_D, RISCV::F25_D,
RISCV::F26_D, RISCV::F27_D};
if (unsigned Reg = State.AllocateReg(FPR64List)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
report_fatal_error("No registers left in GHC calling convention");
return true;
}
// Transform physical registers into virtual registers.
SDValue RISCVTargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
switch (CallConv) {
default:
report_fatal_error("Unsupported calling convention");
case CallingConv::C:
case CallingConv::Fast:
break;
case CallingConv::GHC:
if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
report_fatal_error(
"GHC calling convention requires the F and D instruction set extensions");
}
const Function &Func = MF.getFunction();
if (Func.hasFnAttribute("interrupt")) {
if (!Func.arg_empty())
report_fatal_error(
"Functions with the interrupt attribute cannot have arguments!");
StringRef Kind =
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
report_fatal_error(
"Function interrupt attribute argument not supported!");
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT XLenVT = Subtarget.getXLenVT();
unsigned XLenInBytes = Subtarget.getXLen() / 8;
// Used with vargs to acumulate store chains.
std::vector<SDValue> OutChains;
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (CallConv == CallingConv::GHC)
CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
else
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
CallConv == CallingConv::Fast ? CC_RISCV_FastCC
: CC_RISCV);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue;
// Passing f64 on RV32D with a soft float ABI must be handled as a special
// case.
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
else if (VA.isRegLoc())
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
if (VA.getLocInfo() == CCValAssign::Indirect) {
// If the original argument was split and passed by reference (e.g. i128
// on RV32), we need to load all parts of it here (using the same
// address). Vectors may be partly split to registers and partly to the
// stack, in which case the base address is partly offset and subsequent
// stores are relative to that.
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
MachinePointerInfo()));
unsigned ArgIndex = Ins[i].OrigArgIndex;
unsigned ArgPartOffset = Ins[i].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[i + 1];
unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
if (PartVA.getValVT().isScalableVector())
Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
MachinePointerInfo()));
++i;
}
continue;
}
InVals.push_back(ArgValue);
}
if (IsVarArg) {
ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
// Offset of the first variable argument from stack pointer, and size of
// the vararg save area. For now, the varargs save area is either zero or
// large enough to hold a0-a7.
int VaArgOffset, VarArgsSaveSize;
// If all registers are allocated, then all varargs must be passed on the
// stack and we don't need to save any argregs.
if (ArgRegs.size() == Idx) {
VaArgOffset = CCInfo.getNextStackOffset();
VarArgsSaveSize = 0;
} else {
VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
VaArgOffset = -VarArgsSaveSize;
}
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
RVFI->setVarArgsFrameIndex(FI);
// If saving an odd number of registers then create an extra stack slot to
// ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
// offsets to even-numbered registered remain 2*XLEN-aligned.
if (Idx % 2) {
MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
VarArgsSaveSize += XLenInBytes;
}
// Copy the integer registers that may have been used for passing varargs
// to the vararg save area.
for (unsigned I = Idx; I < ArgRegs.size();
++I, VaArgOffset += XLenInBytes) {
const Register Reg = RegInfo.createVirtualRegister(RC);
RegInfo.addLiveIn(ArgRegs[I], Reg);
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
MachinePointerInfo::getFixedStack(MF, FI));
cast<StoreSDNode>(Store.getNode())
->getMemOperand()
->setValue((Value *)nullptr);
OutChains.push_back(Store);
}
RVFI->setVarArgsSaveSize(VarArgsSaveSize);
}
// All stores are grouped in one node to allow the matching between
// the size of Ins and InVals. This only happens for vararg functions.
if (!OutChains.empty()) {
OutChains.push_back(Chain);
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
}
return Chain;
}
/// isEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization.
/// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
bool RISCVTargetLowering::isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const {
auto &Callee = CLI.Callee;
auto CalleeCC = CLI.CallConv;
auto &Outs = CLI.Outs;
auto &Caller = MF.getFunction();
auto CallerCC = Caller.getCallingConv();
// Exception-handling functions need a special set of instructions to
// indicate a return to the hardware. Tail-calling another function would
// probably break this.
// TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
// should be expanded as new function attributes are introduced.
if (Caller.hasFnAttribute("interrupt"))
return false;
// Do not tail call opt if the stack is used to pass parameters.
if (CCInfo.getNextStackOffset() != 0)
return false;
// Do not tail call opt if any parameters need to be passed indirectly.
// Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
// passed indirectly. So the address of the value will be passed in a
// register, or if not available, then the address is put on the stack. In
// order to pass indirectly, space on the stack often needs to be allocated
// in order to store the value. In this case the CCInfo.getNextStackOffset()
// != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
// are passed CCValAssign::Indirect.
for (auto &VA : ArgLocs)
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
// Do not tail call opt if either caller or callee uses struct return
// semantics.
auto IsCallerStructRet = Caller.hasStructRetAttr();
auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
if (IsCallerStructRet || IsCalleeStructRet)
return false;
// Externally-defined functions with weak linkage should not be
// tail-called. The behaviour of branch instructions in this situation (as
// used for tail calls) is implementation-defined, so we cannot rely on the
// linker replacing the tail call with a return.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
if (GV->hasExternalWeakLinkage())
return false;
}
// The callee has to preserve all registers the caller needs to preserve.
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
// Byval parameters hand the function a pointer directly into the stack area
// we want to reuse during a tail call. Working around this *is* possible
// but less efficient and uglier in LowerCall.
for (auto &Arg : Outs)
if (Arg.Flags.isByVal())
return false;
return true;
}
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
return DAG.getDataLayout().getPrefTypeAlign(
VT.getTypeForEVT(*DAG.getContext()));
}
// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
// and output parameter nodes.
SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
SDLoc &DL = CLI.DL;
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
EVT PtrVT = getPointerTy(DAG.getDataLayout());
MVT XLenVT = Subtarget.getXLenVT();
MachineFunction &MF = DAG.getMachineFunction();
// Analyze the operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (CallConv == CallingConv::GHC)
ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
else
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
CallConv == CallingConv::Fast ? CC_RISCV_FastCC
: CC_RISCV);
// Check if it's really possible to do a tail call.
if (IsTailCall)
IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
if (IsTailCall)
++NumTailCalls;
else if (CLI.CB && CLI.CB->isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
// Create local copies for byval args
SmallVector<SDValue, 8> ByValArgs;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (!Flags.isByVal())
continue;
SDValue Arg = OutVals[i];
unsigned Size = Flags.getByValSize();
Align Alignment = Flags.getNonZeroByValAlign();
int FI =
MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
/*IsVolatile=*/false,
/*AlwaysInline=*/false, IsTailCall,
MachinePointerInfo(), MachinePointerInfo());
ByValArgs.push_back(FIPtr);
}
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
// Copy argument values to their designated locations.
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Handle passing f64 on RV32D with a soft float ABI as a special case.
bool IsF64OnRV32DSoftABI =
VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
SDValue SplitF64 = DAG.getNode(
RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
Register RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));
if (RegLo == RISCV::X17) {
// Second half of f64 is passed on the stack.
// Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
} else {
// Second half of f64 is passed in another GPR.
assert(RegLo < RISCV::X31 && "Invalid register pair");
Register RegHigh = RegLo + 1;
RegsToPass.push_back(std::make_pair(RegHigh, Hi));
}
continue;
}
// IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
// as any other MemLoc.
// Promote the value if needed.
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
Align StackAlign =
std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
getPrefTypeAlign(ArgValue.getValueType(), DAG));
TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
// If the original argument was split (e.g. i128), we need
// to store the required parts of it here (and pass just one address).
// Vectors may be partly split to registers and partly to the stack, in
// which case the base address is partly offset and subsequent stores are
// relative to that.
unsigned ArgIndex = Outs[i].OrigArgIndex;
unsigned ArgPartOffset = Outs[i].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
// Calculate the total size to store. We don't have access to what we're
// actually storing other than performing the loop and collecting the
// info.
SmallVector<std::pair<SDValue, SDValue>> Parts;
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[i + 1];
unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
EVT PartVT = PartValue.getValueType();
if (PartVT.isScalableVector())
Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
StoredSize += PartVT.getStoreSize();
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
Parts.push_back(std::make_pair(PartValue, Offset));
++i;
}
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
for (const auto &Part : Parts) {
SDValue PartValue = Part.first;
SDValue PartOffset = Part.second;
SDValue Address =
DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
}
ArgValue = SpillSlot;
} else {
ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
}
// Use local copy if it is a byval arg.
if (Flags.isByVal())
ArgValue = ByValArgs[j++];
if (VA.isRegLoc()) {
// Queue up the argument copies and emit them at the end.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
} else {
assert(VA.isMemLoc() && "Argument not register or memory");
assert(!IsTailCall && "Tail call not allowed if stack is used "
"for passing parameters");
// Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
SDValue Address =
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
}
}
// Join the stores, which are independent of one another.
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
SDValue Glue;
// Build a sequence of copy-to-reg nodes, chained and glued together.
for (auto &Reg : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
Glue = Chain.getValue(1);
}
// Validate that none of the argument registers have been marked as
// reserved, if so report an error. Do the same for the return address if this
// is not a tailcall.
validateCCReservedRegs(RegsToPass, MF);
if (!IsTailCall &&
MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(),
"Return address register required, but has been reserved."});
// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
// split it and then direct call can be matched by PseudoCALL.
if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = S->getGlobal();
unsigned OpFlags = RISCVII::MO_CALL;
if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
OpFlags = RISCVII::MO_PLT;
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
unsigned OpFlags = RISCVII::MO_CALL;
if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
nullptr))
OpFlags = RISCVII::MO_PLT;
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
}
// The first call operand is the chain and the second is the target address.
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
Ops.push_back(Callee);
// Add argument registers to the end of the list so that they are
// known live into the call.
for (auto &Reg : RegsToPass)
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
if (!IsTailCall) {
// Add a register mask operand representing the call-preserved registers.
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
}
// Glue the call to the argument copies, if any.
if (Glue.getNode())
Ops.push_back(Glue);
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
}
Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
Chain = DAG.getCALLSEQ_END(Chain,
DAG.getConstant(NumBytes, DL, PtrVT, true),
DAG.getConstant(0, DL, PtrVT, true),
Glue, DL);
Glue = Chain.getValue(1);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
// Copy all of the result registers out of their specified physreg.
for (auto &VA : RVLocs) {
// Copy the value out
SDValue RetValue =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
// Glue the RetValue to the end of the call sequence
Chain = RetValue.getValue(1);
Glue = RetValue.getValue(2);
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
SDValue RetValue2 =
DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
Chain = RetValue2.getValue(1);
Glue = RetValue2.getValue(2);
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
RetValue2);
}
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
InVals.push_back(RetValue);
}
return Chain;
}
bool RISCVTargetLowering::CanLowerReturn(
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
Optional<unsigned> FirstMaskArgument;
if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
*this, FirstMaskArgument))
return false;
}
return true;
}
SDValue
RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
// Stores the assignment of the return value to a location.
SmallVector<CCValAssign, 16> RVLocs;
// Info about the registers and stack slot.
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
nullptr, CC_RISCV);
if (CallConv == CallingConv::GHC && !RVLocs.empty())
report_fatal_error("GHC functions return void only");
SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
SDValue Val = OutVals[i];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
// Handle returning f64 on RV32D with a soft float ABI.
assert(VA.isRegLoc() && "Expected return via registers");
SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
Register RegLo = VA.getLocReg();
assert(RegLo < RISCV::X31 && "Invalid register pair");
Register RegHi = RegLo + 1;
if (STI.isRegisterReservedByUser(RegLo) ||
STI.isRegisterReservedByUser(RegHi))
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(),
"Return value register required, but has been reserved."});
Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
} else {
// Handle a 'normal' return.
Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
if (STI.isRegisterReservedByUser(VA.getLocReg()))
MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
MF.getFunction(),
"Return value register required, but has been reserved."});
// Guarantee that all emitted copies are stuck together.
Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
}
RetOps[0] = Chain; // Update chain.
// Add the glue node if we have it.
if (Glue.getNode()) {
RetOps.push_back(Glue);
}
unsigned RetOpc = RISCVISD::RET_FLAG;
// Interrupt service routines use different return instructions.
const Function &Func = DAG.getMachineFunction().getFunction();
if (Func.hasFnAttribute("interrupt")) {
if (!Func.getReturnType()->isVoidTy())
report_fatal_error(
"Functions with the interrupt attribute must have void return type!");
MachineFunction &MF = DAG.getMachineFunction();
StringRef Kind =
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
if (Kind == "user")
RetOpc = RISCVISD::URET_FLAG;
else if (Kind == "supervisor")
RetOpc = RISCVISD::SRET_FLAG;
else
RetOpc = RISCVISD::MRET_FLAG;
}
return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
}
void RISCVTargetLowering::validateCCReservedRegs(
const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
MachineFunction &MF) const {
const Function &F = MF.getFunction();
const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
if (llvm::any_of(Regs, [&STI](auto Reg) {
return STI.isRegisterReservedByUser(Reg.first);
}))
F.getContext().diagnose(DiagnosticInfoUnsupported{
F, "Argument register required, but has been reserved."});
}
bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
#define NODE_NAME_CASE(NODE) \
case RISCVISD::NODE: \
return "RISCVISD::" #NODE;
// clang-format off
switch ((RISCVISD::NodeType)Opcode) {
case RISCVISD::FIRST_NUMBER:
break;
NODE_NAME_CASE(RET_FLAG)
NODE_NAME_CASE(URET_FLAG)
NODE_NAME_CASE(SRET_FLAG)
NODE_NAME_CASE(MRET_FLAG)
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(SELECT_CC)
NODE_NAME_CASE(BR_CC)
NODE_NAME_CASE(BuildPairF64)
NODE_NAME_CASE(SplitF64)
NODE_NAME_CASE(TAIL)
NODE_NAME_CASE(MULHSU)
NODE_NAME_CASE(SLLW)
NODE_NAME_CASE(SRAW)
NODE_NAME_CASE(SRLW)
NODE_NAME_CASE(DIVW)
NODE_NAME_CASE(DIVUW)
NODE_NAME_CASE(REMUW)
NODE_NAME_CASE(ROLW)
NODE_NAME_CASE(RORW)
NODE_NAME_CASE(CLZW)
NODE_NAME_CASE(CTZW)
NODE_NAME_CASE(FSLW)
NODE_NAME_CASE(FSRW)
NODE_NAME_CASE(FSL)
NODE_NAME_CASE(FSR)
NODE_NAME_CASE(FMV_H_X)
NODE_NAME_CASE(FMV_X_ANYEXTH)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
NODE_NAME_CASE(FCVT_X)
NODE_NAME_CASE(FCVT_XU)
NODE_NAME_CASE(FCVT_W_RV64)
NODE_NAME_CASE(FCVT_WU_RV64)
NODE_NAME_CASE(STRICT_FCVT_W_RV64)
NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)
NODE_NAME_CASE(GREV)
NODE_NAME_CASE(GREVW)
NODE_NAME_CASE(GORC)
NODE_NAME_CASE(GORCW)
NODE_NAME_CASE(SHFL)
NODE_NAME_CASE(SHFLW)
NODE_NAME_CASE(UNSHFL)
NODE_NAME_CASE(UNSHFLW)
NODE_NAME_CASE(BFP)
NODE_NAME_CASE(BFPW)
NODE_NAME_CASE(BCOMPRESS)
NODE_NAME_CASE(BCOMPRESSW)
NODE_NAME_CASE(BDECOMPRESS)
NODE_NAME_CASE(BDECOMPRESSW)
NODE_NAME_CASE(VMV_V_X_VL)
NODE_NAME_CASE(VFMV_V_F_VL)
NODE_NAME_CASE(VMV_X_S)
NODE_NAME_CASE(VMV_S_X_VL)
NODE_NAME_CASE(VFMV_S_F_VL)
NODE_NAME_CASE(SPLAT_VECTOR_I64)
NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
NODE_NAME_CASE(READ_VLENB)
NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
NODE_NAME_CASE(VSLIDEUP_VL)
NODE_NAME_CASE(VSLIDE1UP_VL)
NODE_NAME_CASE(VSLIDEDOWN_VL)
NODE_NAME_CASE(VSLIDE1DOWN_VL)
NODE_NAME_CASE(VID_VL)
NODE_NAME_CASE(VFNCVT_ROD_VL)
NODE_NAME_CASE(VECREDUCE_ADD_VL)
NODE_NAME_CASE(VECREDUCE_UMAX_VL)
NODE_NAME_CASE(VECREDUCE_SMAX_VL)
NODE_NAME_CASE(VECREDUCE_UMIN_VL)
NODE_NAME_CASE(VECREDUCE_SMIN_VL)
NODE_NAME_CASE(VECREDUCE_AND_VL)
NODE_NAME_CASE(VECREDUCE_OR_VL)
NODE_NAME_CASE(VECREDUCE_XOR_VL)
NODE_NAME_CASE(VECREDUCE_FADD_VL)
NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
NODE_NAME_CASE(VECREDUCE_FMIN_VL)
NODE_NAME_CASE(VECREDUCE_FMAX_VL)
NODE_NAME_CASE(ADD_VL)
NODE_NAME_CASE(AND_VL)
NODE_NAME_CASE(MUL_VL)
NODE_NAME_CASE(OR_VL)
NODE_NAME_CASE(SDIV_VL)
NODE_NAME_CASE(SHL_VL)
NODE_NAME_CASE(SREM_VL)
NODE_NAME_CASE(SRA_VL)
NODE_NAME_CASE(SRL_VL)
NODE_NAME_CASE(SUB_VL)
NODE_NAME_CASE(UDIV_VL)
NODE_NAME_CASE(UREM_VL)
NODE_NAME_CASE(XOR_VL)
NODE_NAME_CASE(SADDSAT_VL)
NODE_NAME_CASE(UADDSAT_VL)
NODE_NAME_CASE(SSUBSAT_VL)
NODE_NAME_CASE(USUBSAT_VL)
NODE_NAME_CASE(FADD_VL)
NODE_NAME_CASE(FSUB_VL)
NODE_NAME_CASE(FMUL_VL)
NODE_NAME_CASE(FDIV_VL)
NODE_NAME_CASE(FNEG_VL)
NODE_NAME_CASE(FABS_VL)
NODE_NAME_CASE(FSQRT_VL)
NODE_NAME_CASE(FMA_VL)
NODE_NAME_CASE(FCOPYSIGN_VL)
NODE_NAME_CASE(SMIN_VL)
NODE_NAME_CASE(SMAX_VL)
NODE_NAME_CASE(UMIN_VL)
NODE_NAME_CASE(UMAX_VL)
NODE_NAME_CASE(FMINNUM_VL)
NODE_NAME_CASE(FMAXNUM_VL)
NODE_NAME_CASE(MULHS_VL)
NODE_NAME_CASE(MULHU_VL)
NODE_NAME_CASE(FP_TO_SINT_VL)
NODE_NAME_CASE(FP_TO_UINT_VL)
NODE_NAME_CASE(SINT_TO_FP_VL)
NODE_NAME_CASE(UINT_TO_FP_VL)
NODE_NAME_CASE(FP_EXTEND_VL)
NODE_NAME_CASE(FP_ROUND_VL)
NODE_NAME_CASE(VWMUL_VL)
NODE_NAME_CASE(VWMULU_VL)
NODE_NAME_CASE(VWMULSU_VL)
NODE_NAME_CASE(VWADDU_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
NODE_NAME_CASE(VP_MERGE_VL)
NODE_NAME_CASE(VMAND_VL)
NODE_NAME_CASE(VMOR_VL)
NODE_NAME_CASE(VMXOR_VL)
NODE_NAME_CASE(VMCLR_VL)
NODE_NAME_CASE(VMSET_VL)
NODE_NAME_CASE(VRGATHER_VX_VL)
NODE_NAME_CASE(VRGATHER_VV_VL)
NODE_NAME_CASE(VRGATHEREI16_VV_VL)
NODE_NAME_CASE(VSEXT_VL)
NODE_NAME_CASE(VZEXT_VL)
NODE_NAME_CASE(VCPOP_VL)
NODE_NAME_CASE(VLE_VL)
NODE_NAME_CASE(VSE_VL)
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
}
// clang-format on
return nullptr;
#undef NODE_NAME_CASE
}
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
RISCVTargetLowering::ConstraintType
RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
switch (Constraint[0]) {
default:
break;
case 'f':
return C_RegisterClass;
case 'I':
case 'J':
case 'K':
return C_Immediate;
case 'A':
return C_Memory;
case 'S': // A symbolic address
return C_Other;
}
} else {
if (Constraint == "vr" || Constraint == "vm")
return C_RegisterClass;
}
return TargetLowering::getConstraintType(Constraint);
}
std::pair<unsigned, const TargetRegisterClass *>
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
// First, see if this is a constraint that directly corresponds to a
// RISCV register class.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
// TODO: Support fixed vectors up to XLen for P extension?
if (VT.isVector())
break;
return std::make_pair(0U, &RISCV::GPRRegClass);
case 'f':
if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
return std::make_pair(0U, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF() && VT == MVT::f32)
return std::make_pair(0U, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD() && VT == MVT::f64)
return std::make_pair(0U, &RISCV::FPR64RegClass);
break;
default:
break;
}
} else if (Constraint == "vr") {
for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
&RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
return std::make_pair(0U, RC);
}
} else if (Constraint == "vm") {
if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy))
return std::make_pair(0U, &RISCV::VMV0RegClass);
}
// Clang will correctly decode the usage of register name aliases into their
// official names. However, other frontends like `rustc` do not. This allows
// users of these frontends to use the ABI names for registers in LLVM-style
// register constraints.
unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
.Case("{zero}", RISCV::X0)
.Case("{ra}", RISCV::X1)
.Case("{sp}", RISCV::X2)
.Case("{gp}", RISCV::X3)
.Case("{tp}", RISCV::X4)
.Case("{t0}", RISCV::X5)
.Case("{t1}", RISCV::X6)
.Case("{t2}", RISCV::X7)
.Cases("{s0}", "{fp}", RISCV::X8)
.Case("{s1}", RISCV::X9)
.Case("{a0}", RISCV::X10)
.Case("{a1}", RISCV::X11)
.Case("{a2}", RISCV::X12)
.Case("{a3}", RISCV::X13)
.Case("{a4}", RISCV::X14)
.Case("{a5}", RISCV::X15)
.Case("{a6}", RISCV::X16)
.Case("{a7}", RISCV::X17)
.Case("{s2}", RISCV::X18)
.Case("{s3}", RISCV::X19)
.Case("{s4}", RISCV::X20)
.Case("{s5}", RISCV::X21)
.Case("{s6}", RISCV::X22)
.Case("{s7}", RISCV::X23)
.Case("{s8}", RISCV::X24)
.Case("{s9}", RISCV::X25)
.Case("{s10}", RISCV::X26)
.Case("{s11}", RISCV::X27)
.Case("{t3}", RISCV::X28)
.Case("{t4}", RISCV::X29)
.Case("{t5}", RISCV::X30)
.Case("{t6}", RISCV::X31)
.Default(RISCV::NoRegister);
if (XRegFromAlias != RISCV::NoRegister)
return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
// Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
// TableGen record rather than the AsmName to choose registers for InlineAsm
// constraints, plus we want to match those names to the widest floating point
// register type available, manually select floating point registers here.
//
// The second case is the ABI name of the register, so that frontends can also
// use the ABI names in register constraint lists.
if (Subtarget.hasStdExtF()) {
unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
.Cases("{f0}", "{ft0}", RISCV::F0_F)
.Cases("{f1}", "{ft1}", RISCV::F1_F)
.Cases("{f2}", "{ft2}", RISCV::F2_F)
.Cases("{f3}", "{ft3}", RISCV::F3_F)
.Cases("{f4}", "{ft4}", RISCV::F4_F)
.Cases("{f5}", "{ft5}", RISCV::F5_F)
.Cases("{f6}", "{ft6}", RISCV::F6_F)
.Cases("{f7}", "{ft7}", RISCV::F7_F)
.Cases("{f8}", "{fs0}", RISCV::F8_F)
.Cases("{f9}", "{fs1}", RISCV::F9_F)
.Cases("{f10}", "{fa0}", RISCV::F10_F)
.Cases("{f11}", "{fa1}", RISCV::F11_F)
.Cases("{f12}", "{fa2}", RISCV::F12_F)
.Cases("{f13}", "{fa3}", RISCV::F13_F)
.Cases("{f14}", "{fa4}", RISCV::F14_F)
.Cases("{f15}", "{fa5}", RISCV::F15_F)
.Cases("{f16}", "{fa6}", RISCV::F16_F)
.Cases("{f17}", "{fa7}", RISCV::F17_F)
.Cases("{f18}", "{fs2}", RISCV::F18_F)
.Cases("{f19}", "{fs3}", RISCV::F19_F)
.Cases("{f20}", "{fs4}", RISCV::F20_F)
.Cases("{f21}", "{fs5}", RISCV::F21_F)
.Cases("{f22}", "{fs6}", RISCV::F22_F)
.Cases("{f23}", "{fs7}", RISCV::F23_F)
.Cases("{f24}", "{fs8}", RISCV::F24_F)
.Cases("{f25}", "{fs9}", RISCV::F25_F)
.Cases("{f26}", "{fs10}", RISCV::F26_F)
.Cases("{f27}", "{fs11}", RISCV::F27_F)
.Cases("{f28}", "{ft8}", RISCV::F28_F)
.Cases("{f29}", "{ft9}", RISCV::F29_F)
.Cases("{f30}", "{ft10}", RISCV::F30_F)
.Cases("{f31}", "{ft11}", RISCV::F31_F)
.Default(RISCV::NoRegister);
if (FReg != RISCV::NoRegister) {
assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
unsigned RegNo = FReg - RISCV::F0_F;
unsigned DReg = RISCV::F0_D + RegNo;
return std::make_pair(DReg, &RISCV::FPR64RegClass);
}
if (VT == MVT::f32 || VT == MVT::Other)
return std::make_pair(FReg, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
unsigned RegNo = FReg - RISCV::F0_F;
unsigned HReg = RISCV::F0_H + RegNo;
return std::make_pair(HReg, &RISCV::FPR16RegClass);
}
}
}
if (Subtarget.hasVInstructions()) {
Register VReg = StringSwitch<Register>(Constraint.lower())
.Case("{v0}", RISCV::V0)
.Case("{v1}", RISCV::V1)
.Case("{v2}", RISCV::V2)
.Case("{v3}", RISCV::V3)
.Case("{v4}", RISCV::V4)
.Case("{v5}", RISCV::V5)
.Case("{v6}", RISCV::V6)
.Case("{v7}", RISCV::V7)
.Case("{v8}", RISCV::V8)
.Case("{v9}", RISCV::V9)
.Case("{v10}", RISCV::V10)
.Case("{v11}", RISCV::V11)
.Case("{v12}", RISCV::V12)
.Case("{v13}", RISCV::V13)
.Case("{v14}", RISCV::V14)
.Case("{v15}", RISCV::V15)
.Case("{v16}", RISCV::V16)
.Case("{v17}", RISCV::V17)
.Case("{v18}", RISCV::V18)
.Case("{v19}", RISCV::V19)
.Case("{v20}", RISCV::V20)
.Case("{v21}", RISCV::V21)
.Case("{v22}", RISCV::V22)
.Case("{v23}", RISCV::V23)
.Case("{v24}", RISCV::V24)
.Case("{v25}", RISCV::V25)
.Case("{v26}", RISCV::V26)
.Case("{v27}", RISCV::V27)
.Case("{v28}", RISCV::V28)
.Case("{v29}", RISCV::V29)
.Case("{v30}", RISCV::V30)
.Case("{v31}", RISCV::V31)
.Default(RISCV::NoRegister);
if (VReg != RISCV::NoRegister) {
if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
return std::make_pair(VReg, &RISCV::VMRegClass);
if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
return std::make_pair(VReg, &RISCV::VRRegClass);
for (const auto *RC :
{&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
return std::make_pair(VReg, RC);
}
}
}
}
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ std::pair<Register, const TargetRegisterClass *> Res =
+ TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+
+ // If we picked one of the Zfinx register classes, remap it to the GPR class.
+ // FIXME: When Zfinx is supported in CodeGen this will need to take the
+ // Subtarget into account.
+ if (Res.second == &RISCV::GPRF16RegClass ||
+ Res.second == &RISCV::GPRF32RegClass ||
+ Res.second == &RISCV::GPRF64RegClass)
+ return std::make_pair(Res.first, &RISCV::GPRRegClass);
+
+ return Res;
}
unsigned
RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
// Currently only support length 1 constraints.
if (ConstraintCode.size() == 1) {
switch (ConstraintCode[0]) {
case 'A':
return InlineAsm::Constraint_A;
default:
break;
}
}
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
void RISCVTargetLowering::LowerAsmOperandForConstraint(
SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Currently only support length 1 constraints.
if (Constraint.length() == 1) {
switch (Constraint[0]) {
case 'I':
// Validate & create a 12-bit signed immediate operand.
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
uint64_t CVal = C->getSExtValue();
if (isInt<12>(CVal))
Ops.push_back(
DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
}
return;
case 'J':
// Validate & create an integer zero operand.
if (auto *C = dyn_cast<ConstantSDNode>(Op))
if (C->getZExtValue() == 0)
Ops.push_back(
DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
return;
case 'K':
// Validate & create a 5-bit unsigned immediate operand.
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
uint64_t CVal = C->getZExtValue();
if (isUInt<5>(CVal))
Ops.push_back(
DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
}
return;
case 'S':
if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
GA->getValueType(0)));
} else if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
Ops.push_back(DAG.getTargetBlockAddress(BA->getBlockAddress(),
BA->getValueType(0)));
}
return;
default:
break;
}
}
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
return Builder.CreateFence(Ord);
if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Release);
return nullptr;
}
Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Acquire);
return nullptr;
}
TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
// point operations can't be used in an lr/sc sequence without breaking the
// forward-progress guarantee.
if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
return AtomicExpansionKind::None;
}
static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
if (XLen == 32) {
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
case AtomicRMWInst::Add:
return Intrinsic::riscv_masked_atomicrmw_add_i32;
case AtomicRMWInst::Sub:
return Intrinsic::riscv_masked_atomicrmw_sub_i32;
case AtomicRMWInst::Nand:
return Intrinsic::riscv_masked_atomicrmw_nand_i32;
case AtomicRMWInst::Max:
return Intrinsic::riscv_masked_atomicrmw_max_i32;
case AtomicRMWInst::Min:
return Intrinsic::riscv_masked_atomicrmw_min_i32;
case AtomicRMWInst::UMax:
return Intrinsic::riscv_masked_atomicrmw_umax_i32;
case AtomicRMWInst::UMin:
return Intrinsic::riscv_masked_atomicrmw_umin_i32;
}
}
if (XLen == 64) {
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
case AtomicRMWInst::Xchg:
return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
case AtomicRMWInst::Add:
return Intrinsic::riscv_masked_atomicrmw_add_i64;
case AtomicRMWInst::Sub:
return Intrinsic::riscv_masked_atomicrmw_sub_i64;
case AtomicRMWInst::Nand:
return Intrinsic::riscv_masked_atomicrmw_nand_i64;
case AtomicRMWInst::Max:
return Intrinsic::riscv_masked_atomicrmw_max_i64;
case AtomicRMWInst::Min:
return Intrinsic::riscv_masked_atomicrmw_min_i64;
case AtomicRMWInst::UMax:
return Intrinsic::riscv_masked_atomicrmw_umax_i64;
case AtomicRMWInst::UMin:
return Intrinsic::riscv_masked_atomicrmw_umin_i64;
}
}
llvm_unreachable("Unexpected XLen\n");
}
Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
unsigned XLen = Subtarget.getXLen();
Value *Ordering =
Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
Type *Tys[] = {AlignedAddr->getType()};
Function *LrwOpScwLoop = Intrinsic::getDeclaration(
AI->getModule(),
getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
if (XLen == 64) {
Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
}
Value *Result;
// Must pass the shift amount needed to sign extend the loaded value prior
// to performing a signed comparison for min/max. ShiftAmt is the number of
// bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
// is the number of bits to left+right shift the value in order to
// sign-extend.
if (AI->getOperation() == AtomicRMWInst::Min ||
AI->getOperation() == AtomicRMWInst::Max) {
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned ValWidth =
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
Value *SextShamt =
Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
Result = Builder.CreateCall(LrwOpScwLoop,
{AlignedAddr, Incr, Mask, SextShamt, Ordering});
} else {
Result =
Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
}
if (XLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
TargetLowering::AtomicExpansionKind
RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *CI) const {
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
if (Size == 8 || Size == 16)
return AtomicExpansionKind::MaskedIntrinsic;
return AtomicExpansionKind::None;
}
Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
unsigned XLen = Subtarget.getXLen();
Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
if (XLen == 64) {
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
}
Type *Tys[] = {AlignedAddr->getType()};
Function *MaskedCmpXchg =
Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
Value *Result = Builder.CreateCall(
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
if (XLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
return false;
}
bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
EVT VT) const {
if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
return false;
switch (FPVT.getSimpleVT().SimpleTy) {
case MVT::f16:
return Subtarget.hasStdExtZfh();
case MVT::f32:
return Subtarget.hasStdExtF();
case MVT::f64:
return Subtarget.hasStdExtD();
default:
return false;
}
}
unsigned RISCVTargetLowering::getJumpTableEncoding() const {
// If we are using the small code model, we can reduce size of jump table
// entry to 4 bytes.
if (Subtarget.is64Bit() && !isPositionIndependent() &&
getTargetMachine().getCodeModel() == CodeModel::Small) {
return MachineJumpTableInfo::EK_Custom32;
}
return TargetLowering::getJumpTableEncoding();
}
const MCExpr *RISCVTargetLowering::LowerCustomJumpTableEntry(
const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
unsigned uid, MCContext &Ctx) const {
assert(Subtarget.is64Bit() && !isPositionIndependent() &&
getTargetMachine().getCodeModel() == CodeModel::Small);
return MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
}
bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
VT = VT.getScalarType();
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f16:
return Subtarget.hasStdExtZfh();
case MVT::f32:
return Subtarget.hasStdExtF();
case MVT::f64:
return Subtarget.hasStdExtD();
default:
break;
}
return false;
}
Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
}
Register RISCVTargetLowering::getExceptionSelectorRegister(
const Constant *PersonalityFn) const {
return RISCV::X11;
}
bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
// Return false to suppress the unnecessary extensions if the LibCall
// arguments or return value is f32 type for LP64 ABI.
RISCVABI::ABI ABI = Subtarget.getTargetABI();
if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
return false;
return true;
}
bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
if (Subtarget.is64Bit() && Type == MVT::i32)
return true;
return IsSigned;
}
bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const {
// Check integral scalar types.
if (VT.isScalarInteger()) {
// Omit the optimization if the sub target has the M extension and the data
// size exceeds XLen.
if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
return false;
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
// Break the MUL to a SLLI and an ADD/SUB.
const APInt &Imm = ConstNode->getAPIntValue();
if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
return true;
// Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
(Imm - 8).isPowerOf2()))
return true;
// Omit the following optimization if the sub target has the M extension
// and the data size >= XLen.
if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
return false;
// Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
// a pair of LUI/ADDI.
if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
(1 - ImmS).isPowerOf2())
return true;
}
}
}
return false;
}
bool RISCVTargetLowering::isMulAddWithConstProfitable(
const SDValue &AddNode, const SDValue &ConstNode) const {
// Let the DAGCombiner decide for vectors.
EVT VT = AddNode.getValueType();
if (VT.isVector())
return true;
// Let the DAGCombiner decide for larger types.
if (VT.getScalarSizeInBits() > Subtarget.getXLen())
return true;
// It is worse if c1 is simm12 while c1*c2 is not.
ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
const APInt &C1 = C1Node->getAPIntValue();
const APInt &C2 = C2Node->getAPIntValue();
if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
return false;
// Default to true and let the DAGCombiner decide.
return true;
}
bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (!VT.isVector())
return false;
EVT ElemVT = VT.getVectorElementType();
if (Alignment >= ElemVT.getStoreSize()) {
if (Fast)
*Fast = true;
return true;
}
return false;
}
bool RISCVTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.hasValue();
EVT ValueVT = Val.getValueType();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
// Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
// and cast to f32.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
DAG.getConstant(0xFFFF0000, DL, MVT::i32));
Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
Parts[0] = Val;
return true;
}
if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
LLVMContext &Context = *DAG.getContext();
EVT ValueEltVT = ValueVT.getVectorElementType();
EVT PartEltVT = PartVT.getVectorElementType();
unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
if (PartVTBitSize % ValueVTBitSize == 0) {
assert(PartVTBitSize >= ValueVTBitSize);
// If the element types are different, bitcast to the same element type of
// PartVT first.
// Give an example here, we want copy a <vscale x 1 x i8> value to
// <vscale x 4 x i16>.
// We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert
// subvector, then we can bitcast to <vscale x 4 x i16>.
if (ValueEltVT != PartEltVT) {
if (PartVTBitSize > ValueVTBitSize) {
unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
assert(Count != 0 && "The number of element should not be zero.");
EVT SameEltTypeVT =
EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT,
DAG.getUNDEF(SameEltTypeVT), Val,
DAG.getVectorIdxConstant(0, DL));
}
Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else {
Val =
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
Val, DAG.getVectorIdxConstant(0, DL));
}
Parts[0] = Val;
return true;
}
}
return false;
}
SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.hasValue();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
SDValue Val = Parts[0];
// Cast the f32 to i32, truncate to i16, and cast back to f16.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
return Val;
}
if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
LLVMContext &Context = *DAG.getContext();
SDValue Val = Parts[0];
EVT ValueEltVT = ValueVT.getVectorElementType();
EVT PartEltVT = PartVT.getVectorElementType();
unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
if (PartVTBitSize % ValueVTBitSize == 0) {
assert(PartVTBitSize >= ValueVTBitSize);
EVT SameEltTypeVT = ValueVT;
// If the element types are different, convert it to the same element type
// of PartVT.
// Give an example here, we want copy a <vscale x 1 x i8> value from
// <vscale x 4 x i16>.
// We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first,
// then we can extract <vscale x 1 x i8>.
if (ValueEltVT != PartEltVT) {
unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits();
assert(Count != 0 && "The number of element should not be zero.");
SameEltTypeVT =
EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true);
Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
}
Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getVectorIdxConstant(0, DL));
return Val;
}
}
return SDValue();
}
SDValue
RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N, 0); // Lower SDIV as SDIV
assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
"Unexpected divisor!");
// Conditional move is needed, so do the transformation iff Zbt is enabled.
if (!Subtarget.hasStdExtZbt())
return SDValue();
// When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation.
// Besides, more critical path instructions will be generated when dividing
// by 2. So we keep using the original DAGs for these cases.
unsigned Lg2 = Divisor.countTrailingZeros();
if (Lg2 == 1 || Lg2 >= 12)
return SDValue();
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
// Add (N0 < 0) ? Pow2 - 1 : 0;
SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT);
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
Created.push_back(Cmp.getNode());
Created.push_back(Add.getNode());
Created.push_back(Sel.getNode());
// Divide by pow2.
SDValue SRA =
DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT));
// If we're dividing by a positive value, we're done. Otherwise, we must
// negate the result.
if (Divisor.isNonNegative())
return SRA;
Created.push_back(SRA.getNode());
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
}
#define GET_REGISTER_MATCHER
#include "RISCVGenAsmMatcher.inc"
Register
RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const {
Register Reg = MatchRegisterAltName(RegName);
if (Reg == RISCV::NoRegister)
Reg = MatchRegisterName(RegName);
if (Reg == RISCV::NoRegister)
report_fatal_error(
Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
StringRef(RegName) + "\"."));
return Reg;
}
namespace llvm {
namespace RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCVVIntrinsicsTable
} // namespace llvm
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
index d66140a726f6..7bca2084c448 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1,3327 +1,3321 @@
//===- Attributor.cpp - Module-wide attribute deduction -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements an interprocedural pass that deduces and/or propagates
// attributes. This is done in an abstract interpretation style fixpoint
// iteration. See the Attributor.h file comment and the class descriptions in
// that file for more information.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <string>
using namespace llvm;
#define DEBUG_TYPE "attributor"
DEBUG_COUNTER(ManifestDBGCounter, "attributor-manifest",
"Determine what attributes are manifested in the IR");
STATISTIC(NumFnDeleted, "Number of function deleted");
STATISTIC(NumFnWithExactDefinition,
"Number of functions with exact definitions");
STATISTIC(NumFnWithoutExactDefinition,
"Number of functions without exact definitions");
STATISTIC(NumFnShallowWrappersCreated, "Number of shallow wrappers created");
STATISTIC(NumAttributesTimedOut,
"Number of abstract attributes timed out before fixpoint");
STATISTIC(NumAttributesValidFixpoint,
"Number of abstract attributes in a valid fixpoint state");
STATISTIC(NumAttributesManifested,
"Number of abstract attributes manifested in IR");
// TODO: Determine a good default value.
//
// In the LLVM-TS and SPEC2006, 32 seems to not induce compile time overheads
// (when run with the first 5 abstract attributes). The results also indicate
// that we never reach 32 iterations but always find a fixpoint sooner.
//
// This will become more evolved once we perform two interleaved fixpoint
// iterations: bottom-up and top-down.
static cl::opt<unsigned>
SetFixpointIterations("attributor-max-iterations", cl::Hidden,
cl::desc("Maximal number of fixpoint iterations."),
cl::init(32));
static cl::opt<unsigned, true> MaxInitializationChainLengthX(
"attributor-max-initialization-chain-length", cl::Hidden,
cl::desc(
"Maximal number of chained initializations (to avoid stack overflows)"),
cl::location(MaxInitializationChainLength), cl::init(1024));
unsigned llvm::MaxInitializationChainLength;
static cl::opt<bool> VerifyMaxFixpointIterations(
"attributor-max-iterations-verify", cl::Hidden,
cl::desc("Verify that max-iterations is a tight bound for a fixpoint"),
cl::init(false));
static cl::opt<bool> AnnotateDeclarationCallSites(
"attributor-annotate-decl-cs", cl::Hidden,
cl::desc("Annotate call sites of function declarations."), cl::init(false));
static cl::opt<bool> EnableHeapToStack("enable-heap-to-stack-conversion",
cl::init(true), cl::Hidden);
static cl::opt<bool>
AllowShallowWrappers("attributor-allow-shallow-wrappers", cl::Hidden,
cl::desc("Allow the Attributor to create shallow "
"wrappers for non-exact definitions."),
cl::init(false));
static cl::opt<bool>
AllowDeepWrapper("attributor-allow-deep-wrappers", cl::Hidden,
cl::desc("Allow the Attributor to use IP information "
"derived from non-exact functions via cloning"),
cl::init(false));
// These options can only used for debug builds.
#ifndef NDEBUG
static cl::list<std::string>
SeedAllowList("attributor-seed-allow-list", cl::Hidden,
cl::desc("Comma seperated list of attribute names that are "
"allowed to be seeded."),
cl::ZeroOrMore, cl::CommaSeparated);
static cl::list<std::string> FunctionSeedAllowList(
"attributor-function-seed-allow-list", cl::Hidden,
cl::desc("Comma seperated list of function names that are "
"allowed to be seeded."),
cl::ZeroOrMore, cl::CommaSeparated);
#endif
static cl::opt<bool>
DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
cl::desc("Dump the dependency graph to dot files."),
cl::init(false));
static cl::opt<std::string> DepGraphDotFileNamePrefix(
"attributor-depgraph-dot-filename-prefix", cl::Hidden,
cl::desc("The prefix used for the CallGraph dot file names."));
static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
cl::desc("View the dependency graph."),
cl::init(false));
static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
cl::desc("Print attribute dependencies"),
cl::init(false));
static cl::opt<bool> EnableCallSiteSpecific(
"attributor-enable-call-site-specific-deduction", cl::Hidden,
cl::desc("Allow the Attributor to do call site specific analysis"),
cl::init(false));
static cl::opt<bool>
PrintCallGraph("attributor-print-call-graph", cl::Hidden,
cl::desc("Print Attributor's internal call graph"),
cl::init(false));
static cl::opt<bool> SimplifyAllLoads("attributor-simplify-all-loads",
cl::Hidden,
cl::desc("Try to simplify all loads."),
cl::init(true));
/// Logic operators for the change status enum class.
///
///{
ChangeStatus llvm::operator|(ChangeStatus L, ChangeStatus R) {
return L == ChangeStatus::CHANGED ? L : R;
}
ChangeStatus &llvm::operator|=(ChangeStatus &L, ChangeStatus R) {
L = L | R;
return L;
}
ChangeStatus llvm::operator&(ChangeStatus L, ChangeStatus R) {
return L == ChangeStatus::UNCHANGED ? L : R;
}
ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) {
L = L & R;
return L;
}
///}
bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
const AbstractAttribute &QueryingAA) {
// We are looking for volatile instructions or non-relaxed atomics.
if (const auto *CB = dyn_cast<CallBase>(&I)) {
if (CB->hasFnAttr(Attribute::NoSync))
return true;
// Non-convergent and readnone imply nosync.
if (!CB->isConvergent() && !CB->mayReadOrWriteMemory())
return true;
if (AANoSync::isNoSyncIntrinsic(&I))
return true;
const auto &NoSyncAA = A.getAAFor<AANoSync>(
QueryingAA, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
return NoSyncAA.isAssumedNoSync();
}
if (!I.mayReadOrWriteMemory())
return true;
return !I.isVolatile() && !AANoSync::isNonRelaxedAtomic(&I);
}
bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
const Value &V) {
if (auto *C = dyn_cast<Constant>(&V))
return !C->isThreadDependent();
// TODO: Inspect and cache more complex instructions.
if (auto *CB = dyn_cast<CallBase>(&V))
return CB->getNumOperands() == 0 && !CB->mayHaveSideEffects() &&
!CB->mayReadFromMemory();
const Function *Scope = nullptr;
if (auto *I = dyn_cast<Instruction>(&V))
Scope = I->getFunction();
if (auto *A = dyn_cast<Argument>(&V))
Scope = A->getParent();
if (!Scope)
return false;
auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
QueryingAA, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
return NoRecurseAA.isAssumedNoRecurse();
}
Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty,
const TargetLibraryInfo *TLI) {
if (isa<AllocaInst>(Obj))
return UndefValue::get(&Ty);
if (isAllocationFn(&Obj, TLI))
return getInitialValueOfAllocation(&cast<CallBase>(Obj), TLI, &Ty);
auto *GV = dyn_cast<GlobalVariable>(&Obj);
if (!GV || !GV->hasLocalLinkage())
return nullptr;
if (!GV->hasInitializer())
return UndefValue::get(&Ty);
return dyn_cast_or_null<Constant>(getWithType(*GV->getInitializer(), Ty));
}
bool AA::isValidInScope(const Value &V, const Function *Scope) {
if (isa<Constant>(V))
return true;
if (auto *I = dyn_cast<Instruction>(&V))
return I->getFunction() == Scope;
if (auto *A = dyn_cast<Argument>(&V))
return A->getParent() == Scope;
return false;
}
bool AA::isValidAtPosition(const Value &V, const Instruction &CtxI,
InformationCache &InfoCache) {
if (isa<Constant>(V))
return true;
const Function *Scope = CtxI.getFunction();
if (auto *A = dyn_cast<Argument>(&V))
return A->getParent() == Scope;
if (auto *I = dyn_cast<Instruction>(&V))
if (I->getFunction() == Scope) {
const DominatorTree *DT =
InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Scope);
return DT && DT->dominates(I, &CtxI);
}
return false;
}
Value *AA::getWithType(Value &V, Type &Ty) {
if (V.getType() == &Ty)
return &V;
if (isa<PoisonValue>(V))
return PoisonValue::get(&Ty);
if (isa<UndefValue>(V))
return UndefValue::get(&Ty);
if (auto *C = dyn_cast<Constant>(&V)) {
if (C->isNullValue())
return Constant::getNullValue(&Ty);
if (C->getType()->isPointerTy() && Ty.isPointerTy())
return ConstantExpr::getPointerCast(C, &Ty);
if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) {
if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
}
}
return nullptr;
}
Optional<Value *>
AA::combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
const Optional<Value *> &B, Type *Ty) {
if (A == B)
return A;
if (!B.hasValue())
return A;
if (*B == nullptr)
return nullptr;
if (!A.hasValue())
return Ty ? getWithType(**B, *Ty) : nullptr;
if (*A == nullptr)
return nullptr;
if (!Ty)
Ty = (*A)->getType();
if (isa_and_nonnull<UndefValue>(*A))
return getWithType(**B, *Ty);
if (isa<UndefValue>(*B))
return A;
if (*A && *B && *A == getWithType(**B, *Ty))
return A;
return nullptr;
}
bool AA::getPotentialCopiesOfStoredValue(
Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) {
Value &Ptr = *SI.getPointerOperand();
SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI)) {
+ if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI,
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs() << "Underlying objects stored into could not be determined\n";);
return false;
}
SmallVector<const AAPointerInfo *> PIs;
SmallVector<Value *> NewCopies;
for (Value *Obj : Objects) {
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
if (isa<UndefValue>(Obj))
continue;
if (isa<ConstantPointerNull>(Obj)) {
// A null pointer access can be undefined but any offset from null may
// be OK. We do not try to optimize the latter.
if (!NullPointerIsDefined(SI.getFunction(),
Ptr.getType()->getPointerAddressSpace()) &&
A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) ==
Obj)
continue;
LLVM_DEBUG(
dbgs() << "Underlying object is a valid nullptr, giving up.\n";);
return false;
}
if (!isa<AllocaInst>(Obj) && !isa<GlobalVariable>(Obj) &&
!isNoAliasCall(Obj)) {
LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj
<< "\n";);
return false;
}
if (auto *GV = dyn_cast<GlobalVariable>(Obj))
if (!GV->hasLocalLinkage()) {
LLVM_DEBUG(dbgs() << "Underlying object is global with external "
"linkage, not supported yet: "
<< *Obj << "\n";);
return false;
}
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
if (!Acc.isRead())
return true;
auto *LI = dyn_cast<LoadInst>(Acc.getRemoteInst());
if (!LI) {
LLVM_DEBUG(dbgs() << "Underlying object read through a non-load "
"instruction not supported yet: "
<< *Acc.getRemoteInst() << "\n";);
return false;
}
NewCopies.push_back(LI);
return true;
};
auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(*Obj),
DepClassTy::NONE);
if (!PI.forallInterferingAccesses(SI, CheckAccess)) {
LLVM_DEBUG(
dbgs()
<< "Failed to verify all interfering accesses for underlying object: "
<< *Obj << "\n");
return false;
}
PIs.push_back(&PI);
}
for (auto *PI : PIs) {
if (!PI->getState().isAtFixpoint())
UsedAssumedInformation = true;
A.recordDependence(*PI, QueryingAA, DepClassTy::OPTIONAL);
}
PotentialCopies.insert(NewCopies.begin(), NewCopies.end());
return true;
}
static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA,
bool RequireReadNone, bool &IsKnown) {
IRPosition::Kind Kind = IRP.getPositionKind();
if (Kind == IRPosition::IRP_FUNCTION || Kind == IRPosition::IRP_CALL_SITE) {
const auto &MemLocAA =
A.getAAFor<AAMemoryLocation>(QueryingAA, IRP, DepClassTy::NONE);
if (MemLocAA.isAssumedReadNone()) {
IsKnown = MemLocAA.isKnownReadNone();
if (!IsKnown)
A.recordDependence(MemLocAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
}
}
const auto &MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(QueryingAA, IRP, DepClassTy::NONE);
if (MemBehaviorAA.isAssumedReadNone() ||
(!RequireReadNone && MemBehaviorAA.isAssumedReadOnly())) {
IsKnown = RequireReadNone ? MemBehaviorAA.isKnownReadNone()
: MemBehaviorAA.isKnownReadOnly();
if (!IsKnown)
A.recordDependence(MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
}
return false;
}
bool AA::isAssumedReadOnly(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA, bool &IsKnown) {
return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
/* RequireReadNone */ false, IsKnown);
}
bool AA::isAssumedReadNone(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA, bool &IsKnown) {
return isAssumedReadOnlyOrReadNone(A, IRP, QueryingAA,
/* RequireReadNone */ true, IsKnown);
}
static bool
isPotentiallyReachable(Attributor &A, const Instruction &FromI,
const Instruction *ToI, const Function &ToFn,
const AbstractAttribute &QueryingAA,
std::function<bool(const Function &F)> GoBackwardsCB) {
LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName()
<< " from " << FromI << " [GBCB: " << bool(GoBackwardsCB)
<< "]\n");
SmallPtrSet<const Instruction *, 8> Visited;
SmallVector<const Instruction *> Worklist;
Worklist.push_back(&FromI);
while (!Worklist.empty()) {
const Instruction *CurFromI = Worklist.pop_back_val();
if (!Visited.insert(CurFromI).second)
continue;
const Function *FromFn = CurFromI->getFunction();
if (FromFn == &ToFn) {
if (!ToI)
return true;
LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI
<< " intraprocedurally\n");
const auto &ReachabilityAA = A.getAAFor<AAReachability>(
QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
bool Result = ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI);
LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " "
<< (Result ? "can potentially " : "cannot ") << "reach "
<< *ToI << " [Intra]\n");
if (Result)
return true;
continue;
}
// TODO: If we can go arbitrarily backwards we will eventually reach an
// entry point that can reach ToI. Only once this takes a set of blocks
// through which we cannot go, or once we track internal functions not
// accessible from the outside, it makes sense to perform backwards analysis
// in the absence of a GoBackwardsCB.
if (!GoBackwardsCB) {
LLVM_DEBUG(dbgs() << "[AA] check @" << ToFn.getName() << " from "
<< *CurFromI << " is not checked backwards, abort\n");
return true;
}
// Check if the current instruction is already known to reach the ToFn.
const auto &FnReachabilityAA = A.getAAFor<AAFunctionReachability>(
QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL);
bool Result = FnReachabilityAA.instructionCanReach(
A, *CurFromI, ToFn, /* UseBackwards */ false);
LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName()
<< " " << (Result ? "can potentially " : "cannot ")
<< "reach @" << ToFn.getName() << " [FromFn]\n");
if (Result)
return true;
// If we do not go backwards from the FromFn we are done here and so far we
// could not find a way to reach ToFn/ToI.
if (!GoBackwardsCB(*FromFn))
continue;
LLVM_DEBUG(dbgs() << "Stepping backwards to the call sites of @"
<< FromFn->getName() << "\n");
auto CheckCallSite = [&](AbstractCallSite ACS) {
CallBase *CB = ACS.getInstruction();
if (!CB)
return false;
if (isa<InvokeInst>(CB))
return false;
Instruction *Inst = CB->getNextNonDebugInstruction();
Worklist.push_back(Inst);
return true;
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
Result = !A.checkForAllCallSites(CheckCallSite, *FromFn,
/* RequireAllCallSites */ true,
- &QueryingAA, AllCallSitesKnown);
+ &QueryingAA, UsedAssumedInformation);
if (Result) {
LLVM_DEBUG(dbgs() << "[AA] stepping back to call sites from " << *CurFromI
<< " in @" << FromFn->getName()
<< " failed, give up\n");
return true;
}
LLVM_DEBUG(dbgs() << "[AA] stepped back to call sites from " << *CurFromI
<< " in @" << FromFn->getName()
<< " worklist size is: " << Worklist.size() << "\n");
}
return false;
}
bool AA::isPotentiallyReachable(
Attributor &A, const Instruction &FromI, const Instruction &ToI,
const AbstractAttribute &QueryingAA,
std::function<bool(const Function &F)> GoBackwardsCB) {
LLVM_DEBUG(dbgs() << "[AA] isPotentiallyReachable " << ToI << " from "
<< FromI << " [GBCB: " << bool(GoBackwardsCB) << "]\n");
const Function *ToFn = ToI.getFunction();
return ::isPotentiallyReachable(A, FromI, &ToI, *ToFn, QueryingAA,
GoBackwardsCB);
}
bool AA::isPotentiallyReachable(
Attributor &A, const Instruction &FromI, const Function &ToFn,
const AbstractAttribute &QueryingAA,
std::function<bool(const Function &F)> GoBackwardsCB) {
return ::isPotentiallyReachable(A, FromI, /* ToI */ nullptr, ToFn, QueryingAA,
GoBackwardsCB);
}
/// Return true if \p New is equal or worse than \p Old.
static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
if (!Old.isIntAttribute())
return true;
return Old.getValueAsInt() >= New.getValueAsInt();
}
/// Return true if the information provided by \p Attr was added to the
/// attribute list \p Attrs. This is only the case if it was not already present
/// in \p Attrs at the position describe by \p PK and \p AttrIdx.
static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
AttributeList &Attrs, int AttrIdx,
bool ForceReplace = false) {
if (Attr.isEnumAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isStringAttribute()) {
StringRef Kind = Attr.getKindAsString();
if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isIntAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
Attrs = Attrs.removeAttributeAtIndex(Ctx, AttrIdx, Kind);
Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
llvm_unreachable("Expected enum or string attribute!");
}
Argument *IRPosition::getAssociatedArgument() const {
if (getPositionKind() == IRP_ARGUMENT)
return cast<Argument>(&getAnchorValue());
// Not an Argument and no argument number means this is not a call site
// argument, thus we cannot find a callback argument to return.
int ArgNo = getCallSiteArgNo();
if (ArgNo < 0)
return nullptr;
// Use abstract call sites to make the connection between the call site
// values and the ones in callbacks. If a callback was found that makes use
// of the underlying call site operand, we want the corresponding callback
// callee argument and not the direct callee argument.
Optional<Argument *> CBCandidateArg;
SmallVector<const Use *, 4> CallbackUses;
const auto &CB = cast<CallBase>(getAnchorValue());
AbstractCallSite::getCallbackUses(CB, CallbackUses);
for (const Use *U : CallbackUses) {
AbstractCallSite ACS(U);
assert(ACS && ACS.isCallbackCall());
if (!ACS.getCalledFunction())
continue;
for (unsigned u = 0, e = ACS.getNumArgOperands(); u < e; u++) {
// Test if the underlying call site operand is argument number u of the
// callback callee.
if (ACS.getCallArgOperandNo(u) != ArgNo)
continue;
assert(ACS.getCalledFunction()->arg_size() > u &&
"ACS mapped into var-args arguments!");
if (CBCandidateArg.hasValue()) {
CBCandidateArg = nullptr;
break;
}
CBCandidateArg = ACS.getCalledFunction()->getArg(u);
}
}
// If we found a unique callback candidate argument, return it.
if (CBCandidateArg.hasValue() && CBCandidateArg.getValue())
return CBCandidateArg.getValue();
// If no callbacks were found, or none used the underlying call site operand
// exclusively, use the direct callee argument if available.
const Function *Callee = CB.getCalledFunction();
if (Callee && Callee->arg_size() > unsigned(ArgNo))
return Callee->getArg(ArgNo);
return nullptr;
}
ChangeStatus AbstractAttribute::update(Attributor &A) {
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
if (getState().isAtFixpoint())
return HasChanged;
LLVM_DEBUG(dbgs() << "[Attributor] Update: " << *this << "\n");
HasChanged = updateImpl(A);
LLVM_DEBUG(dbgs() << "[Attributor] Update " << HasChanged << " " << *this
<< "\n");
return HasChanged;
}
ChangeStatus
IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
const ArrayRef<Attribute> &DeducedAttrs,
bool ForceReplace) {
Function *ScopeFn = IRP.getAnchorScope();
IRPosition::Kind PK = IRP.getPositionKind();
// In the following some generic code that will manifest attributes in
// DeducedAttrs if they improve the current IR. Due to the different
// annotation positions we use the underlying AttributeList interface.
AttributeList Attrs;
switch (PK) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
return ChangeStatus::UNCHANGED;
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_FUNCTION:
case IRPosition::IRP_RETURNED:
Attrs = ScopeFn->getAttributes();
break;
case IRPosition::IRP_CALL_SITE:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
Attrs = cast<CallBase>(IRP.getAnchorValue()).getAttributes();
break;
}
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
LLVMContext &Ctx = IRP.getAnchorValue().getContext();
for (const Attribute &Attr : DeducedAttrs) {
if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx(), ForceReplace))
continue;
HasChanged = ChangeStatus::CHANGED;
}
if (HasChanged == ChangeStatus::UNCHANGED)
return HasChanged;
switch (PK) {
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_FUNCTION:
case IRPosition::IRP_RETURNED:
ScopeFn->setAttributes(Attrs);
break;
case IRPosition::IRP_CALL_SITE:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
cast<CallBase>(IRP.getAnchorValue()).setAttributes(Attrs);
break;
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
break;
}
return HasChanged;
}
const IRPosition IRPosition::EmptyKey(DenseMapInfo<void *>::getEmptyKey());
const IRPosition
IRPosition::TombstoneKey(DenseMapInfo<void *>::getTombstoneKey());
SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
IRPositions.emplace_back(IRP);
// Helper to determine if operand bundles on a call site are benin or
// potentially problematic. We handle only llvm.assume for now.
auto CanIgnoreOperandBundles = [](const CallBase &CB) {
return (isa<IntrinsicInst>(CB) &&
cast<IntrinsicInst>(CB).getIntrinsicID() == Intrinsic ::assume);
};
const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue());
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_FUNCTION:
return;
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_RETURNED:
IRPositions.emplace_back(IRPosition::function(*IRP.getAnchorScope()));
return;
case IRPosition::IRP_CALL_SITE:
assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB))
if (const Function *Callee = CB->getCalledFunction())
IRPositions.emplace_back(IRPosition::function(*Callee));
return;
case IRPosition::IRP_CALL_SITE_RETURNED:
assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
if (const Function *Callee = CB->getCalledFunction()) {
IRPositions.emplace_back(IRPosition::returned(*Callee));
IRPositions.emplace_back(IRPosition::function(*Callee));
for (const Argument &Arg : Callee->args())
if (Arg.hasReturnedAttr()) {
IRPositions.emplace_back(
IRPosition::callsite_argument(*CB, Arg.getArgNo()));
IRPositions.emplace_back(
IRPosition::value(*CB->getArgOperand(Arg.getArgNo())));
IRPositions.emplace_back(IRPosition::argument(Arg));
}
}
}
IRPositions.emplace_back(IRPosition::callsite_function(*CB));
return;
case IRPosition::IRP_CALL_SITE_ARGUMENT: {
assert(CB && "Expected call site!");
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
const Function *Callee = CB->getCalledFunction();
if (Callee) {
if (Argument *Arg = IRP.getAssociatedArgument())
IRPositions.emplace_back(IRPosition::argument(*Arg));
IRPositions.emplace_back(IRPosition::function(*Callee));
}
}
IRPositions.emplace_back(IRPosition::value(IRP.getAssociatedValue()));
return;
}
}
}
bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs,
bool IgnoreSubsumingPositions, Attributor *A) const {
SmallVector<Attribute, 4> Attrs;
for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
for (Attribute::AttrKind AK : AKs)
if (EquivIRP.getAttrsFromIRAttr(AK, Attrs))
return true;
// The first position returned by the SubsumingPositionIterator is
// always the position itself. If we ignore subsuming positions we
// are done after the first iteration.
if (IgnoreSubsumingPositions)
break;
}
if (A)
for (Attribute::AttrKind AK : AKs)
if (getAttrsFromAssumes(AK, Attrs, *A))
return true;
return false;
}
void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs,
SmallVectorImpl<Attribute> &Attrs,
bool IgnoreSubsumingPositions, Attributor *A) const {
for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
for (Attribute::AttrKind AK : AKs)
EquivIRP.getAttrsFromIRAttr(AK, Attrs);
// The first position returned by the SubsumingPositionIterator is
// always the position itself. If we ignore subsuming positions we
// are done after the first iteration.
if (IgnoreSubsumingPositions)
break;
}
if (A)
for (Attribute::AttrKind AK : AKs)
getAttrsFromAssumes(AK, Attrs, *A);
}
bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
SmallVectorImpl<Attribute> &Attrs) const {
if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
return false;
AttributeList AttrList;
if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
AttrList = CB->getAttributes();
else
AttrList = getAssociatedFunction()->getAttributes();
bool HasAttr = AttrList.hasAttributeAtIndex(getAttrIdx(), AK);
if (HasAttr)
Attrs.push_back(AttrList.getAttributeAtIndex(getAttrIdx(), AK));
return HasAttr;
}
bool IRPosition::getAttrsFromAssumes(Attribute::AttrKind AK,
SmallVectorImpl<Attribute> &Attrs,
Attributor &A) const {
assert(getPositionKind() != IRP_INVALID && "Did expect a valid position!");
Value &AssociatedValue = getAssociatedValue();
const Assume2KnowledgeMap &A2K =
A.getInfoCache().getKnowledgeMap().lookup({&AssociatedValue, AK});
// Check if we found any potential assume use, if not we don't need to create
// explorer iterators.
if (A2K.empty())
return false;
LLVMContext &Ctx = AssociatedValue.getContext();
unsigned AttrsSize = Attrs.size();
MustBeExecutedContextExplorer &Explorer =
A.getInfoCache().getMustBeExecutedContextExplorer();
auto EIt = Explorer.begin(getCtxI()), EEnd = Explorer.end(getCtxI());
for (auto &It : A2K)
if (Explorer.findInContextOf(It.first, EIt, EEnd))
Attrs.push_back(Attribute::get(Ctx, AK, It.second.Max));
return AttrsSize != Attrs.size();
}
void IRPosition::verify() {
#ifdef EXPENSIVE_CHECKS
switch (getPositionKind()) {
case IRP_INVALID:
assert((CBContext == nullptr) &&
"Invalid position must not have CallBaseContext!");
assert(!Enc.getOpaqueValue() &&
"Expected a nullptr for an invalid position!");
return;
case IRP_FLOAT:
assert((!isa<Argument>(&getAssociatedValue())) &&
"Expected specialized kind for argument values!");
return;
case IRP_RETURNED:
assert(isa<Function>(getAsValuePtr()) &&
"Expected function for a 'returned' position!");
assert(getAsValuePtr() == &getAssociatedValue() &&
"Associated value mismatch!");
return;
case IRP_CALL_SITE_RETURNED:
assert((CBContext == nullptr) &&
"'call site returned' position must not have CallBaseContext!");
assert((isa<CallBase>(getAsValuePtr())) &&
"Expected call base for 'call site returned' position!");
assert(getAsValuePtr() == &getAssociatedValue() &&
"Associated value mismatch!");
return;
case IRP_CALL_SITE:
assert((CBContext == nullptr) &&
"'call site function' position must not have CallBaseContext!");
assert((isa<CallBase>(getAsValuePtr())) &&
"Expected call base for 'call site function' position!");
assert(getAsValuePtr() == &getAssociatedValue() &&
"Associated value mismatch!");
return;
case IRP_FUNCTION:
assert(isa<Function>(getAsValuePtr()) &&
"Expected function for a 'function' position!");
assert(getAsValuePtr() == &getAssociatedValue() &&
"Associated value mismatch!");
return;
case IRP_ARGUMENT:
assert(isa<Argument>(getAsValuePtr()) &&
"Expected argument for a 'argument' position!");
assert(getAsValuePtr() == &getAssociatedValue() &&
"Associated value mismatch!");
return;
case IRP_CALL_SITE_ARGUMENT: {
assert((CBContext == nullptr) &&
"'call site argument' position must not have CallBaseContext!");
Use *U = getAsUsePtr();
(void)U; // Silence unused variable warning.
assert(U && "Expected use for a 'call site argument' position!");
assert(isa<CallBase>(U->getUser()) &&
"Expected call base user for a 'call site argument' position!");
assert(cast<CallBase>(U->getUser())->isArgOperand(U) &&
"Expected call base argument operand for a 'call site argument' "
"position");
assert(cast<CallBase>(U->getUser())->getArgOperandNo(U) ==
unsigned(getCallSiteArgNo()) &&
"Argument number mismatch!");
assert(U->get() == &getAssociatedValue() && "Associated value mismatch!");
return;
}
}
#endif
}
Optional<Constant *>
Attributor::getAssumedConstant(const IRPosition &IRP,
const AbstractAttribute &AA,
bool &UsedAssumedInformation) {
// First check all callbacks provided by outside AAs. If any of them returns
// a non-null value that is different from the associated value, or None, we
// assume it's simpliied.
for (auto &CB : SimplificationCallbacks.lookup(IRP)) {
Optional<Value *> SimplifiedV = CB(IRP, &AA, UsedAssumedInformation);
if (!SimplifiedV.hasValue())
return llvm::None;
if (isa_and_nonnull<Constant>(*SimplifiedV))
return cast<Constant>(*SimplifiedV);
return nullptr;
}
const auto &ValueSimplifyAA =
getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE);
Optional<Value *> SimplifiedV =
ValueSimplifyAA.getAssumedSimplifiedValue(*this);
bool IsKnown = ValueSimplifyAA.isAtFixpoint();
UsedAssumedInformation |= !IsKnown;
if (!SimplifiedV.hasValue()) {
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return llvm::None;
}
if (isa_and_nonnull<UndefValue>(SimplifiedV.getValue())) {
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return UndefValue::get(IRP.getAssociatedType());
}
Constant *CI = dyn_cast_or_null<Constant>(SimplifiedV.getValue());
if (CI)
CI = dyn_cast_or_null<Constant>(
AA::getWithType(*CI, *IRP.getAssociatedType()));
if (CI)
recordDependence(ValueSimplifyAA, AA, DepClassTy::OPTIONAL);
return CI;
}
Optional<Value *>
Attributor::getAssumedSimplified(const IRPosition &IRP,
const AbstractAttribute *AA,
bool &UsedAssumedInformation) {
// First check all callbacks provided by outside AAs. If any of them returns
// a non-null value that is different from the associated value, or None, we
// assume it's simpliied.
for (auto &CB : SimplificationCallbacks.lookup(IRP))
return CB(IRP, AA, UsedAssumedInformation);
// If no high-level/outside simplification occured, use AAValueSimplify.
const auto &ValueSimplifyAA =
getOrCreateAAFor<AAValueSimplify>(IRP, AA, DepClassTy::NONE);
Optional<Value *> SimplifiedV =
ValueSimplifyAA.getAssumedSimplifiedValue(*this);
bool IsKnown = ValueSimplifyAA.isAtFixpoint();
UsedAssumedInformation |= !IsKnown;
if (!SimplifiedV.hasValue()) {
if (AA)
recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL);
return llvm::None;
}
if (*SimplifiedV == nullptr)
return const_cast<Value *>(&IRP.getAssociatedValue());
if (Value *SimpleV =
AA::getWithType(**SimplifiedV, *IRP.getAssociatedType())) {
if (AA)
recordDependence(ValueSimplifyAA, *AA, DepClassTy::OPTIONAL);
return SimpleV;
}
return const_cast<Value *>(&IRP.getAssociatedValue());
}
Optional<Value *> Attributor::translateArgumentToCallSiteContent(
Optional<Value *> V, CallBase &CB, const AbstractAttribute &AA,
bool &UsedAssumedInformation) {
if (!V.hasValue())
return V;
if (*V == nullptr || isa<Constant>(*V))
return V;
if (auto *Arg = dyn_cast<Argument>(*V))
if (CB.getCalledFunction() == Arg->getParent())
if (!Arg->hasPointeeInMemoryValueAttr())
return getAssumedSimplified(
IRPosition::callsite_argument(CB, Arg->getArgNo()), AA,
UsedAssumedInformation);
return nullptr;
}
Attributor::~Attributor() {
// The abstract attributes are allocated via the BumpPtrAllocator Allocator,
// thus we cannot delete them. We can, and want to, destruct them though.
for (auto &DepAA : DG.SyntheticRoot.Deps) {
AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
AA->~AbstractAttribute();
}
}
bool Attributor::isAssumedDead(const AbstractAttribute &AA,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass) {
const IRPosition &IRP = AA.getIRPosition();
if (!Functions.count(IRP.getAnchorScope()))
return false;
return isAssumedDead(IRP, &AA, FnLivenessAA, UsedAssumedInformation,
CheckBBLivenessOnly, DepClass);
}
bool Attributor::isAssumedDead(const Use &U,
const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass) {
Instruction *UserI = dyn_cast<Instruction>(U.getUser());
if (!UserI)
return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
if (auto *CB = dyn_cast<CallBase>(UserI)) {
// For call site argument uses we can check if the argument is
// unused/dead.
if (CB->isArgOperand(&U)) {
const IRPosition &CSArgPos =
IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
return isAssumedDead(CSArgPos, QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly,
DepClass);
}
} else if (ReturnInst *RI = dyn_cast<ReturnInst>(UserI)) {
const IRPosition &RetPos = IRPosition::returned(*RI->getFunction());
return isAssumedDead(RetPos, QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
} else if (PHINode *PHI = dyn_cast<PHINode>(UserI)) {
BasicBlock *IncomingBB = PHI->getIncomingBlock(U);
return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
}
return isAssumedDead(IRPosition::inst(*UserI), QueryingAA, FnLivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
}
bool Attributor::isAssumedDead(const Instruction &I,
const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass) {
const IRPosition::CallBaseContext *CBCtx =
QueryingAA ? QueryingAA->getCallBaseContext() : nullptr;
if (ManifestAddedBlocks.contains(I.getParent()))
return false;
if (!FnLivenessAA)
FnLivenessAA =
lookupAAFor<AAIsDead>(IRPosition::function(*I.getFunction(), CBCtx),
QueryingAA, DepClassTy::NONE);
// If we have a context instruction and a liveness AA we use it.
if (FnLivenessAA &&
FnLivenessAA->getIRPosition().getAnchorScope() == I.getFunction() &&
(CheckBBLivenessOnly ? FnLivenessAA->isAssumedDead(I.getParent())
: FnLivenessAA->isAssumedDead(&I))) {
if (QueryingAA)
recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
if (!FnLivenessAA->isKnownDead(&I))
UsedAssumedInformation = true;
return true;
}
if (CheckBBLivenessOnly)
return false;
const IRPosition IRP = IRPosition::inst(I, CBCtx);
const AAIsDead &IsDeadAA =
getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
// Don't check liveness for AAIsDead.
if (QueryingAA == &IsDeadAA)
return false;
if (IsDeadAA.isAssumedDead()) {
if (QueryingAA)
recordDependence(IsDeadAA, *QueryingAA, DepClass);
if (!IsDeadAA.isKnownDead())
UsedAssumedInformation = true;
return true;
}
return false;
}
bool Attributor::isAssumedDead(const IRPosition &IRP,
const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass) {
Instruction *CtxI = IRP.getCtxI();
if (CtxI &&
isAssumedDead(*CtxI, QueryingAA, FnLivenessAA, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true,
CheckBBLivenessOnly ? DepClass : DepClassTy::OPTIONAL))
return true;
if (CheckBBLivenessOnly)
return false;
// If we haven't succeeded we query the specific liveness info for the IRP.
const AAIsDead *IsDeadAA;
if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE)
IsDeadAA = &getOrCreateAAFor<AAIsDead>(
IRPosition::callsite_returned(cast<CallBase>(IRP.getAssociatedValue())),
QueryingAA, DepClassTy::NONE);
else
IsDeadAA = &getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
// Don't check liveness for AAIsDead.
if (QueryingAA == IsDeadAA)
return false;
if (IsDeadAA->isAssumedDead()) {
if (QueryingAA)
recordDependence(*IsDeadAA, *QueryingAA, DepClass);
if (!IsDeadAA->isKnownDead())
UsedAssumedInformation = true;
return true;
}
return false;
}
bool Attributor::isAssumedDead(const BasicBlock &BB,
const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA,
DepClassTy DepClass) {
if (!FnLivenessAA)
FnLivenessAA = lookupAAFor<AAIsDead>(IRPosition::function(*BB.getParent()),
QueryingAA, DepClassTy::NONE);
if (FnLivenessAA->isAssumedDead(&BB)) {
if (QueryingAA)
recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
return true;
}
return false;
}
bool Attributor::checkForAllUses(
function_ref<bool(const Use &, bool &)> Pred,
const AbstractAttribute &QueryingAA, const Value &V,
bool CheckBBLivenessOnly, DepClassTy LivenessDepClass,
function_ref<bool(const Use &OldU, const Use &NewU)> EquivalentUseCB) {
// Check the trivial case first as it catches void values.
if (V.use_empty())
return true;
const IRPosition &IRP = QueryingAA.getIRPosition();
SmallVector<const Use *, 16> Worklist;
SmallPtrSet<const Use *, 16> Visited;
for (const Use &U : V.uses())
Worklist.push_back(&U);
LLVM_DEBUG(dbgs() << "[Attributor] Got " << Worklist.size()
<< " initial uses to check\n");
const Function *ScopeFn = IRP.getAnchorScope();
const auto *LivenessAA =
ScopeFn ? &getAAFor<AAIsDead>(QueryingAA, IRPosition::function(*ScopeFn),
DepClassTy::NONE)
: nullptr;
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
continue;
LLVM_DEBUG({
if (auto *Fn = dyn_cast<Function>(U->getUser()))
dbgs() << "[Attributor] Check use: " << **U << " in " << Fn->getName()
<< "\n";
else
dbgs() << "[Attributor] Check use: " << **U << " in " << *U->getUser()
<< "\n";
});
bool UsedAssumedInformation = false;
if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation,
CheckBBLivenessOnly, LivenessDepClass)) {
LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
continue;
}
if (U->getUser()->isDroppable()) {
LLVM_DEBUG(dbgs() << "[Attributor] Droppable user, skip!\n");
continue;
}
if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
if (&SI->getOperandUse(0) == U) {
if (!Visited.insert(U).second)
continue;
SmallSetVector<Value *, 4> PotentialCopies;
if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies,
QueryingAA,
UsedAssumedInformation)) {
LLVM_DEBUG(dbgs() << "[Attributor] Value is stored, continue with "
<< PotentialCopies.size()
<< " potential copies instead!\n");
for (Value *PotentialCopy : PotentialCopies)
for (const Use &CopyUse : PotentialCopy->uses()) {
if (EquivalentUseCB && !EquivalentUseCB(*U, CopyUse)) {
LLVM_DEBUG(dbgs() << "[Attributor] Potential copy was "
"rejected by the equivalence call back: "
<< *CopyUse << "!\n");
return false;
}
Worklist.push_back(&CopyUse);
}
continue;
}
}
}
bool Follow = false;
if (!Pred(*U, Follow))
return false;
if (!Follow)
continue;
for (const Use &UU : U->getUser()->uses())
Worklist.push_back(&UU);
}
return true;
}
bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const AbstractAttribute &QueryingAA,
bool RequireAllCallSites,
- bool &AllCallSitesKnown) {
+ bool &UsedAssumedInformation) {
// We can try to determine information from
// the call sites. However, this is only possible all call sites are known,
// hence the function has internal linkage.
const IRPosition &IRP = QueryingAA.getIRPosition();
const Function *AssociatedFunction = IRP.getAssociatedFunction();
if (!AssociatedFunction) {
LLVM_DEBUG(dbgs() << "[Attributor] No function associated with " << IRP
<< "\n");
- AllCallSitesKnown = false;
return false;
}
return checkForAllCallSites(Pred, *AssociatedFunction, RequireAllCallSites,
- &QueryingAA, AllCallSitesKnown);
+ &QueryingAA, UsedAssumedInformation);
}
bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Function &Fn,
bool RequireAllCallSites,
const AbstractAttribute *QueryingAA,
- bool &AllCallSitesKnown) {
+ bool &UsedAssumedInformation) {
if (RequireAllCallSites && !Fn.hasLocalLinkage()) {
LLVM_DEBUG(
dbgs()
<< "[Attributor] Function " << Fn.getName()
<< " has no internal linkage, hence not all call sites are known\n");
- AllCallSitesKnown = false;
return false;
}
- // If we do not require all call sites we might not see all.
- AllCallSitesKnown = RequireAllCallSites;
-
SmallVector<const Use *, 8> Uses(make_pointer_range(Fn.uses()));
for (unsigned u = 0; u < Uses.size(); ++u) {
const Use &U = *Uses[u];
LLVM_DEBUG({
if (auto *Fn = dyn_cast<Function>(U))
dbgs() << "[Attributor] Check use: " << Fn->getName() << " in "
<< *U.getUser() << "\n";
else
dbgs() << "[Attributor] Check use: " << *U << " in " << *U.getUser()
<< "\n";
});
- bool UsedAssumedInformation = false;
if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true)) {
LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
continue;
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
if (CE->isCast() && CE->getType()->isPointerTy() &&
CE->getType()->getPointerElementType()->isFunctionTy()) {
LLVM_DEBUG(
dbgs() << "[Attributor] Use, is constant cast expression, add "
<< CE->getNumUses()
<< " uses of that expression instead!\n");
for (const Use &CEU : CE->uses())
Uses.push_back(&CEU);
continue;
}
}
AbstractCallSite ACS(&U);
if (!ACS) {
LLVM_DEBUG(dbgs() << "[Attributor] Function " << Fn.getName()
<< " has non call site use " << *U.get() << " in "
<< *U.getUser() << "\n");
// BlockAddress users are allowed.
if (isa<BlockAddress>(U.getUser()))
continue;
return false;
}
const Use *EffectiveUse =
ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U;
if (!ACS.isCallee(EffectiveUse)) {
if (!RequireAllCallSites) {
LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
<< " is not a call of " << Fn.getName()
<< ", skip use\n");
continue;
}
LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
<< " is an invalid use of " << Fn.getName() << "\n");
return false;
}
// Make sure the arguments that can be matched between the call site and the
// callee argee on their type. It is unlikely they do not and it doesn't
// make sense for all attributes to know/care about this.
assert(&Fn == ACS.getCalledFunction() && "Expected known callee");
unsigned MinArgsParams =
std::min(size_t(ACS.getNumArgOperands()), Fn.arg_size());
for (unsigned u = 0; u < MinArgsParams; ++u) {
Value *CSArgOp = ACS.getCallArgOperand(u);
if (CSArgOp && Fn.getArg(u)->getType() != CSArgOp->getType()) {
LLVM_DEBUG(
dbgs() << "[Attributor] Call site / callee argument type mismatch ["
<< u << "@" << Fn.getName() << ": "
<< *Fn.getArg(u)->getType() << " vs. "
<< *ACS.getCallArgOperand(u)->getType() << "\n");
return false;
}
}
if (Pred(ACS))
continue;
LLVM_DEBUG(dbgs() << "[Attributor] Call site callback failed for "
<< *ACS.getInstruction() << "\n");
return false;
}
return true;
}
bool Attributor::shouldPropagateCallBaseContext(const IRPosition &IRP) {
// TODO: Maintain a cache of Values that are
// on the pathway from a Argument to a Instruction that would effect the
// liveness/return state etc.
return EnableCallSiteSpecific;
}
bool Attributor::checkForAllReturnedValuesAndReturnInsts(
function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
const AbstractAttribute &QueryingAA) {
const IRPosition &IRP = QueryingAA.getIRPosition();
// Since we need to provide return instructions we have to have an exact
// definition.
const Function *AssociatedFunction = IRP.getAssociatedFunction();
if (!AssociatedFunction)
return false;
// If this is a call site query we use the call site specific return values
// and liveness information.
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
const auto &AARetVal =
getAAFor<AAReturnedValues>(QueryingAA, QueryIRP, DepClassTy::REQUIRED);
if (!AARetVal.getState().isValidState())
return false;
return AARetVal.checkForAllReturnedValuesAndReturnInsts(Pred);
}
bool Attributor::checkForAllReturnedValues(
function_ref<bool(Value &)> Pred, const AbstractAttribute &QueryingAA) {
const IRPosition &IRP = QueryingAA.getIRPosition();
const Function *AssociatedFunction = IRP.getAssociatedFunction();
if (!AssociatedFunction)
return false;
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(
*AssociatedFunction, QueryingAA.getCallBaseContext());
const auto &AARetVal =
getAAFor<AAReturnedValues>(QueryingAA, QueryIRP, DepClassTy::REQUIRED);
if (!AARetVal.getState().isValidState())
return false;
return AARetVal.checkForAllReturnedValuesAndReturnInsts(
[&](Value &RV, const SmallSetVector<ReturnInst *, 4> &) {
return Pred(RV);
});
}
static bool checkForAllInstructionsImpl(
Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap,
function_ref<bool(Instruction &)> Pred, const AbstractAttribute *QueryingAA,
const AAIsDead *LivenessAA, const ArrayRef<unsigned> &Opcodes,
bool &UsedAssumedInformation, bool CheckBBLivenessOnly = false,
bool CheckPotentiallyDead = false) {
for (unsigned Opcode : Opcodes) {
// Check if we have instructions with this opcode at all first.
auto *Insts = OpcodeInstMap.lookup(Opcode);
if (!Insts)
continue;
for (Instruction *I : *Insts) {
// Skip dead instructions.
if (A && !CheckPotentiallyDead &&
A->isAssumedDead(IRPosition::inst(*I), QueryingAA, LivenessAA,
UsedAssumedInformation, CheckBBLivenessOnly)) {
LLVM_DEBUG(dbgs() << "[Attributor] Instruction " << *I
<< " is potentially dead, skip!\n";);
continue;
}
if (!Pred(*I))
return false;
}
}
return true;
}
bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
const AbstractAttribute &QueryingAA,
const ArrayRef<unsigned> &Opcodes,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly,
bool CheckPotentiallyDead) {
const IRPosition &IRP = QueryingAA.getIRPosition();
// Since we need to provide instructions we have to have an exact definition.
const Function *AssociatedFunction = IRP.getAssociatedFunction();
if (!AssociatedFunction)
return false;
if (AssociatedFunction->isDeclaration())
return false;
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
const auto *LivenessAA =
(CheckBBLivenessOnly || CheckPotentiallyDead)
? nullptr
: &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
auto &OpcodeInstMap =
InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
LivenessAA, Opcodes, UsedAssumedInformation,
CheckBBLivenessOnly, CheckPotentiallyDead))
return false;
return true;
}
bool Attributor::checkForAllReadWriteInstructions(
function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA,
bool &UsedAssumedInformation) {
const Function *AssociatedFunction =
QueryingAA.getIRPosition().getAssociatedFunction();
if (!AssociatedFunction)
return false;
// TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
const auto &LivenessAA =
getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE);
for (Instruction *I :
InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
// Skip dead instructions.
if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, &LivenessAA,
UsedAssumedInformation))
continue;
if (!Pred(*I))
return false;
}
return true;
}
void Attributor::runTillFixpoint() {
TimeTraceScope TimeScope("Attributor::runTillFixpoint");
LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
<< DG.SyntheticRoot.Deps.size()
<< " abstract attributes.\n");
// Now that all abstract attributes are collected and initialized we start
// the abstract analysis.
unsigned IterationCounter = 1;
unsigned MaxFixedPointIterations;
if (MaxFixpointIterations)
MaxFixedPointIterations = MaxFixpointIterations.getValue();
else
MaxFixedPointIterations = SetFixpointIterations;
SmallVector<AbstractAttribute *, 32> ChangedAAs;
SetVector<AbstractAttribute *> Worklist, InvalidAAs;
Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
do {
// Remember the size to determine new attributes.
size_t NumAAs = DG.SyntheticRoot.Deps.size();
LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
<< ", Worklist size: " << Worklist.size() << "\n");
// For invalid AAs we can fix dependent AAs that have a required dependence,
// thereby folding long dependence chains in a single step without the need
// to run updates.
for (unsigned u = 0; u < InvalidAAs.size(); ++u) {
AbstractAttribute *InvalidAA = InvalidAAs[u];
// Check the dependences to fast track invalidation.
LLVM_DEBUG(dbgs() << "[Attributor] InvalidAA: " << *InvalidAA << " has "
<< InvalidAA->Deps.size()
<< " required & optional dependences\n");
while (!InvalidAA->Deps.empty()) {
const auto &Dep = InvalidAA->Deps.back();
InvalidAA->Deps.pop_back();
AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
LLVM_DEBUG(dbgs() << " - recompute: " << *DepAA);
Worklist.insert(DepAA);
continue;
}
LLVM_DEBUG(dbgs() << " - invalidate: " << *DepAA);
DepAA->getState().indicatePessimisticFixpoint();
assert(DepAA->getState().isAtFixpoint() && "Expected fixpoint state!");
if (!DepAA->getState().isValidState())
InvalidAAs.insert(DepAA);
else
ChangedAAs.push_back(DepAA);
}
}
// Add all abstract attributes that are potentially dependent on one that
// changed to the work list.
for (AbstractAttribute *ChangedAA : ChangedAAs)
while (!ChangedAA->Deps.empty()) {
Worklist.insert(
cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
ChangedAA->Deps.pop_back();
}
LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter
<< ", Worklist+Dependent size: " << Worklist.size()
<< "\n");
// Reset the changed and invalid set.
ChangedAAs.clear();
InvalidAAs.clear();
// Update all abstract attribute in the work list and record the ones that
// changed.
for (AbstractAttribute *AA : Worklist) {
const auto &AAState = AA->getState();
if (!AAState.isAtFixpoint())
if (updateAA(*AA) == ChangeStatus::CHANGED)
ChangedAAs.push_back(AA);
// Use the InvalidAAs vector to propagate invalid states fast transitively
// without requiring updates.
if (!AAState.isValidState())
InvalidAAs.insert(AA);
}
// Add attributes to the changed set if they have been created in the last
// iteration.
ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
DG.SyntheticRoot.end());
// Reset the work list and repopulate with the changed abstract attributes.
// Note that dependent ones are added above.
Worklist.clear();
Worklist.insert(ChangedAAs.begin(), ChangedAAs.end());
Worklist.insert(QueryAAsAwaitingUpdate.begin(),
QueryAAsAwaitingUpdate.end());
QueryAAsAwaitingUpdate.clear();
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
VerifyMaxFixpointIterations));
if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
auto Remark = [&](OptimizationRemarkMissed ORM) {
return ORM << "Attributor did not reach a fixpoint after "
<< ore::NV("Iterations", MaxFixedPointIterations)
<< " iterations.";
};
Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
}
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n");
// Reset abstract arguments not settled in a sound fixpoint by now. This
// happens when we stopped the fixpoint iteration early. Note that only the
// ones marked as "changed" *and* the ones transitively depending on them
// need to be reverted to a pessimistic state. Others might not be in a
// fixpoint state but we can use the optimistic results for them anyway.
SmallPtrSet<AbstractAttribute *, 32> Visited;
for (unsigned u = 0; u < ChangedAAs.size(); u++) {
AbstractAttribute *ChangedAA = ChangedAAs[u];
if (!Visited.insert(ChangedAA).second)
continue;
AbstractState &State = ChangedAA->getState();
if (!State.isAtFixpoint()) {
State.indicatePessimisticFixpoint();
NumAttributesTimedOut++;
}
while (!ChangedAA->Deps.empty()) {
ChangedAAs.push_back(
cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
ChangedAA->Deps.pop_back();
}
}
LLVM_DEBUG({
if (!Visited.empty())
dbgs() << "\n[Attributor] Finalized " << Visited.size()
<< " abstract attributes.\n";
});
if (VerifyMaxFixpointIterations &&
IterationCounter != MaxFixedPointIterations) {
errs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixedPointIterations
<< " iterations\n";
llvm_unreachable("The fixpoint was not reached with exactly the number of "
"specified iterations!");
}
}
void Attributor::registerForUpdate(AbstractAttribute &AA) {
assert(AA.isQueryAA() &&
"Non-query AAs should not be required to register for updates!");
QueryAAsAwaitingUpdate.insert(&AA);
}
ChangeStatus Attributor::manifestAttributes() {
TimeTraceScope TimeScope("Attributor::manifestAttributes");
size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
unsigned NumManifested = 0;
unsigned NumAtFixpoint = 0;
ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
for (auto &DepAA : DG.SyntheticRoot.Deps) {
AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
AbstractState &State = AA->getState();
// If there is not already a fixpoint reached, we can now take the
// optimistic state. This is correct because we enforced a pessimistic one
// on abstract attributes that were transitively dependent on a changed one
// already above.
if (!State.isAtFixpoint())
State.indicateOptimisticFixpoint();
// We must not manifest Attributes that use Callbase info.
if (AA->hasCallBaseContext())
continue;
// If the state is invalid, we do not try to manifest it.
if (!State.isValidState())
continue;
// Skip dead code.
bool UsedAssumedInformation = false;
if (isAssumedDead(*AA, nullptr, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true))
continue;
// Check if the manifest debug counter that allows skipping manifestation of
// AAs
if (!DebugCounter::shouldExecute(ManifestDBGCounter))
continue;
// Manifest the state and record if we changed the IR.
ChangeStatus LocalChange = AA->manifest(*this);
if (LocalChange == ChangeStatus::CHANGED && AreStatisticsEnabled())
AA->trackStatistics();
LLVM_DEBUG(dbgs() << "[Attributor] Manifest " << LocalChange << " : " << *AA
<< "\n");
ManifestChange = ManifestChange | LocalChange;
NumAtFixpoint++;
NumManifested += (LocalChange == ChangeStatus::CHANGED);
}
(void)NumManifested;
(void)NumAtFixpoint;
LLVM_DEBUG(dbgs() << "\n[Attributor] Manifested " << NumManifested
<< " arguments while " << NumAtFixpoint
<< " were in a valid fixpoint state\n");
NumAttributesManifested += NumManifested;
NumAttributesValidFixpoint += NumAtFixpoint;
(void)NumFinalAAs;
if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
errs() << "Unexpected abstract attribute: "
<< cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
<< " :: "
<< cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
->getIRPosition()
.getAssociatedValue()
<< "\n";
llvm_unreachable("Expected the final number of abstract attributes to "
"remain unchanged!");
}
return ManifestChange;
}
void Attributor::identifyDeadInternalFunctions() {
// Early exit if we don't intend to delete functions.
if (!DeleteFns)
return;
// Identify dead internal functions and delete them. This happens outside
// the other fixpoint analysis as we might treat potentially dead functions
// as live to lower the number of iterations. If they happen to be dead, the
// below fixpoint loop will identify and eliminate them.
SmallVector<Function *, 8> InternalFns;
for (Function *F : Functions)
if (F->hasLocalLinkage())
InternalFns.push_back(F);
SmallPtrSet<Function *, 8> LiveInternalFns;
bool FoundLiveInternal = true;
while (FoundLiveInternal) {
FoundLiveInternal = false;
for (unsigned u = 0, e = InternalFns.size(); u < e; ++u) {
Function *F = InternalFns[u];
if (!F)
continue;
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (checkForAllCallSites(
[&](AbstractCallSite ACS) {
Function *Callee = ACS.getInstruction()->getFunction();
return ToBeDeletedFunctions.count(Callee) ||
(Functions.count(Callee) && Callee->hasLocalLinkage() &&
!LiveInternalFns.count(Callee));
},
- *F, true, nullptr, AllCallSitesKnown)) {
+ *F, true, nullptr, UsedAssumedInformation)) {
continue;
}
LiveInternalFns.insert(F);
InternalFns[u] = nullptr;
FoundLiveInternal = true;
}
}
for (unsigned u = 0, e = InternalFns.size(); u < e; ++u)
if (Function *F = InternalFns[u])
ToBeDeletedFunctions.insert(F);
}
ChangeStatus Attributor::cleanupIR() {
TimeTraceScope TimeScope("Attributor::cleanupIR");
// Delete stuff at the end to avoid invalid references and a nice order.
LLVM_DEBUG(dbgs() << "\n[Attributor] Delete/replace at least "
<< ToBeDeletedFunctions.size() << " functions and "
<< ToBeDeletedBlocks.size() << " blocks and "
<< ToBeDeletedInsts.size() << " instructions and "
<< ToBeChangedValues.size() << " values and "
<< ToBeChangedUses.size() << " uses. "
<< "Preserve manifest added " << ManifestAddedBlocks.size()
<< " blocks\n");
SmallVector<WeakTrackingVH, 32> DeadInsts;
SmallVector<Instruction *, 32> TerminatorsToFold;
auto ReplaceUse = [&](Use *U, Value *NewV) {
Value *OldV = U->get();
// If we plan to replace NewV we need to update it at this point.
do {
const auto &Entry = ToBeChangedValues.lookup(NewV);
if (!Entry.first)
break;
NewV = Entry.first;
} while (true);
// Do not replace uses in returns if the value is a must-tail call we will
// not delete.
if (auto *RI = dyn_cast<ReturnInst>(U->getUser())) {
if (auto *CI = dyn_cast<CallInst>(OldV->stripPointerCasts()))
if (CI->isMustTailCall() &&
(!ToBeDeletedInsts.count(CI) || !isRunOn(*CI->getCaller())))
return;
// If we rewrite a return and the new value is not an argument, strip the
// `returned` attribute as it is wrong now.
if (!isa<Argument>(NewV))
for (auto &Arg : RI->getFunction()->args())
Arg.removeAttr(Attribute::Returned);
}
// Do not perform call graph altering changes outside the SCC.
if (auto *CB = dyn_cast<CallBase>(U->getUser()))
if (CB->isCallee(U) && !isRunOn(*CB->getCaller()))
return;
LLVM_DEBUG(dbgs() << "Use " << *NewV << " in " << *U->getUser()
<< " instead of " << *OldV << "\n");
U->set(NewV);
if (Instruction *I = dyn_cast<Instruction>(OldV)) {
CGModifiedFunctions.insert(I->getFunction());
if (!isa<PHINode>(I) && !ToBeDeletedInsts.count(I) &&
isInstructionTriviallyDead(I))
DeadInsts.push_back(I);
}
if (isa<UndefValue>(NewV) && isa<CallBase>(U->getUser())) {
auto *CB = cast<CallBase>(U->getUser());
if (CB->isArgOperand(U)) {
unsigned Idx = CB->getArgOperandNo(U);
CB->removeParamAttr(Idx, Attribute::NoUndef);
Function *Fn = CB->getCalledFunction();
if (Fn && Fn->arg_size() > Idx)
Fn->removeParamAttr(Idx, Attribute::NoUndef);
}
}
if (isa<Constant>(NewV) && isa<BranchInst>(U->getUser())) {
Instruction *UserI = cast<Instruction>(U->getUser());
if (isa<UndefValue>(NewV)) {
ToBeChangedToUnreachableInsts.insert(UserI);
} else {
TerminatorsToFold.push_back(UserI);
}
}
};
for (auto &It : ToBeChangedUses) {
Use *U = It.first;
Value *NewV = It.second;
ReplaceUse(U, NewV);
}
SmallVector<Use *, 4> Uses;
for (auto &It : ToBeChangedValues) {
Value *OldV = It.first;
auto &Entry = It.second;
Value *NewV = Entry.first;
Uses.clear();
for (auto &U : OldV->uses())
if (Entry.second || !U.getUser()->isDroppable())
Uses.push_back(&U);
for (Use *U : Uses)
ReplaceUse(U, NewV);
}
for (auto &V : InvokeWithDeadSuccessor)
if (InvokeInst *II = dyn_cast_or_null<InvokeInst>(V)) {
assert(isRunOn(*II->getFunction()) &&
"Cannot replace an invoke outside the current SCC!");
bool UnwindBBIsDead = II->hasFnAttr(Attribute::NoUnwind);
bool NormalBBIsDead = II->hasFnAttr(Attribute::NoReturn);
bool Invoke2CallAllowed =
!AAIsDead::mayCatchAsynchronousExceptions(*II->getFunction());
assert((UnwindBBIsDead || NormalBBIsDead) &&
"Invoke does not have dead successors!");
BasicBlock *BB = II->getParent();
BasicBlock *NormalDestBB = II->getNormalDest();
if (UnwindBBIsDead) {
Instruction *NormalNextIP = &NormalDestBB->front();
if (Invoke2CallAllowed) {
changeToCall(II);
NormalNextIP = BB->getTerminator();
}
if (NormalBBIsDead)
ToBeChangedToUnreachableInsts.insert(NormalNextIP);
} else {
assert(NormalBBIsDead && "Broken invariant!");
if (!NormalDestBB->getUniquePredecessor())
NormalDestBB = SplitBlockPredecessors(NormalDestBB, {BB}, ".dead");
ToBeChangedToUnreachableInsts.insert(&NormalDestBB->front());
}
}
for (Instruction *I : TerminatorsToFold) {
if (!isRunOn(*I->getFunction()))
continue;
CGModifiedFunctions.insert(I->getFunction());
ConstantFoldTerminator(I->getParent());
}
for (auto &V : ToBeChangedToUnreachableInsts)
if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
if (!isRunOn(*I->getFunction()))
continue;
CGModifiedFunctions.insert(I->getFunction());
changeToUnreachable(I);
}
for (auto &V : ToBeDeletedInsts) {
if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
if (auto *CB = dyn_cast<CallBase>(I)) {
if (!isRunOn(*I->getFunction()))
continue;
if (!isa<IntrinsicInst>(CB))
CGUpdater.removeCallSite(*CB);
}
I->dropDroppableUses();
CGModifiedFunctions.insert(I->getFunction());
if (!I->getType()->isVoidTy())
I->replaceAllUsesWith(UndefValue::get(I->getType()));
if (!isa<PHINode>(I) && isInstructionTriviallyDead(I))
DeadInsts.push_back(I);
else
I->eraseFromParent();
}
}
llvm::erase_if(DeadInsts, [&](WeakTrackingVH I) {
return !I || !isRunOn(*cast<Instruction>(I)->getFunction());
});
LLVM_DEBUG({
dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size() << "\n";
for (auto &I : DeadInsts)
if (I)
dbgs() << " - " << *I << "\n";
});
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) {
SmallVector<BasicBlock *, 8> ToBeDeletedBBs;
ToBeDeletedBBs.reserve(NumDeadBlocks);
for (BasicBlock *BB : ToBeDeletedBlocks) {
assert(isRunOn(*BB->getParent()) &&
"Cannot delete a block outside the current SCC!");
CGModifiedFunctions.insert(BB->getParent());
// Do not delete BBs added during manifests of AAs.
if (ManifestAddedBlocks.contains(BB))
continue;
ToBeDeletedBBs.push_back(BB);
}
// Actually we do not delete the blocks but squash them into a single
// unreachable but untangling branches that jump here is something we need
// to do in a more generic way.
detachDeadBlocks(ToBeDeletedBBs, nullptr);
}
identifyDeadInternalFunctions();
// Rewrite the functions as requested during manifest.
ChangeStatus ManifestChange = rewriteFunctionSignatures(CGModifiedFunctions);
for (Function *Fn : CGModifiedFunctions)
if (!ToBeDeletedFunctions.count(Fn) && Functions.count(Fn))
CGUpdater.reanalyzeFunction(*Fn);
for (Function *Fn : ToBeDeletedFunctions) {
if (!Functions.count(Fn))
continue;
CGUpdater.removeFunction(*Fn);
}
if (!ToBeChangedUses.empty())
ManifestChange = ChangeStatus::CHANGED;
if (!ToBeChangedToUnreachableInsts.empty())
ManifestChange = ChangeStatus::CHANGED;
if (!ToBeDeletedFunctions.empty())
ManifestChange = ChangeStatus::CHANGED;
if (!ToBeDeletedBlocks.empty())
ManifestChange = ChangeStatus::CHANGED;
if (!ToBeDeletedInsts.empty())
ManifestChange = ChangeStatus::CHANGED;
if (!InvokeWithDeadSuccessor.empty())
ManifestChange = ChangeStatus::CHANGED;
if (!DeadInsts.empty())
ManifestChange = ChangeStatus::CHANGED;
NumFnDeleted += ToBeDeletedFunctions.size();
LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << ToBeDeletedFunctions.size()
<< " functions after manifest.\n");
#ifdef EXPENSIVE_CHECKS
for (Function *F : Functions) {
if (ToBeDeletedFunctions.count(F))
continue;
assert(!verifyFunction(*F, &errs()) && "Module verification failed!");
}
#endif
return ManifestChange;
}
ChangeStatus Attributor::run() {
TimeTraceScope TimeScope("Attributor::run");
AttributorCallGraph ACallGraph(*this);
if (PrintCallGraph)
ACallGraph.populateAll();
Phase = AttributorPhase::UPDATE;
runTillFixpoint();
// dump graphs on demand
if (DumpDepGraph)
DG.dumpGraph();
if (ViewDepGraph)
DG.viewGraph();
if (PrintDependencies)
DG.print();
Phase = AttributorPhase::MANIFEST;
ChangeStatus ManifestChange = manifestAttributes();
Phase = AttributorPhase::CLEANUP;
ChangeStatus CleanupChange = cleanupIR();
if (PrintCallGraph)
ACallGraph.print();
return ManifestChange | CleanupChange;
}
ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
TimeTraceScope TimeScope(
AA.getName() + std::to_string(AA.getIRPosition().getPositionKind()) +
"::updateAA");
assert(Phase == AttributorPhase::UPDATE &&
"We can update AA only in the update stage!");
// Use a new dependence vector for this update.
DependenceVector DV;
DependenceStack.push_back(&DV);
auto &AAState = AA.getState();
ChangeStatus CS = ChangeStatus::UNCHANGED;
bool UsedAssumedInformation = false;
if (!isAssumedDead(AA, nullptr, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true))
CS = AA.update(*this);
if (!AA.isQueryAA() && DV.empty()) {
// If the attribute did not query any non-fix information, the state
// will not change and we can indicate that right away.
AAState.indicateOptimisticFixpoint();
}
if (!AAState.isAtFixpoint())
rememberDependences();
// Verify the stack was used properly, that is we pop the dependence vector we
// put there earlier.
DependenceVector *PoppedDV = DependenceStack.pop_back_val();
(void)PoppedDV;
assert(PoppedDV == &DV && "Inconsistent usage of the dependence stack!");
return CS;
}
void Attributor::createShallowWrapper(Function &F) {
assert(!F.isDeclaration() && "Cannot create a wrapper around a declaration!");
Module &M = *F.getParent();
LLVMContext &Ctx = M.getContext();
FunctionType *FnTy = F.getFunctionType();
Function *Wrapper =
Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(), F.getName());
F.setName(""); // set the inside function anonymous
M.getFunctionList().insert(F.getIterator(), Wrapper);
F.setLinkage(GlobalValue::InternalLinkage);
F.replaceAllUsesWith(Wrapper);
assert(F.use_empty() && "Uses remained after wrapper was created!");
// Move the COMDAT section to the wrapper.
// TODO: Check if we need to keep it for F as well.
Wrapper->setComdat(F.getComdat());
F.setComdat(nullptr);
// Copy all metadata and attributes but keep them on F as well.
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
F.getAllMetadata(MDs);
for (auto MDIt : MDs)
Wrapper->addMetadata(MDIt.first, *MDIt.second);
Wrapper->setAttributes(F.getAttributes());
// Create the call in the wrapper.
BasicBlock *EntryBB = BasicBlock::Create(Ctx, "entry", Wrapper);
SmallVector<Value *, 8> Args;
Argument *FArgIt = F.arg_begin();
for (Argument &Arg : Wrapper->args()) {
Args.push_back(&Arg);
Arg.setName((FArgIt++)->getName());
}
CallInst *CI = CallInst::Create(&F, Args, "", EntryBB);
CI->setTailCall(true);
CI->addFnAttr(Attribute::NoInline);
ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB);
NumFnShallowWrappersCreated++;
}
bool Attributor::isInternalizable(Function &F) {
if (F.isDeclaration() || F.hasLocalLinkage() ||
GlobalValue::isInterposableLinkage(F.getLinkage()))
return false;
return true;
}
Function *Attributor::internalizeFunction(Function &F, bool Force) {
if (!AllowDeepWrapper && !Force)
return nullptr;
if (!isInternalizable(F))
return nullptr;
SmallPtrSet<Function *, 2> FnSet = {&F};
DenseMap<Function *, Function *> InternalizedFns;
internalizeFunctions(FnSet, InternalizedFns);
return InternalizedFns[&F];
}
bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
DenseMap<Function *, Function *> &FnMap) {
for (Function *F : FnSet)
if (!Attributor::isInternalizable(*F))
return false;
FnMap.clear();
// Generate the internalized version of each function.
for (Function *F : FnSet) {
Module &M = *F->getParent();
FunctionType *FnTy = F->getFunctionType();
// Create a copy of the current function
Function *Copied =
Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(),
F->getName() + ".internalized");
ValueToValueMapTy VMap;
auto *NewFArgIt = Copied->arg_begin();
for (auto &Arg : F->args()) {
auto ArgName = Arg.getName();
NewFArgIt->setName(ArgName);
VMap[&Arg] = &(*NewFArgIt++);
}
SmallVector<ReturnInst *, 8> Returns;
// Copy the body of the original function to the new one
CloneFunctionInto(Copied, F, VMap,
CloneFunctionChangeType::LocalChangesOnly, Returns);
// Set the linakage and visibility late as CloneFunctionInto has some
// implicit requirements.
Copied->setVisibility(GlobalValue::DefaultVisibility);
Copied->setLinkage(GlobalValue::PrivateLinkage);
// Copy metadata
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
F->getAllMetadata(MDs);
for (auto MDIt : MDs)
if (!Copied->hasMetadata())
Copied->addMetadata(MDIt.first, *MDIt.second);
M.getFunctionList().insert(F->getIterator(), Copied);
Copied->setDSOLocal(true);
FnMap[F] = Copied;
}
// Replace all uses of the old function with the new internalized function
// unless the caller is a function that was just internalized.
for (Function *F : FnSet) {
auto &InternalizedFn = FnMap[F];
auto IsNotInternalized = [&](Use &U) -> bool {
if (auto *CB = dyn_cast<CallBase>(U.getUser()))
return !FnMap.lookup(CB->getCaller());
return false;
};
F->replaceUsesWithIf(InternalizedFn, IsNotInternalized);
}
return true;
}
bool Attributor::isValidFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
if (!RewriteSignatures)
return false;
Function *Fn = Arg.getParent();
auto CallSiteCanBeChanged = [Fn](AbstractCallSite ACS) {
// Forbid the call site to cast the function return type. If we need to
// rewrite these functions we need to re-create a cast for the new call site
// (if the old had uses).
if (!ACS.getCalledFunction() ||
ACS.getInstruction()->getType() !=
ACS.getCalledFunction()->getReturnType())
return false;
if (ACS.getCalledOperand()->getType() != Fn->getType())
return false;
// Forbid must-tail calls for now.
return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall();
};
// Avoid var-arg functions for now.
if (Fn->isVarArg()) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite var-args functions\n");
return false;
}
// Avoid functions with complicated argument passing semantics.
AttributeList FnAttributeList = Fn->getAttributes();
if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
LLVM_DEBUG(
dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
return false;
}
// Avoid callbacks for now.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr,
- AllCallSitesKnown)) {
+ UsedAssumedInformation)) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n");
return false;
}
auto InstPred = [](Instruction &I) {
if (auto *CI = dyn_cast<CallInst>(&I))
return !CI->isMustTailCall();
return true;
};
// Forbid must-tail calls for now.
// TODO:
- bool UsedAssumedInformation = false;
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr,
nullptr, {Instruction::Call},
UsedAssumedInformation)) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite due to instructions\n");
return false;
}
return true;
}
bool Attributor::registerFunctionSignatureRewrite(
Argument &Arg, ArrayRef<Type *> ReplacementTypes,
ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
<< Arg.getParent()->getName() << " with "
<< ReplacementTypes.size() << " replacements\n");
assert(isValidFunctionSignatureRewrite(Arg, ReplacementTypes) &&
"Cannot register an invalid rewrite");
Function *Fn = Arg.getParent();
SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs =
ArgumentReplacementMap[Fn];
if (ARIs.empty())
ARIs.resize(Fn->arg_size());
// If we have a replacement already with less than or equal new arguments,
// ignore this request.
std::unique_ptr<ArgumentReplacementInfo> &ARI = ARIs[Arg.getArgNo()];
if (ARI && ARI->getNumReplacementArgs() <= ReplacementTypes.size()) {
LLVM_DEBUG(dbgs() << "[Attributor] Existing rewrite is preferred\n");
return false;
}
// If we have a replacement already but we like the new one better, delete
// the old.
ARI.reset();
LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
<< Arg.getParent()->getName() << " with "
<< ReplacementTypes.size() << " replacements\n");
// Remember the replacement.
ARI.reset(new ArgumentReplacementInfo(*this, Arg, ReplacementTypes,
std::move(CalleeRepairCB),
std::move(ACSRepairCB)));
return true;
}
bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
bool Result = true;
#ifndef NDEBUG
if (SeedAllowList.size() != 0)
Result = llvm::is_contained(SeedAllowList, AA.getName());
Function *Fn = AA.getAnchorScope();
if (FunctionSeedAllowList.size() != 0 && Fn)
Result &= llvm::is_contained(FunctionSeedAllowList, Fn->getName());
#endif
return Result;
}
ChangeStatus Attributor::rewriteFunctionSignatures(
SmallPtrSetImpl<Function *> &ModifiedFns) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (auto &It : ArgumentReplacementMap) {
Function *OldFn = It.getFirst();
// Deleted functions do not require rewrites.
if (!Functions.count(OldFn) || ToBeDeletedFunctions.count(OldFn))
continue;
const SmallVectorImpl<std::unique_ptr<ArgumentReplacementInfo>> &ARIs =
It.getSecond();
assert(ARIs.size() == OldFn->arg_size() && "Inconsistent state!");
SmallVector<Type *, 16> NewArgumentTypes;
SmallVector<AttributeSet, 16> NewArgumentAttributes;
// Collect replacement argument types and copy over existing attributes.
AttributeList OldFnAttributeList = OldFn->getAttributes();
for (Argument &Arg : OldFn->args()) {
if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
ARIs[Arg.getArgNo()]) {
NewArgumentTypes.append(ARI->ReplacementTypes.begin(),
ARI->ReplacementTypes.end());
NewArgumentAttributes.append(ARI->getNumReplacementArgs(),
AttributeSet());
} else {
NewArgumentTypes.push_back(Arg.getType());
NewArgumentAttributes.push_back(
OldFnAttributeList.getParamAttrs(Arg.getArgNo()));
}
}
FunctionType *OldFnTy = OldFn->getFunctionType();
Type *RetTy = OldFnTy->getReturnType();
// Construct the new function type using the new arguments types.
FunctionType *NewFnTy =
FunctionType::get(RetTy, NewArgumentTypes, OldFnTy->isVarArg());
LLVM_DEBUG(dbgs() << "[Attributor] Function rewrite '" << OldFn->getName()
<< "' from " << *OldFn->getFunctionType() << " to "
<< *NewFnTy << "\n");
// Create the new function body and insert it into the module.
Function *NewFn = Function::Create(NewFnTy, OldFn->getLinkage(),
OldFn->getAddressSpace(), "");
Functions.insert(NewFn);
OldFn->getParent()->getFunctionList().insert(OldFn->getIterator(), NewFn);
NewFn->takeName(OldFn);
NewFn->copyAttributesFrom(OldFn);
// Patch the pointer to LLVM function in debug info descriptor.
NewFn->setSubprogram(OldFn->getSubprogram());
OldFn->setSubprogram(nullptr);
// Recompute the parameter attributes list based on the new arguments for
// the function.
LLVMContext &Ctx = OldFn->getContext();
NewFn->setAttributes(AttributeList::get(
Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(),
NewArgumentAttributes));
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
// function empty.
NewFn->getBasicBlockList().splice(NewFn->begin(),
OldFn->getBasicBlockList());
// Fixup block addresses to reference new function.
SmallVector<BlockAddress *, 8u> BlockAddresses;
for (User *U : OldFn->users())
if (auto *BA = dyn_cast<BlockAddress>(U))
BlockAddresses.push_back(BA);
for (auto *BA : BlockAddresses)
BA->replaceAllUsesWith(BlockAddress::get(NewFn, BA->getBasicBlock()));
// Set of all "call-like" instructions that invoke the old function mapped
// to their new replacements.
SmallVector<std::pair<CallBase *, CallBase *>, 8> CallSitePairs;
// Callback to create a new "call-like" instruction for a given one.
auto CallSiteReplacementCreator = [&](AbstractCallSite ACS) {
CallBase *OldCB = cast<CallBase>(ACS.getInstruction());
const AttributeList &OldCallAttributeList = OldCB->getAttributes();
// Collect the new argument operands for the replacement call site.
SmallVector<Value *, 16> NewArgOperands;
SmallVector<AttributeSet, 16> NewArgOperandAttributes;
for (unsigned OldArgNum = 0; OldArgNum < ARIs.size(); ++OldArgNum) {
unsigned NewFirstArgNum = NewArgOperands.size();
(void)NewFirstArgNum; // only used inside assert.
if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
ARIs[OldArgNum]) {
if (ARI->ACSRepairCB)
ARI->ACSRepairCB(*ARI, ACS, NewArgOperands);
assert(ARI->getNumReplacementArgs() + NewFirstArgNum ==
NewArgOperands.size() &&
"ACS repair callback did not provide as many operand as new "
"types were registered!");
// TODO: Exose the attribute set to the ACS repair callback
NewArgOperandAttributes.append(ARI->ReplacementTypes.size(),
AttributeSet());
} else {
NewArgOperands.push_back(ACS.getCallArgOperand(OldArgNum));
NewArgOperandAttributes.push_back(
OldCallAttributeList.getParamAttrs(OldArgNum));
}
}
assert(NewArgOperands.size() == NewArgOperandAttributes.size() &&
"Mismatch # argument operands vs. # argument operand attributes!");
assert(NewArgOperands.size() == NewFn->arg_size() &&
"Mismatch # argument operands vs. # function arguments!");
SmallVector<OperandBundleDef, 4> OperandBundleDefs;
OldCB->getOperandBundlesAsDefs(OperandBundleDefs);
// Create a new call or invoke instruction to replace the old one.
CallBase *NewCB;
if (InvokeInst *II = dyn_cast<InvokeInst>(OldCB)) {
NewCB =
InvokeInst::Create(NewFn, II->getNormalDest(), II->getUnwindDest(),
NewArgOperands, OperandBundleDefs, "", OldCB);
} else {
auto *NewCI = CallInst::Create(NewFn, NewArgOperands, OperandBundleDefs,
"", OldCB);
NewCI->setTailCallKind(cast<CallInst>(OldCB)->getTailCallKind());
NewCB = NewCI;
}
// Copy over various properties and the new attributes.
NewCB->copyMetadata(*OldCB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
NewCB->setCallingConv(OldCB->getCallingConv());
NewCB->takeName(OldCB);
NewCB->setAttributes(AttributeList::get(
Ctx, OldCallAttributeList.getFnAttrs(),
OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes));
CallSitePairs.push_back({OldCB, NewCB});
return true;
};
// Use the CallSiteReplacementCreator to create replacement call sites.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
bool Success = checkForAllCallSites(CallSiteReplacementCreator, *OldFn,
- true, nullptr, AllCallSitesKnown);
+ true, nullptr, UsedAssumedInformation);
(void)Success;
assert(Success && "Assumed call site replacement to succeed!");
// Rewire the arguments.
Argument *OldFnArgIt = OldFn->arg_begin();
Argument *NewFnArgIt = NewFn->arg_begin();
for (unsigned OldArgNum = 0; OldArgNum < ARIs.size();
++OldArgNum, ++OldFnArgIt) {
if (const std::unique_ptr<ArgumentReplacementInfo> &ARI =
ARIs[OldArgNum]) {
if (ARI->CalleeRepairCB)
ARI->CalleeRepairCB(*ARI, *NewFn, NewFnArgIt);
NewFnArgIt += ARI->ReplacementTypes.size();
} else {
NewFnArgIt->takeName(&*OldFnArgIt);
OldFnArgIt->replaceAllUsesWith(&*NewFnArgIt);
++NewFnArgIt;
}
}
// Eliminate the instructions *after* we visited all of them.
for (auto &CallSitePair : CallSitePairs) {
CallBase &OldCB = *CallSitePair.first;
CallBase &NewCB = *CallSitePair.second;
assert(OldCB.getType() == NewCB.getType() &&
"Cannot handle call sites with different types!");
ModifiedFns.insert(OldCB.getFunction());
CGUpdater.replaceCallSite(OldCB, NewCB);
OldCB.replaceAllUsesWith(&NewCB);
OldCB.eraseFromParent();
}
// Replace the function in the call graph (if any).
CGUpdater.replaceFunctionWith(*OldFn, *NewFn);
// If the old function was modified and needed to be reanalyzed, the new one
// does now.
if (ModifiedFns.erase(OldFn))
ModifiedFns.insert(NewFn);
Changed = ChangeStatus::CHANGED;
}
return Changed;
}
void InformationCache::initializeInformationCache(const Function &CF,
FunctionInfo &FI) {
// As we do not modify the function here we can remove the const
// withouth breaking implicit assumptions. At the end of the day, we could
// initialize the cache eagerly which would look the same to the users.
Function &F = const_cast<Function &>(CF);
// Walk all instructions to find interesting instructions that might be
// queried by abstract attributes during their initialization or update.
// This has to happen before we create attributes.
for (Instruction &I : instructions(&F)) {
bool IsInterestingOpcode = false;
// To allow easy access to all instructions in a function with a given
// opcode we store them in the InfoCache. As not all opcodes are interesting
// to concrete attributes we only cache the ones that are as identified in
// the following switch.
// Note: There are no concrete attributes now so this is initially empty.
switch (I.getOpcode()) {
default:
assert(!isa<CallBase>(&I) &&
"New call base instruction type needs to be known in the "
"Attributor.");
break;
case Instruction::Call:
// Calls are interesting on their own, additionally:
// For `llvm.assume` calls we also fill the KnowledgeMap as we find them.
// For `must-tail` calls we remember the caller and callee.
if (auto *Assume = dyn_cast<AssumeInst>(&I)) {
fillMapFromAssume(*Assume, KnowledgeMap);
} else if (cast<CallInst>(I).isMustTailCall()) {
FI.ContainsMustTailCall = true;
if (const Function *Callee = cast<CallInst>(I).getCalledFunction())
getFunctionInfo(*Callee).CalledViaMustTail = true;
}
LLVM_FALLTHROUGH;
case Instruction::CallBr:
case Instruction::Invoke:
case Instruction::CleanupRet:
case Instruction::CatchSwitch:
case Instruction::AtomicRMW:
case Instruction::AtomicCmpXchg:
case Instruction::Br:
case Instruction::Resume:
case Instruction::Ret:
case Instruction::Load:
// The alignment of a pointer is interesting for loads.
case Instruction::Store:
// The alignment of a pointer is interesting for stores.
case Instruction::Alloca:
case Instruction::AddrSpaceCast:
IsInterestingOpcode = true;
}
if (IsInterestingOpcode) {
auto *&Insts = FI.OpcodeInstMap[I.getOpcode()];
if (!Insts)
Insts = new (Allocator) InstructionVectorTy();
Insts->push_back(&I);
}
if (I.mayReadOrWriteMemory())
FI.RWInsts.push_back(&I);
}
if (F.hasFnAttribute(Attribute::AlwaysInline) &&
isInlineViable(F).isSuccess())
InlineableFunctions.insert(&F);
}
AAResults *InformationCache::getAAResultsForFunction(const Function &F) {
return AG.getAnalysis<AAManager>(F);
}
InformationCache::FunctionInfo::~FunctionInfo() {
// The instruction vectors are allocated using a BumpPtrAllocator, we need to
// manually destroy them.
for (auto &It : OpcodeInstMap)
It.getSecond()->~InstructionVectorTy();
}
void Attributor::recordDependence(const AbstractAttribute &FromAA,
const AbstractAttribute &ToAA,
DepClassTy DepClass) {
if (DepClass == DepClassTy::NONE)
return;
// If we are outside of an update, thus before the actual fixpoint iteration
// started (= when we create AAs), we do not track dependences because we will
// put all AAs into the initial worklist anyway.
if (DependenceStack.empty())
return;
if (FromAA.getState().isAtFixpoint())
return;
DependenceStack.back()->push_back({&FromAA, &ToAA, DepClass});
}
void Attributor::rememberDependences() {
assert(!DependenceStack.empty() && "No dependences to remember!");
for (DepInfo &DI : *DependenceStack.back()) {
assert((DI.DepClass == DepClassTy::REQUIRED ||
DI.DepClass == DepClassTy::OPTIONAL) &&
"Expected required or optional dependence (1 bit)!");
auto &DepAAs = const_cast<AbstractAttribute &>(*DI.FromAA).Deps;
DepAAs.push_back(AbstractAttribute::DepTy(
const_cast<AbstractAttribute *>(DI.ToAA), unsigned(DI.DepClass)));
}
}
void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!VisitedFunctions.insert(&F).second)
return;
if (F.isDeclaration())
return;
// In non-module runs we need to look at the call sites of a function to
// determine if it is part of a must-tail call edge. This will influence what
// attributes we can derive.
InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F);
if (!isModulePass() && !FI.CalledViaMustTail) {
for (const Use &U : F.uses())
if (const auto *CB = dyn_cast<CallBase>(U.getUser()))
if (CB->isCallee(&U) && CB->isMustTailCall())
FI.CalledViaMustTail = true;
}
IRPosition FPos = IRPosition::function(F);
// Check for dead BasicBlocks in every function.
// We need dead instruction detection because we do not want to deal with
// broken IR in which SSA rules do not apply.
getOrCreateAAFor<AAIsDead>(FPos);
// Every function might be "will-return".
getOrCreateAAFor<AAWillReturn>(FPos);
// Every function might contain instructions that cause "undefined behavior".
getOrCreateAAFor<AAUndefinedBehavior>(FPos);
// Every function can be nounwind.
getOrCreateAAFor<AANoUnwind>(FPos);
// Every function might be marked "nosync"
getOrCreateAAFor<AANoSync>(FPos);
// Every function might be "no-free".
getOrCreateAAFor<AANoFree>(FPos);
// Every function might be "no-return".
getOrCreateAAFor<AANoReturn>(FPos);
// Every function might be "no-recurse".
getOrCreateAAFor<AANoRecurse>(FPos);
// Every function might be "readnone/readonly/writeonly/...".
getOrCreateAAFor<AAMemoryBehavior>(FPos);
// Every function can be "readnone/argmemonly/inaccessiblememonly/...".
getOrCreateAAFor<AAMemoryLocation>(FPos);
// Every function can track active assumptions.
getOrCreateAAFor<AAAssumptionInfo>(FPos);
// Every function might be applicable for Heap-To-Stack conversion.
if (EnableHeapToStack)
getOrCreateAAFor<AAHeapToStack>(FPos);
// Return attributes are only appropriate if the return type is non void.
Type *ReturnType = F.getReturnType();
if (!ReturnType->isVoidTy()) {
// Argument attribute "returned" --- Create only one per function even
// though it is an argument attribute.
getOrCreateAAFor<AAReturnedValues>(FPos);
IRPosition RetPos = IRPosition::returned(F);
// Every returned value might be dead.
getOrCreateAAFor<AAIsDead>(RetPos);
// Every function might be simplified.
getOrCreateAAFor<AAValueSimplify>(RetPos);
// Every returned value might be marked noundef.
getOrCreateAAFor<AANoUndef>(RetPos);
if (ReturnType->isPointerTy()) {
// Every function with pointer return type might be marked align.
getOrCreateAAFor<AAAlign>(RetPos);
// Every function with pointer return type might be marked nonnull.
getOrCreateAAFor<AANonNull>(RetPos);
// Every function with pointer return type might be marked noalias.
getOrCreateAAFor<AANoAlias>(RetPos);
// Every function with pointer return type might be marked
// dereferenceable.
getOrCreateAAFor<AADereferenceable>(RetPos);
}
}
for (Argument &Arg : F.args()) {
IRPosition ArgPos = IRPosition::argument(Arg);
// Every argument might be simplified. We have to go through the Attributor
// interface though as outside AAs can register custom simplification
// callbacks.
bool UsedAssumedInformation = false;
getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation);
// Every argument might be dead.
getOrCreateAAFor<AAIsDead>(ArgPos);
// Every argument might be marked noundef.
getOrCreateAAFor<AANoUndef>(ArgPos);
if (Arg.getType()->isPointerTy()) {
// Every argument with pointer type might be marked nonnull.
getOrCreateAAFor<AANonNull>(ArgPos);
// Every argument with pointer type might be marked noalias.
getOrCreateAAFor<AANoAlias>(ArgPos);
// Every argument with pointer type might be marked dereferenceable.
getOrCreateAAFor<AADereferenceable>(ArgPos);
// Every argument with pointer type might be marked align.
getOrCreateAAFor<AAAlign>(ArgPos);
// Every argument with pointer type might be marked nocapture.
getOrCreateAAFor<AANoCapture>(ArgPos);
// Every argument with pointer type might be marked
// "readnone/readonly/writeonly/..."
getOrCreateAAFor<AAMemoryBehavior>(ArgPos);
// Every argument with pointer type might be marked nofree.
getOrCreateAAFor<AANoFree>(ArgPos);
// Every argument with pointer type might be privatizable (or promotable)
getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);
}
}
auto CallSitePred = [&](Instruction &I) -> bool {
auto &CB = cast<CallBase>(I);
IRPosition CBInstPos = IRPosition::inst(CB);
IRPosition CBFnPos = IRPosition::callsite_function(CB);
// Call sites might be dead if they do not have side effects and no live
// users. The return value might be dead if there are no live users.
getOrCreateAAFor<AAIsDead>(CBInstPos);
Function *Callee = CB.getCalledFunction();
// TODO: Even if the callee is not known now we might be able to simplify
// the call/callee.
if (!Callee)
return true;
// Every call site can track active assumptions.
getOrCreateAAFor<AAAssumptionInfo>(CBFnPos);
// Skip declarations except if annotations on their call sites were
// explicitly requested.
if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
!Callee->hasMetadata(LLVMContext::MD_callback))
return true;
if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) {
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
getOrCreateAAFor<AAValueSimplify>(CBRetPos);
}
for (int I = 0, E = CB.arg_size(); I < E; ++I) {
IRPosition CBArgPos = IRPosition::callsite_argument(CB, I);
// Every call site argument might be dead.
getOrCreateAAFor<AAIsDead>(CBArgPos);
// Call site argument might be simplified. We have to go through the
// Attributor interface though as outside AAs can register custom
// simplification callbacks.
bool UsedAssumedInformation = false;
getAssumedSimplified(CBArgPos, /* AA */ nullptr, UsedAssumedInformation);
// Every call site argument might be marked "noundef".
getOrCreateAAFor<AANoUndef>(CBArgPos);
if (!CB.getArgOperand(I)->getType()->isPointerTy())
continue;
// Call site argument attribute "non-null".
getOrCreateAAFor<AANonNull>(CBArgPos);
// Call site argument attribute "nocapture".
getOrCreateAAFor<AANoCapture>(CBArgPos);
// Call site argument attribute "no-alias".
getOrCreateAAFor<AANoAlias>(CBArgPos);
// Call site argument attribute "dereferenceable".
getOrCreateAAFor<AADereferenceable>(CBArgPos);
// Call site argument attribute "align".
getOrCreateAAFor<AAAlign>(CBArgPos);
// Call site argument attribute
// "readnone/readonly/writeonly/..."
getOrCreateAAFor<AAMemoryBehavior>(CBArgPos);
// Call site argument attribute "nofree".
getOrCreateAAFor<AANoFree>(CBArgPos);
}
return true;
};
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
bool Success;
bool UsedAssumedInformation = false;
Success = checkForAllInstructionsImpl(
nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr,
{(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
(unsigned)Instruction::Call},
UsedAssumedInformation);
(void)Success;
assert(Success && "Expected the check call to be successful!");
auto LoadStorePred = [&](Instruction &I) -> bool {
if (isa<LoadInst>(I)) {
getOrCreateAAFor<AAAlign>(
IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
if (SimplifyAllLoads)
getOrCreateAAFor<AAValueSimplify>(IRPosition::value(I));
} else
getOrCreateAAFor<AAAlign>(
IRPosition::value(*cast<StoreInst>(I).getPointerOperand()));
return true;
};
Success = checkForAllInstructionsImpl(
nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr,
{(unsigned)Instruction::Load, (unsigned)Instruction::Store},
UsedAssumedInformation);
(void)Success;
assert(Success && "Expected the check call to be successful!");
}
/// Helpers to ease debugging through output streams and print calls.
///
///{
raw_ostream &llvm::operator<<(raw_ostream &OS, ChangeStatus S) {
return OS << (S == ChangeStatus::CHANGED ? "changed" : "unchanged");
}
raw_ostream &llvm::operator<<(raw_ostream &OS, IRPosition::Kind AP) {
switch (AP) {
case IRPosition::IRP_INVALID:
return OS << "inv";
case IRPosition::IRP_FLOAT:
return OS << "flt";
case IRPosition::IRP_RETURNED:
return OS << "fn_ret";
case IRPosition::IRP_CALL_SITE_RETURNED:
return OS << "cs_ret";
case IRPosition::IRP_FUNCTION:
return OS << "fn";
case IRPosition::IRP_CALL_SITE:
return OS << "cs";
case IRPosition::IRP_ARGUMENT:
return OS << "arg";
case IRPosition::IRP_CALL_SITE_ARGUMENT:
return OS << "cs_arg";
}
llvm_unreachable("Unknown attribute position!");
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const IRPosition &Pos) {
const Value &AV = Pos.getAssociatedValue();
OS << "{" << Pos.getPositionKind() << ":" << AV.getName() << " ["
<< Pos.getAnchorValue().getName() << "@" << Pos.getCallSiteArgNo() << "]";
if (Pos.hasCallBaseContext())
OS << "[cb_context:" << *Pos.getCallBaseContext() << "]";
return OS << "}";
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const IntegerRangeState &S) {
OS << "range-state(" << S.getBitWidth() << ")<";
S.getKnown().print(OS);
OS << " / ";
S.getAssumed().print(OS);
OS << ">";
return OS << static_cast<const AbstractState &>(S);
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractState &S) {
return OS << (!S.isValidState() ? "top" : (S.isAtFixpoint() ? "fix" : ""));
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
AA.print(OS);
return OS;
}
raw_ostream &llvm::operator<<(raw_ostream &OS,
const PotentialConstantIntValuesState &S) {
OS << "set-state(< {";
if (!S.isValidState())
OS << "full-set";
else {
for (auto &it : S.getAssumedSet())
OS << it << ", ";
if (S.undefIsContained())
OS << "undef ";
}
OS << "} >)";
return OS;
}
void AbstractAttribute::print(raw_ostream &OS) const {
OS << "[";
OS << getName();
OS << "] for CtxI ";
if (auto *I = getCtxI()) {
OS << "'";
I->print(OS);
OS << "'";
} else
OS << "<<null inst>>";
OS << " at position " << getIRPosition() << " with state " << getAsStr()
<< '\n';
}
void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
print(OS);
for (const auto &DepAA : Deps) {
auto *AA = DepAA.getPointer();
OS << " updates ";
AA->print(OS);
}
OS << '\n';
}
raw_ostream &llvm::operator<<(raw_ostream &OS,
const AAPointerInfo::Access &Acc) {
OS << " [" << Acc.getKind() << "] " << *Acc.getRemoteInst();
if (Acc.getLocalInst() != Acc.getRemoteInst())
OS << " via " << *Acc.getLocalInst();
if (Acc.getContent().hasValue())
OS << " [" << *Acc.getContent() << "]";
return OS;
}
///}
/// ----------------------------------------------------------------------------
/// Pass (Manager) Boilerplate
/// ----------------------------------------------------------------------------
static bool runAttributorOnFunctions(InformationCache &InfoCache,
SetVector<Function *> &Functions,
AnalysisGetter &AG,
CallGraphUpdater &CGUpdater,
bool DeleteFns) {
if (Functions.empty())
return false;
LLVM_DEBUG({
dbgs() << "[Attributor] Run on module with " << Functions.size()
<< " functions:\n";
for (Function *Fn : Functions)
dbgs() << " - " << Fn->getName() << "\n";
});
// Create an Attributor and initially empty information cache that is filled
// while we identify default attribute opportunities.
Attributor A(Functions, InfoCache, CGUpdater, /* Allowed */ nullptr,
DeleteFns);
// Create shallow wrappers for all functions that are not IPO amendable
if (AllowShallowWrappers)
for (Function *F : Functions)
if (!A.isFunctionIPOAmendable(*F))
Attributor::createShallowWrapper(*F);
// Internalize non-exact functions
// TODO: for now we eagerly internalize functions without calculating the
// cost, we need a cost interface to determine whether internalizing
// a function is "benefitial"
if (AllowDeepWrapper) {
unsigned FunSize = Functions.size();
for (unsigned u = 0; u < FunSize; u++) {
Function *F = Functions[u];
if (!F->isDeclaration() && !F->isDefinitionExact() && F->getNumUses() &&
!GlobalValue::isInterposableLinkage(F->getLinkage())) {
Function *NewF = Attributor::internalizeFunction(*F);
assert(NewF && "Could not internalize function.");
Functions.insert(NewF);
// Update call graph
CGUpdater.replaceFunctionWith(*F, *NewF);
for (const Use &U : NewF->uses())
if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) {
auto *CallerF = CB->getCaller();
CGUpdater.reanalyzeFunction(*CallerF);
}
}
}
}
for (Function *F : Functions) {
if (F->hasExactDefinition())
NumFnWithExactDefinition++;
else
NumFnWithoutExactDefinition++;
// We look at internal functions only on-demand but if any use is not a
// direct call or outside the current set of analyzed functions, we have
// to do it eagerly.
if (F->hasLocalLinkage()) {
if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
const auto *CB = dyn_cast<CallBase>(U.getUser());
return CB && CB->isCallee(&U) &&
Functions.count(const_cast<Function *>(CB->getCaller()));
}))
continue;
}
// Populate the Attributor with abstract attribute opportunities in the
// function and the information cache with IR information.
A.identifyDefaultAbstractAttributes(*F);
}
ChangeStatus Changed = A.run();
LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
<< " functions, result: " << Changed << ".\n");
return Changed == ChangeStatus::CHANGED;
}
void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
void AADepGraph::dumpGraph() {
static std::atomic<int> CallTimes;
std::string Prefix;
if (!DepGraphDotFileNamePrefix.empty())
Prefix = DepGraphDotFileNamePrefix;
else
Prefix = "dep_graph";
std::string Filename =
Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
outs() << "Dependency graph dump to " << Filename << ".\n";
std::error_code EC;
raw_fd_ostream File(Filename, EC, sys::fs::OF_TextWithCRLF);
if (!EC)
llvm::WriteGraph(File, this);
CallTimes++;
}
void AADepGraph::print() {
for (auto DepAA : SyntheticRoot.Deps)
cast<AbstractAttribute>(DepAA.getPointer())->printWithDeps(outs());
}
PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
AnalysisGetter AG(FAM);
SetVector<Function *> Functions;
for (Function &F : M)
Functions.insert(&F);
CallGraphUpdater CGUpdater;
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
/* DeleteFns */ true)) {
// FIXME: Think about passes we will preserve and add them here.
return PreservedAnalyses::none();
}
return PreservedAnalyses::all();
}
PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
AnalysisGetter AG(FAM);
SetVector<Function *> Functions;
for (LazyCallGraph::Node &N : C)
Functions.insert(&N.getFunction());
if (Functions.empty())
return PreservedAnalyses::all();
Module &M = *Functions.back()->getParent();
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
if (runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
/* DeleteFns */ false)) {
// FIXME: Think about passes we will preserve and add them here.
PreservedAnalyses PA;
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
return PA;
}
return PreservedAnalyses::all();
}
namespace llvm {
template <> struct GraphTraits<AADepGraphNode *> {
using NodeRef = AADepGraphNode *;
using DepTy = PointerIntPair<AADepGraphNode *, 1>;
using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
using ChildIteratorType =
mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
};
template <>
struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
using nodes_iterator =
mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
};
template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
static std::string getNodeLabel(const AADepGraphNode *Node,
const AADepGraph *DG) {
std::string AAString;
raw_string_ostream O(AAString);
Node->print(O);
return AAString;
}
};
} // end namespace llvm
namespace {
struct AttributorLegacyPass : public ModulePass {
static char ID;
AttributorLegacyPass() : ModulePass(ID) {
initializeAttributorLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override {
if (skipModule(M))
return false;
AnalysisGetter AG;
SetVector<Function *> Functions;
for (Function &F : M)
Functions.insert(&F);
CallGraphUpdater CGUpdater;
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
/* DeleteFns*/ true);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: Think about passes we will preserve and add them here.
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
static char ID;
AttributorCGSCCLegacyPass() : CallGraphSCCPass(ID) {
initializeAttributorCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnSCC(CallGraphSCC &SCC) override {
if (skipSCC(SCC))
return false;
SetVector<Function *> Functions;
for (CallGraphNode *CGN : SCC)
if (Function *Fn = CGN->getFunction())
if (!Fn->isDeclaration())
Functions.insert(Fn);
if (Functions.empty())
return false;
AnalysisGetter AG;
CallGraph &CG = const_cast<CallGraph &>(SCC.getCallGraph());
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, SCC);
Module &M = *Functions.back()->getParent();
BumpPtrAllocator Allocator;
InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
/* DeleteFns */ false);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: Think about passes we will preserve and add them here.
AU.addRequired<TargetLibraryInfoWrapperPass>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
};
} // end anonymous namespace
Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
Pass *llvm::createAttributorCGSCCLegacyPass() {
return new AttributorCGSCCLegacyPass();
}
char AttributorLegacyPass::ID = 0;
char AttributorCGSCCLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
"Deduce and propagate attributes", false, false)
INITIALIZE_PASS_BEGIN(AttributorCGSCCLegacyPass, "attributor-cgscc",
"Deduce and propagate attributes (CGSCC pass)", false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(AttributorCGSCCLegacyPass, "attributor-cgscc",
"Deduce and propagate attributes (CGSCC pass)", false,
false)
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 4e4f768ed2cb..61a973f869d4 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1,10173 +1,10207 @@
//===- AttributorAttributes.cpp - Attributes for Attributor deduction -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// See the Attributor.h file comment and the class descriptions in that file for
// more information.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/ArgumentPromotion.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
using namespace llvm;
#define DEBUG_TYPE "attributor"
static cl::opt<bool> ManifestInternal(
"attributor-manifest-internal", cl::Hidden,
cl::desc("Manifest Attributor internal string attributes."),
cl::init(false));
static cl::opt<int> MaxHeapToStackSize("max-heap-to-stack-size", cl::init(128),
cl::Hidden);
template <>
unsigned llvm::PotentialConstantIntValuesState::MaxPotentialValues = 0;
static cl::opt<unsigned, true> MaxPotentialValues(
"attributor-max-potential-values", cl::Hidden,
cl::desc("Maximum number of potential values to be "
"tracked for each position."),
cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues),
cl::init(7));
static cl::opt<unsigned>
MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden,
cl::desc("Maximum number of interfering writes to "
"check before assuming all might interfere."),
cl::init(6));
STATISTIC(NumAAs, "Number of abstract attributes created");
// Some helper macros to deal with statistics tracking.
//
// Usage:
// For simple IR attribute tracking overload trackStatistics in the abstract
// attribute and choose the right STATS_DECLTRACK_********* macro,
// e.g.,:
// void trackStatistics() const override {
// STATS_DECLTRACK_ARG_ATTR(returned)
// }
// If there is a single "increment" side one can use the macro
// STATS_DECLTRACK with a custom message. If there are multiple increment
// sides, STATS_DECL and STATS_TRACK can also be used separately.
//
#define BUILD_STAT_MSG_IR_ATTR(TYPE, NAME) \
("Number of " #TYPE " marked '" #NAME "'")
#define BUILD_STAT_NAME(NAME, TYPE) NumIR##TYPE##_##NAME
#define STATS_DECL_(NAME, MSG) STATISTIC(NAME, MSG);
#define STATS_DECL(NAME, TYPE, MSG) \
STATS_DECL_(BUILD_STAT_NAME(NAME, TYPE), MSG);
#define STATS_TRACK(NAME, TYPE) ++(BUILD_STAT_NAME(NAME, TYPE));
#define STATS_DECLTRACK(NAME, TYPE, MSG) \
{ \
STATS_DECL(NAME, TYPE, MSG) \
STATS_TRACK(NAME, TYPE) \
}
#define STATS_DECLTRACK_ARG_ATTR(NAME) \
STATS_DECLTRACK(NAME, Arguments, BUILD_STAT_MSG_IR_ATTR(arguments, NAME))
#define STATS_DECLTRACK_CSARG_ATTR(NAME) \
STATS_DECLTRACK(NAME, CSArguments, \
BUILD_STAT_MSG_IR_ATTR(call site arguments, NAME))
#define STATS_DECLTRACK_FN_ATTR(NAME) \
STATS_DECLTRACK(NAME, Function, BUILD_STAT_MSG_IR_ATTR(functions, NAME))
#define STATS_DECLTRACK_CS_ATTR(NAME) \
STATS_DECLTRACK(NAME, CS, BUILD_STAT_MSG_IR_ATTR(call site, NAME))
#define STATS_DECLTRACK_FNRET_ATTR(NAME) \
STATS_DECLTRACK(NAME, FunctionReturn, \
BUILD_STAT_MSG_IR_ATTR(function returns, NAME))
#define STATS_DECLTRACK_CSRET_ATTR(NAME) \
STATS_DECLTRACK(NAME, CSReturn, \
BUILD_STAT_MSG_IR_ATTR(call site returns, NAME))
#define STATS_DECLTRACK_FLOATING_ATTR(NAME) \
STATS_DECLTRACK(NAME, Floating, \
("Number of floating values known to be '" #NAME "'"))
// Specialization of the operator<< for abstract attributes subclasses. This
// disambiguates situations where multiple operators are applicable.
namespace llvm {
#define PIPE_OPERATOR(CLASS) \
raw_ostream &operator<<(raw_ostream &OS, const CLASS &AA) { \
return OS << static_cast<const AbstractAttribute &>(AA); \
}
PIPE_OPERATOR(AAIsDead)
PIPE_OPERATOR(AANoUnwind)
PIPE_OPERATOR(AANoSync)
PIPE_OPERATOR(AANoRecurse)
PIPE_OPERATOR(AAWillReturn)
PIPE_OPERATOR(AANoReturn)
PIPE_OPERATOR(AAReturnedValues)
PIPE_OPERATOR(AANonNull)
PIPE_OPERATOR(AANoAlias)
PIPE_OPERATOR(AADereferenceable)
PIPE_OPERATOR(AAAlign)
PIPE_OPERATOR(AANoCapture)
PIPE_OPERATOR(AAValueSimplify)
PIPE_OPERATOR(AANoFree)
PIPE_OPERATOR(AAHeapToStack)
PIPE_OPERATOR(AAReachability)
PIPE_OPERATOR(AAMemoryBehavior)
PIPE_OPERATOR(AAMemoryLocation)
PIPE_OPERATOR(AAValueConstantRange)
PIPE_OPERATOR(AAPrivatizablePtr)
PIPE_OPERATOR(AAUndefinedBehavior)
PIPE_OPERATOR(AAPotentialValues)
PIPE_OPERATOR(AANoUndef)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAFunctionReachability)
PIPE_OPERATOR(AAPointerInfo)
PIPE_OPERATOR(AAAssumptionInfo)
#undef PIPE_OPERATOR
template <>
ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
const DerefState &R) {
ChangeStatus CS0 =
clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState);
ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState);
return CS0 | CS1;
}
} // namespace llvm
/// Get pointer operand of memory accessing instruction. If \p I is
/// not a memory accessing instruction, return nullptr. If \p AllowVolatile,
/// is set to false and the instruction is volatile, return nullptr.
static const Value *getPointerOperand(const Instruction *I,
bool AllowVolatile) {
if (!AllowVolatile && I->isVolatile())
return nullptr;
if (auto *LI = dyn_cast<LoadInst>(I)) {
return LI->getPointerOperand();
}
if (auto *SI = dyn_cast<StoreInst>(I)) {
return SI->getPointerOperand();
}
if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I)) {
return CXI->getPointerOperand();
}
if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
return RMWI->getPointerOperand();
}
return nullptr;
}
/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and
/// advanced by \p Offset bytes. To aid later analysis the method tries to build
/// getelement pointer instructions that traverse the natural type of \p Ptr if
/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
/// through a cast to i8*.
///
/// TODO: This could probably live somewhere more prominantly if it doesn't
/// already exist.
static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
int64_t Offset, IRBuilder<NoFolder> &IRB,
const DataLayout &DL) {
assert(Offset >= 0 && "Negative offset not supported yet!");
LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
<< "-bytes as " << *ResTy << "\n");
if (Offset) {
Type *Ty = PtrElemTy;
APInt IntOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(Ty, IntOffset);
SmallVector<Value *, 4> ValIndices;
std::string GEPName = Ptr->getName().str();
for (const APInt &Index : IntIndices) {
ValIndices.push_back(IRB.getInt(Index));
GEPName += "." + std::to_string(Index.getZExtValue());
}
// Create a GEP for the indices collected above.
Ptr = IRB.CreateGEP(PtrElemTy, Ptr, ValIndices, GEPName);
// If an offset is left we use byte-wise adjustment.
if (IntOffset != 0) {
Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(IntOffset),
GEPName + ".b" + Twine(IntOffset.getZExtValue()));
}
}
// Ensure the result has the requested type.
Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, ResTy,
Ptr->getName() + ".cast");
LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
return Ptr;
}
/// Recursively visit all values that might become \p IRP at some point. This
/// will be done by looking through cast instructions, selects, phis, and calls
/// with the "returned" attribute. Once we cannot look through the value any
/// further, the callback \p VisitValueCB is invoked and passed the current
/// value, the \p State, and a flag to indicate if we stripped anything.
/// Stripped means that we unpacked the value associated with \p IRP at least
/// once. Note that the value used for the callback may still be the value
/// associated with \p IRP (due to PHIs). To limit how much effort is invested,
/// we will never visit more values than specified by \p MaxValues.
/// If \p Intraprocedural is set to true only values valid in the scope of
/// \p CtxI will be visited and simplification into other scopes is prevented.
template <typename StateTy>
static bool genericValueTraversal(
Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
StateTy &State,
function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
VisitValueCB,
- const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
+ const Instruction *CtxI, bool &UsedAssumedInformation,
+ bool UseValueSimplify = true, int MaxValues = 16,
function_ref<Value *(Value *)> StripCB = nullptr,
bool Intraprocedural = false) {
- const AAIsDead *LivenessAA = nullptr;
- if (IRP.getAnchorScope())
- LivenessAA = &A.getAAFor<AAIsDead>(
- QueryingAA,
- IRPosition::function(*IRP.getAnchorScope(), IRP.getCallBaseContext()),
- DepClassTy::NONE);
- bool AnyDead = false;
+ struct LivenessInfo {
+ const AAIsDead *LivenessAA = nullptr;
+ bool AnyDead = false;
+ };
+ DenseMap<const Function *, LivenessInfo> LivenessAAs;
+ auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & {
+ LivenessInfo &LI = LivenessAAs[&F];
+ if (!LI.LivenessAA)
+ LI.LivenessAA = &A.getAAFor<AAIsDead>(QueryingAA, IRPosition::function(F),
+ DepClassTy::NONE);
+ return LI;
+ };
Value *InitialV = &IRP.getAssociatedValue();
using Item = std::pair<Value *, const Instruction *>;
SmallSet<Item, 16> Visited;
SmallVector<Item, 16> Worklist;
Worklist.push_back({InitialV, CtxI});
int Iteration = 0;
do {
Item I = Worklist.pop_back_val();
Value *V = I.first;
CtxI = I.second;
if (StripCB)
V = StripCB(V);
// Check if we should process the current value. To prevent endless
// recursion keep a record of the values we followed!
if (!Visited.insert(I).second)
continue;
// Make sure we limit the compile time for complex expressions.
if (Iteration++ >= MaxValues) {
LLVM_DEBUG(dbgs() << "Generic value traversal reached iteration limit: "
<< Iteration << "!\n");
return false;
}
// Explicitly look through calls with a "returned" attribute if we do
// not have a pointer as stripPointerCasts only works on them.
Value *NewV = nullptr;
if (V->getType()->isPointerTy()) {
NewV = V->stripPointerCasts();
} else {
auto *CB = dyn_cast<CallBase>(V);
if (CB && CB->getCalledFunction()) {
for (Argument &Arg : CB->getCalledFunction()->args())
if (Arg.hasReturnedAttr()) {
NewV = CB->getArgOperand(Arg.getArgNo());
break;
}
}
}
if (NewV && NewV != V) {
Worklist.push_back({NewV, CtxI});
continue;
}
// Look through select instructions, visit assumed potential values.
if (auto *SI = dyn_cast<SelectInst>(V)) {
- bool UsedAssumedInformation = false;
Optional<Constant *> C = A.getAssumedConstant(
*SI->getCondition(), QueryingAA, UsedAssumedInformation);
bool NoValueYet = !C.hasValue();
if (NoValueYet || isa_and_nonnull<UndefValue>(*C))
continue;
if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) {
if (CI->isZero())
Worklist.push_back({SI->getFalseValue(), CtxI});
else
Worklist.push_back({SI->getTrueValue(), CtxI});
continue;
}
// We could not simplify the condition, assume both values.(
Worklist.push_back({SI->getTrueValue(), CtxI});
Worklist.push_back({SI->getFalseValue(), CtxI});
continue;
}
// Look through phi nodes, visit all live operands.
if (auto *PHI = dyn_cast<PHINode>(V)) {
- assert(LivenessAA &&
- "Expected liveness in the presence of instructions!");
+ LivenessInfo &LI = GetLivenessInfo(*PHI->getFunction());
for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
- if (LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
- AnyDead = true;
+ if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI->getParent())) {
+ LI.AnyDead = true;
+ UsedAssumedInformation |= !LI.LivenessAA->isAtFixpoint();
continue;
}
Worklist.push_back(
{PHI->getIncomingValue(u), IncomingBB->getTerminator()});
}
continue;
}
if (auto *Arg = dyn_cast<Argument>(V)) {
if (!Intraprocedural && !Arg->hasPassPointeeByValueCopyAttr()) {
SmallVector<Item> CallSiteValues;
- bool AllCallSitesKnown = true;
+ bool UsedAssumedInformation = false;
if (A.checkForAllCallSites(
[&](AbstractCallSite ACS) {
// Callbacks might not have a corresponding call site operand,
// stick with the argument in that case.
Value *CSOp = ACS.getCallArgOperand(*Arg);
if (!CSOp)
return false;
CallSiteValues.push_back({CSOp, ACS.getInstruction()});
return true;
},
- *Arg->getParent(), true, &QueryingAA, AllCallSitesKnown)) {
+ *Arg->getParent(), true, &QueryingAA, UsedAssumedInformation)) {
Worklist.append(CallSiteValues);
continue;
}
}
}
if (UseValueSimplify && !isa<Constant>(V)) {
- bool UsedAssumedInformation = false;
Optional<Value *> SimpleV =
A.getAssumedSimplified(*V, QueryingAA, UsedAssumedInformation);
if (!SimpleV.hasValue())
continue;
Value *NewV = SimpleV.getValue();
if (NewV && NewV != V) {
if (!Intraprocedural || !CtxI ||
AA::isValidInScope(*NewV, CtxI->getFunction())) {
Worklist.push_back({NewV, CtxI});
continue;
}
}
}
// Once a leaf is reached we inform the user through the callback.
if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) {
LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: "
<< *V << "!\n");
return false;
}
} while (!Worklist.empty());
// If we actually used liveness information so we have to record a dependence.
- if (AnyDead)
- A.recordDependence(*LivenessAA, QueryingAA, DepClassTy::OPTIONAL);
+ for (auto &It : LivenessAAs)
+ if (It.second.AnyDead)
+ A.recordDependence(*It.second.LivenessAA, QueryingAA,
+ DepClassTy::OPTIONAL);
// All values have been visited.
return true;
}
bool AA::getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
SmallVectorImpl<Value *> &Objects,
const AbstractAttribute &QueryingAA,
const Instruction *CtxI,
+ bool &UsedAssumedInformation,
bool Intraprocedural) {
auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
SmallPtrSet<Value *, 8> SeenObjects;
auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
SmallVectorImpl<Value *> &Objects,
bool) -> bool {
if (SeenObjects.insert(&Val).second)
Objects.push_back(&Val);
return true;
};
if (!genericValueTraversal<decltype(Objects)>(
A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
- true, 32, StripCB, Intraprocedural))
+ UsedAssumedInformation, true, 32, StripCB, Intraprocedural))
return false;
return true;
}
const Value *stripAndAccumulateMinimalOffsets(
Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val,
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
bool UseAssumed = false) {
auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool {
const IRPosition &Pos = IRPosition::value(V);
// Only track dependence if we are going to use the assumed info.
const AAValueConstantRange &ValueConstantRangeAA =
A.getAAFor<AAValueConstantRange>(QueryingAA, Pos,
UseAssumed ? DepClassTy::OPTIONAL
: DepClassTy::NONE);
ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed()
: ValueConstantRangeAA.getKnown();
// We can only use the lower part of the range because the upper part can
// be higher than what the value can really be.
ROffset = Range.getSignedMin();
return true;
};
return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
/* AllowInvariant */ false,
AttributorAnalysis);
}
static const Value *
getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA,
const Value *Ptr, int64_t &BytesOffset,
const DataLayout &DL, bool AllowNonInbounds = false) {
APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
const Value *Base = stripAndAccumulateMinimalOffsets(
A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds);
BytesOffset = OffsetAPInt.getSExtValue();
return Base;
}
/// Clamp the information known for all returned values of a function
/// (identified by \p QueryingAA) into \p S.
template <typename AAType, typename StateType = typename AAType::StateType>
static void clampReturnedValueStates(
Attributor &A, const AAType &QueryingAA, StateType &S,
const IRPosition::CallBaseContext *CBContext = nullptr) {
LLVM_DEBUG(dbgs() << "[Attributor] Clamp return value states for "
<< QueryingAA << " into " << S << "\n");
assert((QueryingAA.getIRPosition().getPositionKind() ==
IRPosition::IRP_RETURNED ||
QueryingAA.getIRPosition().getPositionKind() ==
IRPosition::IRP_CALL_SITE_RETURNED) &&
"Can only clamp returned value states for a function returned or call "
"site returned position!");
// Use an optional state as there might not be any return values and we want
// to join (IntegerState::operator&) the state of all there are.
Optional<StateType> T;
// Callback for each possibly returned value.
auto CheckReturnValue = [&](Value &RV) -> bool {
const IRPosition &RVPos = IRPosition::value(RV, CBContext);
const AAType &AA =
A.getAAFor<AAType>(QueryingAA, RVPos, DepClassTy::REQUIRED);
LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
<< " @ " << RVPos << "\n");
const StateType &AAS = AA.getState();
if (T.hasValue())
*T &= AAS;
else
T = AAS;
LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " RV State: " << T
<< "\n");
return T->isValidState();
};
if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
S.indicatePessimisticFixpoint();
else if (T.hasValue())
S ^= *T;
}
namespace {
/// Helper class for generic deduction: return value -> returned position.
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
bool PropagateCallBaseContext = false>
struct AAReturnedFromReturnedValues : public BaseType {
AAReturnedFromReturnedValues(const IRPosition &IRP, Attributor &A)
: BaseType(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
StateType S(StateType::getBestState(this->getState()));
clampReturnedValueStates<AAType, StateType>(
A, *this, S,
PropagateCallBaseContext ? this->getCallBaseContext() : nullptr);
// TODO: If we know we visited all returned values, thus no are assumed
// dead, we can take the known information from the state T.
return clampStateAndIndicateChange<StateType>(this->getState(), S);
}
};
/// Clamp the information known at all call sites for a given argument
/// (identified by \p QueryingAA) into \p S.
template <typename AAType, typename StateType = typename AAType::StateType>
static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
StateType &S) {
LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
<< QueryingAA << " into " << S << "\n");
assert(QueryingAA.getIRPosition().getPositionKind() ==
IRPosition::IRP_ARGUMENT &&
"Can only clamp call site argument states for an argument position!");
// Use an optional state as there might not be any return values and we want
// to join (IntegerState::operator&) the state of all there are.
Optional<StateType> T;
// The argument number which is also the call site argument number.
unsigned ArgNo = QueryingAA.getIRPosition().getCallSiteArgNo();
auto CallSiteCheck = [&](AbstractCallSite ACS) {
const IRPosition &ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
// Check if a coresponding argument was found or if it is on not associated
// (which can happen for callback calls).
if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
return false;
const AAType &AA =
A.getAAFor<AAType>(QueryingAA, ACSArgPos, DepClassTy::REQUIRED);
LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
<< " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
const StateType &AAS = AA.getState();
if (T.hasValue())
*T &= AAS;
else
T = AAS;
LLVM_DEBUG(dbgs() << "[Attributor] AA State: " << AAS << " CSA State: " << T
<< "\n");
return T->isValidState();
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CallSiteCheck, QueryingAA, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
S.indicatePessimisticFixpoint();
else if (T.hasValue())
S ^= *T;
}
/// This function is the bridge between argument position and the call base
/// context.
template <typename AAType, typename BaseType,
typename StateType = typename AAType::StateType>
bool getArgumentStateFromCallBaseContext(Attributor &A,
BaseType &QueryingAttribute,
IRPosition &Pos, StateType &State) {
assert((Pos.getPositionKind() == IRPosition::IRP_ARGUMENT) &&
"Expected an 'argument' position !");
const CallBase *CBContext = Pos.getCallBaseContext();
if (!CBContext)
return false;
int ArgNo = Pos.getCallSiteArgNo();
assert(ArgNo >= 0 && "Invalid Arg No!");
const auto &AA = A.getAAFor<AAType>(
QueryingAttribute, IRPosition::callsite_argument(*CBContext, ArgNo),
DepClassTy::REQUIRED);
const StateType &CBArgumentState =
static_cast<const StateType &>(AA.getState());
LLVM_DEBUG(dbgs() << "[Attributor] Briding Call site context to argument"
<< "Position:" << Pos << "CB Arg state:" << CBArgumentState
<< "\n");
// NOTE: If we want to do call site grouping it should happen here.
State ^= CBArgumentState;
return true;
}
/// Helper class for generic deduction: call site argument -> argument position.
template <typename AAType, typename BaseType,
typename StateType = typename AAType::StateType,
bool BridgeCallBaseContext = false>
struct AAArgumentFromCallSiteArguments : public BaseType {
AAArgumentFromCallSiteArguments(const IRPosition &IRP, Attributor &A)
: BaseType(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
StateType S = StateType::getBestState(this->getState());
if (BridgeCallBaseContext) {
bool Success =
getArgumentStateFromCallBaseContext<AAType, BaseType, StateType>(
A, *this, this->getIRPosition(), S);
if (Success)
return clampStateAndIndicateChange<StateType>(this->getState(), S);
}
clampCallSiteArgumentStates<AAType, StateType>(A, *this, S);
// TODO: If we know we visited all incoming values, thus no are assumed
// dead, we can take the known information from the state T.
return clampStateAndIndicateChange<StateType>(this->getState(), S);
}
};
/// Helper class for generic replication: function returned -> cs returned.
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
bool IntroduceCallBaseContext = false>
struct AACallSiteReturnedFromReturned : public BaseType {
AACallSiteReturnedFromReturned(const IRPosition &IRP, Attributor &A)
: BaseType(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
assert(this->getIRPosition().getPositionKind() ==
IRPosition::IRP_CALL_SITE_RETURNED &&
"Can only wrap function returned positions for call site returned "
"positions!");
auto &S = this->getState();
const Function *AssociatedFunction =
this->getIRPosition().getAssociatedFunction();
if (!AssociatedFunction)
return S.indicatePessimisticFixpoint();
CallBase &CBContext = cast<CallBase>(this->getAnchorValue());
if (IntroduceCallBaseContext)
LLVM_DEBUG(dbgs() << "[Attributor] Introducing call base context:"
<< CBContext << "\n");
IRPosition FnPos = IRPosition::returned(
*AssociatedFunction, IntroduceCallBaseContext ? &CBContext : nullptr);
const AAType &AA = A.getAAFor<AAType>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(S, AA.getState());
}
};
} // namespace
/// Helper function to accumulate uses.
template <class AAType, typename StateType = typename AAType::StateType>
static void followUsesInContext(AAType &AA, Attributor &A,
MustBeExecutedContextExplorer &Explorer,
const Instruction *CtxI,
SetVector<const Use *> &Uses,
StateType &State) {
auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
for (unsigned u = 0; u < Uses.size(); ++u) {
const Use *U = Uses[u];
if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
bool Found = Explorer.findInContextOf(UserI, EIt, EEnd);
if (Found && AA.followUseInMBEC(A, U, UserI, State))
for (const Use &Us : UserI->uses())
Uses.insert(&Us);
}
}
}
/// Use the must-be-executed-context around \p I to add information into \p S.
/// The AAType class is required to have `followUseInMBEC` method with the
/// following signature and behaviour:
///
/// bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I)
/// U - Underlying use.
/// I - The user of the \p U.
/// Returns true if the value should be tracked transitively.
///
template <class AAType, typename StateType = typename AAType::StateType>
static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
Instruction &CtxI) {
// Container for (transitive) uses of the associated value.
SetVector<const Use *> Uses;
for (const Use &U : AA.getIRPosition().getAssociatedValue().uses())
Uses.insert(&U);
MustBeExecutedContextExplorer &Explorer =
A.getInfoCache().getMustBeExecutedContextExplorer();
followUsesInContext<AAType>(AA, A, Explorer, &CtxI, Uses, S);
if (S.isAtFixpoint())
return;
SmallVector<const BranchInst *, 4> BrInsts;
auto Pred = [&](const Instruction *I) {
if (const BranchInst *Br = dyn_cast<BranchInst>(I))
if (Br->isConditional())
BrInsts.push_back(Br);
return true;
};
// Here, accumulate conditional branch instructions in the context. We
// explore the child paths and collect the known states. The disjunction of
// those states can be merged to its own state. Let ParentState_i be a state
// to indicate the known information for an i-th branch instruction in the
// context. ChildStates are created for its successors respectively.
//
// ParentS_1 = ChildS_{1, 1} /\ ChildS_{1, 2} /\ ... /\ ChildS_{1, n_1}
// ParentS_2 = ChildS_{2, 1} /\ ChildS_{2, 2} /\ ... /\ ChildS_{2, n_2}
// ...
// ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m}
//
// Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m
//
// FIXME: Currently, recursive branches are not handled. For example, we
// can't deduce that ptr must be dereferenced in below function.
//
// void f(int a, int c, int *ptr) {
// if(a)
// if (b) {
// *ptr = 0;
// } else {
// *ptr = 1;
// }
// else {
// if (b) {
// *ptr = 0;
// } else {
// *ptr = 1;
// }
// }
// }
Explorer.checkForAllContext(&CtxI, Pred);
for (const BranchInst *Br : BrInsts) {
StateType ParentState;
// The known state of the parent state is a conjunction of children's
// known states so it is initialized with a best state.
ParentState.indicateOptimisticFixpoint();
for (const BasicBlock *BB : Br->successors()) {
StateType ChildState;
size_t BeforeSize = Uses.size();
followUsesInContext(AA, A, Explorer, &BB->front(), Uses, ChildState);
// Erase uses which only appear in the child.
for (auto It = Uses.begin() + BeforeSize; It != Uses.end();)
It = Uses.erase(It);
ParentState &= ChildState;
}
// Use only known state.
S += ParentState;
}
}
/// ------------------------ PointerInfo ---------------------------------------
namespace llvm {
namespace AA {
namespace PointerInfo {
/// An access kind description as used by AAPointerInfo.
struct OffsetAndSize;
struct State;
} // namespace PointerInfo
} // namespace AA
/// Helper for AA::PointerInfo::Acccess DenseMap/Set usage.
template <>
struct DenseMapInfo<AAPointerInfo::Access> : DenseMapInfo<Instruction *> {
using Access = AAPointerInfo::Access;
static inline Access getEmptyKey();
static inline Access getTombstoneKey();
static unsigned getHashValue(const Access &A);
static bool isEqual(const Access &LHS, const Access &RHS);
};
/// Helper that allows OffsetAndSize as a key in a DenseMap.
template <>
struct DenseMapInfo<AA::PointerInfo ::OffsetAndSize>
: DenseMapInfo<std::pair<int64_t, int64_t>> {};
/// Helper for AA::PointerInfo::Acccess DenseMap/Set usage ignoring everythign
/// but the instruction
struct AccessAsInstructionInfo : DenseMapInfo<Instruction *> {
using Base = DenseMapInfo<Instruction *>;
using Access = AAPointerInfo::Access;
static inline Access getEmptyKey();
static inline Access getTombstoneKey();
static unsigned getHashValue(const Access &A);
static bool isEqual(const Access &LHS, const Access &RHS);
};
} // namespace llvm
/// Helper to represent an access offset and size, with logic to deal with
/// uncertainty and check for overlapping accesses.
struct AA::PointerInfo::OffsetAndSize : public std::pair<int64_t, int64_t> {
using BaseTy = std::pair<int64_t, int64_t>;
OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {}
OffsetAndSize(const BaseTy &P) : BaseTy(P) {}
int64_t getOffset() const { return first; }
int64_t getSize() const { return second; }
static OffsetAndSize getUnknown() { return OffsetAndSize(Unknown, Unknown); }
/// Return true if offset or size are unknown.
bool offsetOrSizeAreUnknown() const {
return getOffset() == OffsetAndSize::Unknown ||
getSize() == OffsetAndSize::Unknown;
}
/// Return true if this offset and size pair might describe an address that
/// overlaps with \p OAS.
bool mayOverlap(const OffsetAndSize &OAS) const {
// Any unknown value and we are giving up -> overlap.
if (offsetOrSizeAreUnknown() || OAS.offsetOrSizeAreUnknown())
return true;
// Check if one offset point is in the other interval [offset, offset+size].
return OAS.getOffset() + OAS.getSize() > getOffset() &&
OAS.getOffset() < getOffset() + getSize();
}
/// Constant used to represent unknown offset or sizes.
static constexpr int64_t Unknown = 1 << 31;
};
/// Implementation of the DenseMapInfo.
///
///{
inline llvm::AccessAsInstructionInfo::Access
llvm::AccessAsInstructionInfo::getEmptyKey() {
return Access(Base::getEmptyKey(), nullptr, AAPointerInfo::AK_READ, nullptr);
}
inline llvm::AccessAsInstructionInfo::Access
llvm::AccessAsInstructionInfo::getTombstoneKey() {
return Access(Base::getTombstoneKey(), nullptr, AAPointerInfo::AK_READ,
nullptr);
}
unsigned llvm::AccessAsInstructionInfo::getHashValue(
const llvm::AccessAsInstructionInfo::Access &A) {
return Base::getHashValue(A.getRemoteInst());
}
bool llvm::AccessAsInstructionInfo::isEqual(
const llvm::AccessAsInstructionInfo::Access &LHS,
const llvm::AccessAsInstructionInfo::Access &RHS) {
return LHS.getRemoteInst() == RHS.getRemoteInst();
}
inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
llvm::DenseMapInfo<AAPointerInfo::Access>::getEmptyKey() {
return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_READ,
nullptr);
}
inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
llvm::DenseMapInfo<AAPointerInfo::Access>::getTombstoneKey() {
return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_WRITE,
nullptr);
}
unsigned llvm::DenseMapInfo<AAPointerInfo::Access>::getHashValue(
const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &A) {
return detail::combineHashValue(
DenseMapInfo<Instruction *>::getHashValue(A.getRemoteInst()),
(A.isWrittenValueYetUndetermined()
? ~0
: DenseMapInfo<Value *>::getHashValue(A.getWrittenValue()))) +
A.getKind();
}
bool llvm::DenseMapInfo<AAPointerInfo::Access>::isEqual(
const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &LHS,
const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &RHS) {
return LHS == RHS;
}
///}
/// A type to track pointer/struct usage and accesses for AAPointerInfo.
struct AA::PointerInfo::State : public AbstractState {
/// Return the best possible representable state.
static State getBestState(const State &SIS) { return State(); }
/// Return the worst possible representable state.
static State getWorstState(const State &SIS) {
State R;
R.indicatePessimisticFixpoint();
return R;
}
State() {}
State(const State &SIS) : AccessBins(SIS.AccessBins) {}
State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {}
const State &getAssumed() const { return *this; }
/// See AbstractState::isValidState().
bool isValidState() const override { return BS.isValidState(); }
/// See AbstractState::isAtFixpoint().
bool isAtFixpoint() const override { return BS.isAtFixpoint(); }
/// See AbstractState::indicateOptimisticFixpoint().
ChangeStatus indicateOptimisticFixpoint() override {
BS.indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// See AbstractState::indicatePessimisticFixpoint().
ChangeStatus indicatePessimisticFixpoint() override {
BS.indicatePessimisticFixpoint();
return ChangeStatus::CHANGED;
}
State &operator=(const State &R) {
if (this == &R)
return *this;
BS = R.BS;
AccessBins = R.AccessBins;
return *this;
}
State &operator=(State &&R) {
if (this == &R)
return *this;
std::swap(BS, R.BS);
std::swap(AccessBins, R.AccessBins);
return *this;
}
bool operator==(const State &R) const {
if (BS != R.BS)
return false;
if (AccessBins.size() != R.AccessBins.size())
return false;
auto It = begin(), RIt = R.begin(), E = end();
while (It != E) {
if (It->getFirst() != RIt->getFirst())
return false;
auto &Accs = It->getSecond();
auto &RAccs = RIt->getSecond();
if (Accs.size() != RAccs.size())
return false;
auto AccIt = Accs.begin(), RAccIt = RAccs.begin(), AccE = Accs.end();
while (AccIt != AccE) {
if (*AccIt != *RAccIt)
return false;
++AccIt;
++RAccIt;
}
++It;
++RIt;
}
return true;
}
bool operator!=(const State &R) const { return !(*this == R); }
/// We store accesses in a set with the instruction as key.
using Accesses = DenseSet<AAPointerInfo::Access, AccessAsInstructionInfo>;
/// We store all accesses in bins denoted by their offset and size.
using AccessBinsTy = DenseMap<OffsetAndSize, Accesses>;
AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); }
AccessBinsTy::const_iterator end() const { return AccessBins.end(); }
protected:
/// The bins with all the accesses for the associated pointer.
DenseMap<OffsetAndSize, Accesses> AccessBins;
/// Add a new access to the state at offset \p Offset and with size \p Size.
/// The access is associated with \p I, writes \p Content (if anything), and
/// is of kind \p Kind.
/// \Returns CHANGED, if the state changed, UNCHANGED otherwise.
ChangeStatus addAccess(int64_t Offset, int64_t Size, Instruction &I,
Optional<Value *> Content,
AAPointerInfo::AccessKind Kind, Type *Ty,
Instruction *RemoteI = nullptr,
Accesses *BinPtr = nullptr) {
OffsetAndSize Key{Offset, Size};
Accesses &Bin = BinPtr ? *BinPtr : AccessBins[Key];
AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty);
// Check if we have an access for this instruction in this bin, if not,
// simply add it.
auto It = Bin.find(Acc);
if (It == Bin.end()) {
Bin.insert(Acc);
return ChangeStatus::CHANGED;
}
// If the existing access is the same as then new one, nothing changed.
AAPointerInfo::Access Before = *It;
// The new one will be combined with the existing one.
*It &= Acc;
return *It == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
}
/// See AAPointerInfo::forallInterferingAccesses.
bool forallInterferingAccesses(
Instruction &I,
function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
if (!isValidState())
return false;
// First find the offset and size of I.
OffsetAndSize OAS(-1, -1);
for (auto &It : AccessBins) {
for (auto &Access : It.getSecond()) {
if (Access.getRemoteInst() == &I) {
OAS = It.getFirst();
break;
}
}
if (OAS.getSize() != -1)
break;
}
if (OAS.getSize() == -1)
return true;
// Now that we have an offset and size, find all overlapping ones and use
// the callback on the accesses.
for (auto &It : AccessBins) {
OffsetAndSize ItOAS = It.getFirst();
if (!OAS.mayOverlap(ItOAS))
continue;
bool IsExact = OAS == ItOAS && !OAS.offsetOrSizeAreUnknown();
for (auto &Access : It.getSecond())
if (!CB(Access, IsExact))
return false;
}
return true;
}
private:
/// State to track fixpoint and validity.
BooleanState BS;
};
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
AAPointerInfoImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override { AAPointerInfo::initialize(A); }
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return std::string("PointerInfo ") +
(isValidState() ? (std::string("#") +
std::to_string(AccessBins.size()) + " bins")
: "<invalid>");
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
return AAPointerInfo::manifest(A);
}
bool forallInterferingAccesses(
LoadInst &LI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
const override {
return State::forallInterferingAccesses(LI, CB);
}
bool forallInterferingAccesses(
StoreInst &SI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
const override {
return State::forallInterferingAccesses(SI, CB);
}
bool forallInterferingWrites(
Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI,
function_ref<bool(const Access &, bool)> UserCB) const override {
SmallPtrSet<const Access *, 8> DominatingWrites;
SmallVector<std::pair<const Access *, bool>, 8> InterferingWrites;
Function &Scope = *LI.getFunction();
const auto &NoSyncAA = A.getAAFor<AANoSync>(
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
const bool NoSync = NoSyncAA.isAssumedNoSync();
// Helper to determine if we need to consider threading, which we cannot
// right now. However, if the function is (assumed) nosync or the thread
// executing all instructions is the main thread only we can ignore
// threading.
auto CanIgnoreThreading = [&](const Instruction &I) -> bool {
if (NoSync)
return true;
if (ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I))
return true;
return false;
};
// Helper to determine if the access is executed by the same thread as the
// load, for now it is sufficient to avoid any potential threading effects
// as we cannot deal with them anyway.
auto IsSameThreadAsLoad = [&](const Access &Acc) -> bool {
return CanIgnoreThreading(*Acc.getLocalInst());
};
// TODO: Use inter-procedural reachability and dominance.
const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
QueryingAA, IRPosition::function(*LI.getFunction()),
DepClassTy::OPTIONAL);
const bool CanUseCFGResoning = CanIgnoreThreading(LI);
InformationCache &InfoCache = A.getInfoCache();
const DominatorTree *DT =
NoRecurseAA.isKnownNoRecurse()
? InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
Scope)
: nullptr;
enum GPUAddressSpace : unsigned {
Generic = 0,
Global = 1,
Shared = 3,
Constant = 4,
Local = 5,
};
// Helper to check if a value has "kernel lifetime", that is it will not
// outlive a GPU kernel. This is true for shared, constant, and local
// globals on AMD and NVIDIA GPUs.
auto HasKernelLifetime = [&](Value *V, Module &M) {
Triple T(M.getTargetTriple());
if (!(T.isAMDGPU() || T.isNVPTX()))
return false;
switch (V->getType()->getPointerAddressSpace()) {
case GPUAddressSpace::Shared:
case GPUAddressSpace::Constant:
case GPUAddressSpace::Local:
return true;
default:
return false;
};
};
// The IsLiveInCalleeCB will be used by the AA::isPotentiallyReachable query
// to determine if we should look at reachability from the callee. For
// certain pointers we know the lifetime and we do not have to step into the
// callee to determine reachability as the pointer would be dead in the
// callee. See the conditional initialization below.
std::function<bool(const Function &)> IsLiveInCalleeCB;
if (auto *AI = dyn_cast<AllocaInst>(&getAssociatedValue())) {
// If the alloca containing function is not recursive the alloca
// must be dead in the callee.
const Function *AIFn = AI->getFunction();
const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
*this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL);
if (NoRecurseAA.isAssumedNoRecurse()) {
IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; };
}
} else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) {
// If the global has kernel lifetime we can stop if we reach a kernel
// as it is "dead" in the (unknown) callees.
if (HasKernelLifetime(GV, *GV->getParent()))
IsLiveInCalleeCB = [](const Function &Fn) {
return !Fn.hasFnAttribute("kernel");
};
}
auto AccessCB = [&](const Access &Acc, bool Exact) {
if (!Acc.isWrite())
return true;
// For now we only filter accesses based on CFG reasoning which does not
// work yet if we have threading effects, or the access is complicated.
if (CanUseCFGResoning) {
if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA,
IsLiveInCalleeCB))
return true;
if (DT && Exact &&
(Acc.getLocalInst()->getFunction() == LI.getFunction()) &&
IsSameThreadAsLoad(Acc)) {
if (DT->dominates(Acc.getLocalInst(), &LI))
DominatingWrites.insert(&Acc);
}
}
InterferingWrites.push_back({&Acc, Exact});
return true;
};
if (!State::forallInterferingAccesses(LI, AccessCB))
return false;
// If we cannot use CFG reasoning we only filter the non-write accesses
// and are done here.
if (!CanUseCFGResoning) {
for (auto &It : InterferingWrites)
if (!UserCB(*It.first, It.second))
return false;
return true;
}
// Helper to determine if we can skip a specific write access. This is in
// the worst case quadratic as we are looking for another write that will
// hide the effect of this one.
auto CanSkipAccess = [&](const Access &Acc, bool Exact) {
if (!IsSameThreadAsLoad(Acc))
return false;
if (!DominatingWrites.count(&Acc))
return false;
for (const Access *DomAcc : DominatingWrites) {
assert(Acc.getLocalInst()->getFunction() ==
DomAcc->getLocalInst()->getFunction() &&
"Expected dominating writes to be in the same function!");
if (DomAcc != &Acc &&
DT->dominates(Acc.getLocalInst(), DomAcc->getLocalInst())) {
return true;
}
}
return false;
};
// Run the user callback on all writes we cannot skip and return if that
// succeeded for all or not.
unsigned NumInterferingWrites = InterferingWrites.size();
- for (auto &It : InterferingWrites)
+ for (auto &It : InterferingWrites) {
if (!DT || NumInterferingWrites > MaxInterferingWrites ||
- !CanSkipAccess(*It.first, It.second))
+ !CanSkipAccess(*It.first, It.second)) {
if (!UserCB(*It.first, It.second))
return false;
+ }
+ }
return true;
}
ChangeStatus translateAndAddCalleeState(Attributor &A,
const AAPointerInfo &CalleeAA,
int64_t CallArgOffset, CallBase &CB) {
using namespace AA::PointerInfo;
if (!CalleeAA.getState().isValidState() || !isValidState())
return indicatePessimisticFixpoint();
const auto &CalleeImplAA = static_cast<const AAPointerInfoImpl &>(CalleeAA);
bool IsByval = CalleeImplAA.getAssociatedArgument()->hasByValAttr();
// Combine the accesses bin by bin.
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (auto &It : CalleeImplAA.getState()) {
OffsetAndSize OAS = OffsetAndSize::getUnknown();
if (CallArgOffset != OffsetAndSize::Unknown)
OAS = OffsetAndSize(It.first.getOffset() + CallArgOffset,
It.first.getSize());
Accesses &Bin = AccessBins[OAS];
for (const AAPointerInfo::Access &RAcc : It.second) {
if (IsByval && !RAcc.isRead())
continue;
bool UsedAssumedInformation = false;
Optional<Value *> Content = A.translateArgumentToCallSiteContent(
RAcc.getContent(), CB, *this, UsedAssumedInformation);
AccessKind AK =
AccessKind(RAcc.getKind() & (IsByval ? AccessKind::AK_READ
: AccessKind::AK_READ_WRITE));
Changed =
Changed | addAccess(OAS.getOffset(), OAS.getSize(), CB, Content, AK,
RAcc.getType(), RAcc.getRemoteInst(), &Bin);
}
}
return Changed;
}
/// Statistic tracking for all AAPointerInfo implementations.
/// See AbstractAttribute::trackStatistics().
void trackPointerInfoStatistics(const IRPosition &IRP) const {}
};
struct AAPointerInfoFloating : public AAPointerInfoImpl {
using AccessKind = AAPointerInfo::AccessKind;
AAPointerInfoFloating(const IRPosition &IRP, Attributor &A)
: AAPointerInfoImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override { AAPointerInfoImpl::initialize(A); }
/// Deal with an access and signal if it was handled successfully.
bool handleAccess(Attributor &A, Instruction &I, Value &Ptr,
Optional<Value *> Content, AccessKind Kind, int64_t Offset,
ChangeStatus &Changed, Type *Ty,
int64_t Size = AA::PointerInfo::OffsetAndSize::Unknown) {
using namespace AA::PointerInfo;
// No need to find a size if one is given or the offset is unknown.
if (Offset != OffsetAndSize::Unknown && Size == OffsetAndSize::Unknown &&
Ty) {
const DataLayout &DL = A.getDataLayout();
TypeSize AccessSize = DL.getTypeStoreSize(Ty);
if (!AccessSize.isScalable())
Size = AccessSize.getFixedSize();
}
Changed = Changed | addAccess(Offset, Size, I, Content, Kind, Ty);
return true;
};
/// Helper struct, will support ranges eventually.
struct OffsetInfo {
int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown;
bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
};
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
using namespace AA::PointerInfo;
State S = getState();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Value &AssociatedValue = getAssociatedValue();
const DataLayout &DL = A.getDataLayout();
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
bool &Follow) {
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
UsrOI = PtrOI;
Follow = true;
return true;
};
const auto *TLI = getAnchorScope()
? A.getInfoCache().getTargetLibraryInfoForFunction(
*getAnchorScope())
: nullptr;
auto UsePred = [&](const Use &U, bool &Follow) -> bool {
Value *CurPtr = U.get();
User *Usr = U.getUser();
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Analyze " << *CurPtr << " in "
<< *Usr << "\n");
assert(OffsetInfoMap.count(CurPtr) &&
"The current pointer offset should have been seeded!");
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
if (CE->isCast())
return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow);
if (CE->isCompare())
return true;
if (!isa<GEPOperator>(CE)) {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled constant user " << *CE
<< "\n");
return false;
}
}
if (auto *GEP = dyn_cast<GEPOperator>(Usr)) {
// Note the order here, the Usr access might change the map, CurPtr is
// already in it though.
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
OffsetInfo &PtrOI = OffsetInfoMap[CurPtr];
UsrOI = PtrOI;
// TODO: Use range information.
if (PtrOI.Offset == OffsetAndSize::Unknown ||
!GEP->hasAllConstantIndices()) {
UsrOI.Offset = OffsetAndSize::Unknown;
Follow = true;
return true;
}
SmallVector<Value *, 8> Indices;
for (Use &Idx : GEP->indices()) {
if (auto *CIdx = dyn_cast<ConstantInt>(Idx)) {
Indices.push_back(CIdx);
continue;
}
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Non constant GEP index " << *GEP
<< " : " << *Idx << "\n");
return false;
}
UsrOI.Offset = PtrOI.Offset + DL.getIndexedOffsetInType(
GEP->getSourceElementType(), Indices);
Follow = true;
return true;
}
if (isa<CastInst>(Usr) || isa<SelectInst>(Usr))
return HandlePassthroughUser(Usr, OffsetInfoMap[CurPtr], Follow);
// For PHIs we need to take care of the recurrence explicitly as the value
// might change while we iterate through a loop. For now, we give up if
// the PHI is not invariant.
if (isa<PHINode>(Usr)) {
// Note the order here, the Usr access might change the map, CurPtr is
// already in it though.
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
OffsetInfo &PtrOI = OffsetInfoMap[CurPtr];
// Check if the PHI is invariant (so far).
if (UsrOI == PtrOI)
return true;
// Check if the PHI operand has already an unknown offset as we can't
// improve on that anymore.
if (PtrOI.Offset == OffsetAndSize::Unknown) {
UsrOI = PtrOI;
Follow = true;
return true;
}
// Check if the PHI operand is not dependent on the PHI itself.
// TODO: This is not great as we look at the pointer type. However, it
// is unclear where the Offset size comes from with typeless pointers.
APInt Offset(
DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()),
0);
if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true)) {
if (Offset != PtrOI.Offset) {
LLVM_DEBUG(dbgs()
<< "[AAPointerInfo] PHI operand pointer offset mismatch "
<< *CurPtr << " in " << *Usr << "\n");
return false;
}
return HandlePassthroughUser(Usr, PtrOI, Follow);
}
// TODO: Approximate in case we know the direction of the recurrence.
LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
<< *CurPtr << " in " << *Usr << "\n");
UsrOI = PtrOI;
UsrOI.Offset = OffsetAndSize::Unknown;
Follow = true;
return true;
}
if (auto *LoadI = dyn_cast<LoadInst>(Usr))
return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr,
AccessKind::AK_READ, OffsetInfoMap[CurPtr].Offset,
Changed, LoadI->getType());
if (auto *StoreI = dyn_cast<StoreInst>(Usr)) {
if (StoreI->getValueOperand() == CurPtr) {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Escaping use in store "
<< *StoreI << "\n");
return false;
}
bool UsedAssumedInformation = false;
Optional<Value *> Content = A.getAssumedSimplified(
*StoreI->getValueOperand(), *this, UsedAssumedInformation);
return handleAccess(A, *StoreI, *CurPtr, Content, AccessKind::AK_WRITE,
OffsetInfoMap[CurPtr].Offset, Changed,
StoreI->getValueOperand()->getType());
}
if (auto *CB = dyn_cast<CallBase>(Usr)) {
if (CB->isLifetimeStartOrEnd())
return true;
if (TLI && isFreeCall(CB, TLI))
return true;
if (CB->isArgOperand(&U)) {
unsigned ArgNo = CB->getArgOperandNo(&U);
const auto &CSArgPI = A.getAAFor<AAPointerInfo>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::REQUIRED);
Changed = translateAndAddCalleeState(
A, CSArgPI, OffsetInfoMap[CurPtr].Offset, *CB) |
Changed;
return true;
}
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB
<< "\n");
// TODO: Allow some call uses
return false;
}
LLVM_DEBUG(dbgs() << "[AAPointerInfo] User not handled " << *Usr << "\n");
return false;
};
auto EquivalentUseCB = [&](const Use &OldU, const Use &NewU) {
if (OffsetInfoMap.count(NewU))
return OffsetInfoMap[NewU] == OffsetInfoMap[OldU];
OffsetInfoMap[NewU] = OffsetInfoMap[OldU];
return true;
};
if (!A.checkForAllUses(UsePred, *this, AssociatedValue,
/* CheckBBLivenessOnly */ true, DepClassTy::OPTIONAL,
EquivalentUseCB))
return indicatePessimisticFixpoint();
LLVM_DEBUG({
dbgs() << "Accesses by bin after update:\n";
for (auto &It : AccessBins) {
dbgs() << "[" << It.first.getOffset() << "-"
<< It.first.getOffset() + It.first.getSize()
<< "] : " << It.getSecond().size() << "\n";
for (auto &Acc : It.getSecond()) {
dbgs() << " - " << Acc.getKind() << " - " << *Acc.getLocalInst()
<< "\n";
if (Acc.getLocalInst() != Acc.getRemoteInst())
dbgs() << " --> "
<< *Acc.getRemoteInst() << "\n";
if (!Acc.isWrittenValueYetUndetermined())
dbgs() << " - " << Acc.getWrittenValue() << "\n";
}
}
});
return Changed;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
struct AAPointerInfoReturned final : AAPointerInfoImpl {
AAPointerInfoReturned(const IRPosition &IRP, Attributor &A)
: AAPointerInfoImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
struct AAPointerInfoArgument final : AAPointerInfoFloating {
AAPointerInfoArgument(const IRPosition &IRP, Attributor &A)
: AAPointerInfoFloating(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAPointerInfoFloating::initialize(A);
if (getAnchorScope()->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
AAPointerInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAPointerInfoFloating(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
using namespace AA::PointerInfo;
// We handle memory intrinsics explicitly, at least the first (=
// destination) and second (=source) arguments as we know how they are
// accessed.
if (auto *MI = dyn_cast_or_null<MemIntrinsic>(getCtxI())) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
int64_t LengthVal = OffsetAndSize::Unknown;
if (Length)
LengthVal = Length->getSExtValue();
Value &Ptr = getAssociatedValue();
unsigned ArgNo = getIRPosition().getCallSiteArgNo();
ChangeStatus Changed;
if (ArgNo == 0) {
handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed,
nullptr, LengthVal);
} else if (ArgNo == 1) {
handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_READ, 0, Changed,
nullptr, LengthVal);
} else {
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic "
<< *MI << "\n");
return indicatePessimisticFixpoint();
}
return Changed;
}
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Argument *Arg = getAssociatedArgument();
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA =
A.getAAFor<AAPointerInfo>(*this, ArgPos, DepClassTy::REQUIRED);
return translateAndAddCalleeState(A, ArgAA, 0, *cast<CallBase>(getCtxI()));
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
struct AAPointerInfoCallSiteReturned final : AAPointerInfoFloating {
AAPointerInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAPointerInfoFloating(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
}
};
/// -----------------------NoUnwind Function Attribute--------------------------
struct AANoUnwindImpl : AANoUnwind {
AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {}
const std::string getAsStr() const override {
return getAssumed() ? "nounwind" : "may-unwind";
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto Opcodes = {
(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
(unsigned)Instruction::Call, (unsigned)Instruction::CleanupRet,
(unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
auto CheckForNoUnwind = [&](Instruction &I) {
if (!I.mayThrow())
return true;
if (const auto *CB = dyn_cast<CallBase>(&I)) {
const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(
*this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED);
return NoUnwindAA.isAssumedNoUnwind();
}
return false;
};
bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
struct AANoUnwindFunction final : public AANoUnwindImpl {
AANoUnwindFunction(const IRPosition &IRP, Attributor &A)
: AANoUnwindImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nounwind) }
};
/// NoUnwind attribute deduction for a call sites.
struct AANoUnwindCallSite final : AANoUnwindImpl {
AANoUnwindCallSite(const IRPosition &IRP, Attributor &A)
: AANoUnwindImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoUnwindImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
};
/// --------------------- Function Return Values -------------------------------
/// "Attribute" that collects all potential returned values and the return
/// instructions that they arise from.
///
/// If there is a unique returned value R, the manifest method will:
/// - mark R with the "returned" attribute, if R is an argument.
class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
/// Mapping of values potentially returned by the associated function to the
/// return instructions that might return them.
MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues;
/// State flags
///
///{
bool IsFixed = false;
bool IsValidState = true;
///}
public:
AAReturnedValuesImpl(const IRPosition &IRP, Attributor &A)
: AAReturnedValues(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// Reset the state.
IsFixed = false;
IsValidState = true;
ReturnedValues.clear();
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration()) {
indicatePessimisticFixpoint();
return;
}
assert(!F->getReturnType()->isVoidTy() &&
"Did not expect a void return type!");
// The map from instruction opcodes to those instructions in the function.
auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F);
// Look through all arguments, if one is marked as returned we are done.
for (Argument &Arg : F->args()) {
if (Arg.hasReturnedAttr()) {
auto &ReturnInstSet = ReturnedValues[&Arg];
if (auto *Insts = OpcodeInstMap.lookup(Instruction::Ret))
for (Instruction *RI : *Insts)
ReturnInstSet.insert(cast<ReturnInst>(RI));
indicateOptimisticFixpoint();
return;
}
}
if (!A.isFunctionIPOAmendable(*F))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override;
/// See AbstractAttribute::getState(...).
AbstractState &getState() override { return *this; }
/// See AbstractAttribute::getState(...).
const AbstractState &getState() const override { return *this; }
/// See AbstractAttribute::updateImpl(Attributor &A).
ChangeStatus updateImpl(Attributor &A) override;
llvm::iterator_range<iterator> returned_values() override {
return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
}
llvm::iterator_range<const_iterator> returned_values() const override {
return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
}
/// Return the number of potential return values, -1 if unknown.
size_t getNumReturnValues() const override {
return isValidState() ? ReturnedValues.size() : -1;
}
/// Return an assumed unique return value if a single candidate is found. If
/// there cannot be one, return a nullptr. If it is not clear yet, return the
/// Optional::NoneType.
Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
/// See AbstractState::checkForAllReturnedValues(...).
bool checkForAllReturnedValuesAndReturnInsts(
function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
const override;
/// Pretty print the attribute similar to the IR representation.
const std::string getAsStr() const override;
/// See AbstractState::isAtFixpoint().
bool isAtFixpoint() const override { return IsFixed; }
/// See AbstractState::isValidState().
bool isValidState() const override { return IsValidState; }
/// See AbstractState::indicateOptimisticFixpoint(...).
ChangeStatus indicateOptimisticFixpoint() override {
IsFixed = true;
return ChangeStatus::UNCHANGED;
}
ChangeStatus indicatePessimisticFixpoint() override {
IsFixed = true;
IsValidState = false;
return ChangeStatus::CHANGED;
}
};
ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
// Bookkeeping.
assert(isValidState());
STATS_DECLTRACK(KnownReturnValues, FunctionReturn,
"Number of function with known return values");
// Check if we have an assumed unique return value that we could manifest.
Optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
if (!UniqueRV.hasValue() || !UniqueRV.getValue())
return Changed;
// Bookkeeping.
STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
"Number of function with unique return");
// If the assumed unique return value is an argument, annotate it.
if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
if (UniqueRVArg->getType()->canLosslesslyBitCastTo(
getAssociatedFunction()->getReturnType())) {
getIRPosition() = IRPosition::argument(*UniqueRVArg);
Changed = IRAttribute::manifest(A);
}
}
return Changed;
}
const std::string AAReturnedValuesImpl::getAsStr() const {
return (isAtFixpoint() ? "returns(#" : "may-return(#") +
(isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
}
Optional<Value *>
AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const {
// If checkForAllReturnedValues provides a unique value, ignoring potential
// undef values that can also be present, it is assumed to be the actual
// return value and forwarded to the caller of this method. If there are
// multiple, a nullptr is returned indicating there cannot be a unique
// returned value.
Optional<Value *> UniqueRV;
Type *Ty = getAssociatedFunction()->getReturnType();
auto Pred = [&](Value &RV) -> bool {
UniqueRV = AA::combineOptionalValuesInAAValueLatice(UniqueRV, &RV, Ty);
return UniqueRV != Optional<Value *>(nullptr);
};
if (!A.checkForAllReturnedValues(Pred, *this))
UniqueRV = nullptr;
return UniqueRV;
}
bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
const {
if (!isValidState())
return false;
// Check all returned values but ignore call sites as long as we have not
// encountered an overdefined one during an update.
for (auto &It : ReturnedValues) {
Value *RV = It.first;
if (!Pred(*RV, It.second))
return false;
}
return true;
}
ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret,
bool) -> bool {
assert(AA::isValidInScope(V, Ret.getFunction()) &&
"Assumed returned value should be valid in function scope!");
if (ReturnedValues[&V].insert(&Ret))
Changed = ChangeStatus::CHANGED;
return true;
};
+ bool UsedAssumedInformation = false;
auto ReturnInstCB = [&](Instruction &I) {
ReturnInst &Ret = cast<ReturnInst>(I);
return genericValueTraversal<ReturnInst>(
A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
- &I, /* UseValueSimplify */ true, /* MaxValues */ 16,
+ &I, UsedAssumedInformation, /* UseValueSimplify */ true,
+ /* MaxValues */ 16,
/* StripCB */ nullptr, /* Intraprocedural */ true);
};
// Discover returned values from all live returned instructions in the
// associated function.
- bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret},
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return Changed;
}
struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
AAReturnedValuesFunction(const IRPosition &IRP, Attributor &A)
: AAReturnedValuesImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) }
};
/// Returned values information for a call sites.
struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
AAReturnedValuesCallSite(const IRPosition &IRP, Attributor &A)
: AAReturnedValuesImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites instead of
// redirecting requests to the callee.
llvm_unreachable("Abstract attributes for returned values are not "
"supported for call sites yet!");
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
/// ------------------------ NoSync Function Attribute -------------------------
struct AANoSyncImpl : AANoSync {
AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {}
const std::string getAsStr() const override {
return getAssumed() ? "nosync" : "may-sync";
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
};
bool AANoSync::isNonRelaxedAtomic(const Instruction *I) {
if (!I->isAtomic())
return false;
if (auto *FI = dyn_cast<FenceInst>(I))
// All legal orderings for fence are stronger than monotonic.
return FI->getSyncScopeID() != SyncScope::SingleThread;
if (auto *AI = dyn_cast<AtomicCmpXchgInst>(I)) {
// Unordered is not a legal ordering for cmpxchg.
return (AI->getSuccessOrdering() != AtomicOrdering::Monotonic ||
AI->getFailureOrdering() != AtomicOrdering::Monotonic);
}
AtomicOrdering Ordering;
switch (I->getOpcode()) {
case Instruction::AtomicRMW:
Ordering = cast<AtomicRMWInst>(I)->getOrdering();
break;
case Instruction::Store:
Ordering = cast<StoreInst>(I)->getOrdering();
break;
case Instruction::Load:
Ordering = cast<LoadInst>(I)->getOrdering();
break;
default:
llvm_unreachable(
"New atomic operations need to be known in the attributor.");
}
return (Ordering != AtomicOrdering::Unordered &&
Ordering != AtomicOrdering::Monotonic);
}
/// Return true if this intrinsic is nosync. This is only used for intrinsics
/// which would be nosync except that they have a volatile flag. All other
/// intrinsics are simply annotated with the nosync attribute in Intrinsics.td.
bool AANoSync::isNoSyncIntrinsic(const Instruction *I) {
if (auto *MI = dyn_cast<MemIntrinsic>(I))
return !MI->isVolatile();
return false;
}
ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
auto CheckRWInstForNoSync = [&](Instruction &I) {
return AA::isNoSyncInst(A, I, *this);
};
auto CheckForNoSync = [&](Instruction &I) {
// At this point we handled all read/write effects and they are all
// nosync, so they can be skipped.
if (I.mayReadOrWriteMemory())
return true;
// non-convergent and readnone imply nosync.
return !cast<CallBase>(I).isConvergent();
};
bool UsedAssumedInformation = false;
if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this,
UsedAssumedInformation) ||
!A.checkForAllCallLikeInstructions(CheckForNoSync, *this,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
struct AANoSyncFunction final : public AANoSyncImpl {
AANoSyncFunction(const IRPosition &IRP, Attributor &A)
: AANoSyncImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nosync) }
};
/// NoSync attribute deduction for a call sites.
struct AANoSyncCallSite final : AANoSyncImpl {
AANoSyncCallSite(const IRPosition &IRP, Attributor &A)
: AANoSyncImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoSyncImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); }
};
/// ------------------------ No-Free Attributes ----------------------------
struct AANoFreeImpl : public AANoFree {
AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto CheckForNoFree = [&](Instruction &I) {
const auto &CB = cast<CallBase>(I);
if (CB.hasFnAttr(Attribute::NoFree))
return true;
const auto &NoFreeAA = A.getAAFor<AANoFree>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
return NoFreeAA.isAssumedNoFree();
};
bool UsedAssumedInformation = false;
if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return getAssumed() ? "nofree" : "may-free";
}
};
struct AANoFreeFunction final : public AANoFreeImpl {
AANoFreeFunction(const IRPosition &IRP, Attributor &A)
: AANoFreeImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(nofree) }
};
/// NoFree attribute deduction for a call sites.
struct AANoFreeCallSite final : AANoFreeImpl {
AANoFreeCallSite(const IRPosition &IRP, Attributor &A)
: AANoFreeImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoFreeImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nofree); }
};
/// NoFree attribute for floating values.
struct AANoFreeFloating : AANoFreeImpl {
AANoFreeFloating(const IRPosition &IRP, Attributor &A)
: AANoFreeImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override{STATS_DECLTRACK_FLOATING_ATTR(nofree)}
/// See Abstract Attribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const IRPosition &IRP = getIRPosition();
const auto &NoFreeAA = A.getAAFor<AANoFree>(
*this, IRPosition::function_scope(IRP), DepClassTy::OPTIONAL);
if (NoFreeAA.isAssumedNoFree())
return ChangeStatus::UNCHANGED;
Value &AssociatedValue = getIRPosition().getAssociatedValue();
auto Pred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
if (auto *CB = dyn_cast<CallBase>(UserI)) {
if (CB->isBundleOperand(&U))
return false;
if (!CB->isArgOperand(&U))
return true;
unsigned ArgNo = CB->getArgOperandNo(&U);
const auto &NoFreeArg = A.getAAFor<AANoFree>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::REQUIRED);
return NoFreeArg.isAssumedNoFree();
}
if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
Follow = true;
return true;
}
if (isa<StoreInst>(UserI) || isa<LoadInst>(UserI) ||
isa<ReturnInst>(UserI))
return true;
// Unknown user.
return false;
};
if (!A.checkForAllUses(Pred, *this, AssociatedValue))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
/// NoFree attribute for a call site argument.
struct AANoFreeArgument final : AANoFreeFloating {
AANoFreeArgument(const IRPosition &IRP, Attributor &A)
: AANoFreeFloating(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nofree) }
};
/// NoFree attribute for call site arguments.
struct AANoFreeCallSiteArgument final : AANoFreeFloating {
AANoFreeCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AANoFreeFloating(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Argument *Arg = getAssociatedArgument();
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nofree)};
};
/// NoFree attribute for function return value.
struct AANoFreeReturned final : AANoFreeFloating {
AANoFreeReturned(const IRPosition &IRP, Attributor &A)
: AANoFreeFloating(IRP, A) {
llvm_unreachable("NoFree is not applicable to function returns!");
}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
llvm_unreachable("NoFree is not applicable to function returns!");
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable("NoFree is not applicable to function returns!");
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
/// NoFree attribute deduction for a call site return value.
struct AANoFreeCallSiteReturned final : AANoFreeFloating {
AANoFreeCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AANoFreeFloating(IRP, A) {}
ChangeStatus manifest(Attributor &A) override {
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) }
};
/// ------------------------ NonNull Argument Attribute ------------------------
static int64_t getKnownNonNullAndDerefBytesForUse(
Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue,
const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) {
TrackUse = false;
const Value *UseV = U->get();
if (!UseV->getType()->isPointerTy())
return 0;
// We need to follow common pointer manipulation uses to the accesses they
// feed into. We can try to be smart to avoid looking through things we do not
// like for now, e.g., non-inbounds GEPs.
if (isa<CastInst>(I)) {
TrackUse = true;
return 0;
}
if (isa<GetElementPtrInst>(I)) {
TrackUse = true;
return 0;
}
Type *PtrTy = UseV->getType();
const Function *F = I->getFunction();
bool NullPointerIsDefined =
F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
const DataLayout &DL = A.getInfoCache().getDL();
if (const auto *CB = dyn_cast<CallBase>(I)) {
if (CB->isBundleOperand(U)) {
if (RetainedKnowledge RK = getKnowledgeFromUse(
U, {Attribute::NonNull, Attribute::Dereferenceable})) {
IsNonNull |=
(RK.AttrKind == Attribute::NonNull || !NullPointerIsDefined);
return RK.ArgValue;
}
return 0;
}
if (CB->isCallee(U)) {
IsNonNull |= !NullPointerIsDefined;
return 0;
}
unsigned ArgNo = CB->getArgOperandNo(U);
IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
// As long as we only use known information there is no need to track
// dependences here.
auto &DerefAA =
A.getAAFor<AADereferenceable>(QueryingAA, IRP, DepClassTy::NONE);
IsNonNull |= DerefAA.isKnownNonNull();
return DerefAA.getKnownDereferenceableBytes();
}
Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile())
return 0;
int64_t Offset;
const Value *Base =
getMinimalBaseOfPointer(A, QueryingAA, Loc->Ptr, Offset, DL);
if (Base && Base == &AssociatedValue) {
int64_t DerefBytes = Loc->Size.getValue() + Offset;
IsNonNull |= !NullPointerIsDefined;
return std::max(int64_t(0), DerefBytes);
}
/// Corner case when an offset is 0.
Base = GetPointerBaseWithConstantOffset(Loc->Ptr, Offset, DL,
/*AllowNonInbounds*/ true);
if (Base && Base == &AssociatedValue && Offset == 0) {
int64_t DerefBytes = Loc->Size.getValue();
IsNonNull |= !NullPointerIsDefined;
return std::max(int64_t(0), DerefBytes);
}
return 0;
}
struct AANonNullImpl : AANonNull {
AANonNullImpl(const IRPosition &IRP, Attributor &A)
: AANonNull(IRP, A),
NullIsDefined(NullPointerIsDefined(
getAnchorScope(),
getAssociatedValue().getType()->getPointerAddressSpace())) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
Value &V = getAssociatedValue();
if (!NullIsDefined &&
hasAttr({Attribute::NonNull, Attribute::Dereferenceable},
/* IgnoreSubsumingPositions */ false, &A)) {
indicateOptimisticFixpoint();
return;
}
if (isa<ConstantPointerNull>(V)) {
indicatePessimisticFixpoint();
return;
}
AANonNull::initialize(A);
bool CanBeNull, CanBeFreed;
if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull,
CanBeFreed)) {
if (!CanBeNull) {
indicateOptimisticFixpoint();
return;
}
}
if (isa<GlobalValue>(&getAssociatedValue())) {
indicatePessimisticFixpoint();
return;
}
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
/// See followUsesInMBEC
bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
AANonNull::StateType &State) {
bool IsNonNull = false;
bool TrackUse = false;
getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I,
IsNonNull, TrackUse);
State.setKnown(IsNonNull);
return TrackUse;
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return getAssumed() ? "nonnull" : "may-null";
}
/// Flag to determine if the underlying value can be null and still allow
/// valid accesses.
const bool NullIsDefined;
};
/// NonNull attribute for a floating value.
struct AANonNullFloating : public AANonNullImpl {
AANonNullFloating(const IRPosition &IRP, Attributor &A)
: AANonNullImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const DataLayout &DL = A.getDataLayout();
DominatorTree *DT = nullptr;
AssumptionCache *AC = nullptr;
InformationCache &InfoCache = A.getInfoCache();
if (const Function *Fn = getAnchorScope()) {
DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn);
AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn);
}
auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
AANonNull::StateType &T, bool Stripped) -> bool {
const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
if (!Stripped && this == &AA) {
if (!isKnownNonZero(&V, DL, 0, AC, CtxI, DT))
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
const AANonNull::StateType &NS = AA.getState();
T ^= NS;
}
return T.isValidState();
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
};
/// NonNull attribute for function return value.
struct AANonNullReturned final
: AAReturnedFromReturnedValues<AANonNull, AANonNull> {
AANonNullReturned(const IRPosition &IRP, Attributor &A)
: AAReturnedFromReturnedValues<AANonNull, AANonNull>(IRP, A) {}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return getAssumed() ? "nonnull" : "may-null";
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(nonnull) }
};
/// NonNull attribute for function argument.
struct AANonNullArgument final
: AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl> {
AANonNullArgument(const IRPosition &IRP, Attributor &A)
: AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
};
struct AANonNullCallSiteArgument final : AANonNullFloating {
AANonNullCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AANonNullFloating(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(nonnull) }
};
/// NonNull attribute for a call site return position.
struct AANonNullCallSiteReturned final
: AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl> {
AANonNullCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
};
/// ------------------------ No-Recurse Attributes ----------------------------
struct AANoRecurseImpl : public AANoRecurse {
AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {}
/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
return getAssumed() ? "norecurse" : "may-recurse";
}
};
struct AANoRecurseFunction final : AANoRecurseImpl {
AANoRecurseFunction(const IRPosition &IRP, Attributor &A)
: AANoRecurseImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// If all live call sites are known to be no-recurse, we are as well.
auto CallSitePred = [&](AbstractCallSite ACS) {
const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
*this, IRPosition::function(*ACS.getInstruction()->getFunction()),
DepClassTy::NONE);
return NoRecurseAA.isKnownNoRecurse();
};
- bool AllCallSitesKnown;
- if (A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown)) {
+ bool UsedAssumedInformation = false;
+ if (A.checkForAllCallSites(CallSitePred, *this, true,
+ UsedAssumedInformation)) {
// If we know all call sites and all are known no-recurse, we are done.
// If all known call sites, which might not be all that exist, are known
// to be no-recurse, we are not done but we can continue to assume
// no-recurse. If one of the call sites we have not visited will become
// live, another update is triggered.
- if (AllCallSitesKnown)
+ if (!UsedAssumedInformation)
indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
const AAFunctionReachability &EdgeReachability =
A.getAAFor<AAFunctionReachability>(*this, getIRPosition(),
DepClassTy::REQUIRED);
if (EdgeReachability.canReach(A, *getAnchorScope()))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(norecurse) }
};
/// NoRecurse attribute deduction for a call sites.
struct AANoRecurseCallSite final : AANoRecurseImpl {
AANoRecurseCallSite(const IRPosition &IRP, Attributor &A)
: AANoRecurseImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoRecurseImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); }
};
/// -------------------- Undefined-Behavior Attributes ------------------------
struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A)
: AAUndefinedBehavior(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
// through a pointer (i.e. also branches etc.)
ChangeStatus updateImpl(Attributor &A) override {
const size_t UBPrevSize = KnownUBInsts.size();
const size_t NoUBPrevSize = AssumedNoUBInsts.size();
auto InspectMemAccessInstForUB = [&](Instruction &I) {
// Lang ref now states volatile store is not UB, let's skip them.
if (I.isVolatile() && I.mayWriteToMemory())
return true;
// Skip instructions that are already saved.
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;
// If we reach here, we know we have an instruction
// that accesses memory through a pointer operand,
// for which getPointerOperand() should give it to us.
Value *PtrOp =
const_cast<Value *>(getPointerOperand(&I, /* AllowVolatile */ true));
assert(PtrOp &&
"Expected pointer operand of memory accessing instruction");
// Either we stopped and the appropriate action was taken,
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedPtrOp = stopOnUndefOrAssumed(A, PtrOp, &I);
if (!SimplifiedPtrOp.hasValue() || !SimplifiedPtrOp.getValue())
return true;
const Value *PtrOpVal = SimplifiedPtrOp.getValue();
// A memory access through a pointer is considered UB
// only if the pointer has constant null value.
// TODO: Expand it to not only check constant values.
if (!isa<ConstantPointerNull>(PtrOpVal)) {
AssumedNoUBInsts.insert(&I);
return true;
}
const Type *PtrTy = PtrOpVal->getType();
// Because we only consider instructions inside functions,
// assume that a parent function exists.
const Function *F = I.getFunction();
// A memory access using constant null pointer is only considered UB
// if null pointer is _not_ defined for the target platform.
if (llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()))
AssumedNoUBInsts.insert(&I);
else
KnownUBInsts.insert(&I);
return true;
};
auto InspectBrInstForUB = [&](Instruction &I) {
// A conditional branch instruction is considered UB if it has `undef`
// condition.
// Skip instructions that are already saved.
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;
// We know we have a branch instruction.
auto *BrInst = cast<BranchInst>(&I);
// Unconditional branches are never considered UB.
if (BrInst->isUnconditional())
return true;
// Either we stopped and the appropriate action was taken,
// or we got back a simplified value to continue.
Optional<Value *> SimplifiedCond =
stopOnUndefOrAssumed(A, BrInst->getCondition(), BrInst);
if (!SimplifiedCond.hasValue() || !SimplifiedCond.getValue())
return true;
AssumedNoUBInsts.insert(&I);
return true;
};
auto InspectCallSiteForUB = [&](Instruction &I) {
// Check whether a callsite always cause UB or not
// Skip instructions that are already saved.
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;
// Check nonnull and noundef argument attribute violation for each
// callsite.
CallBase &CB = cast<CallBase>(I);
Function *Callee = CB.getCalledFunction();
if (!Callee)
return true;
for (unsigned idx = 0; idx < CB.arg_size(); idx++) {
// If current argument is known to be simplified to null pointer and the
// corresponding argument position is known to have nonnull attribute,
// the argument is poison. Furthermore, if the argument is poison and
// the position is known to have noundef attriubte, this callsite is
// considered UB.
if (idx >= Callee->arg_size())
break;
Value *ArgVal = CB.getArgOperand(idx);
if (!ArgVal)
continue;
// Here, we handle three cases.
// (1) Not having a value means it is dead. (we can replace the value
// with undef)
// (2) Simplified to undef. The argument violate noundef attriubte.
// (3) Simplified to null pointer where known to be nonnull.
// The argument is a poison value and violate noundef attribute.
IRPosition CalleeArgumentIRP = IRPosition::callsite_argument(CB, idx);
auto &NoUndefAA =
A.getAAFor<AANoUndef>(*this, CalleeArgumentIRP, DepClassTy::NONE);
if (!NoUndefAA.isKnownNoUndef())
continue;
bool UsedAssumedInformation = false;
Optional<Value *> SimplifiedVal = A.getAssumedSimplified(
IRPosition::value(*ArgVal), *this, UsedAssumedInformation);
if (UsedAssumedInformation)
continue;
if (SimplifiedVal.hasValue() && !SimplifiedVal.getValue())
return true;
if (!SimplifiedVal.hasValue() ||
isa<UndefValue>(*SimplifiedVal.getValue())) {
KnownUBInsts.insert(&I);
continue;
}
if (!ArgVal->getType()->isPointerTy() ||
!isa<ConstantPointerNull>(*SimplifiedVal.getValue()))
continue;
auto &NonNullAA =
A.getAAFor<AANonNull>(*this, CalleeArgumentIRP, DepClassTy::NONE);
if (NonNullAA.isKnownNonNull())
KnownUBInsts.insert(&I);
}
return true;
};
auto InspectReturnInstForUB = [&](Instruction &I) {
auto &RI = cast<ReturnInst>(I);
// Either we stopped and the appropriate action was taken,
// or we got back a simplified return value to continue.
Optional<Value *> SimplifiedRetValue =
stopOnUndefOrAssumed(A, RI.getReturnValue(), &I);
if (!SimplifiedRetValue.hasValue() || !SimplifiedRetValue.getValue())
return true;
// Check if a return instruction always cause UB or not
// Note: It is guaranteed that the returned position of the anchor
// scope has noundef attribute when this is called.
// We also ensure the return position is not "assumed dead"
// because the returned value was then potentially simplified to
// `undef` in AAReturnedValues without removing the `noundef`
// attribute yet.
// When the returned position has noundef attriubte, UB occurs in the
// following cases.
// (1) Returned value is known to be undef.
// (2) The value is known to be a null pointer and the returned
// position has nonnull attribute (because the returned value is
// poison).
if (isa<ConstantPointerNull>(*SimplifiedRetValue)) {
auto &NonNullAA = A.getAAFor<AANonNull>(
*this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE);
if (NonNullAA.isKnownNonNull())
KnownUBInsts.insert(&I);
}
return true;
};
bool UsedAssumedInformation = false;
A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
{Instruction::Load, Instruction::Store,
Instruction::AtomicCmpXchg,
Instruction::AtomicRMW},
UsedAssumedInformation,
/* CheckBBLivenessOnly */ true);
A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br},
UsedAssumedInformation,
/* CheckBBLivenessOnly */ true);
A.checkForAllCallLikeInstructions(InspectCallSiteForUB, *this,
UsedAssumedInformation);
// If the returned position of the anchor scope has noundef attriubte, check
// all returned instructions.
if (!getAnchorScope()->getReturnType()->isVoidTy()) {
const IRPosition &ReturnIRP = IRPosition::returned(*getAnchorScope());
if (!A.isAssumedDead(ReturnIRP, this, nullptr, UsedAssumedInformation)) {
auto &RetPosNoUndefAA =
A.getAAFor<AANoUndef>(*this, ReturnIRP, DepClassTy::NONE);
if (RetPosNoUndefAA.isKnownNoUndef())
A.checkForAllInstructions(InspectReturnInstForUB, *this,
{Instruction::Ret}, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true);
}
}
if (NoUBPrevSize != AssumedNoUBInsts.size() ||
UBPrevSize != KnownUBInsts.size())
return ChangeStatus::CHANGED;
return ChangeStatus::UNCHANGED;
}
bool isKnownToCauseUB(Instruction *I) const override {
return KnownUBInsts.count(I);
}
bool isAssumedToCauseUB(Instruction *I) const override {
// In simple words, if an instruction is not in the assumed to _not_
// cause UB, then it is assumed UB (that includes those
// in the KnownUBInsts set). The rest is boilerplate
// is to ensure that it is one of the instructions we test
// for UB.
switch (I->getOpcode()) {
case Instruction::Load:
case Instruction::Store:
case Instruction::AtomicCmpXchg:
case Instruction::AtomicRMW:
return !AssumedNoUBInsts.count(I);
case Instruction::Br: {
auto BrInst = cast<BranchInst>(I);
if (BrInst->isUnconditional())
return false;
return !AssumedNoUBInsts.count(I);
} break;
default:
return false;
}
return false;
}
ChangeStatus manifest(Attributor &A) override {
if (KnownUBInsts.empty())
return ChangeStatus::UNCHANGED;
for (Instruction *I : KnownUBInsts)
A.changeToUnreachableAfterManifest(I);
return ChangeStatus::CHANGED;
}
/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
return getAssumed() ? "undefined-behavior" : "no-ub";
}
/// Note: The correctness of this analysis depends on the fact that the
/// following 2 sets will stop changing after some point.
/// "Change" here means that their size changes.
/// The size of each set is monotonically increasing
/// (we only add items to them) and it is upper bounded by the number of
/// instructions in the processed function (we can never save more
/// elements in either set than this number). Hence, at some point,
/// they will stop increasing.
/// Consequently, at some point, both sets will have stopped
/// changing, effectively making the analysis reach a fixpoint.
/// Note: These 2 sets are disjoint and an instruction can be considered
/// one of 3 things:
/// 1) Known to cause UB (AAUndefinedBehavior could prove it) and put it in
/// the KnownUBInsts set.
/// 2) Assumed to cause UB (in every updateImpl, AAUndefinedBehavior
/// has a reason to assume it).
/// 3) Assumed to not cause UB. very other instruction - AAUndefinedBehavior
/// could not find a reason to assume or prove that it can cause UB,
/// hence it assumes it doesn't. We have a set for these instructions
/// so that we don't reprocess them in every update.
/// Note however that instructions in this set may cause UB.
protected:
/// A set of all live instructions _known_ to cause UB.
SmallPtrSet<Instruction *, 8> KnownUBInsts;
private:
/// A set of all the (live) instructions that are assumed to _not_ cause UB.
SmallPtrSet<Instruction *, 8> AssumedNoUBInsts;
// Should be called on updates in which if we're processing an instruction
// \p I that depends on a value \p V, one of the following has to happen:
// - If the value is assumed, then stop.
// - If the value is known but undef, then consider it UB.
// - Otherwise, do specific processing with the simplified value.
// We return None in the first 2 cases to signify that an appropriate
// action was taken and the caller should stop.
// Otherwise, we return the simplified value that the caller should
// use for specific processing.
Optional<Value *> stopOnUndefOrAssumed(Attributor &A, Value *V,
Instruction *I) {
bool UsedAssumedInformation = false;
Optional<Value *> SimplifiedV = A.getAssumedSimplified(
IRPosition::value(*V), *this, UsedAssumedInformation);
if (!UsedAssumedInformation) {
// Don't depend on assumed values.
if (!SimplifiedV.hasValue()) {
// If it is known (which we tested above) but it doesn't have a value,
// then we can assume `undef` and hence the instruction is UB.
KnownUBInsts.insert(I);
return llvm::None;
}
if (!SimplifiedV.getValue())
return nullptr;
V = *SimplifiedV;
}
if (isa<UndefValue>(V)) {
KnownUBInsts.insert(I);
return llvm::None;
}
return V;
}
};
struct AAUndefinedBehaviorFunction final : AAUndefinedBehaviorImpl {
AAUndefinedBehaviorFunction(const IRPosition &IRP, Attributor &A)
: AAUndefinedBehaviorImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECL(UndefinedBehaviorInstruction, Instruction,
"Number of instructions known to have UB");
BUILD_STAT_NAME(UndefinedBehaviorInstruction, Instruction) +=
KnownUBInsts.size();
}
};
/// ------------------------ Will-Return Attributes ----------------------------
// Helper function that checks whether a function has any cycle which we don't
// know if it is bounded or not.
// Loops with maximum trip count are considered bounded, any other cycle not.
static bool mayContainUnboundedCycle(Function &F, Attributor &A) {
ScalarEvolution *SE =
A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(F);
LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(F);
// If either SCEV or LoopInfo is not available for the function then we assume
// any cycle to be unbounded cycle.
// We use scc_iterator which uses Tarjan algorithm to find all the maximal
// SCCs.To detect if there's a cycle, we only need to find the maximal ones.
if (!SE || !LI) {
for (scc_iterator<Function *> SCCI = scc_begin(&F); !SCCI.isAtEnd(); ++SCCI)
if (SCCI.hasCycle())
return true;
return false;
}
// If there's irreducible control, the function may contain non-loop cycles.
if (mayContainIrreducibleControl(F, LI))
return true;
// Any loop that does not have a max trip count is considered unbounded cycle.
for (auto *L : LI->getLoopsInPreorder()) {
if (!SE->getSmallConstantMaxTripCount(L))
return true;
}
return false;
}
struct AAWillReturnImpl : public AAWillReturn {
AAWillReturnImpl(const IRPosition &IRP, Attributor &A)
: AAWillReturn(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAWillReturn::initialize(A);
if (isImpliedByMustprogressAndReadonly(A, /* KnownOnly */ true)) {
indicateOptimisticFixpoint();
return;
}
}
/// Check for `mustprogress` and `readonly` as they imply `willreturn`.
bool isImpliedByMustprogressAndReadonly(Attributor &A, bool KnownOnly) {
// Check for `mustprogress` in the scope and the associated function which
// might be different if this is a call site.
if ((!getAnchorScope() || !getAnchorScope()->mustProgress()) &&
(!getAssociatedFunction() || !getAssociatedFunction()->mustProgress()))
return false;
bool IsKnown;
if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
return IsKnown || !KnownOnly;
return false;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
if (isImpliedByMustprogressAndReadonly(A, /* KnownOnly */ false))
return ChangeStatus::UNCHANGED;
auto CheckForWillReturn = [&](Instruction &I) {
IRPosition IPos = IRPosition::callsite_function(cast<CallBase>(I));
const auto &WillReturnAA =
A.getAAFor<AAWillReturn>(*this, IPos, DepClassTy::REQUIRED);
if (WillReturnAA.isKnownWillReturn())
return true;
if (!WillReturnAA.isAssumedWillReturn())
return false;
const auto &NoRecurseAA =
A.getAAFor<AANoRecurse>(*this, IPos, DepClassTy::REQUIRED);
return NoRecurseAA.isAssumedNoRecurse();
};
bool UsedAssumedInformation = false;
if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
return getAssumed() ? "willreturn" : "may-noreturn";
}
};
struct AAWillReturnFunction final : AAWillReturnImpl {
AAWillReturnFunction(const IRPosition &IRP, Attributor &A)
: AAWillReturnImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAWillReturnImpl::initialize(A);
Function *F = getAnchorScope();
if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(willreturn) }
};
/// WillReturn attribute deduction for a call sites.
struct AAWillReturnCallSite final : AAWillReturnImpl {
AAWillReturnCallSite(const IRPosition &IRP, Attributor &A)
: AAWillReturnImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAWillReturnImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || !A.isFunctionIPOAmendable(*F))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
if (isImpliedByMustprogressAndReadonly(A, /* KnownOnly */ false))
return ChangeStatus::UNCHANGED;
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); }
};
/// -------------------AAReachability Attribute--------------------------
struct AAReachabilityImpl : AAReachability {
AAReachabilityImpl(const IRPosition &IRP, Attributor &A)
: AAReachability(IRP, A) {}
const std::string getAsStr() const override {
// TODO: Return the number of reachable queries.
return "reachable";
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
if (!NoRecurseAA.isAssumedNoRecurse())
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
struct AAReachabilityFunction final : public AAReachabilityImpl {
AAReachabilityFunction(const IRPosition &IRP, Attributor &A)
: AAReachabilityImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); }
};
/// ------------------------ NoAlias Argument Attribute ------------------------
struct AANoAliasImpl : AANoAlias {
AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) {
assert(getAssociatedType()->isPointerTy() &&
"Noalias is a pointer attribute");
}
const std::string getAsStr() const override {
return getAssumed() ? "noalias" : "may-alias";
}
};
/// NoAlias attribute for a floating value.
struct AANoAliasFloating final : AANoAliasImpl {
AANoAliasFloating(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoAliasImpl::initialize(A);
Value *Val = &getAssociatedValue();
do {
CastInst *CI = dyn_cast<CastInst>(Val);
if (!CI)
break;
Value *Base = CI->getOperand(0);
if (!Base->hasOneUse())
break;
Val = Base;
} while (true);
if (!Val->getType()->isPointerTy()) {
indicatePessimisticFixpoint();
return;
}
if (isa<AllocaInst>(Val))
indicateOptimisticFixpoint();
else if (isa<ConstantPointerNull>(Val) &&
!NullPointerIsDefined(getAnchorScope(),
Val->getType()->getPointerAddressSpace()))
indicateOptimisticFixpoint();
else if (Val != &getAssociatedValue()) {
const auto &ValNoAliasAA = A.getAAFor<AANoAlias>(
*this, IRPosition::value(*Val), DepClassTy::OPTIONAL);
if (ValNoAliasAA.isKnownNoAlias())
indicateOptimisticFixpoint();
}
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Implement this.
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(noalias)
}
};
/// NoAlias attribute for an argument.
struct AANoAliasArgument final
: AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>;
AANoAliasArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
Base::initialize(A);
// See callsite argument attribute and callee argument attribute.
if (hasAttr({Attribute::ByVal}))
indicateOptimisticFixpoint();
}
/// See AbstractAttribute::update(...).
ChangeStatus updateImpl(Attributor &A) override {
// We have to make sure no-alias on the argument does not break
// synchronization when this is a callback argument, see also [1] below.
// If synchronization cannot be affected, we delegate to the base updateImpl
// function, otherwise we give up for now.
// If the function is no-sync, no-alias cannot break synchronization.
const auto &NoSyncAA =
A.getAAFor<AANoSync>(*this, IRPosition::function_scope(getIRPosition()),
DepClassTy::OPTIONAL);
if (NoSyncAA.isAssumedNoSync())
return Base::updateImpl(A);
// If the argument is read-only, no-alias cannot break synchronization.
bool IsKnown;
if (AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
return Base::updateImpl(A);
// If the argument is never passed through callbacks, no-alias cannot break
// synchronization.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (A.checkForAllCallSites(
[](AbstractCallSite ACS) { return !ACS.isCallbackCall(); }, *this,
- true, AllCallSitesKnown))
+ true, UsedAssumedInformation))
return Base::updateImpl(A);
// TODO: add no-alias but make sure it doesn't break synchronization by
// introducing fake uses. See:
// [1] Compiler Optimizations for OpenMP, J. Doerfert and H. Finkel,
// International Workshop on OpenMP 2018,
// http://compilers.cs.uni-saarland.de/people/doerfert/par_opt18.pdf
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noalias) }
};
struct AANoAliasCallSiteArgument final : AANoAliasImpl {
AANoAliasCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// See callsite argument attribute and callee argument attribute.
const auto &CB = cast<CallBase>(getAnchorValue());
if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias))
indicateOptimisticFixpoint();
Value &Val = getAssociatedValue();
if (isa<ConstantPointerNull>(Val) &&
!NullPointerIsDefined(getAnchorScope(),
Val.getType()->getPointerAddressSpace()))
indicateOptimisticFixpoint();
}
/// Determine if the underlying value may alias with the call site argument
/// \p OtherArgNo of \p ICS (= the underlying call site).
bool mayAliasWithArgument(Attributor &A, AAResults *&AAR,
const AAMemoryBehavior &MemBehaviorAA,
const CallBase &CB, unsigned OtherArgNo) {
// We do not need to worry about aliasing with the underlying IRP.
if (this->getCalleeArgNo() == (int)OtherArgNo)
return false;
// If it is not a pointer or pointer vector we do not alias.
const Value *ArgOp = CB.getArgOperand(OtherArgNo);
if (!ArgOp->getType()->isPtrOrPtrVectorTy())
return false;
auto &CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
*this, IRPosition::callsite_argument(CB, OtherArgNo), DepClassTy::NONE);
// If the argument is readnone, there is no read-write aliasing.
if (CBArgMemBehaviorAA.isAssumedReadNone()) {
A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
return false;
}
// If the argument is readonly and the underlying value is readonly, there
// is no read-write aliasing.
bool IsReadOnly = MemBehaviorAA.isAssumedReadOnly();
if (CBArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
return false;
}
// We have to utilize actual alias analysis queries so we need the object.
if (!AAR)
AAR = A.getInfoCache().getAAResultsForFunction(*getAnchorScope());
// Try to rule it out at the call site.
bool IsAliasing = !AAR || !AAR->isNoAlias(&getAssociatedValue(), ArgOp);
LLVM_DEBUG(dbgs() << "[NoAliasCSArg] Check alias between "
"callsite arguments: "
<< getAssociatedValue() << " " << *ArgOp << " => "
<< (IsAliasing ? "" : "no-") << "alias \n");
return IsAliasing;
}
bool
isKnownNoAliasDueToNoAliasPreservation(Attributor &A, AAResults *&AAR,
const AAMemoryBehavior &MemBehaviorAA,
const AANoAlias &NoAliasAA) {
// We can deduce "noalias" if the following conditions hold.
// (i) Associated value is assumed to be noalias in the definition.
// (ii) Associated value is assumed to be no-capture in all the uses
// possibly executed before this callsite.
// (iii) There is no other pointer argument which could alias with the
// value.
bool AssociatedValueIsNoAliasAtDef = NoAliasAA.isAssumedNoAlias();
if (!AssociatedValueIsNoAliasAtDef) {
LLVM_DEBUG(dbgs() << "[AANoAlias] " << getAssociatedValue()
<< " is not no-alias at the definition\n");
return false;
}
A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL);
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
const Function *ScopeFn = VIRP.getAnchorScope();
auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, VIRP, DepClassTy::NONE);
// Check whether the value is captured in the scope using AANoCapture.
// Look at CFG and check only uses possibly executed before this
// callsite.
auto UsePred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
// If UserI is the curr instruction and there is a single potential use of
// the value in UserI we allow the use.
// TODO: We should inspect the operands and allow those that cannot alias
// with the value.
if (UserI == getCtxI() && UserI->getNumOperands() == 1)
return true;
if (ScopeFn) {
const auto &ReachabilityAA = A.getAAFor<AAReachability>(
*this, IRPosition::function(*ScopeFn), DepClassTy::OPTIONAL);
if (!ReachabilityAA.isAssumedReachable(A, *UserI, *getCtxI()))
return true;
if (auto *CB = dyn_cast<CallBase>(UserI)) {
if (CB->isArgOperand(&U)) {
unsigned ArgNo = CB->getArgOperandNo(&U);
const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::OPTIONAL);
if (NoCaptureAA.isAssumedNoCapture())
return true;
}
}
}
// For cases which can potentially have more users
if (isa<GetElementPtrInst>(U) || isa<BitCastInst>(U) || isa<PHINode>(U) ||
isa<SelectInst>(U)) {
Follow = true;
return true;
}
LLVM_DEBUG(dbgs() << "[AANoAliasCSArg] Unknown user: " << *U << "\n");
return false;
};
if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
if (!A.checkForAllUses(UsePred, *this, getAssociatedValue())) {
LLVM_DEBUG(
dbgs() << "[AANoAliasCSArg] " << getAssociatedValue()
<< " cannot be noalias as it is potentially captured\n");
return false;
}
}
A.recordDependence(NoCaptureAA, *this, DepClassTy::OPTIONAL);
// Check there is no other pointer argument which could alias with the
// value passed at this call site.
// TODO: AbstractCallSite
const auto &CB = cast<CallBase>(getAnchorValue());
for (unsigned OtherArgNo = 0; OtherArgNo < CB.arg_size(); OtherArgNo++)
if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
return false;
return true;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// If the argument is readnone we are done as there are no accesses via the
// argument.
auto &MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
if (MemBehaviorAA.isAssumedReadNone()) {
A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
return ChangeStatus::UNCHANGED;
}
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
const auto &NoAliasAA =
A.getAAFor<AANoAlias>(*this, VIRP, DepClassTy::NONE);
AAResults *AAR = nullptr;
if (isKnownNoAliasDueToNoAliasPreservation(A, AAR, MemBehaviorAA,
NoAliasAA)) {
LLVM_DEBUG(
dbgs() << "[AANoAlias] No-Alias deduced via no-alias preservation\n");
return ChangeStatus::UNCHANGED;
}
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noalias) }
};
/// NoAlias attribute for function return value.
struct AANoAliasReturned final : AANoAliasImpl {
AANoAliasReturned(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoAliasImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
virtual ChangeStatus updateImpl(Attributor &A) override {
auto CheckReturnValue = [&](Value &RV) -> bool {
if (Constant *C = dyn_cast<Constant>(&RV))
if (C->isNullValue() || isa<UndefValue>(C))
return true;
/// For now, we can only deduce noalias if we have call sites.
/// FIXME: add more support.
if (!isa<CallBase>(&RV))
return false;
const IRPosition &RVPos = IRPosition::value(RV);
const auto &NoAliasAA =
A.getAAFor<AANoAlias>(*this, RVPos, DepClassTy::REQUIRED);
if (!NoAliasAA.isAssumedNoAlias())
return false;
const auto &NoCaptureAA =
A.getAAFor<AANoCapture>(*this, RVPos, DepClassTy::REQUIRED);
return NoCaptureAA.isAssumedNoCaptureMaybeReturned();
};
if (!A.checkForAllReturnedValues(CheckReturnValue, *this))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noalias) }
};
/// NoAlias attribute deduction for a call site return value.
struct AANoAliasCallSiteReturned final : AANoAliasImpl {
AANoAliasCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoAliasImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::returned(*F);
auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); }
};
/// -------------------AAIsDead Function Attribute-----------------------
struct AAIsDeadValueImpl : public AAIsDead {
AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {}
/// See AAIsDead::isAssumedDead().
bool isAssumedDead() const override { return isAssumed(IS_DEAD); }
/// See AAIsDead::isKnownDead().
bool isKnownDead() const override { return isKnown(IS_DEAD); }
/// See AAIsDead::isAssumedDead(BasicBlock *).
bool isAssumedDead(const BasicBlock *BB) const override { return false; }
/// See AAIsDead::isKnownDead(BasicBlock *).
bool isKnownDead(const BasicBlock *BB) const override { return false; }
/// See AAIsDead::isAssumedDead(Instruction *I).
bool isAssumedDead(const Instruction *I) const override {
return I == getCtxI() && isAssumedDead();
}
/// See AAIsDead::isKnownDead(Instruction *I).
bool isKnownDead(const Instruction *I) const override {
return isAssumedDead(I) && isKnownDead();
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return isAssumedDead() ? "assumed-dead" : "assumed-live";
}
/// Check if all uses are assumed dead.
bool areAllUsesAssumedDead(Attributor &A, Value &V) {
// Callers might not check the type, void has no uses.
if (V.getType()->isVoidTy())
return true;
// If we replace a value with a constant there are no uses left afterwards.
if (!isa<Constant>(V)) {
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(V, *this, UsedAssumedInformation);
if (!C.hasValue() || *C)
return true;
}
auto UsePred = [&](const Use &U, bool &Follow) { return false; };
// Explicitly set the dependence class to required because we want a long
// chain of N dependent instructions to be considered live as soon as one is
// without going through N update cycles. This is not required for
// correctness.
return A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ false,
DepClassTy::REQUIRED);
}
/// Determine if \p I is assumed to be side-effect free.
bool isAssumedSideEffectFree(Attributor &A, Instruction *I) {
if (!I || wouldInstructionBeTriviallyDead(I))
return true;
auto *CB = dyn_cast<CallBase>(I);
if (!CB || isa<IntrinsicInst>(CB))
return false;
const IRPosition &CallIRP = IRPosition::callsite_function(*CB);
const auto &NoUnwindAA =
A.getAndUpdateAAFor<AANoUnwind>(*this, CallIRP, DepClassTy::NONE);
if (!NoUnwindAA.isAssumedNoUnwind())
return false;
if (!NoUnwindAA.isKnownNoUnwind())
A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL);
bool IsKnown;
return AA::isAssumedReadOnly(A, CallIRP, *this, IsKnown);
}
};
struct AAIsDeadFloating : public AAIsDeadValueImpl {
AAIsDeadFloating(const IRPosition &IRP, Attributor &A)
: AAIsDeadValueImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (isa<UndefValue>(getAssociatedValue())) {
indicatePessimisticFixpoint();
return;
}
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
if (!isAssumedSideEffectFree(A, I)) {
if (!isa_and_nonnull<StoreInst>(I))
indicatePessimisticFixpoint();
else
removeAssumedBits(HAS_NO_EFFECT);
}
}
bool isDeadStore(Attributor &A, StoreInst &SI) {
// Lang ref now states volatile store is not UB/dead, let's skip them.
if (SI.isVolatile())
return false;
bool UsedAssumedInformation = false;
SmallSetVector<Value *, 4> PotentialCopies;
if (!AA::getPotentialCopiesOfStoredValue(A, SI, PotentialCopies, *this,
UsedAssumedInformation))
return false;
return llvm::all_of(PotentialCopies, [&](Value *V) {
return A.isAssumedDead(IRPosition::value(*V), this, nullptr,
UsedAssumedInformation);
});
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
if (!isDeadStore(A, *SI))
return indicatePessimisticFixpoint();
} else {
if (!isAssumedSideEffectFree(A, I))
return indicatePessimisticFixpoint();
if (!areAllUsesAssumedDead(A, getAssociatedValue()))
return indicatePessimisticFixpoint();
}
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
Value &V = getAssociatedValue();
if (auto *I = dyn_cast<Instruction>(&V)) {
// If we get here we basically know the users are all dead. We check if
// isAssumedSideEffectFree returns true here again because it might not be
// the case and only the users are dead but the instruction (=call) is
// still needed.
if (isa<StoreInst>(I) ||
(isAssumedSideEffectFree(A, I) && !isa<InvokeInst>(I))) {
A.deleteAfterManifest(*I);
return ChangeStatus::CHANGED;
}
}
if (V.use_empty())
return ChangeStatus::UNCHANGED;
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(V, *this, UsedAssumedInformation);
if (C.hasValue() && C.getValue())
return ChangeStatus::UNCHANGED;
// Replace the value with undef as it is dead but keep droppable uses around
// as they provide information we don't want to give up on just yet.
UndefValue &UV = *UndefValue::get(V.getType());
bool AnyChange =
A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false);
return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(IsDead)
}
};
struct AAIsDeadArgument : public AAIsDeadFloating {
AAIsDeadArgument(const IRPosition &IRP, Attributor &A)
: AAIsDeadFloating(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (!A.isFunctionIPOAmendable(*getAnchorScope()))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = AAIsDeadFloating::manifest(A);
Argument &Arg = *getAssociatedArgument();
if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {}))
if (A.registerFunctionSignatureRewrite(
Arg, /* ReplacementTypes */ {},
Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) {
Arg.dropDroppableUses();
return ChangeStatus::CHANGED;
}
return Changed;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(IsDead) }
};
struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
AAIsDeadCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAIsDeadValueImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (isa<UndefValue>(getAssociatedValue()))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Argument *Arg = getAssociatedArgument();
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
CallBase &CB = cast<CallBase>(getAnchorValue());
Use &U = CB.getArgOperandUse(getCallSiteArgNo());
assert(!isa<UndefValue>(U.get()) &&
"Expected undef values to be filtered out!");
UndefValue &UV = *UndefValue::get(U->getType());
if (A.changeUseAfterManifest(U, UV))
return ChangeStatus::CHANGED;
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(IsDead) }
};
struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
AAIsDeadCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAIsDeadFloating(IRP, A), IsAssumedSideEffectFree(true) {}
/// See AAIsDead::isAssumedDead().
bool isAssumedDead() const override {
return AAIsDeadFloating::isAssumedDead() && IsAssumedSideEffectFree;
}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (isa<UndefValue>(getAssociatedValue())) {
indicatePessimisticFixpoint();
return;
}
// We track this separately as a secondary state.
IsAssumedSideEffectFree = isAssumedSideEffectFree(A, getCtxI());
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (IsAssumedSideEffectFree && !isAssumedSideEffectFree(A, getCtxI())) {
IsAssumedSideEffectFree = false;
Changed = ChangeStatus::CHANGED;
}
if (!areAllUsesAssumedDead(A, getAssociatedValue()))
return indicatePessimisticFixpoint();
return Changed;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (IsAssumedSideEffectFree)
STATS_DECLTRACK_CSRET_ATTR(IsDead)
else
STATS_DECLTRACK_CSRET_ATTR(UnusedResult)
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return isAssumedDead()
? "assumed-dead"
: (getAssumed() ? "assumed-dead-users" : "assumed-live");
}
private:
bool IsAssumedSideEffectFree;
};
struct AAIsDeadReturned : public AAIsDeadValueImpl {
AAIsDeadReturned(const IRPosition &IRP, Attributor &A)
: AAIsDeadValueImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
bool UsedAssumedInformation = false;
A.checkForAllInstructions([](Instruction &) { return true; }, *this,
{Instruction::Ret}, UsedAssumedInformation);
auto PredForCallSite = [&](AbstractCallSite ACS) {
if (ACS.isCallbackCall() || !ACS.getInstruction())
return false;
return areAllUsesAssumedDead(A, *ACS.getInstruction());
};
- bool AllCallSitesKnown;
if (!A.checkForAllCallSites(PredForCallSite, *this, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// TODO: Rewrite the signature to return void?
bool AnyChange = false;
UndefValue &UV = *UndefValue::get(getAssociatedFunction()->getReturnType());
auto RetInstPred = [&](Instruction &I) {
ReturnInst &RI = cast<ReturnInst>(I);
if (!isa<UndefValue>(RI.getReturnValue()))
AnyChange |= A.changeUseAfterManifest(RI.getOperandUse(0), UV);
return true;
};
bool UsedAssumedInformation = false;
A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret},
UsedAssumedInformation);
return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(IsDead) }
};
struct AAIsDeadFunction : public AAIsDead {
AAIsDeadFunction(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
const Function *F = getAnchorScope();
if (F && !F->isDeclaration()) {
// We only want to compute liveness once. If the function is not part of
// the SCC, skip it.
if (A.isRunOn(*const_cast<Function *>(F))) {
ToBeExploredFrom.insert(&F->getEntryBlock().front());
assumeLive(A, F->getEntryBlock());
} else {
indicatePessimisticFixpoint();
}
}
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" +
std::to_string(getAnchorScope()->size()) + "][#TBEP " +
std::to_string(ToBeExploredFrom.size()) + "][#KDE " +
std::to_string(KnownDeadEnds.size()) + "]";
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
assert(getState().isValidState() &&
"Attempted to manifest an invalid state!");
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
Function &F = *getAnchorScope();
if (AssumedLiveBlocks.empty()) {
A.deleteAfterManifest(F);
return ChangeStatus::CHANGED;
}
// Flag to determine if we can change an invoke to a call assuming the
// callee is nounwind. This is not possible if the personality of the
// function allows to catch asynchronous exceptions.
bool Invoke2CallAllowed = !mayCatchAsynchronousExceptions(F);
KnownDeadEnds.set_union(ToBeExploredFrom);
for (const Instruction *DeadEndI : KnownDeadEnds) {
auto *CB = dyn_cast<CallBase>(DeadEndI);
if (!CB)
continue;
const auto &NoReturnAA = A.getAndUpdateAAFor<AANoReturn>(
*this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
bool MayReturn = !NoReturnAA.isAssumedNoReturn();
if (MayReturn && (!Invoke2CallAllowed || !isa<InvokeInst>(CB)))
continue;
if (auto *II = dyn_cast<InvokeInst>(DeadEndI))
A.registerInvokeWithDeadSuccessor(const_cast<InvokeInst &>(*II));
else
A.changeToUnreachableAfterManifest(
const_cast<Instruction *>(DeadEndI->getNextNode()));
HasChanged = ChangeStatus::CHANGED;
}
STATS_DECL(AAIsDead, BasicBlock, "Number of dead basic blocks deleted.");
for (BasicBlock &BB : F)
if (!AssumedLiveBlocks.count(&BB)) {
A.deleteAfterManifest(BB);
++BUILD_STAT_NAME(AAIsDead, BasicBlock);
HasChanged = ChangeStatus::CHANGED;
}
return HasChanged;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const override {
+ assert(From->getParent() == getAnchorScope() &&
+ To->getParent() == getAnchorScope() &&
+ "Used AAIsDead of the wrong function");
return isValidState() && !AssumedLiveEdges.count(std::make_pair(From, To));
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
/// Returns true if the function is assumed dead.
bool isAssumedDead() const override { return false; }
/// See AAIsDead::isKnownDead().
bool isKnownDead() const override { return false; }
/// See AAIsDead::isAssumedDead(BasicBlock *).
bool isAssumedDead(const BasicBlock *BB) const override {
assert(BB->getParent() == getAnchorScope() &&
"BB must be in the same anchor scope function.");
if (!getAssumed())
return false;
return !AssumedLiveBlocks.count(BB);
}
/// See AAIsDead::isKnownDead(BasicBlock *).
bool isKnownDead(const BasicBlock *BB) const override {
return getKnown() && isAssumedDead(BB);
}
/// See AAIsDead::isAssumed(Instruction *I).
bool isAssumedDead(const Instruction *I) const override {
assert(I->getParent()->getParent() == getAnchorScope() &&
"Instruction must be in the same anchor scope function.");
if (!getAssumed())
return false;
// If it is not in AssumedLiveBlocks then it for sure dead.
// Otherwise, it can still be after noreturn call in a live block.
if (!AssumedLiveBlocks.count(I->getParent()))
return true;
// If it is not after a liveness barrier it is live.
const Instruction *PrevI = I->getPrevNode();
while (PrevI) {
if (KnownDeadEnds.count(PrevI) || ToBeExploredFrom.count(PrevI))
return true;
PrevI = PrevI->getPrevNode();
}
return false;
}
/// See AAIsDead::isKnownDead(Instruction *I).
bool isKnownDead(const Instruction *I) const override {
return getKnown() && isAssumedDead(I);
}
/// Assume \p BB is (partially) live now and indicate to the Attributor \p A
/// that internal function called from \p BB should now be looked at.
bool assumeLive(Attributor &A, const BasicBlock &BB) {
if (!AssumedLiveBlocks.insert(&BB).second)
return false;
// We assume that all of BB is (probably) live now and if there are calls to
// internal functions we will assume that those are now live as well. This
// is a performance optimization for blocks with calls to a lot of internal
// functions. It can however cause dead functions to be treated as live.
for (const Instruction &I : BB)
if (const auto *CB = dyn_cast<CallBase>(&I))
if (const Function *F = CB->getCalledFunction())
if (F->hasLocalLinkage())
A.markLiveInternalFunction(*F);
return true;
}
/// Collection of instructions that need to be explored again, e.g., we
/// did assume they do not transfer control to (one of their) successors.
SmallSetVector<const Instruction *, 8> ToBeExploredFrom;
/// Collection of instructions that are known to not transfer control.
SmallSetVector<const Instruction *, 8> KnownDeadEnds;
/// Collection of all assumed live edges
DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> AssumedLiveEdges;
/// Collection of all assumed live BasicBlocks.
DenseSet<const BasicBlock *> AssumedLiveBlocks;
};
static bool
identifyAliveSuccessors(Attributor &A, const CallBase &CB,
AbstractAttribute &AA,
SmallVectorImpl<const Instruction *> &AliveSuccessors) {
const IRPosition &IPos = IRPosition::callsite_function(CB);
const auto &NoReturnAA =
A.getAndUpdateAAFor<AANoReturn>(AA, IPos, DepClassTy::OPTIONAL);
if (NoReturnAA.isAssumedNoReturn())
return !NoReturnAA.isKnownNoReturn();
if (CB.isTerminator())
AliveSuccessors.push_back(&CB.getSuccessor(0)->front());
else
AliveSuccessors.push_back(CB.getNextNode());
return false;
}
static bool
identifyAliveSuccessors(Attributor &A, const InvokeInst &II,
AbstractAttribute &AA,
SmallVectorImpl<const Instruction *> &AliveSuccessors) {
bool UsedAssumedInformation =
identifyAliveSuccessors(A, cast<CallBase>(II), AA, AliveSuccessors);
// First, determine if we can change an invoke to a call assuming the
// callee is nounwind. This is not possible if the personality of the
// function allows to catch asynchronous exceptions.
if (AAIsDeadFunction::mayCatchAsynchronousExceptions(*II.getFunction())) {
AliveSuccessors.push_back(&II.getUnwindDest()->front());
} else {
const IRPosition &IPos = IRPosition::callsite_function(II);
const auto &AANoUnw =
A.getAndUpdateAAFor<AANoUnwind>(AA, IPos, DepClassTy::OPTIONAL);
if (AANoUnw.isAssumedNoUnwind()) {
UsedAssumedInformation |= !AANoUnw.isKnownNoUnwind();
} else {
AliveSuccessors.push_back(&II.getUnwindDest()->front());
}
}
return UsedAssumedInformation;
}
static bool
identifyAliveSuccessors(Attributor &A, const BranchInst &BI,
AbstractAttribute &AA,
SmallVectorImpl<const Instruction *> &AliveSuccessors) {
bool UsedAssumedInformation = false;
if (BI.getNumSuccessors() == 1) {
AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
} else {
Optional<Constant *> C =
A.getAssumedConstant(*BI.getCondition(), AA, UsedAssumedInformation);
if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) {
// No value yet, assume both edges are dead.
} else if (isa_and_nonnull<ConstantInt>(*C)) {
const BasicBlock *SuccBB =
BI.getSuccessor(1 - cast<ConstantInt>(*C)->getValue().getZExtValue());
AliveSuccessors.push_back(&SuccBB->front());
} else {
AliveSuccessors.push_back(&BI.getSuccessor(0)->front());
AliveSuccessors.push_back(&BI.getSuccessor(1)->front());
UsedAssumedInformation = false;
}
}
return UsedAssumedInformation;
}
static bool
identifyAliveSuccessors(Attributor &A, const SwitchInst &SI,
AbstractAttribute &AA,
SmallVectorImpl<const Instruction *> &AliveSuccessors) {
bool UsedAssumedInformation = false;
Optional<Constant *> C =
A.getAssumedConstant(*SI.getCondition(), AA, UsedAssumedInformation);
if (!C.hasValue() || isa_and_nonnull<UndefValue>(C.getValue())) {
// No value yet, assume all edges are dead.
} else if (isa_and_nonnull<ConstantInt>(C.getValue())) {
for (auto &CaseIt : SI.cases()) {
if (CaseIt.getCaseValue() == C.getValue()) {
AliveSuccessors.push_back(&CaseIt.getCaseSuccessor()->front());
return UsedAssumedInformation;
}
}
AliveSuccessors.push_back(&SI.getDefaultDest()->front());
return UsedAssumedInformation;
} else {
for (const BasicBlock *SuccBB : successors(SI.getParent()))
AliveSuccessors.push_back(&SuccBB->front());
}
return UsedAssumedInformation;
}
ChangeStatus AAIsDeadFunction::updateImpl(Attributor &A) {
ChangeStatus Change = ChangeStatus::UNCHANGED;
LLVM_DEBUG(dbgs() << "[AAIsDead] Live [" << AssumedLiveBlocks.size() << "/"
<< getAnchorScope()->size() << "] BBs and "
<< ToBeExploredFrom.size() << " exploration points and "
<< KnownDeadEnds.size() << " known dead ends\n");
// Copy and clear the list of instructions we need to explore from. It is
// refilled with instructions the next update has to look at.
SmallVector<const Instruction *, 8> Worklist(ToBeExploredFrom.begin(),
ToBeExploredFrom.end());
decltype(ToBeExploredFrom) NewToBeExploredFrom;
SmallVector<const Instruction *, 8> AliveSuccessors;
while (!Worklist.empty()) {
const Instruction *I = Worklist.pop_back_val();
LLVM_DEBUG(dbgs() << "[AAIsDead] Exploration inst: " << *I << "\n");
// Fast forward for uninteresting instructions. We could look for UB here
// though.
while (!I->isTerminator() && !isa<CallBase>(I))
I = I->getNextNode();
AliveSuccessors.clear();
bool UsedAssumedInformation = false;
switch (I->getOpcode()) {
// TODO: look for (assumed) UB to backwards propagate "deadness".
default:
assert(I->isTerminator() &&
"Expected non-terminators to be handled already!");
for (const BasicBlock *SuccBB : successors(I->getParent()))
AliveSuccessors.push_back(&SuccBB->front());
break;
case Instruction::Call:
UsedAssumedInformation = identifyAliveSuccessors(A, cast<CallInst>(*I),
*this, AliveSuccessors);
break;
case Instruction::Invoke:
UsedAssumedInformation = identifyAliveSuccessors(A, cast<InvokeInst>(*I),
*this, AliveSuccessors);
break;
case Instruction::Br:
UsedAssumedInformation = identifyAliveSuccessors(A, cast<BranchInst>(*I),
*this, AliveSuccessors);
break;
case Instruction::Switch:
UsedAssumedInformation = identifyAliveSuccessors(A, cast<SwitchInst>(*I),
*this, AliveSuccessors);
break;
}
if (UsedAssumedInformation) {
NewToBeExploredFrom.insert(I);
} else if (AliveSuccessors.empty() ||
(I->isTerminator() &&
AliveSuccessors.size() < I->getNumSuccessors())) {
if (KnownDeadEnds.insert(I))
Change = ChangeStatus::CHANGED;
}
LLVM_DEBUG(dbgs() << "[AAIsDead] #AliveSuccessors: "
<< AliveSuccessors.size() << " UsedAssumedInformation: "
<< UsedAssumedInformation << "\n");
for (const Instruction *AliveSuccessor : AliveSuccessors) {
if (!I->isTerminator()) {
assert(AliveSuccessors.size() == 1 &&
"Non-terminator expected to have a single successor!");
Worklist.push_back(AliveSuccessor);
} else {
// record the assumed live edge
auto Edge = std::make_pair(I->getParent(), AliveSuccessor->getParent());
if (AssumedLiveEdges.insert(Edge).second)
Change = ChangeStatus::CHANGED;
if (assumeLive(A, *AliveSuccessor->getParent()))
Worklist.push_back(AliveSuccessor);
}
}
}
// Check if the content of ToBeExploredFrom changed, ignore the order.
if (NewToBeExploredFrom.size() != ToBeExploredFrom.size() ||
llvm::any_of(NewToBeExploredFrom, [&](const Instruction *I) {
return !ToBeExploredFrom.count(I);
})) {
Change = ChangeStatus::CHANGED;
ToBeExploredFrom = std::move(NewToBeExploredFrom);
}
// If we know everything is live there is no need to query for liveness.
// Instead, indicating a pessimistic fixpoint will cause the state to be
// "invalid" and all queries to be answered conservatively without lookups.
// To be in this state we have to (1) finished the exploration and (3) not
// discovered any non-trivial dead end and (2) not ruled unreachable code
// dead.
if (ToBeExploredFrom.empty() &&
getAnchorScope()->size() == AssumedLiveBlocks.size() &&
llvm::all_of(KnownDeadEnds, [](const Instruction *DeadEndI) {
return DeadEndI->isTerminator() && DeadEndI->getNumSuccessors() == 0;
}))
return indicatePessimisticFixpoint();
return Change;
}
/// Liveness information for a call sites.
struct AAIsDeadCallSite final : AAIsDeadFunction {
AAIsDeadCallSite(const IRPosition &IRP, Attributor &A)
: AAIsDeadFunction(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites instead of
// redirecting requests to the callee.
llvm_unreachable("Abstract attributes for liveness are not "
"supported for call sites yet!");
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
/// -------------------- Dereferenceable Argument Attribute --------------------
struct AADereferenceableImpl : AADereferenceable {
AADereferenceableImpl(const IRPosition &IRP, Attributor &A)
: AADereferenceable(IRP, A) {}
using StateType = DerefState;
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
SmallVector<Attribute, 4> Attrs;
getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
Attrs, /* IgnoreSubsumingPositions */ false, &A);
for (const Attribute &Attr : Attrs)
takeKnownDerefBytesMaximum(Attr.getValueAsInt());
const IRPosition &IRP = this->getIRPosition();
NonNullAA = &A.getAAFor<AANonNull>(*this, IRP, DepClassTy::NONE);
bool CanBeNull, CanBeFreed;
takeKnownDerefBytesMaximum(
IRP.getAssociatedValue().getPointerDereferenceableBytes(
A.getDataLayout(), CanBeNull, CanBeFreed));
bool IsFnInterface = IRP.isFnInterfaceKind();
Function *FnScope = IRP.getAnchorScope();
if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) {
indicatePessimisticFixpoint();
return;
}
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
/// See AbstractAttribute::getState()
/// {
StateType &getState() override { return *this; }
const StateType &getState() const override { return *this; }
/// }
/// Helper function for collecting accessed bytes in must-be-executed-context
void addAccessedBytesForUse(Attributor &A, const Use *U, const Instruction *I,
DerefState &State) {
const Value *UseV = U->get();
if (!UseV->getType()->isPointerTy())
return;
Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile())
return;
int64_t Offset;
const Value *Base = GetPointerBaseWithConstantOffset(
Loc->Ptr, Offset, A.getDataLayout(), /*AllowNonInbounds*/ true);
if (Base && Base == &getAssociatedValue())
State.addAccessedBytes(Offset, Loc->Size.getValue());
}
/// See followUsesInMBEC
bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
AADereferenceable::StateType &State) {
bool IsNonNull = false;
bool TrackUse = false;
int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
A, *this, getAssociatedValue(), U, I, IsNonNull, TrackUse);
LLVM_DEBUG(dbgs() << "[AADereferenceable] Deref bytes: " << DerefBytes
<< " for instruction " << *I << "\n");
addAccessedBytesForUse(A, U, I, State);
State.takeKnownDerefBytesMaximum(DerefBytes);
return TrackUse;
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Change = AADereferenceable::manifest(A);
if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) {
removeAttrs({Attribute::DereferenceableOrNull});
return ChangeStatus::CHANGED;
}
return Change;
}
void getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
// TODO: Add *_globally support
if (isAssumedNonNull())
Attrs.emplace_back(Attribute::getWithDereferenceableBytes(
Ctx, getAssumedDereferenceableBytes()));
else
Attrs.emplace_back(Attribute::getWithDereferenceableOrNullBytes(
Ctx, getAssumedDereferenceableBytes()));
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
if (!getAssumedDereferenceableBytes())
return "unknown-dereferenceable";
return std::string("dereferenceable") +
(isAssumedNonNull() ? "" : "_or_null") +
(isAssumedGlobal() ? "_globally" : "") + "<" +
std::to_string(getKnownDereferenceableBytes()) + "-" +
std::to_string(getAssumedDereferenceableBytes()) + ">";
}
};
/// Dereferenceable attribute for a floating value.
struct AADereferenceableFloating : AADereferenceableImpl {
AADereferenceableFloating(const IRPosition &IRP, Attributor &A)
: AADereferenceableImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const DataLayout &DL = A.getDataLayout();
auto VisitValueCB = [&](const Value &V, const Instruction *, DerefState &T,
bool Stripped) -> bool {
unsigned IdxWidth =
DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace());
APInt Offset(IdxWidth, 0);
const Value *Base =
stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false);
const auto &AA = A.getAAFor<AADereferenceable>(
*this, IRPosition::value(*Base), DepClassTy::REQUIRED);
int64_t DerefBytes = 0;
if (!Stripped && this == &AA) {
// Use IR information if we did not strip anything.
// TODO: track globally.
bool CanBeNull, CanBeFreed;
DerefBytes =
Base->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
T.GlobalState.indicatePessimisticFixpoint();
} else {
const DerefState &DS = AA.getState();
DerefBytes = DS.DerefBytesState.getAssumed();
T.GlobalState &= DS.GlobalState;
}
// For now we do not try to "increase" dereferenceability due to negative
// indices as we first have to come up with code to deal with loops and
// for overflows of the dereferenceable bytes.
int64_t OffsetSExt = Offset.getSExtValue();
if (OffsetSExt < 0)
OffsetSExt = 0;
T.takeAssumedDerefBytesMinimum(
std::max(int64_t(0), DerefBytes - OffsetSExt));
if (this == &AA) {
if (!Stripped) {
// If nothing was stripped IR information is all we got.
T.takeKnownDerefBytesMaximum(
std::max(int64_t(0), DerefBytes - OffsetSExt));
T.indicatePessimisticFixpoint();
} else if (OffsetSExt > 0) {
// If something was stripped but there is circular reasoning we look
// for the offset. If it is positive we basically decrease the
// dereferenceable bytes in a circluar loop now, which will simply
// drive them down to the known value in a very slow way which we
// can accelerate.
T.indicatePessimisticFixpoint();
}
}
return T.isValidState();
};
DerefState T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(dereferenceable)
}
};
/// Dereferenceable attribute for a return value.
struct AADereferenceableReturned final
: AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl> {
AADereferenceableReturned(const IRPosition &IRP, Attributor &A)
: AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl>(
IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FNRET_ATTR(dereferenceable)
}
};
/// Dereferenceable attribute for an argument
struct AADereferenceableArgument final
: AAArgumentFromCallSiteArguments<AADereferenceable,
AADereferenceableImpl> {
using Base =
AAArgumentFromCallSiteArguments<AADereferenceable, AADereferenceableImpl>;
AADereferenceableArgument(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(dereferenceable)
}
};
/// Dereferenceable attribute for a call site argument.
struct AADereferenceableCallSiteArgument final : AADereferenceableFloating {
AADereferenceableCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AADereferenceableFloating(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(dereferenceable)
}
};
/// Dereferenceable attribute deduction for a call site return value.
struct AADereferenceableCallSiteReturned final
: AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl> {
using Base =
AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl>;
AADereferenceableCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CS_ATTR(dereferenceable);
}
};
// ------------------------ Align Argument Attribute ------------------------
static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
Value &AssociatedValue, const Use *U,
const Instruction *I, bool &TrackUse) {
// We need to follow common pointer manipulation uses to the accesses they
// feed into.
if (isa<CastInst>(I)) {
// Follow all but ptr2int casts.
TrackUse = !isa<PtrToIntInst>(I);
return 0;
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
if (GEP->hasAllConstantIndices())
TrackUse = true;
return 0;
}
MaybeAlign MA;
if (const auto *CB = dyn_cast<CallBase>(I)) {
if (CB->isBundleOperand(U) || CB->isCallee(U))
return 0;
unsigned ArgNo = CB->getArgOperandNo(U);
IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
// As long as we only use known information there is no need to track
// dependences here.
auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP, DepClassTy::NONE);
MA = MaybeAlign(AlignAA.getKnownAlign());
}
const DataLayout &DL = A.getDataLayout();
const Value *UseV = U->get();
if (auto *SI = dyn_cast<StoreInst>(I)) {
if (SI->getPointerOperand() == UseV)
MA = SI->getAlign();
} else if (auto *LI = dyn_cast<LoadInst>(I)) {
if (LI->getPointerOperand() == UseV)
MA = LI->getAlign();
}
if (!MA || *MA <= QueryingAA.getKnownAlign())
return 0;
unsigned Alignment = MA->value();
int64_t Offset;
if (const Value *Base = GetPointerBaseWithConstantOffset(UseV, Offset, DL)) {
if (Base == &AssociatedValue) {
// BasePointerAddr + Offset = Alignment * Q for some integer Q.
// So we can say that the maximum power of two which is a divisor of
// gcd(Offset, Alignment) is an alignment.
uint32_t gcd =
greatestCommonDivisor(uint32_t(abs((int32_t)Offset)), Alignment);
Alignment = llvm::PowerOf2Floor(gcd);
}
}
return Alignment;
}
struct AAAlignImpl : AAAlign {
AAAlignImpl(const IRPosition &IRP, Attributor &A) : AAAlign(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
SmallVector<Attribute, 4> Attrs;
getAttrs({Attribute::Alignment}, Attrs);
for (const Attribute &Attr : Attrs)
takeKnownMaximum(Attr.getValueAsInt());
Value &V = getAssociatedValue();
// TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int
// use of the function pointer. This was caused by D73131. We want to
// avoid this for function pointers especially because we iterate
// their uses and int2ptr is not handled. It is not a correctness
// problem though!
if (!V.getType()->getPointerElementType()->isFunctionTy())
takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value());
if (getIRPosition().isFnInterfaceKind() &&
(!getAnchorScope() ||
!A.isFunctionIPOAmendable(*getAssociatedFunction()))) {
indicatePessimisticFixpoint();
return;
}
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus LoadStoreChanged = ChangeStatus::UNCHANGED;
// Check for users that allow alignment annotations.
Value &AssociatedValue = getAssociatedValue();
for (const Use &U : AssociatedValue.uses()) {
if (auto *SI = dyn_cast<StoreInst>(U.getUser())) {
if (SI->getPointerOperand() == &AssociatedValue)
if (SI->getAlignment() < getAssumedAlign()) {
STATS_DECLTRACK(AAAlign, Store,
"Number of times alignment added to a store");
SI->setAlignment(Align(getAssumedAlign()));
LoadStoreChanged = ChangeStatus::CHANGED;
}
} else if (auto *LI = dyn_cast<LoadInst>(U.getUser())) {
if (LI->getPointerOperand() == &AssociatedValue)
if (LI->getAlignment() < getAssumedAlign()) {
LI->setAlignment(Align(getAssumedAlign()));
STATS_DECLTRACK(AAAlign, Load,
"Number of times alignment added to a load");
LoadStoreChanged = ChangeStatus::CHANGED;
}
}
}
ChangeStatus Changed = AAAlign::manifest(A);
Align InheritAlign =
getAssociatedValue().getPointerAlignment(A.getDataLayout());
if (InheritAlign >= getAssumedAlign())
return LoadStoreChanged;
return Changed | LoadStoreChanged;
}
// TODO: Provide a helper to determine the implied ABI alignment and check in
// the existing manifest method and a new one for AAAlignImpl that value
// to avoid making the alignment explicit if it did not improve.
/// See AbstractAttribute::getDeducedAttributes
virtual void
getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
if (getAssumedAlign() > 1)
Attrs.emplace_back(
Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
}
/// See followUsesInMBEC
bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
AAAlign::StateType &State) {
bool TrackUse = false;
unsigned int KnownAlign =
getKnownAlignForUse(A, *this, getAssociatedValue(), U, I, TrackUse);
State.takeKnownMaximum(KnownAlign);
return TrackUse;
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return getAssumedAlign() ? ("align<" + std::to_string(getKnownAlign()) +
"-" + std::to_string(getAssumedAlign()) + ">")
: "unknown-align";
}
};
/// Align attribute for a floating value.
struct AAAlignFloating : AAAlignImpl {
AAAlignFloating(const IRPosition &IRP, Attributor &A) : AAAlignImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const DataLayout &DL = A.getDataLayout();
auto VisitValueCB = [&](Value &V, const Instruction *,
AAAlign::StateType &T, bool Stripped) -> bool {
const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
if (!Stripped && this == &AA) {
int64_t Offset;
unsigned Alignment = 1;
if (const Value *Base =
GetPointerBaseWithConstantOffset(&V, Offset, DL)) {
Align PA = Base->getPointerAlignment(DL);
// BasePointerAddr + Offset = Alignment * Q for some integer Q.
// So we can say that the maximum power of two which is a divisor of
// gcd(Offset, Alignment) is an alignment.
uint32_t gcd = greatestCommonDivisor(uint32_t(abs((int32_t)Offset)),
uint32_t(PA.value()));
Alignment = llvm::PowerOf2Floor(gcd);
} else {
Alignment = V.getPointerAlignment(DL).value();
}
// Use only IR information if we did not strip anything.
T.takeKnownMaximum(Alignment);
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
const AAAlign::StateType &DS = AA.getState();
T ^= DS;
}
return T.isValidState();
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
// TODO: If we know we visited all incoming values, thus no are assumed
// dead, we can take the known information from the state T.
return clampStateAndIndicateChange(getState(), T);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FLOATING_ATTR(align) }
};
/// Align attribute for function return value.
struct AAAlignReturned final
: AAReturnedFromReturnedValues<AAAlign, AAAlignImpl> {
using Base = AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>;
AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
Base::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
};
/// Align attribute for function argument.
struct AAAlignArgument final
: AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> {
using Base = AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>;
AAAlignArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// If the associated argument is involved in a must-tail call we give up
// because we would need to keep the argument alignments of caller and
// callee in-sync. Just does not seem worth the trouble right now.
if (A.getInfoCache().isInvolvedInMustTailCall(*getAssociatedArgument()))
return ChangeStatus::UNCHANGED;
return Base::manifest(A);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(aligned) }
};
struct AAAlignCallSiteArgument final : AAAlignFloating {
AAAlignCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAAlignFloating(IRP, A) {}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// If the associated argument is involved in a must-tail call we give up
// because we would need to keep the argument alignments of caller and
// callee in-sync. Just does not seem worth the trouble right now.
if (Argument *Arg = getAssociatedArgument())
if (A.getInfoCache().isInvolvedInMustTailCall(*Arg))
return ChangeStatus::UNCHANGED;
ChangeStatus Changed = AAAlignImpl::manifest(A);
Align InheritAlign =
getAssociatedValue().getPointerAlignment(A.getDataLayout());
if (InheritAlign >= getAssumedAlign())
Changed = ChangeStatus::UNCHANGED;
return Changed;
}
/// See AbstractAttribute::updateImpl(Attributor &A).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = AAAlignFloating::updateImpl(A);
if (Argument *Arg = getAssociatedArgument()) {
// We only take known information from the argument
// so we do not need to track a dependence.
const auto &ArgAlignAA = A.getAAFor<AAAlign>(
*this, IRPosition::argument(*Arg), DepClassTy::NONE);
takeKnownMaximum(ArgAlignAA.getKnownAlign());
}
return Changed;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(aligned) }
};
/// Align attribute deduction for a call site return value.
struct AAAlignCallSiteReturned final
: AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl> {
using Base = AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl>;
AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
Base::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
/// ------------------ Function No-Return Attribute ----------------------------
struct AANoReturnImpl : public AANoReturn {
AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoReturn::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return getAssumed() ? "noreturn" : "may-return";
}
/// See AbstractAttribute::updateImpl(Attributor &A).
virtual ChangeStatus updateImpl(Attributor &A) override {
auto CheckForNoReturn = [](Instruction &) { return false; };
bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(CheckForNoReturn, *this,
{(unsigned)Instruction::Ret},
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
};
struct AANoReturnFunction final : AANoReturnImpl {
AANoReturnFunction(const IRPosition &IRP, Attributor &A)
: AANoReturnImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(noreturn) }
};
/// NoReturn attribute deduction for a call sites.
struct AANoReturnCallSite final : AANoReturnImpl {
AANoReturnCallSite(const IRPosition &IRP, Attributor &A)
: AANoReturnImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoReturnImpl::initialize(A);
if (Function *F = getAssociatedFunction()) {
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos, DepClassTy::REQUIRED);
if (!FnAA.isAssumedNoReturn())
indicatePessimisticFixpoint();
}
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); }
};
/// ----------------------- Variable Capturing ---------------------------------
/// A class to hold the state of for no-capture attributes.
struct AANoCaptureImpl : public AANoCapture {
AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ true)) {
indicateOptimisticFixpoint();
return;
}
Function *AnchorScope = getAnchorScope();
if (isFnInterfaceKind() &&
(!AnchorScope || !A.isFunctionIPOAmendable(*AnchorScope))) {
indicatePessimisticFixpoint();
return;
}
// You cannot "capture" null in the default address space.
if (isa<ConstantPointerNull>(getAssociatedValue()) &&
getAssociatedValue().getType()->getPointerAddressSpace() == 0) {
indicateOptimisticFixpoint();
return;
}
const Function *F =
isArgumentPosition() ? getAssociatedFunction() : AnchorScope;
// Check what state the associated function can actually capture.
if (F)
determineFunctionCaptureCapabilities(getIRPosition(), *F, *this);
else
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
/// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...).
virtual void
getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
if (!isAssumedNoCaptureMaybeReturned())
return;
if (isArgumentPosition()) {
if (isAssumedNoCapture())
Attrs.emplace_back(Attribute::get(Ctx, Attribute::NoCapture));
else if (ManifestInternal)
Attrs.emplace_back(Attribute::get(Ctx, "no-capture-maybe-returned"));
}
}
/// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known
/// depending on the ability of the function associated with \p IRP to capture
/// state in memory and through "returning/throwing", respectively.
static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
const Function &F,
BitIntegerState &State) {
// TODO: Once we have memory behavior attributes we should use them here.
// If we know we cannot communicate or write to memory, we do not care about
// ptr2int anymore.
if (F.onlyReadsMemory() && F.doesNotThrow() &&
F.getReturnType()->isVoidTy()) {
State.addKnownBits(NO_CAPTURE);
return;
}
// A function cannot capture state in memory if it only reads memory, it can
// however return/throw state and the state might be influenced by the
// pointer value, e.g., loading from a returned pointer might reveal a bit.
if (F.onlyReadsMemory())
State.addKnownBits(NOT_CAPTURED_IN_MEM);
// A function cannot communicate state back if it does not through
// exceptions and doesn not return values.
if (F.doesNotThrow() && F.getReturnType()->isVoidTy())
State.addKnownBits(NOT_CAPTURED_IN_RET);
// Check existing "returned" attributes.
int ArgNo = IRP.getCalleeArgNo();
if (F.doesNotThrow() && ArgNo >= 0) {
for (unsigned u = 0, e = F.arg_size(); u < e; ++u)
if (F.hasParamAttribute(u, Attribute::Returned)) {
if (u == unsigned(ArgNo))
State.removeAssumedBits(NOT_CAPTURED_IN_RET);
else if (F.onlyReadsMemory())
State.addKnownBits(NO_CAPTURE);
else
State.addKnownBits(NOT_CAPTURED_IN_RET);
break;
}
}
}
/// See AbstractState::getAsStr().
const std::string getAsStr() const override {
if (isKnownNoCapture())
return "known not-captured";
if (isAssumedNoCapture())
return "assumed not-captured";
if (isKnownNoCaptureMaybeReturned())
return "known not-captured-maybe-returned";
if (isAssumedNoCaptureMaybeReturned())
return "assumed not-captured-maybe-returned";
return "assumed-captured";
}
};
/// Attributor-aware capture tracker.
struct AACaptureUseTracker final : public CaptureTracker {
/// Create a capture tracker that can lookup in-flight abstract attributes
/// through the Attributor \p A.
///
/// If a use leads to a potential capture, \p CapturedInMemory is set and the
/// search is stopped. If a use leads to a return instruction,
/// \p CommunicatedBack is set to true and \p CapturedInMemory is not changed.
/// If a use leads to a ptr2int which may capture the value,
/// \p CapturedInInteger is set. If a use is found that is currently assumed
/// "no-capture-maybe-returned", the user is added to the \p PotentialCopies
/// set. All values in \p PotentialCopies are later tracked as well. For every
/// explored use we decrement \p RemainingUsesToExplore. Once it reaches 0,
/// the search is stopped with \p CapturedInMemory and \p CapturedInInteger
/// conservatively set to true.
AACaptureUseTracker(Attributor &A, AANoCapture &NoCaptureAA,
const AAIsDead &IsDeadAA, AANoCapture::StateType &State,
SmallSetVector<Value *, 4> &PotentialCopies,
unsigned &RemainingUsesToExplore)
: A(A), NoCaptureAA(NoCaptureAA), IsDeadAA(IsDeadAA), State(State),
PotentialCopies(PotentialCopies),
RemainingUsesToExplore(RemainingUsesToExplore) {}
/// Determine if \p V maybe captured. *Also updates the state!*
bool valueMayBeCaptured(const Value *V) {
if (V->getType()->isPointerTy()) {
PointerMayBeCaptured(V, this);
} else {
State.indicatePessimisticFixpoint();
}
return State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
}
/// See CaptureTracker::tooManyUses().
void tooManyUses() override {
State.removeAssumedBits(AANoCapture::NO_CAPTURE);
}
bool isDereferenceableOrNull(Value *O, const DataLayout &DL) override {
if (CaptureTracker::isDereferenceableOrNull(O, DL))
return true;
const auto &DerefAA = A.getAAFor<AADereferenceable>(
NoCaptureAA, IRPosition::value(*O), DepClassTy::OPTIONAL);
return DerefAA.getAssumedDereferenceableBytes();
}
/// See CaptureTracker::captured(...).
bool captured(const Use *U) override {
Instruction *UInst = cast<Instruction>(U->getUser());
LLVM_DEBUG(dbgs() << "Check use: " << *U->get() << " in " << *UInst
<< "\n");
// Because we may reuse the tracker multiple times we keep track of the
// number of explored uses ourselves as well.
if (RemainingUsesToExplore-- == 0) {
LLVM_DEBUG(dbgs() << " - too many uses to explore!\n");
return isCapturedIn(/* Memory */ true, /* Integer */ true,
/* Return */ true);
}
// Deal with ptr2int by following uses.
if (isa<PtrToIntInst>(UInst)) {
LLVM_DEBUG(dbgs() << " - ptr2int assume the worst!\n");
return valueMayBeCaptured(UInst);
}
// For stores we check if we can follow the value through memory or not.
if (auto *SI = dyn_cast<StoreInst>(UInst)) {
if (SI->isVolatile())
return isCapturedIn(/* Memory */ true, /* Integer */ false,
/* Return */ false);
bool UsedAssumedInformation = false;
if (!AA::getPotentialCopiesOfStoredValue(
A, *SI, PotentialCopies, NoCaptureAA, UsedAssumedInformation))
return isCapturedIn(/* Memory */ true, /* Integer */ false,
/* Return */ false);
// Not captured directly, potential copies will be checked.
return isCapturedIn(/* Memory */ false, /* Integer */ false,
/* Return */ false);
}
// Explicitly catch return instructions.
if (isa<ReturnInst>(UInst)) {
if (UInst->getFunction() == NoCaptureAA.getAnchorScope())
return isCapturedIn(/* Memory */ false, /* Integer */ false,
/* Return */ true);
return isCapturedIn(/* Memory */ true, /* Integer */ true,
/* Return */ true);
}
// For now we only use special logic for call sites. However, the tracker
// itself knows about a lot of other non-capturing cases already.
auto *CB = dyn_cast<CallBase>(UInst);
if (!CB || !CB->isArgOperand(U))
return isCapturedIn(/* Memory */ true, /* Integer */ true,
/* Return */ true);
unsigned ArgNo = CB->getArgOperandNo(U);
const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
// If we have a abstract no-capture attribute for the argument we can use
// it to justify a non-capture attribute here. This allows recursion!
auto &ArgNoCaptureAA =
A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos, DepClassTy::REQUIRED);
if (ArgNoCaptureAA.isAssumedNoCapture())
return isCapturedIn(/* Memory */ false, /* Integer */ false,
/* Return */ false);
if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
addPotentialCopy(*CB);
return isCapturedIn(/* Memory */ false, /* Integer */ false,
/* Return */ false);
}
// Lastly, we could not find a reason no-capture can be assumed so we don't.
return isCapturedIn(/* Memory */ true, /* Integer */ true,
/* Return */ true);
}
/// Register \p CS as potential copy of the value we are checking.
void addPotentialCopy(CallBase &CB) { PotentialCopies.insert(&CB); }
/// See CaptureTracker::shouldExplore(...).
bool shouldExplore(const Use *U) override {
// Check liveness and ignore droppable users.
bool UsedAssumedInformation = false;
return !U->getUser()->isDroppable() &&
!A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA,
UsedAssumedInformation);
}
/// Update the state according to \p CapturedInMem, \p CapturedInInt, and
/// \p CapturedInRet, then return the appropriate value for use in the
/// CaptureTracker::captured() interface.
bool isCapturedIn(bool CapturedInMem, bool CapturedInInt,
bool CapturedInRet) {
LLVM_DEBUG(dbgs() << " - captures [Mem " << CapturedInMem << "|Int "
<< CapturedInInt << "|Ret " << CapturedInRet << "]\n");
if (CapturedInMem)
State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_MEM);
if (CapturedInInt)
State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_INT);
if (CapturedInRet)
State.removeAssumedBits(AANoCapture::NOT_CAPTURED_IN_RET);
return !State.isAssumed(AANoCapture::NO_CAPTURE_MAYBE_RETURNED);
}
private:
/// The attributor providing in-flight abstract attributes.
Attributor &A;
/// The abstract attribute currently updated.
AANoCapture &NoCaptureAA;
/// The abstract liveness state.
const AAIsDead &IsDeadAA;
/// The state currently updated.
AANoCapture::StateType &State;
/// Set of potential copies of the tracked value.
SmallSetVector<Value *, 4> &PotentialCopies;
/// Global counter to limit the number of explored uses.
unsigned &RemainingUsesToExplore;
};
ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
const IRPosition &IRP = getIRPosition();
Value *V = isArgumentPosition() ? IRP.getAssociatedArgument()
: &IRP.getAssociatedValue();
if (!V)
return indicatePessimisticFixpoint();
const Function *F =
isArgumentPosition() ? IRP.getAssociatedFunction() : IRP.getAnchorScope();
assert(F && "Expected a function!");
const IRPosition &FnPos = IRPosition::function(*F);
const auto &IsDeadAA = A.getAAFor<AAIsDead>(*this, FnPos, DepClassTy::NONE);
AANoCapture::StateType T;
// Readonly means we cannot capture through memory.
bool IsKnown;
if (AA::isAssumedReadOnly(A, FnPos, *this, IsKnown)) {
T.addKnownBits(NOT_CAPTURED_IN_MEM);
if (IsKnown)
addKnownBits(NOT_CAPTURED_IN_MEM);
}
// Make sure all returned values are different than the underlying value.
// TODO: we could do this in a more sophisticated way inside
// AAReturnedValues, e.g., track all values that escape through returns
// directly somehow.
auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
bool SeenConstant = false;
for (auto &It : RVAA.returned_values()) {
if (isa<Constant>(It.first)) {
if (SeenConstant)
return false;
SeenConstant = true;
} else if (!isa<Argument>(It.first) ||
It.first == getAssociatedArgument())
return false;
}
return true;
};
const auto &NoUnwindAA =
A.getAAFor<AANoUnwind>(*this, FnPos, DepClassTy::OPTIONAL);
if (NoUnwindAA.isAssumedNoUnwind()) {
bool IsVoidTy = F->getReturnType()->isVoidTy();
const AAReturnedValues *RVAA =
IsVoidTy ? nullptr
: &A.getAAFor<AAReturnedValues>(*this, FnPos,
DepClassTy::OPTIONAL);
if (IsVoidTy || CheckReturnedArgs(*RVAA)) {
T.addKnownBits(NOT_CAPTURED_IN_RET);
if (T.isKnown(NOT_CAPTURED_IN_MEM))
return ChangeStatus::UNCHANGED;
if (NoUnwindAA.isKnownNoUnwind() &&
(IsVoidTy || RVAA->getState().isAtFixpoint())) {
addKnownBits(NOT_CAPTURED_IN_RET);
if (isKnown(NOT_CAPTURED_IN_MEM))
return indicateOptimisticFixpoint();
}
}
}
// Use the CaptureTracker interface and logic with the specialized tracker,
// defined in AACaptureUseTracker, that can look at in-flight abstract
// attributes and directly updates the assumed state.
SmallSetVector<Value *, 4> PotentialCopies;
unsigned RemainingUsesToExplore =
getDefaultMaxUsesToExploreForCaptureTracking();
AACaptureUseTracker Tracker(A, *this, IsDeadAA, T, PotentialCopies,
RemainingUsesToExplore);
// Check all potential copies of the associated value until we can assume
// none will be captured or we have to assume at least one might be.
unsigned Idx = 0;
PotentialCopies.insert(V);
while (T.isAssumed(NO_CAPTURE_MAYBE_RETURNED) && Idx < PotentialCopies.size())
Tracker.valueMayBeCaptured(PotentialCopies[Idx++]);
AANoCapture::StateType &S = getState();
auto Assumed = S.getAssumed();
S.intersectAssumedBits(T.getAssumed());
if (!isAssumedNoCaptureMaybeReturned())
return indicatePessimisticFixpoint();
return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// NoCapture attribute for function arguments.
struct AANoCaptureArgument final : AANoCaptureImpl {
AANoCaptureArgument(const IRPosition &IRP, Attributor &A)
: AANoCaptureImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nocapture) }
};
/// NoCapture attribute for call site arguments.
struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
AANoCaptureCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AANoCaptureImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (Argument *Arg = getAssociatedArgument())
if (Arg->hasByValAttr())
indicateOptimisticFixpoint();
AANoCaptureImpl::initialize(A);
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Argument *Arg = getAssociatedArgument();
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override{STATS_DECLTRACK_CSARG_ATTR(nocapture)};
};
/// NoCapture attribute for floating values.
struct AANoCaptureFloating final : AANoCaptureImpl {
AANoCaptureFloating(const IRPosition &IRP, Attributor &A)
: AANoCaptureImpl(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(nocapture)
}
};
/// NoCapture attribute for function return value.
struct AANoCaptureReturned final : AANoCaptureImpl {
AANoCaptureReturned(const IRPosition &IRP, Attributor &A)
: AANoCaptureImpl(IRP, A) {
llvm_unreachable("NoCapture is not applicable to function returns!");
}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
llvm_unreachable("NoCapture is not applicable to function returns!");
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable("NoCapture is not applicable to function returns!");
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
/// NoCapture attribute deduction for a call site return value.
struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
AANoCaptureCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AANoCaptureImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
const Function *F = getAnchorScope();
// Check what state the associated function can actually capture.
determineFunctionCaptureCapabilities(getIRPosition(), *F, *this);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
/// ------------------ Value Simplify Attribute ----------------------------
bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
// FIXME: Add a typecast support.
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, Other, Ty);
if (SimplifiedAssociatedValue == Optional<Value *>(nullptr))
return false;
LLVM_DEBUG({
if (SimplifiedAssociatedValue.hasValue())
dbgs() << "[ValueSimplify] is assumed to be "
<< **SimplifiedAssociatedValue << "\n";
else
dbgs() << "[ValueSimplify] is assumed to be <none>\n";
});
return true;
}
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (getAssociatedValue().getType()->isVoidTy())
indicatePessimisticFixpoint();
if (A.hasSimplificationCallback(getIRPosition()))
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
LLVM_DEBUG({
errs() << "SAV: " << SimplifiedAssociatedValue << " ";
if (SimplifiedAssociatedValue && *SimplifiedAssociatedValue)
errs() << "SAV: " << **SimplifiedAssociatedValue << " ";
});
return isValidState() ? (isAtFixpoint() ? "simplified" : "maybe-simple")
: "not-simple";
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
/// See AAValueSimplify::getAssumedSimplifiedValue()
Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
return SimplifiedAssociatedValue;
}
/// Return a value we can use as replacement for the associated one, or
/// nullptr if we don't have one that makes sense.
Value *getReplacementValue(Attributor &A) const {
Value *NewV;
NewV = SimplifiedAssociatedValue.hasValue()
? SimplifiedAssociatedValue.getValue()
: UndefValue::get(getAssociatedType());
if (!NewV)
return nullptr;
NewV = AA::getWithType(*NewV, *getAssociatedType());
if (!NewV || NewV == &getAssociatedValue())
return nullptr;
const Instruction *CtxI = getCtxI();
if (CtxI && !AA::isValidAtPosition(*NewV, *CtxI, A.getInfoCache()))
return nullptr;
if (!CtxI && !AA::isValidInScope(*NewV, getAnchorScope()))
return nullptr;
return NewV;
}
/// Helper function for querying AAValueSimplify and updating candicate.
/// \param IRP The value position we are trying to unify with SimplifiedValue
bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA,
const IRPosition &IRP, bool Simplify = true) {
bool UsedAssumedInformation = false;
Optional<Value *> QueryingValueSimplified = &IRP.getAssociatedValue();
if (Simplify)
QueryingValueSimplified =
A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation);
return unionAssumed(QueryingValueSimplified);
}
/// Returns a candidate is found or not
template <typename AAType> bool askSimplifiedValueFor(Attributor &A) {
if (!getAssociatedValue().getType()->isIntegerTy())
return false;
// This will also pass the call base context.
const auto &AA =
A.getAAFor<AAType>(*this, getIRPosition(), DepClassTy::NONE);
Optional<ConstantInt *> COpt = AA.getAssumedConstantInt(A);
if (!COpt.hasValue()) {
SimplifiedAssociatedValue = llvm::None;
A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
return true;
}
if (auto *C = COpt.getValue()) {
SimplifiedAssociatedValue = C;
A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
return true;
}
return false;
}
bool askSimplifiedValueForOtherAAs(Attributor &A) {
if (askSimplifiedValueFor<AAValueConstantRange>(A))
return true;
if (askSimplifiedValueFor<AAPotentialValues>(A))
return true;
return false;
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (getAssociatedValue().user_empty())
return Changed;
if (auto *NewV = getReplacementValue(A)) {
LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() << " -> "
<< *NewV << " :: " << *this << "\n");
if (A.changeValueAfterManifest(getAssociatedValue(), *NewV))
Changed = ChangeStatus::CHANGED;
}
return Changed | AAValueSimplify::manifest(A);
}
/// See AbstractState::indicatePessimisticFixpoint(...).
ChangeStatus indicatePessimisticFixpoint() override {
SimplifiedAssociatedValue = &getAssociatedValue();
return AAValueSimplify::indicatePessimisticFixpoint();
}
static bool handleLoad(Attributor &A, const AbstractAttribute &AA,
LoadInst &L, function_ref<bool(Value &)> Union) {
auto UnionWrapper = [&](Value &V, Value &Obj) {
if (isa<AllocaInst>(Obj))
return Union(V);
if (!AA::isDynamicallyUnique(A, AA, V))
return false;
if (!AA::isValidAtPosition(V, L, A.getInfoCache()))
return false;
return Union(V);
};
Value &Ptr = *L.getPointerOperand();
SmallVector<Value *, 8> Objects;
- if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L))
+ bool UsedAssumedInformation = false;
+ if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, AA, &L,
+ UsedAssumedInformation))
return false;
const auto *TLI =
A.getInfoCache().getTargetLibraryInfoForFunction(*L.getFunction());
for (Value *Obj : Objects) {
LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n");
if (isa<UndefValue>(Obj))
continue;
if (isa<ConstantPointerNull>(Obj)) {
// A null pointer access can be undefined but any offset from null may
// be OK. We do not try to optimize the latter.
- bool UsedAssumedInformation = false;
if (!NullPointerIsDefined(L.getFunction(),
Ptr.getType()->getPointerAddressSpace()) &&
A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj)
continue;
return false;
}
Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType(), TLI);
if (!InitialVal || !Union(*InitialVal))
return false;
LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store "
"propagation, checking accesses next.\n");
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
if (Acc.isWrittenValueYetUndetermined())
return true;
Value *Content = Acc.getWrittenValue();
if (!Content)
return false;
Value *CastedContent =
AA::getWithType(*Content, *AA.getAssociatedType());
if (!CastedContent)
return false;
if (IsExact)
return UnionWrapper(*CastedContent, *Obj);
if (auto *C = dyn_cast<Constant>(CastedContent))
if (C->isNullValue() || C->isAllOnesValue() || isa<UndefValue>(C))
return UnionWrapper(*CastedContent, *Obj);
return false;
};
auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
DepClassTy::REQUIRED);
if (!PI.forallInterferingWrites(A, AA, L, CheckAccess))
return false;
}
return true;
}
};
struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
AAValueSimplifyArgument(const IRPosition &IRP, Attributor &A)
: AAValueSimplifyImpl(IRP, A) {}
void initialize(Attributor &A) override {
AAValueSimplifyImpl::initialize(A);
if (!getAnchorScope() || getAnchorScope()->isDeclaration())
indicatePessimisticFixpoint();
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
Attribute::StructRet, Attribute::Nest, Attribute::ByVal},
/* IgnoreSubsumingPositions */ true))
indicatePessimisticFixpoint();
// FIXME: This is a hack to prevent us from propagating function poiner in
// the new pass manager CGSCC pass as it creates call edges the
// CallGraphUpdater cannot handle yet.
Value &V = getAssociatedValue();
if (V.getType()->isPointerTy() &&
V.getType()->getPointerElementType()->isFunctionTy() &&
!A.isModulePass())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// Byval is only replacable if it is readonly otherwise we would write into
// the replaced value and not the copy that byval creates implicitly.
Argument *Arg = getAssociatedArgument();
if (Arg->hasByValAttr()) {
// TODO: We probably need to verify synchronization is not an issue, e.g.,
// there is no race by not copying a constant byval.
bool IsKnown;
if (!AA::isAssumedReadOnly(A, getIRPosition(), *this, IsKnown))
return indicatePessimisticFixpoint();
}
auto Before = SimplifiedAssociatedValue;
auto PredForCallSite = [&](AbstractCallSite ACS) {
const IRPosition &ACSArgPos =
IRPosition::callsite_argument(ACS, getCallSiteArgNo());
// Check if a coresponding argument was found or if it is on not
// associated (which can happen for callback calls).
if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
return false;
// Simplify the argument operand explicitly and check if the result is
// valid in the current scope. This avoids refering to simplified values
// in other functions, e.g., we don't want to say a an argument in a
// static function is actually an argument in a different function.
bool UsedAssumedInformation = false;
Optional<Constant *> SimpleArgOp =
A.getAssumedConstant(ACSArgPos, *this, UsedAssumedInformation);
if (!SimpleArgOp.hasValue())
return true;
if (!SimpleArgOp.getValue())
return false;
if (!AA::isDynamicallyUnique(A, *this, **SimpleArgOp))
return false;
return unionAssumed(*SimpleArgOp);
};
// Generate a answer specific to a call site context.
bool Success;
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (hasCallBaseContext() &&
getCallBaseContext()->getCalledFunction() == Arg->getParent())
Success = PredForCallSite(
AbstractCallSite(&getCallBaseContext()->getCalledOperandUse()));
else
Success = A.checkForAllCallSites(PredForCallSite, *this, true,
- AllCallSitesKnown);
+ UsedAssumedInformation);
if (!Success)
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED
: ChangeStatus ::CHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(value_simplify)
}
};
struct AAValueSimplifyReturned : AAValueSimplifyImpl {
AAValueSimplifyReturned(const IRPosition &IRP, Attributor &A)
: AAValueSimplifyImpl(IRP, A) {}
/// See AAValueSimplify::getAssumedSimplifiedValue()
Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
if (!isValidState())
return nullptr;
return SimplifiedAssociatedValue;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
auto PredForReturned = [&](Value &V) {
return checkAndUpdate(A, *this,
IRPosition::value(V, getCallBaseContext()));
};
if (!A.checkForAllReturnedValues(PredForReturned, *this))
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED
: ChangeStatus ::CHANGED;
}
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (auto *NewV = getReplacementValue(A)) {
auto PredForReturned =
[&](Value &, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
for (ReturnInst *RI : RetInsts) {
Value *ReturnedVal = RI->getReturnValue();
if (ReturnedVal == NewV || isa<UndefValue>(ReturnedVal))
return true;
assert(RI->getFunction() == getAnchorScope() &&
"ReturnInst in wrong function!");
LLVM_DEBUG(dbgs()
<< "[ValueSimplify] " << *ReturnedVal << " -> "
<< *NewV << " in " << *RI << " :: " << *this << "\n");
if (A.changeUseAfterManifest(RI->getOperandUse(0), *NewV))
Changed = ChangeStatus::CHANGED;
}
return true;
};
A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this);
}
return Changed | AAValueSimplify::manifest(A);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FNRET_ATTR(value_simplify)
}
};
struct AAValueSimplifyFloating : AAValueSimplifyImpl {
AAValueSimplifyFloating(const IRPosition &IRP, Attributor &A)
: AAValueSimplifyImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAValueSimplifyImpl::initialize(A);
Value &V = getAnchorValue();
// TODO: add other stuffs
if (isa<Constant>(V))
indicatePessimisticFixpoint();
}
/// Check if \p Cmp is a comparison we can simplify.
///
/// We handle multiple cases, one in which at least one operand is an
/// (assumed) nullptr. If so, try to simplify it using AANonNull on the other
/// operand. Return true if successful, in that case SimplifiedAssociatedValue
/// will be updated.
bool handleCmp(Attributor &A, CmpInst &Cmp) {
auto Union = [&](Value &V) {
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, &V, V.getType());
return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
};
Value *LHS = Cmp.getOperand(0);
Value *RHS = Cmp.getOperand(1);
// Simplify the operands first.
bool UsedAssumedInformation = false;
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedLHS.hasValue())
return true;
if (!SimplifiedLHS.getValue())
return false;
LHS = *SimplifiedLHS;
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedRHS.hasValue())
return true;
if (!SimplifiedRHS.getValue())
return false;
RHS = *SimplifiedRHS;
LLVMContext &Ctx = Cmp.getContext();
// Handle the trivial case first in which we don't even need to think about
// null or non-null.
if (LHS == RHS && (Cmp.isTrueWhenEqual() || Cmp.isFalseWhenEqual())) {
Constant *NewVal =
ConstantInt::get(Type::getInt1Ty(Ctx), Cmp.isTrueWhenEqual());
if (!Union(*NewVal))
return false;
if (!UsedAssumedInformation)
indicateOptimisticFixpoint();
return true;
}
// From now on we only handle equalities (==, !=).
ICmpInst *ICmp = dyn_cast<ICmpInst>(&Cmp);
if (!ICmp || !ICmp->isEquality())
return false;
bool LHSIsNull = isa<ConstantPointerNull>(LHS);
bool RHSIsNull = isa<ConstantPointerNull>(RHS);
if (!LHSIsNull && !RHSIsNull)
return false;
// Left is the nullptr ==/!= non-nullptr case. We'll use AANonNull on the
// non-nullptr operand and if we assume it's non-null we can conclude the
// result of the comparison.
assert((LHSIsNull || RHSIsNull) &&
"Expected nullptr versus non-nullptr comparison at this point");
// The index is the operand that we assume is not null.
unsigned PtrIdx = LHSIsNull;
auto &PtrNonNullAA = A.getAAFor<AANonNull>(
*this, IRPosition::value(*ICmp->getOperand(PtrIdx)),
DepClassTy::REQUIRED);
if (!PtrNonNullAA.isAssumedNonNull())
return false;
UsedAssumedInformation |= !PtrNonNullAA.isKnownNonNull();
// The new value depends on the predicate, true for != and false for ==.
Constant *NewVal = ConstantInt::get(
Type::getInt1Ty(Ctx), ICmp->getPredicate() == CmpInst::ICMP_NE);
if (!Union(*NewVal))
return false;
if (!UsedAssumedInformation)
indicateOptimisticFixpoint();
return true;
}
bool updateWithLoad(Attributor &A, LoadInst &L) {
auto Union = [&](Value &V) {
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, &V, L.getType());
return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
};
return handleLoad(A, *this, L, Union);
}
/// Use the generic, non-optimistic InstSimplfy functionality if we managed to
/// simplify any operand of the instruction \p I. Return true if successful,
/// in that case SimplifiedAssociatedValue will be updated.
bool handleGenericInst(Attributor &A, Instruction &I) {
bool SomeSimplified = false;
bool UsedAssumedInformation = false;
SmallVector<Value *, 8> NewOps(I.getNumOperands());
int Idx = 0;
for (Value *Op : I.operands()) {
const auto &SimplifiedOp =
A.getAssumedSimplified(IRPosition::value(*Op, getCallBaseContext()),
*this, UsedAssumedInformation);
// If we are not sure about any operand we are not sure about the entire
// instruction, we'll wait.
if (!SimplifiedOp.hasValue())
return true;
if (SimplifiedOp.getValue())
NewOps[Idx] = SimplifiedOp.getValue();
else
NewOps[Idx] = Op;
SomeSimplified |= (NewOps[Idx] != Op);
++Idx;
}
// We won't bother with the InstSimplify interface if we didn't simplify any
// operand ourselves.
if (!SomeSimplified)
return false;
InformationCache &InfoCache = A.getInfoCache();
Function *F = I.getFunction();
const auto *DT =
InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
OptimizationRemarkEmitter *ORE = nullptr;
const DataLayout &DL = I.getModule()->getDataLayout();
SimplifyQuery Q(DL, TLI, DT, AC, &I);
if (Value *SimplifiedI =
SimplifyInstructionWithOperands(&I, NewOps, Q, ORE)) {
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, SimplifiedI, I.getType());
return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
}
return false;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
auto VisitValueCB = [&](Value &V, const Instruction *CtxI, bool &,
bool Stripped) -> bool {
auto &AA = A.getAAFor<AAValueSimplify>(
*this, IRPosition::value(V, getCallBaseContext()),
DepClassTy::REQUIRED);
if (!Stripped && this == &AA) {
if (auto *I = dyn_cast<Instruction>(&V)) {
if (auto *LI = dyn_cast<LoadInst>(&V))
if (updateWithLoad(A, *LI))
return true;
if (auto *Cmp = dyn_cast<CmpInst>(&V))
if (handleCmp(A, *Cmp))
return true;
if (handleGenericInst(A, *I))
return true;
}
// TODO: Look the instruction and check recursively.
LLVM_DEBUG(dbgs() << "[ValueSimplify] Can't be stripped more : " << V
<< "\n");
return false;
}
return checkAndUpdate(A, *this,
IRPosition::value(V, getCallBaseContext()));
};
bool Dummy = false;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy,
VisitValueCB, getCtxI(),
+ UsedAssumedInformation,
/* UseValueSimplify */ false))
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
// If a candicate was found in this update, return CHANGED.
return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED
: ChangeStatus ::CHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(value_simplify)
}
};
struct AAValueSimplifyFunction : AAValueSimplifyImpl {
AAValueSimplifyFunction(const IRPosition &IRP, Attributor &A)
: AAValueSimplifyImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
SimplifiedAssociatedValue = nullptr;
indicateOptimisticFixpoint();
}
/// See AbstractAttribute::initialize(...).
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable(
"AAValueSimplify(Function|CallSite)::updateImpl will not be called");
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FN_ATTR(value_simplify)
}
};
struct AAValueSimplifyCallSite : AAValueSimplifyFunction {
AAValueSimplifyCallSite(const IRPosition &IRP, Attributor &A)
: AAValueSimplifyFunction(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CS_ATTR(value_simplify)
}
};
struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
AAValueSimplifyCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAValueSimplifyImpl(IRP, A) {}
void initialize(Attributor &A) override {
AAValueSimplifyImpl::initialize(A);
if (!getAssociatedFunction())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto Before = SimplifiedAssociatedValue;
auto &RetAA = A.getAAFor<AAReturnedValues>(
*this, IRPosition::function(*getAssociatedFunction()),
DepClassTy::REQUIRED);
auto PredForReturned =
[&](Value &RetVal, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
bool UsedAssumedInformation = false;
Optional<Value *> CSRetVal = A.translateArgumentToCallSiteContent(
&RetVal, *cast<CallBase>(getCtxI()), *this,
UsedAssumedInformation);
SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
SimplifiedAssociatedValue, CSRetVal, getAssociatedType());
return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
};
if (!RetAA.checkForAllReturnedValuesAndReturnInsts(PredForReturned))
if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED
: ChangeStatus ::CHANGED;
}
void trackStatistics() const override {
STATS_DECLTRACK_CSRET_ATTR(value_simplify)
}
};
struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
AAValueSimplifyCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAValueSimplifyFloating(IRP, A) {}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (auto *NewV = getReplacementValue(A)) {
Use &U = cast<CallBase>(&getAnchorValue())
->getArgOperandUse(getCallSiteArgNo());
if (A.changeUseAfterManifest(U, *NewV))
Changed = ChangeStatus::CHANGED;
}
return Changed | AAValueSimplify::manifest(A);
}
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(value_simplify)
}
};
/// ----------------------- Heap-To-Stack Conversion ---------------------------
struct AAHeapToStackFunction final : public AAHeapToStack {
struct AllocationInfo {
/// The call that allocates the memory.
CallBase *const CB;
/// The library function id for the allocation.
LibFunc LibraryFunctionId = NotLibFunc;
/// The status wrt. a rewrite.
enum {
STACK_DUE_TO_USE,
STACK_DUE_TO_FREE,
INVALID,
} Status = STACK_DUE_TO_USE;
/// Flag to indicate if we encountered a use that might free this allocation
/// but which is not in the deallocation infos.
bool HasPotentiallyFreeingUnknownUses = false;
/// The set of free calls that use this allocation.
SmallPtrSet<CallBase *, 1> PotentialFreeCalls{};
};
struct DeallocationInfo {
/// The call that deallocates the memory.
CallBase *const CB;
/// Flag to indicate if we don't know all objects this deallocation might
/// free.
bool MightFreeUnknownObjects = false;
/// The set of allocation calls that are potentially freed.
SmallPtrSet<CallBase *, 1> PotentialAllocationCalls{};
};
AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
: AAHeapToStack(IRP, A) {}
~AAHeapToStackFunction() {
// Ensure we call the destructor so we release any memory allocated in the
// sets.
for (auto &It : AllocationInfos)
It.getSecond()->~AllocationInfo();
for (auto &It : DeallocationInfos)
It.getSecond()->~DeallocationInfo();
}
void initialize(Attributor &A) override {
AAHeapToStack::initialize(A);
const Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
auto AllocationIdentifierCB = [&](Instruction &I) {
CallBase *CB = dyn_cast<CallBase>(&I);
if (!CB)
return true;
if (isFreeCall(CB, TLI)) {
DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB};
return true;
}
// To do heap to stack, we need to know that the allocation itself is
// removable once uses are rewritten, and that we can initialize the
// alloca to the same pattern as the original allocation result.
if (isAllocationFn(CB, TLI) && isAllocRemovable(CB, TLI)) {
auto *I8Ty = Type::getInt8Ty(CB->getParent()->getContext());
if (nullptr != getInitialValueOfAllocation(CB, TLI, I8Ty)) {
AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB};
AllocationInfos[CB] = AI;
TLI->getLibFunc(*CB, AI->LibraryFunctionId);
}
}
return true;
};
bool UsedAssumedInformation = false;
bool Success = A.checkForAllCallLikeInstructions(
AllocationIdentifierCB, *this, UsedAssumedInformation,
/* CheckBBLivenessOnly */ false,
/* CheckPotentiallyDead */ true);
(void)Success;
assert(Success && "Did not expect the call base visit callback to fail!");
}
const std::string getAsStr() const override {
unsigned NumH2SMallocs = 0, NumInvalidMallocs = 0;
for (const auto &It : AllocationInfos) {
if (It.second->Status == AllocationInfo::INVALID)
++NumInvalidMallocs;
else
++NumH2SMallocs;
}
return "[H2S] Mallocs Good/Bad: " + std::to_string(NumH2SMallocs) + "/" +
std::to_string(NumInvalidMallocs);
}
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {
STATS_DECL(
MallocCalls, Function,
"Number of malloc/calloc/aligned_alloc calls converted to allocas");
for (auto &It : AllocationInfos)
if (It.second->Status != AllocationInfo::INVALID)
++BUILD_STAT_NAME(MallocCalls, Function);
}
bool isAssumedHeapToStack(const CallBase &CB) const override {
if (isValidState())
if (AllocationInfo *AI = AllocationInfos.lookup(&CB))
return AI->Status != AllocationInfo::INVALID;
return false;
}
bool isAssumedHeapToStackRemovedFree(CallBase &CB) const override {
if (!isValidState())
return false;
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
continue;
if (AI.PotentialFreeCalls.count(&CB))
return true;
}
return false;
}
ChangeStatus manifest(Attributor &A) override {
assert(getState().isValidState() &&
"Attempted to manifest an invalid state!");
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
continue;
for (CallBase *FreeCall : AI.PotentialFreeCalls) {
LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
A.deleteAfterManifest(*FreeCall);
HasChanged = ChangeStatus::CHANGED;
}
LLVM_DEBUG(dbgs() << "H2S: Removing malloc-like call: " << *AI.CB
<< "\n");
auto Remark = [&](OptimizationRemark OR) {
LibFunc IsAllocShared;
if (TLI->getLibFunc(*AI.CB, IsAllocShared))
if (IsAllocShared == LibFunc___kmpc_alloc_shared)
return OR << "Moving globalized variable to the stack.";
return OR << "Moving memory allocation from the heap to the stack.";
};
if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared)
A.emitRemark<OptimizationRemark>(AI.CB, "OMP110", Remark);
else
A.emitRemark<OptimizationRemark>(AI.CB, "HeapToStack", Remark);
+ const DataLayout &DL = A.getInfoCache().getDL();
Value *Size;
Optional<APInt> SizeAPI = getSize(A, *this, AI);
if (SizeAPI.hasValue()) {
Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI);
} else {
LLVMContext &Ctx = AI.CB->getContext();
- auto &DL = A.getInfoCache().getDL();
ObjectSizeOpts Opts;
ObjectSizeOffsetEvaluator Eval(DL, TLI, Ctx, Opts);
SizeOffsetEvalType SizeOffsetPair = Eval.compute(AI.CB);
assert(SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown() &&
cast<ConstantInt>(SizeOffsetPair.second)->isZero());
Size = SizeOffsetPair.first;
}
Align Alignment(1);
if (MaybeAlign RetAlign = AI.CB->getRetAlign())
Alignment = max(Alignment, RetAlign);
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
Optional<APInt> AlignmentAPI = getAPInt(A, *this, *Align);
assert(AlignmentAPI.hasValue() &&
"Expected an alignment during manifest!");
Alignment =
max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue()));
}
- unsigned AS = cast<PointerType>(AI.CB->getType())->getAddressSpace();
- Instruction *Alloca =
- new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
- "", AI.CB->getNextNode());
+ // TODO: Hoist the alloca towards the function entry.
+ unsigned AS = DL.getAllocaAddrSpace();
+ Instruction *Alloca = new AllocaInst(Type::getInt8Ty(F->getContext()), AS,
+ Size, Alignment, "", AI.CB);
if (Alloca->getType() != AI.CB->getType())
- Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc",
- Alloca->getNextNode());
+ Alloca = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
+ Alloca, AI.CB->getType(), "malloc_cast", AI.CB);
auto *I8Ty = Type::getInt8Ty(F->getContext());
auto *InitVal = getInitialValueOfAllocation(AI.CB, TLI, I8Ty);
assert(InitVal &&
"Must be able to materialize initial memory state of allocation");
A.changeValueAfterManifest(*AI.CB, *Alloca);
if (auto *II = dyn_cast<InvokeInst>(AI.CB)) {
auto *NBB = II->getNormalDest();
BranchInst::Create(NBB, AI.CB->getParent());
A.deleteAfterManifest(*AI.CB);
} else {
A.deleteAfterManifest(*AI.CB);
}
// Initialize the alloca with the same value as used by the allocation
// function. We can skip undef as the initial value of an alloc is
// undef, and the memset would simply end up being DSEd.
if (!isa<UndefValue>(InitVal)) {
IRBuilder<> Builder(Alloca->getNextNode());
// TODO: Use alignment above if align!=1
Builder.CreateMemSet(Alloca, InitVal, Size, None);
}
HasChanged = ChangeStatus::CHANGED;
}
return HasChanged;
}
Optional<APInt> getAPInt(Attributor &A, const AbstractAttribute &AA,
Value &V) {
bool UsedAssumedInformation = false;
Optional<Constant *> SimpleV =
A.getAssumedConstant(V, AA, UsedAssumedInformation);
if (!SimpleV.hasValue())
return APInt(64, 0);
if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
return CI->getValue();
return llvm::None;
}
Optional<APInt> getSize(Attributor &A, const AbstractAttribute &AA,
AllocationInfo &AI) {
auto Mapper = [&](const Value *V) -> const Value * {
bool UsedAssumedInformation = false;
if (Optional<Constant *> SimpleV =
A.getAssumedConstant(*V, AA, UsedAssumedInformation))
if (*SimpleV)
return *SimpleV;
return V;
};
const Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
return getAllocSize(AI.CB, TLI, Mapper);
}
/// Collection of all malloc-like calls in a function with associated
/// information.
DenseMap<CallBase *, AllocationInfo *> AllocationInfos;
/// Collection of all free-like calls in a function with associated
/// information.
DenseMap<CallBase *, DeallocationInfo *> DeallocationInfos;
ChangeStatus updateImpl(Attributor &A) override;
};
ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
const Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
const auto &LivenessAA =
A.getAAFor<AAIsDead>(*this, IRPosition::function(*F), DepClassTy::NONE);
MustBeExecutedContextExplorer &Explorer =
A.getInfoCache().getMustBeExecutedContextExplorer();
bool StackIsAccessibleByOtherThreads =
A.getInfoCache().stackIsAccessibleByOtherThreads();
// Flag to ensure we update our deallocation information at most once per
// updateImpl call and only if we use the free check reasoning.
bool HasUpdatedFrees = false;
auto UpdateFrees = [&]() {
HasUpdatedFrees = true;
for (auto &It : DeallocationInfos) {
DeallocationInfo &DI = *It.second;
// For now we cannot use deallocations that have unknown inputs, skip
// them.
if (DI.MightFreeUnknownObjects)
continue;
// No need to analyze dead calls, ignore them instead.
bool UsedAssumedInformation = false;
if (A.isAssumedDead(*DI.CB, this, &LivenessAA, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true))
continue;
// Use the optimistic version to get the freed objects, ignoring dead
// branches etc.
SmallVector<Value *, 8> Objects;
if (!AA::getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects,
- *this, DI.CB)) {
+ *this, DI.CB,
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n");
DI.MightFreeUnknownObjects = true;
continue;
}
// Check each object explicitly.
for (auto *Obj : Objects) {
// Free of null and undef can be ignored as no-ops (or UB in the latter
// case).
if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj))
continue;
CallBase *ObjCB = dyn_cast<CallBase>(Obj);
if (!ObjCB) {
LLVM_DEBUG(dbgs()
<< "[H2S] Free of a non-call object: " << *Obj << "\n");
DI.MightFreeUnknownObjects = true;
continue;
}
AllocationInfo *AI = AllocationInfos.lookup(ObjCB);
if (!AI) {
LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj
<< "\n");
DI.MightFreeUnknownObjects = true;
continue;
}
DI.PotentialAllocationCalls.insert(ObjCB);
}
}
};
auto FreeCheck = [&](AllocationInfo &AI) {
// If the stack is not accessible by other threads, the "must-free" logic
// doesn't apply as the pointer could be shared and needs to be places in
// "shareable" memory.
if (!StackIsAccessibleByOtherThreads) {
auto &NoSyncAA =
A.getAAFor<AANoSync>(*this, getIRPosition(), DepClassTy::OPTIONAL);
if (!NoSyncAA.isAssumedNoSync()) {
LLVM_DEBUG(
dbgs() << "[H2S] found an escaping use, stack is not accessible by "
"other threads and function is not nosync:\n");
return false;
}
}
if (!HasUpdatedFrees)
UpdateFrees();
// TODO: Allow multi exit functions that have different free calls.
if (AI.PotentialFreeCalls.size() != 1) {
LLVM_DEBUG(dbgs() << "[H2S] did not find one free call but "
<< AI.PotentialFreeCalls.size() << "\n");
return false;
}
CallBase *UniqueFree = *AI.PotentialFreeCalls.begin();
DeallocationInfo *DI = DeallocationInfos.lookup(UniqueFree);
if (!DI) {
LLVM_DEBUG(
dbgs() << "[H2S] unique free call was not known as deallocation call "
<< *UniqueFree << "\n");
return false;
}
if (DI->MightFreeUnknownObjects) {
LLVM_DEBUG(
dbgs() << "[H2S] unique free call might free unknown allocations\n");
return false;
}
if (DI->PotentialAllocationCalls.size() > 1) {
LLVM_DEBUG(dbgs() << "[H2S] unique free call might free "
<< DI->PotentialAllocationCalls.size()
<< " different allocations\n");
return false;
}
if (*DI->PotentialAllocationCalls.begin() != AI.CB) {
LLVM_DEBUG(
dbgs()
<< "[H2S] unique free call not known to free this allocation but "
<< **DI->PotentialAllocationCalls.begin() << "\n");
return false;
}
Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
if (!Explorer.findInContextOf(UniqueFree, CtxI)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] unique free call might not be executed with the allocation "
<< *UniqueFree << "\n");
return false;
}
return true;
};
auto UsesCheck = [&](AllocationInfo &AI) {
bool ValidUsesOnly = true;
auto Pred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
if (isa<LoadInst>(UserI))
return true;
if (auto *SI = dyn_cast<StoreInst>(UserI)) {
if (SI->getValueOperand() == U.get()) {
LLVM_DEBUG(dbgs()
<< "[H2S] escaping store to memory: " << *UserI << "\n");
ValidUsesOnly = false;
} else {
// A store into the malloc'ed memory is fine.
}
return true;
}
if (auto *CB = dyn_cast<CallBase>(UserI)) {
if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd())
return true;
if (DeallocationInfos.count(CB)) {
AI.PotentialFreeCalls.insert(CB);
return true;
}
unsigned ArgNo = CB->getArgOperandNo(&U);
const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::OPTIONAL);
// If a call site argument use is nofree, we are fine.
const auto &ArgNoFreeAA = A.getAAFor<AANoFree>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::OPTIONAL);
bool MaybeCaptured = !NoCaptureAA.isAssumedNoCapture();
bool MaybeFreed = !ArgNoFreeAA.isAssumedNoFree();
if (MaybeCaptured ||
(AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared &&
MaybeFreed)) {
AI.HasPotentiallyFreeingUnknownUses |= MaybeFreed;
// Emit a missed remark if this is missed OpenMP globalization.
auto Remark = [&](OptimizationRemarkMissed ORM) {
return ORM
<< "Could not move globalized variable to the stack. "
"Variable is potentially captured in call. Mark "
"parameter as `__attribute__((noescape))` to override.";
};
if (ValidUsesOnly &&
AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared)
A.emitRemark<OptimizationRemarkMissed>(AI.CB, "OMP113", Remark);
LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
ValidUsesOnly = false;
}
return true;
}
if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
Follow = true;
return true;
}
// Unknown user for which we can not track uses further (in a way that
// makes sense).
LLVM_DEBUG(dbgs() << "[H2S] Unknown user: " << *UserI << "\n");
ValidUsesOnly = false;
return true;
};
if (!A.checkForAllUses(Pred, *this, *AI.CB))
return false;
return ValidUsesOnly;
};
// The actual update starts here. We look at all allocations and depending on
// their status perform the appropriate check(s).
for (auto &It : AllocationInfos) {
AllocationInfo &AI = *It.second;
if (AI.Status == AllocationInfo::INVALID)
continue;
if (Value *Align = getAllocAlignment(AI.CB, TLI)) {
if (!getAPInt(A, *this, *Align)) {
// Can't generate an alloca which respects the required alignment
// on the allocation.
LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB
<< "\n");
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
}
}
if (MaxHeapToStackSize != -1) {
Optional<APInt> Size = getSize(A, *this, AI);
if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
LLVM_DEBUG({
if (!Size.hasValue())
dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n";
else
dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
<< MaxHeapToStackSize << "\n";
});
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
}
}
switch (AI.Status) {
case AllocationInfo::STACK_DUE_TO_USE:
if (UsesCheck(AI))
continue;
AI.Status = AllocationInfo::STACK_DUE_TO_FREE;
LLVM_FALLTHROUGH;
case AllocationInfo::STACK_DUE_TO_FREE:
if (FreeCheck(AI))
continue;
AI.Status = AllocationInfo::INVALID;
Changed = ChangeStatus::CHANGED;
continue;
case AllocationInfo::INVALID:
llvm_unreachable("Invalid allocations should never reach this point!");
};
}
return Changed;
}
/// ----------------------- Privatizable Pointers ------------------------------
struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {}
ChangeStatus indicatePessimisticFixpoint() override {
AAPrivatizablePtr::indicatePessimisticFixpoint();
PrivatizableType = nullptr;
return ChangeStatus::CHANGED;
}
/// Identify the type we can chose for a private copy of the underlying
/// argument. None means it is not clear yet, nullptr means there is none.
virtual Optional<Type *> identifyPrivatizableType(Attributor &A) = 0;
/// Return a privatizable type that encloses both T0 and T1.
/// TODO: This is merely a stub for now as we should manage a mapping as well.
Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
if (!T0.hasValue())
return T1;
if (!T1.hasValue())
return T0;
if (T0 == T1)
return T0;
return nullptr;
}
Optional<Type *> getPrivatizableType() const override {
return PrivatizableType;
}
const std::string getAsStr() const override {
return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]";
}
protected:
Optional<Type *> PrivatizableType;
};
// TODO: Do this for call site arguments (probably also other values) as well.
struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
AAPrivatizablePtrArgument(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtrImpl(IRP, A) {}
/// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
// If this is a byval argument and we know all the call sites (so we can
// rewrite them), there is no need to check them explicitly.
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (getIRPosition().hasAttr(Attribute::ByVal) &&
A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
- true, AllCallSitesKnown))
+ true, UsedAssumedInformation))
return getAssociatedValue().getType()->getPointerElementType();
Optional<Type *> Ty;
unsigned ArgNo = getIRPosition().getCallSiteArgNo();
// Make sure the associated call site argument has the same type at all call
// sites and it is an allocation we know is safe to privatize, for now that
// means we only allow alloca instructions.
// TODO: We can additionally analyze the accesses in the callee to create
// the type from that information instead. That is a little more
// involved and will be done in a follow up patch.
auto CallSiteCheck = [&](AbstractCallSite ACS) {
IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
// Check if a coresponding argument was found or if it is one not
// associated (which can happen for callback calls).
if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
return false;
// Check that all call sites agree on a type.
auto &PrivCSArgAA =
A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos, DepClassTy::REQUIRED);
Optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType();
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
if (CSTy.hasValue() && CSTy.getValue())
CSTy.getValue()->print(dbgs());
else if (CSTy.hasValue())
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
});
Ty = combineTypes(Ty, CSTy);
LLVM_DEBUG({
dbgs() << " : New Type: ";
if (Ty.hasValue() && Ty.getValue())
Ty.getValue()->print(dbgs());
else if (Ty.hasValue())
dbgs() << "<nullptr>";
else
dbgs() << "<none>";
dbgs() << "\n";
});
return !Ty.hasValue() || Ty.getValue();
};
- if (!A.checkForAllCallSites(CallSiteCheck, *this, true, AllCallSitesKnown))
+ if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
+ UsedAssumedInformation))
return nullptr;
return Ty;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType.hasValue())
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
// The dependence is optional so we don't give up once we give up on the
// alignment.
A.getAAFor<AAAlign>(*this, IRPosition::value(getAssociatedValue()),
DepClassTy::OPTIONAL);
// Avoid arguments with padding for now.
if (!getIRPosition().hasAttr(Attribute::ByVal) &&
!ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
A.getInfoCache().getDL())) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
return indicatePessimisticFixpoint();
}
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
// Verify callee and caller agree on how the promoted argument would be
// passed.
Function &Fn = *getIRPosition().getAnchorScope();
const auto *TTI =
A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(Fn);
if (!TTI) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Missing TTI for function "
<< Fn.getName() << "\n");
return indicatePessimisticFixpoint();
}
auto CallSiteCheck = [&](AbstractCallSite ACS) {
CallBase *CB = ACS.getInstruction();
return TTI->areTypesABICompatible(
CB->getCaller(), CB->getCalledFunction(), ReplacementTypes);
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
- AllCallSitesKnown)) {
+ UsedAssumedInformation)) {
LLVM_DEBUG(
dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for "
<< Fn.getName() << "\n");
return indicatePessimisticFixpoint();
}
// Register a rewrite of the argument.
Argument *Arg = getAssociatedArgument();
if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Rewrite not valid\n");
return indicatePessimisticFixpoint();
}
unsigned ArgNo = Arg->getArgNo();
// Helper to check if for the given call site the associated argument is
// passed to a callback where the privatization would be different.
auto IsCompatiblePrivArgOfCallback = [&](CallBase &CB) {
SmallVector<const Use *, 4> CallbackUses;
AbstractCallSite::getCallbackUses(CB, CallbackUses);
for (const Use *U : CallbackUses) {
AbstractCallSite CBACS(U);
assert(CBACS && CBACS.isCallbackCall());
for (Argument &CBArg : CBACS.getCalledFunction()->args()) {
int CBArgNo = CBACS.getCallArgOperandNo(CBArg);
LLVM_DEBUG({
dbgs()
<< "[AAPrivatizablePtr] Argument " << *Arg
<< "check if can be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"callback ("
<< CBArgNo << "@" << CBACS.getCalledFunction()->getName()
<< ")\n[AAPrivatizablePtr] " << CBArg << " : "
<< CBACS.getCallArgOperand(CBArg) << " vs "
<< CB.getArgOperand(ArgNo) << "\n"
<< "[AAPrivatizablePtr] " << CBArg << " : "
<< CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n";
});
if (CBArgNo != int(ArgNo))
continue;
const auto &CBArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(CBArg), DepClassTy::REQUIRED);
if (CBArgPrivAA.isValidState()) {
auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
if (!CBArgPrivTy.hasValue())
continue;
if (CBArgPrivTy.getValue() == PrivatizableType)
continue;
}
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
<< " cannot be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"callback ("
<< CBArgNo << "@" << CBACS.getCalledFunction()->getName()
<< ").\n[AAPrivatizablePtr] for which the argument "
"privatization is not compatible.\n";
});
return false;
}
}
return true;
};
// Helper to check if for the given call site the associated argument is
// passed to a direct call where the privatization would be different.
auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) {
CallBase *DC = cast<CallBase>(ACS.getInstruction());
int DCArgNo = ACS.getCallArgOperandNo(ArgNo);
assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->arg_size() &&
"Expected a direct call operand for callback call operand");
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
<< " check if be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"direct call of ("
<< DCArgNo << "@" << DC->getCalledFunction()->getName()
<< ").\n";
});
Function *DCCallee = DC->getCalledFunction();
if (unsigned(DCArgNo) < DCCallee->arg_size()) {
const auto &DCArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(*DCCallee->getArg(DCArgNo)),
DepClassTy::REQUIRED);
if (DCArgPrivAA.isValidState()) {
auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
if (!DCArgPrivTy.hasValue())
return true;
if (DCArgPrivTy.getValue() == PrivatizableType)
return true;
}
}
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
<< " cannot be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"direct call of ("
<< ACS.getInstruction()->getCalledFunction()->getName()
<< ").\n[AAPrivatizablePtr] for which the argument "
"privatization is not compatible.\n";
});
return false;
};
// Helper to check if the associated argument is used at the given abstract
// call site in a way that is incompatible with the privatization assumed
// here.
auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
if (ACS.isDirectCall())
return IsCompatiblePrivArgOfCallback(*ACS.getInstruction());
if (ACS.isCallbackCall())
return IsCompatiblePrivArgOfDirectCS(ACS);
return false;
};
if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true,
- AllCallSitesKnown))
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// Given a type to private \p PrivType, collect the constituates (which are
/// used) in \p ReplacementTypes.
static void
identifyReplacementTypes(Type *PrivType,
SmallVectorImpl<Type *> &ReplacementTypes) {
// TODO: For now we expand the privatization type to the fullest which can
// lead to dead arguments that need to be removed later.
assert(PrivType && "Expected privatizable type!");
// Traverse the type, extract constituate types on the outermost level.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++)
ReplacementTypes.push_back(PrivStructType->getElementType(u));
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
ReplacementTypes.append(PrivArrayType->getNumElements(),
PrivArrayType->getElementType());
} else {
ReplacementTypes.push_back(PrivType);
}
}
/// Initialize \p Base according to the type \p PrivType at position \p IP.
/// The values needed are taken from the arguments of \p F starting at
/// position \p ArgNo.
static void createInitialization(Type *PrivType, Value &Base, Function &F,
unsigned ArgNo, Instruction &IP) {
assert(PrivType && "Expected privatizable type!");
IRBuilder<NoFolder> IRB(&IP);
const DataLayout &DL = F.getParent()->getDataLayout();
// Traverse the type, build GEPs and stores.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo();
Value *Ptr =
constructPointer(PointeeTy, PrivType, &Base,
PrivStructLayout->getElementOffset(u), IRB, DL);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
Type *PointeeTy = PrivArrayType->getElementType();
Type *PointeePtrTy = PointeeTy->getPointerTo();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr = constructPointer(PointeePtrTy, PrivType, &Base,
u * PointeeTySize, IRB, DL);
new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
}
} else {
new StoreInst(F.getArg(ArgNo), &Base, &IP);
}
}
/// Extract values from \p Base according to the type \p PrivType at the
/// call position \p ACS. The values are appended to \p ReplacementValues.
void createReplacementValues(Align Alignment, Type *PrivType,
AbstractCallSite ACS, Value *Base,
SmallVectorImpl<Value *> &ReplacementValues) {
assert(Base && "Expected base value!");
assert(PrivType && "Expected privatizable type!");
Instruction *IP = ACS.getInstruction();
IRBuilder<NoFolder> IRB(IP);
const DataLayout &DL = IP->getModule()->getDataLayout();
Type *PrivPtrType = PrivType->getPointerTo();
if (Base->getType() != PrivPtrType)
Base = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
Base, PrivPtrType, "", ACS.getInstruction());
// Traverse the type, build GEPs and loads.
if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
Type *PointeeTy = PrivStructType->getElementType(u);
Value *Ptr =
constructPointer(PointeeTy->getPointerTo(), PrivType, Base,
PrivStructLayout->getElementOffset(u), IRB, DL);
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
}
} else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
Type *PointeeTy = PrivArrayType->getElementType();
uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
Type *PointeePtrTy = PointeeTy->getPointerTo();
for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
Value *Ptr = constructPointer(PointeePtrTy, PrivType, Base,
u * PointeeTySize, IRB, DL);
LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
}
} else {
LoadInst *L = new LoadInst(PrivType, Base, "", IP);
L->setAlignment(Alignment);
ReplacementValues.push_back(L);
}
}
/// See AbstractAttribute::manifest(...)
ChangeStatus manifest(Attributor &A) override {
if (!PrivatizableType.hasValue())
return ChangeStatus::UNCHANGED;
assert(PrivatizableType.getValue() && "Expected privatizable type!");
// Collect all tail calls in the function as we cannot allow new allocas to
// escape into tail recursion.
// TODO: Be smarter about new allocas escaping into tail calls.
SmallVector<CallInst *, 16> TailCalls;
bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(
[&](Instruction &I) {
CallInst &CI = cast<CallInst>(I);
if (CI.isTailCall())
TailCalls.push_back(&CI);
return true;
},
*this, {Instruction::Call}, UsedAssumedInformation))
return ChangeStatus::UNCHANGED;
Argument *Arg = getAssociatedArgument();
// Query AAAlign attribute for alignment of associated argument to
// determine the best alignment of loads.
const auto &AlignAA =
A.getAAFor<AAAlign>(*this, IRPosition::value(*Arg), DepClassTy::NONE);
// Callback to repair the associated function. A new alloca is placed at the
// beginning and initialized with the values passed through arguments. The
// new alloca replaces the use of the old pointer argument.
Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB =
[=](const Attributor::ArgumentReplacementInfo &ARI,
Function &ReplacementFn, Function::arg_iterator ArgIt) {
BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
Instruction *IP = &*EntryBB.getFirstInsertionPt();
const DataLayout &DL = IP->getModule()->getDataLayout();
unsigned AS = DL.getAllocaAddrSpace();
Instruction *AI = new AllocaInst(PrivatizableType.getValue(), AS,
Arg->getName() + ".priv", IP);
createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
ArgIt->getArgNo(), *IP);
if (AI->getType() != Arg->getType())
AI = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
AI, Arg->getType(), "", IP);
Arg->replaceAllUsesWith(AI);
for (CallInst *CI : TailCalls)
CI->setTailCall(false);
};
// Callback to repair a call site of the associated function. The elements
// of the privatizable type are loaded prior to the call and passed to the
// new function version.
Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
[=, &AlignAA](const Attributor::ArgumentReplacementInfo &ARI,
AbstractCallSite ACS,
SmallVectorImpl<Value *> &NewArgOperands) {
// When no alignment is specified for the load instruction,
// natural alignment is assumed.
createReplacementValues(
assumeAligned(AlignAA.getAssumedAlign()),
PrivatizableType.getValue(), ACS,
ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
NewArgOperands);
};
// Collect the types that will replace the privatizable type in the function
// signature.
SmallVector<Type *, 16> ReplacementTypes;
identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
// Register a rewrite of the argument.
if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes,
std::move(FnRepairCB),
std::move(ACSRepairCB)))
return ChangeStatus::CHANGED;
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
AAPrivatizablePtrFloating(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtrImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
virtual void initialize(Attributor &A) override {
// TODO: We can privatize more than arguments.
indicatePessimisticFixpoint();
}
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::"
"updateImpl will not be called");
}
/// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
Optional<Type *> identifyPrivatizableType(Attributor &A) override {
Value *Obj = getUnderlyingObject(&getAssociatedValue());
if (!Obj) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
return nullptr;
}
if (auto *AI = dyn_cast<AllocaInst>(Obj))
if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
if (CI->isOne())
return AI->getAllocatedType();
if (auto *Arg = dyn_cast<Argument>(Obj)) {
auto &PrivArgAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(*Arg), DepClassTy::REQUIRED);
if (PrivArgAA.isAssumedPrivatizablePtr())
return Obj->getType()->getPointerElementType();
}
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
"alloca nor privatizable argument: "
<< *Obj << "!\n");
return nullptr;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrCallSiteArgument final
: public AAPrivatizablePtrFloating {
AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtrFloating(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (getIRPosition().hasAttr(Attribute::ByVal))
indicateOptimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
PrivatizableType = identifyPrivatizableType(A);
if (!PrivatizableType.hasValue())
return ChangeStatus::UNCHANGED;
if (!PrivatizableType.getValue())
return indicatePessimisticFixpoint();
const IRPosition &IRP = getIRPosition();
auto &NoCaptureAA =
A.getAAFor<AANoCapture>(*this, IRP, DepClassTy::REQUIRED);
if (!NoCaptureAA.isAssumedNoCapture()) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n");
return indicatePessimisticFixpoint();
}
auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP, DepClassTy::REQUIRED);
if (!NoAliasAA.isAssumedNoAlias()) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n");
return indicatePessimisticFixpoint();
}
bool IsKnown;
if (!AA::isAssumedReadOnly(A, IRP, *this, IsKnown)) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
return indicatePessimisticFixpoint();
}
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrCallSiteReturned final
: public AAPrivatizablePtrFloating {
AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtrFloating(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// TODO: We can privatize more than arguments.
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr);
}
};
struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
AAPrivatizablePtrReturned(const IRPosition &IRP, Attributor &A)
: AAPrivatizablePtrFloating(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// TODO: We can privatize more than arguments.
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
}
};
/// -------------------- Memory Behavior Attributes ----------------------------
/// Includes read-none, read-only, and write-only.
/// ----------------------------------------------------------------------------
struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A)
: AAMemoryBehavior(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
getKnownStateFromValue(getIRPosition(), getState());
AAMemoryBehavior::initialize(A);
}
/// Return the memory behavior information encoded in the IR for \p IRP.
static void getKnownStateFromValue(const IRPosition &IRP,
BitIntegerState &State,
bool IgnoreSubsumingPositions = false) {
SmallVector<Attribute, 2> Attrs;
IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions);
for (const Attribute &Attr : Attrs) {
switch (Attr.getKindAsEnum()) {
case Attribute::ReadNone:
State.addKnownBits(NO_ACCESSES);
break;
case Attribute::ReadOnly:
State.addKnownBits(NO_WRITES);
break;
case Attribute::WriteOnly:
State.addKnownBits(NO_READS);
break;
default:
llvm_unreachable("Unexpected attribute!");
}
}
if (auto *I = dyn_cast<Instruction>(&IRP.getAnchorValue())) {
if (!I->mayReadFromMemory())
State.addKnownBits(NO_READS);
if (!I->mayWriteToMemory())
State.addKnownBits(NO_WRITES);
}
}
/// See AbstractAttribute::getDeducedAttributes(...).
void getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
assert(Attrs.size() == 0);
if (isAssumedReadNone())
Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone));
else if (isAssumedReadOnly())
Attrs.push_back(Attribute::get(Ctx, Attribute::ReadOnly));
else if (isAssumedWriteOnly())
Attrs.push_back(Attribute::get(Ctx, Attribute::WriteOnly));
assert(Attrs.size() <= 1);
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
if (hasAttr(Attribute::ReadNone, /* IgnoreSubsumingPositions */ true))
return ChangeStatus::UNCHANGED;
const IRPosition &IRP = getIRPosition();
// Check if we would improve the existing attributes first.
SmallVector<Attribute, 4> DeducedAttrs;
getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
return IRP.hasAttr(Attr.getKindAsEnum(),
/* IgnoreSubsumingPositions */ true);
}))
return ChangeStatus::UNCHANGED;
// Clear existing attributes.
IRP.removeAttrs(AttrKinds);
// Use the generic manifest method.
return IRAttribute::manifest(A);
}
/// See AbstractState::getAsStr().
const std::string getAsStr() const override {
if (isAssumedReadNone())
return "readnone";
if (isAssumedReadOnly())
return "readonly";
if (isAssumedWriteOnly())
return "writeonly";
return "may-read/write";
}
/// The set of IR attributes AAMemoryBehavior deals with.
static const Attribute::AttrKind AttrKinds[3];
};
const Attribute::AttrKind AAMemoryBehaviorImpl::AttrKinds[] = {
Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly};
/// Memory behavior attribute for a floating value.
struct AAMemoryBehaviorFloating : AAMemoryBehaviorImpl {
AAMemoryBehaviorFloating(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
STATS_DECLTRACK_FLOATING_ATTR(readnone)
else if (isAssumedReadOnly())
STATS_DECLTRACK_FLOATING_ATTR(readonly)
else if (isAssumedWriteOnly())
STATS_DECLTRACK_FLOATING_ATTR(writeonly)
}
private:
/// Return true if users of \p UserI might access the underlying
/// variable/location described by \p U and should therefore be analyzed.
bool followUsersOfUseIn(Attributor &A, const Use &U,
const Instruction *UserI);
/// Update the state according to the effect of use \p U in \p UserI.
void analyzeUseIn(Attributor &A, const Use &U, const Instruction *UserI);
};
/// Memory behavior attribute for function argument.
struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
AAMemoryBehaviorArgument(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorFloating(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
const IRPosition &IRP = getIRPosition();
// TODO: Make IgnoreSubsumingPositions a property of an IRAttribute so we
// can query it when we use has/getAttr. That would allow us to reuse the
// initialize of the base class here.
bool HasByVal =
IRP.hasAttr({Attribute::ByVal}, /* IgnoreSubsumingPositions */ true);
getKnownStateFromValue(IRP, getState(),
/* IgnoreSubsumingPositions */ HasByVal);
// Initialize the use vector with all direct uses of the associated value.
Argument *Arg = getAssociatedArgument();
if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent())))
indicatePessimisticFixpoint();
}
ChangeStatus manifest(Attributor &A) override {
// TODO: Pointer arguments are not supported on vectors of pointers yet.
if (!getAssociatedValue().getType()->isPointerTy())
return ChangeStatus::UNCHANGED;
// TODO: From readattrs.ll: "inalloca parameters are always
// considered written"
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
removeKnownBits(NO_WRITES);
removeAssumedBits(NO_WRITES);
}
return AAMemoryBehaviorFloating::manifest(A);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
STATS_DECLTRACK_ARG_ATTR(readnone)
else if (isAssumedReadOnly())
STATS_DECLTRACK_ARG_ATTR(readonly)
else if (isAssumedWriteOnly())
STATS_DECLTRACK_ARG_ATTR(writeonly)
}
};
struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
AAMemoryBehaviorCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorArgument(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// If we don't have an associated attribute this is either a variadic call
// or an indirect call, either way, nothing to do here.
Argument *Arg = getAssociatedArgument();
if (!Arg) {
indicatePessimisticFixpoint();
return;
}
if (Arg->hasByValAttr()) {
addKnownBits(NO_WRITES);
removeKnownBits(NO_READS);
removeAssumedBits(NO_READS);
}
AAMemoryBehaviorArgument::initialize(A);
if (getAssociatedFunction()->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Argument *Arg = getAssociatedArgument();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
auto &ArgAA =
A.getAAFor<AAMemoryBehavior>(*this, ArgPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), ArgAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
STATS_DECLTRACK_CSARG_ATTR(readnone)
else if (isAssumedReadOnly())
STATS_DECLTRACK_CSARG_ATTR(readonly)
else if (isAssumedWriteOnly())
STATS_DECLTRACK_CSARG_ATTR(writeonly)
}
};
/// Memory behavior attribute for a call site return position.
struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
AAMemoryBehaviorCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorFloating(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAMemoryBehaviorImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// We do not annotate returned values.
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
};
/// An AA to represent the memory behavior function attributes.
struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
AAMemoryBehaviorFunction(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(Attributor &A).
virtual ChangeStatus updateImpl(Attributor &A) override;
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
Function &F = cast<Function>(getAnchorValue());
if (isAssumedReadNone()) {
F.removeFnAttr(Attribute::ArgMemOnly);
F.removeFnAttr(Attribute::InaccessibleMemOnly);
F.removeFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
return AAMemoryBehaviorImpl::manifest(A);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
STATS_DECLTRACK_FN_ATTR(readnone)
else if (isAssumedReadOnly())
STATS_DECLTRACK_FN_ATTR(readonly)
else if (isAssumedWriteOnly())
STATS_DECLTRACK_FN_ATTR(writeonly)
}
};
/// AAMemoryBehavior attribute for call sites.
struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
AAMemoryBehaviorCallSite(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAMemoryBehaviorImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA =
A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::REQUIRED);
return clampStateAndIndicateChange(getState(), FnAA.getState());
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
STATS_DECLTRACK_CS_ATTR(readnone)
else if (isAssumedReadOnly())
STATS_DECLTRACK_CS_ATTR(readonly)
else if (isAssumedWriteOnly())
STATS_DECLTRACK_CS_ATTR(writeonly)
}
};
ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
// The current assumed state used to determine a change.
auto AssumedState = getAssumed();
auto CheckRWInst = [&](Instruction &I) {
// If the instruction has an own memory behavior state, use it to restrict
// the local state. No further analysis is required as the other memory
// state is as optimistic as it gets.
if (const auto *CB = dyn_cast<CallBase>(&I)) {
const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
*this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED);
intersectAssumedBits(MemBehaviorAA.getAssumed());
return !isAtFixpoint();
}
// Remove access kind modifiers if necessary.
if (I.mayReadFromMemory())
removeAssumedBits(NO_READS);
if (I.mayWriteToMemory())
removeAssumedBits(NO_WRITES);
return !isAtFixpoint();
};
bool UsedAssumedInformation = false;
if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
const IRPosition &IRP = getIRPosition();
const IRPosition &FnPos = IRPosition::function_scope(IRP);
AAMemoryBehavior::StateType &S = getState();
// First, check the function scope. We take the known information and we avoid
// work if the assumed information implies the current assumed information for
// this attribute. This is a valid for all but byval arguments.
Argument *Arg = IRP.getAssociatedArgument();
AAMemoryBehavior::base_t FnMemAssumedState =
AAMemoryBehavior::StateType::getWorstState();
if (!Arg || !Arg->hasByValAttr()) {
const auto &FnMemAA =
A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::OPTIONAL);
FnMemAssumedState = FnMemAA.getAssumed();
S.addKnownBits(FnMemAA.getKnown());
if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
return ChangeStatus::UNCHANGED;
}
// The current assumed state used to determine a change.
auto AssumedState = S.getAssumed();
// Make sure the value is not captured (except through "return"), if
// it is, any information derived would be irrelevant anyway as we cannot
// check the potential aliases introduced by the capture. However, no need
// to fall back to anythign less optimistic than the function state.
const auto &ArgNoCaptureAA =
A.getAAFor<AANoCapture>(*this, IRP, DepClassTy::OPTIONAL);
if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
S.intersectAssumedBits(FnMemAssumedState);
return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
// Visit and expand uses until all are analyzed or a fixpoint is reached.
auto UsePred = [&](const Use &U, bool &Follow) -> bool {
Instruction *UserI = cast<Instruction>(U.getUser());
LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << *U << " in " << *UserI
<< " \n");
// Droppable users, e.g., llvm::assume does not actually perform any action.
if (UserI->isDroppable())
return true;
// Check if the users of UserI should also be visited.
Follow = followUsersOfUseIn(A, U, UserI);
// If UserI might touch memory we analyze the use in detail.
if (UserI->mayReadOrWriteMemory())
analyzeUseIn(A, U, UserI);
return !isAtFixpoint();
};
if (!A.checkForAllUses(UsePred, *this, getAssociatedValue()))
return indicatePessimisticFixpoint();
return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
}
bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use &U,
const Instruction *UserI) {
// The loaded value is unrelated to the pointer argument, no need to
// follow the users of the load.
if (isa<LoadInst>(UserI))
return false;
// By default we follow all uses assuming UserI might leak information on U,
// we have special handling for call sites operands though.
const auto *CB = dyn_cast<CallBase>(UserI);
if (!CB || !CB->isArgOperand(&U))
return true;
// If the use is a call argument known not to be captured, the users of
// the call do not need to be visited because they have to be unrelated to
// the input. Note that this check is not trivial even though we disallow
// general capturing of the underlying argument. The reason is that the
// call might the argument "through return", which we allow and for which we
// need to check call users.
if (U.get()->getType()->isPointerTy()) {
unsigned ArgNo = CB->getArgOperandNo(&U);
const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
*this, IRPosition::callsite_argument(*CB, ArgNo), DepClassTy::OPTIONAL);
return !ArgNoCaptureAA.isAssumedNoCapture();
}
return true;
}
void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
const Instruction *UserI) {
assert(UserI->mayReadOrWriteMemory());
switch (UserI->getOpcode()) {
default:
// TODO: Handle all atomics and other side-effect operations we know of.
break;
case Instruction::Load:
// Loads cause the NO_READS property to disappear.
removeAssumedBits(NO_READS);
return;
case Instruction::Store:
// Stores cause the NO_WRITES property to disappear if the use is the
// pointer operand. Note that while capturing was taken care of somewhere
// else we need to deal with stores of the value that is not looked through.
if (cast<StoreInst>(UserI)->getPointerOperand() == U.get())
removeAssumedBits(NO_WRITES);
else
indicatePessimisticFixpoint();
return;
case Instruction::Call:
case Instruction::CallBr:
case Instruction::Invoke: {
// For call sites we look at the argument memory behavior attribute (this
// could be recursive!) in order to restrict our own state.
const auto *CB = cast<CallBase>(UserI);
// Give up on operand bundles.
if (CB->isBundleOperand(&U)) {
indicatePessimisticFixpoint();
return;
}
// Calling a function does read the function pointer, maybe write it if the
// function is self-modifying.
if (CB->isCallee(&U)) {
removeAssumedBits(NO_READS);
break;
}
// Adjust the possible access behavior based on the information on the
// argument.
IRPosition Pos;
if (U.get()->getType()->isPointerTy())
Pos = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
else
Pos = IRPosition::callsite_function(*CB);
const auto &MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(*this, Pos, DepClassTy::OPTIONAL);
// "assumed" has at most the same bits as the MemBehaviorAA assumed
// and at least "known".
intersectAssumedBits(MemBehaviorAA.getAssumed());
return;
}
};
// Generally, look at the "may-properties" and adjust the assumed state if we
// did not trigger special handling before.
if (UserI->mayReadFromMemory())
removeAssumedBits(NO_READS);
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
/// inaccessiblememorargmemonly
/// ----------------------------------------------------------------------------
std::string AAMemoryLocation::getMemoryLocationsAsStr(
AAMemoryLocation::MemoryLocationsKind MLK) {
if (0 == (MLK & AAMemoryLocation::NO_LOCATIONS))
return "all memory";
if (MLK == AAMemoryLocation::NO_LOCATIONS)
return "no memory";
std::string S = "memory:";
if (0 == (MLK & AAMemoryLocation::NO_LOCAL_MEM))
S += "stack,";
if (0 == (MLK & AAMemoryLocation::NO_CONST_MEM))
S += "constant,";
if (0 == (MLK & AAMemoryLocation::NO_GLOBAL_INTERNAL_MEM))
S += "internal global,";
if (0 == (MLK & AAMemoryLocation::NO_GLOBAL_EXTERNAL_MEM))
S += "external global,";
if (0 == (MLK & AAMemoryLocation::NO_ARGUMENT_MEM))
S += "argument,";
if (0 == (MLK & AAMemoryLocation::NO_INACCESSIBLE_MEM))
S += "inaccessible,";
if (0 == (MLK & AAMemoryLocation::NO_MALLOCED_MEM))
S += "malloced,";
if (0 == (MLK & AAMemoryLocation::NO_UNKOWN_MEM))
S += "unknown,";
S.pop_back();
return S;
}
struct AAMemoryLocationImpl : public AAMemoryLocation {
AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A)
: AAMemoryLocation(IRP, A), Allocator(A.Allocator) {
for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u)
AccessKind2Accesses[u] = nullptr;
}
~AAMemoryLocationImpl() {
// The AccessSets are allocated via a BumpPtrAllocator, we call
// the destructor manually.
for (unsigned u = 0; u < llvm::CTLog2<VALID_STATE>(); ++u)
if (AccessKind2Accesses[u])
AccessKind2Accesses[u]->~AccessSet();
}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
getKnownStateFromValue(A, getIRPosition(), getState());
AAMemoryLocation::initialize(A);
}
/// Return the memory behavior information encoded in the IR for \p IRP.
static void getKnownStateFromValue(Attributor &A, const IRPosition &IRP,
BitIntegerState &State,
bool IgnoreSubsumingPositions = false) {
// For internal functions we ignore `argmemonly` and
// `inaccessiblememorargmemonly` as we might break it via interprocedural
// constant propagation. It is unclear if this is the best way but it is
// unlikely this will cause real performance problems. If we are deriving
// attributes for the anchor function we even remove the attribute in
// addition to ignoring it.
bool UseArgMemOnly = true;
Function *AnchorFn = IRP.getAnchorScope();
if (AnchorFn && A.isRunOn(*AnchorFn))
UseArgMemOnly = !AnchorFn->hasLocalLinkage();
SmallVector<Attribute, 2> Attrs;
IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions);
for (const Attribute &Attr : Attrs) {
switch (Attr.getKindAsEnum()) {
case Attribute::ReadNone:
State.addKnownBits(NO_LOCAL_MEM | NO_CONST_MEM);
break;
case Attribute::InaccessibleMemOnly:
State.addKnownBits(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
break;
case Attribute::ArgMemOnly:
if (UseArgMemOnly)
State.addKnownBits(inverseLocation(NO_ARGUMENT_MEM, true, true));
else
IRP.removeAttrs({Attribute::ArgMemOnly});
break;
case Attribute::InaccessibleMemOrArgMemOnly:
if (UseArgMemOnly)
State.addKnownBits(inverseLocation(
NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
else
IRP.removeAttrs({Attribute::InaccessibleMemOrArgMemOnly});
break;
default:
llvm_unreachable("Unexpected attribute!");
}
}
}
/// See AbstractAttribute::getDeducedAttributes(...).
void getDeducedAttributes(LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
assert(Attrs.size() == 0);
if (isAssumedReadNone()) {
Attrs.push_back(Attribute::get(Ctx, Attribute::ReadNone));
} else if (getIRPosition().getPositionKind() == IRPosition::IRP_FUNCTION) {
if (isAssumedInaccessibleMemOnly())
Attrs.push_back(Attribute::get(Ctx, Attribute::InaccessibleMemOnly));
else if (isAssumedArgMemOnly())
Attrs.push_back(Attribute::get(Ctx, Attribute::ArgMemOnly));
else if (isAssumedInaccessibleOrArgMemOnly())
Attrs.push_back(
Attribute::get(Ctx, Attribute::InaccessibleMemOrArgMemOnly));
}
assert(Attrs.size() <= 1);
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
const IRPosition &IRP = getIRPosition();
// Check if we would improve the existing attributes first.
SmallVector<Attribute, 4> DeducedAttrs;
getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
return IRP.hasAttr(Attr.getKindAsEnum(),
/* IgnoreSubsumingPositions */ true);
}))
return ChangeStatus::UNCHANGED;
// Clear existing attributes.
IRP.removeAttrs(AttrKinds);
if (isAssumedReadNone())
IRP.removeAttrs(AAMemoryBehaviorImpl::AttrKinds);
// Use the generic manifest method.
return IRAttribute::manifest(A);
}
/// See AAMemoryLocation::checkForAllAccessesToMemoryKind(...).
bool checkForAllAccessesToMemoryKind(
function_ref<bool(const Instruction *, const Value *, AccessKind,
MemoryLocationsKind)>
Pred,
MemoryLocationsKind RequestedMLK) const override {
if (!isValidState())
return false;
MemoryLocationsKind AssumedMLK = getAssumedNotAccessedLocation();
if (AssumedMLK == NO_LOCATIONS)
return true;
unsigned Idx = 0;
for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS;
CurMLK *= 2, ++Idx) {
if (CurMLK & RequestedMLK)
continue;
if (const AccessSet *Accesses = AccessKind2Accesses[Idx])
for (const AccessInfo &AI : *Accesses)
if (!Pred(AI.I, AI.Ptr, AI.Kind, CurMLK))
return false;
}
return true;
}
ChangeStatus indicatePessimisticFixpoint() override {
// If we give up and indicate a pessimistic fixpoint this instruction will
// become an access for all potential access kinds:
// TODO: Add pointers for argmemonly and globals to improve the results of
// checkForAllAccessesToMemoryKind.
bool Changed = false;
MemoryLocationsKind KnownMLK = getKnown();
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2)
if (!(CurMLK & KnownMLK))
updateStateAndAccessesMap(getState(), CurMLK, I, nullptr, Changed,
getAccessKindFromInst(I));
return AAMemoryLocation::indicatePessimisticFixpoint();
}
protected:
/// Helper struct to tie together an instruction that has a read or write
/// effect with the pointer it accesses (if any).
struct AccessInfo {
/// The instruction that caused the access.
const Instruction *I;
/// The base pointer that is accessed, or null if unknown.
const Value *Ptr;
/// The kind of access (read/write/read+write).
AccessKind Kind;
bool operator==(const AccessInfo &RHS) const {
return I == RHS.I && Ptr == RHS.Ptr && Kind == RHS.Kind;
}
bool operator()(const AccessInfo &LHS, const AccessInfo &RHS) const {
if (LHS.I != RHS.I)
return LHS.I < RHS.I;
if (LHS.Ptr != RHS.Ptr)
return LHS.Ptr < RHS.Ptr;
if (LHS.Kind != RHS.Kind)
return LHS.Kind < RHS.Kind;
return false;
}
};
/// Mapping from *single* memory location kinds, e.g., LOCAL_MEM with the
/// value of NO_LOCAL_MEM, to the accesses encountered for this memory kind.
using AccessSet = SmallSet<AccessInfo, 2, AccessInfo>;
AccessSet *AccessKind2Accesses[llvm::CTLog2<VALID_STATE>()];
/// Categorize the pointer arguments of CB that might access memory in
/// AccessedLoc and update the state and access map accordingly.
void
categorizeArgumentPointerLocations(Attributor &A, CallBase &CB,
AAMemoryLocation::StateType &AccessedLocs,
bool &Changed);
/// Return the kind(s) of location that may be accessed by \p V.
AAMemoryLocation::MemoryLocationsKind
categorizeAccessedLocations(Attributor &A, Instruction &I, bool &Changed);
/// Return the access kind as determined by \p I.
AccessKind getAccessKindFromInst(const Instruction *I) {
AccessKind AK = READ_WRITE;
if (I) {
AK = I->mayReadFromMemory() ? READ : NONE;
AK = AccessKind(AK | (I->mayWriteToMemory() ? WRITE : NONE));
}
return AK;
}
/// Update the state \p State and the AccessKind2Accesses given that \p I is
/// an access of kind \p AK to a \p MLK memory location with the access
/// pointer \p Ptr.
void updateStateAndAccessesMap(AAMemoryLocation::StateType &State,
MemoryLocationsKind MLK, const Instruction *I,
const Value *Ptr, bool &Changed,
AccessKind AK = READ_WRITE) {
assert(isPowerOf2_32(MLK) && "Expected a single location set!");
auto *&Accesses = AccessKind2Accesses[llvm::Log2_32(MLK)];
if (!Accesses)
Accesses = new (Allocator) AccessSet();
Changed |= Accesses->insert(AccessInfo{I, Ptr, AK}).second;
State.removeAssumedBits(MLK);
}
/// Determine the underlying locations kinds for \p Ptr, e.g., globals or
/// arguments, and update the state and access map accordingly.
void categorizePtrValue(Attributor &A, const Instruction &I, const Value &Ptr,
AAMemoryLocation::StateType &State, bool &Changed);
/// Used to allocate access sets.
BumpPtrAllocator &Allocator;
/// The set of IR attributes AAMemoryLocation deals with.
static const Attribute::AttrKind AttrKinds[4];
};
const Attribute::AttrKind AAMemoryLocationImpl::AttrKinds[] = {
Attribute::ReadNone, Attribute::InaccessibleMemOnly, Attribute::ArgMemOnly,
Attribute::InaccessibleMemOrArgMemOnly};
void AAMemoryLocationImpl::categorizePtrValue(
Attributor &A, const Instruction &I, const Value &Ptr,
AAMemoryLocation::StateType &State, bool &Changed) {
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize pointer locations for "
<< Ptr << " ["
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
SmallVector<Value *, 8> Objects;
+ bool UsedAssumedInformation = false;
if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I,
+ UsedAssumedInformation,
/* Intraprocedural */ true)) {
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
getAccessKindFromInst(&I));
return;
}
for (Value *Obj : Objects) {
// TODO: recognize the TBAA used for constant accesses.
MemoryLocationsKind MLK = NO_LOCATIONS;
if (isa<UndefValue>(Obj))
continue;
if (isa<Argument>(Obj)) {
// TODO: For now we do not treat byval arguments as local copies performed
// on the call edge, though, we should. To make that happen we need to
// teach various passes, e.g., DSE, about the copy effect of a byval. That
// would also allow us to mark functions only accessing byval arguments as
// readnone again, atguably their acceses have no effect outside of the
// function, like accesses to allocas.
MLK = NO_ARGUMENT_MEM;
} else if (auto *GV = dyn_cast<GlobalValue>(Obj)) {
// Reading constant memory is not treated as a read "effect" by the
// function attr pass so we won't neither. Constants defined by TBAA are
// similar. (We know we do not write it because it is constant.)
if (auto *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isConstant())
continue;
if (GV->hasLocalLinkage())
MLK = NO_GLOBAL_INTERNAL_MEM;
else
MLK = NO_GLOBAL_EXTERNAL_MEM;
} else if (isa<ConstantPointerNull>(Obj) &&
!NullPointerIsDefined(getAssociatedFunction(),
Ptr.getType()->getPointerAddressSpace())) {
continue;
} else if (isa<AllocaInst>(Obj)) {
MLK = NO_LOCAL_MEM;
} else if (const auto *CB = dyn_cast<CallBase>(Obj)) {
const auto &NoAliasAA = A.getAAFor<AANoAlias>(
*this, IRPosition::callsite_returned(*CB), DepClassTy::OPTIONAL);
if (NoAliasAA.isAssumedNoAlias())
MLK = NO_MALLOCED_MEM;
else
MLK = NO_UNKOWN_MEM;
} else {
MLK = NO_UNKOWN_MEM;
}
assert(MLK != NO_LOCATIONS && "No location specified!");
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Ptr value can be categorized: "
<< *Obj << " -> " << getMemoryLocationsAsStr(MLK)
<< "\n");
updateStateAndAccessesMap(getState(), MLK, &I, Obj, Changed,
getAccessKindFromInst(&I));
}
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Accessed locations with pointer locations: "
<< getMemoryLocationsAsStr(State.getAssumed()) << "\n");
}
void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
Attributor &A, CallBase &CB, AAMemoryLocation::StateType &AccessedLocs,
bool &Changed) {
for (unsigned ArgNo = 0, E = CB.arg_size(); ArgNo < E; ++ArgNo) {
// Skip non-pointer arguments.
const Value *ArgOp = CB.getArgOperand(ArgNo);
if (!ArgOp->getType()->isPtrOrPtrVectorTy())
continue;
// Skip readnone arguments.
const IRPosition &ArgOpIRP = IRPosition::callsite_argument(CB, ArgNo);
const auto &ArgOpMemLocationAA =
A.getAAFor<AAMemoryBehavior>(*this, ArgOpIRP, DepClassTy::OPTIONAL);
if (ArgOpMemLocationAA.isAssumedReadNone())
continue;
// Categorize potentially accessed pointer arguments as if there was an
// access instruction with them as pointer.
categorizePtrValue(A, CB, *ArgOp, AccessedLocs, Changed);
}
}
AAMemoryLocation::MemoryLocationsKind
AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
bool &Changed) {
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize accessed locations for "
<< I << "\n");
AAMemoryLocation::StateType AccessedLocs;
AccessedLocs.intersectAssumedBits(NO_LOCATIONS);
if (auto *CB = dyn_cast<CallBase>(&I)) {
// First check if we assume any memory is access is visible.
const auto &CBMemLocationAA = A.getAAFor<AAMemoryLocation>(
*this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I
<< " [" << CBMemLocationAA << "]\n");
if (CBMemLocationAA.isAssumedReadNone())
return NO_LOCATIONS;
if (CBMemLocationAA.isAssumedInaccessibleMemOnly()) {
updateStateAndAccessesMap(AccessedLocs, NO_INACCESSIBLE_MEM, &I, nullptr,
Changed, getAccessKindFromInst(&I));
return AccessedLocs.getAssumed();
}
uint32_t CBAssumedNotAccessedLocs =
CBMemLocationAA.getAssumedNotAccessedLocation();
// Set the argmemonly and global bit as we handle them separately below.
uint32_t CBAssumedNotAccessedLocsNoArgMem =
CBAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) {
if (CBAssumedNotAccessedLocsNoArgMem & CurMLK)
continue;
updateStateAndAccessesMap(AccessedLocs, CurMLK, &I, nullptr, Changed,
getAccessKindFromInst(&I));
}
// Now handle global memory if it might be accessed. This is slightly tricky
// as NO_GLOBAL_MEM has multiple bits set.
bool HasGlobalAccesses = ((~CBAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
if (HasGlobalAccesses) {
auto AccessPred = [&](const Instruction *, const Value *Ptr,
AccessKind Kind, MemoryLocationsKind MLK) {
updateStateAndAccessesMap(AccessedLocs, MLK, &I, Ptr, Changed,
getAccessKindFromInst(&I));
return true;
};
if (!CBMemLocationAA.checkForAllAccessesToMemoryKind(
AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false)))
return AccessedLocs.getWorstState();
}
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Accessed state before argument handling: "
<< getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
// Now handle argument memory if it might be accessed.
bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
if (HasArgAccesses)
categorizeArgumentPointerLocations(A, *CB, AccessedLocs, Changed);
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Accessed state after argument handling: "
<< getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
return AccessedLocs.getAssumed();
}
if (const Value *Ptr = getPointerOperand(&I, /* AllowVolatile */ true)) {
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Categorize memory access with pointer: "
<< I << " [" << *Ptr << "]\n");
categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed);
return AccessedLocs.getAssumed();
}
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Failed to categorize instruction: "
<< I << "\n");
updateStateAndAccessesMap(AccessedLocs, NO_UNKOWN_MEM, &I, nullptr, Changed,
getAccessKindFromInst(&I));
return AccessedLocs.getAssumed();
}
/// An AA to represent the memory behavior function attributes.
struct AAMemoryLocationFunction final : public AAMemoryLocationImpl {
AAMemoryLocationFunction(const IRPosition &IRP, Attributor &A)
: AAMemoryLocationImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(Attributor &A).
virtual ChangeStatus updateImpl(Attributor &A) override {
const auto &MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
if (MemBehaviorAA.isAssumedReadNone()) {
if (MemBehaviorAA.isKnownReadNone())
return indicateOptimisticFixpoint();
assert(isAssumedReadNone() &&
"AAMemoryLocation was not read-none but AAMemoryBehavior was!");
A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
return ChangeStatus::UNCHANGED;
}
// The current assumed state used to determine a change.
auto AssumedState = getAssumed();
bool Changed = false;
auto CheckRWInst = [&](Instruction &I) {
MemoryLocationsKind MLK = categorizeAccessedLocations(A, I, Changed);
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Accessed locations for " << I
<< ": " << getMemoryLocationsAsStr(MLK) << "\n");
removeAssumedBits(inverseLocation(MLK, false, false));
// Stop once only the valid bit set in the *not assumed location*, thus
// once we don't actually exclude any memory locations in the state.
return getAssumedNotAccessedLocation() != VALID_STATE;
};
bool UsedAssumedInformation = false;
if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this,
UsedAssumedInformation))
return indicatePessimisticFixpoint();
Changed |= AssumedState != getAssumed();
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
STATS_DECLTRACK_FN_ATTR(readnone)
else if (isAssumedArgMemOnly())
STATS_DECLTRACK_FN_ATTR(argmemonly)
else if (isAssumedInaccessibleMemOnly())
STATS_DECLTRACK_FN_ATTR(inaccessiblememonly)
else if (isAssumedInaccessibleOrArgMemOnly())
STATS_DECLTRACK_FN_ATTR(inaccessiblememorargmemonly)
}
};
/// AAMemoryLocation attribute for call sites.
struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
AAMemoryLocationCallSite(const IRPosition &IRP, Attributor &A)
: AAMemoryLocationImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAMemoryLocationImpl::initialize(A);
Function *F = getAssociatedFunction();
if (!F || F->isDeclaration())
indicatePessimisticFixpoint();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA =
A.getAAFor<AAMemoryLocation>(*this, FnPos, DepClassTy::REQUIRED);
bool Changed = false;
auto AccessPred = [&](const Instruction *I, const Value *Ptr,
AccessKind Kind, MemoryLocationsKind MLK) {
updateStateAndAccessesMap(getState(), MLK, I, Ptr, Changed,
getAccessKindFromInst(I));
return true;
};
if (!FnAA.checkForAllAccessesToMemoryKind(AccessPred, ALL_LOCATIONS))
return indicatePessimisticFixpoint();
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
if (isAssumedReadNone())
STATS_DECLTRACK_CS_ATTR(readnone)
}
};
/// ------------------ Value Constant Range Attribute -------------------------
struct AAValueConstantRangeImpl : AAValueConstantRange {
using StateType = IntegerRangeState;
AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A)
: AAValueConstantRange(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
if (A.hasSimplificationCallback(getIRPosition())) {
indicatePessimisticFixpoint();
return;
}
// Intersect a range given by SCEV.
intersectKnown(getConstantRangeFromSCEV(A, getCtxI()));
// Intersect a range given by LVI.
intersectKnown(getConstantRangeFromLVI(A, getCtxI()));
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
std::string Str;
llvm::raw_string_ostream OS(Str);
OS << "range(" << getBitWidth() << ")<";
getKnown().print(OS);
OS << " / ";
getAssumed().print(OS);
OS << ">";
return OS.str();
}
/// Helper function to get a SCEV expr for the associated value at program
/// point \p I.
const SCEV *getSCEV(Attributor &A, const Instruction *I = nullptr) const {
if (!getAnchorScope())
return nullptr;
ScalarEvolution *SE =
A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
*getAnchorScope());
LoopInfo *LI = A.getInfoCache().getAnalysisResultForFunction<LoopAnalysis>(
*getAnchorScope());
if (!SE || !LI)
return nullptr;
const SCEV *S = SE->getSCEV(&getAssociatedValue());
if (!I)
return S;
return SE->getSCEVAtScope(S, LI->getLoopFor(I->getParent()));
}
/// Helper function to get a range from SCEV for the associated value at
/// program point \p I.
ConstantRange getConstantRangeFromSCEV(Attributor &A,
const Instruction *I = nullptr) const {
if (!getAnchorScope())
return getWorstState(getBitWidth());
ScalarEvolution *SE =
A.getInfoCache().getAnalysisResultForFunction<ScalarEvolutionAnalysis>(
*getAnchorScope());
const SCEV *S = getSCEV(A, I);
if (!SE || !S)
return getWorstState(getBitWidth());
return SE->getUnsignedRange(S);
}
/// Helper function to get a range from LVI for the associated value at
/// program point \p I.
ConstantRange
getConstantRangeFromLVI(Attributor &A,
const Instruction *CtxI = nullptr) const {
if (!getAnchorScope())
return getWorstState(getBitWidth());
LazyValueInfo *LVI =
A.getInfoCache().getAnalysisResultForFunction<LazyValueAnalysis>(
*getAnchorScope());
if (!LVI || !CtxI)
return getWorstState(getBitWidth());
return LVI->getConstantRange(&getAssociatedValue(),
const_cast<Instruction *>(CtxI));
}
/// Return true if \p CtxI is valid for querying outside analyses.
/// This basically makes sure we do not ask intra-procedural analysis
/// about a context in the wrong function or a context that violates
/// dominance assumptions they might have. The \p AllowAACtxI flag indicates
/// if the original context of this AA is OK or should be considered invalid.
bool isValidCtxInstructionForOutsideAnalysis(Attributor &A,
const Instruction *CtxI,
bool AllowAACtxI) const {
if (!CtxI || (!AllowAACtxI && CtxI == getCtxI()))
return false;
// Our context might be in a different function, neither intra-procedural
// analysis (ScalarEvolution nor LazyValueInfo) can handle that.
if (!AA::isValidInScope(getAssociatedValue(), CtxI->getFunction()))
return false;
// If the context is not dominated by the value there are paths to the
// context that do not define the value. This cannot be handled by
// LazyValueInfo so we need to bail.
if (auto *I = dyn_cast<Instruction>(&getAssociatedValue())) {
InformationCache &InfoCache = A.getInfoCache();
const DominatorTree *DT =
InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
*I->getFunction());
return DT && DT->dominates(I, CtxI);
}
return true;
}
/// See AAValueConstantRange::getKnownConstantRange(..).
ConstantRange
getKnownConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const override {
if (!isValidCtxInstructionForOutsideAnalysis(A, CtxI,
/* AllowAACtxI */ false))
return getKnown();
ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
return getKnown().intersectWith(SCEVR).intersectWith(LVIR);
}
/// See AAValueConstantRange::getAssumedConstantRange(..).
ConstantRange
getAssumedConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const override {
// TODO: Make SCEV use Attributor assumption.
// We may be able to bound a variable range via assumptions in
// Attributor. ex.) If x is assumed to be in [1, 3] and y is known to
// evolve to x^2 + x, then we can say that y is in [2, 12].
if (!isValidCtxInstructionForOutsideAnalysis(A, CtxI,
/* AllowAACtxI */ false))
return getAssumed();
ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
ConstantRange SCEVR = getConstantRangeFromSCEV(A, CtxI);
return getAssumed().intersectWith(SCEVR).intersectWith(LVIR);
}
/// Helper function to create MDNode for range metadata.
static MDNode *
getMDNodeForConstantRange(Type *Ty, LLVMContext &Ctx,
const ConstantRange &AssumedConstantRange) {
Metadata *LowAndHigh[] = {ConstantAsMetadata::get(ConstantInt::get(
Ty, AssumedConstantRange.getLower())),
ConstantAsMetadata::get(ConstantInt::get(
Ty, AssumedConstantRange.getUpper()))};
return MDNode::get(Ctx, LowAndHigh);
}
/// Return true if \p Assumed is included in \p KnownRanges.
static bool isBetterRange(const ConstantRange &Assumed, MDNode *KnownRanges) {
if (Assumed.isFullSet())
return false;
if (!KnownRanges)
return true;
// If multiple ranges are annotated in IR, we give up to annotate assumed
// range for now.
// TODO: If there exists a known range which containts assumed range, we
// can say assumed range is better.
if (KnownRanges->getNumOperands() > 2)
return false;
ConstantInt *Lower =
mdconst::extract<ConstantInt>(KnownRanges->getOperand(0));
ConstantInt *Upper =
mdconst::extract<ConstantInt>(KnownRanges->getOperand(1));
ConstantRange Known(Lower->getValue(), Upper->getValue());
return Known.contains(Assumed) && Known != Assumed;
}
/// Helper function to set range metadata.
static bool
setRangeMetadataIfisBetterRange(Instruction *I,
const ConstantRange &AssumedConstantRange) {
auto *OldRangeMD = I->getMetadata(LLVMContext::MD_range);
if (isBetterRange(AssumedConstantRange, OldRangeMD)) {
if (!AssumedConstantRange.isEmptySet()) {
I->setMetadata(LLVMContext::MD_range,
getMDNodeForConstantRange(I->getType(), I->getContext(),
AssumedConstantRange));
return true;
}
}
return false;
}
/// See AbstractAttribute::manifest()
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
ConstantRange AssumedConstantRange = getAssumedConstantRange(A);
assert(!AssumedConstantRange.isFullSet() && "Invalid state");
auto &V = getAssociatedValue();
if (!AssumedConstantRange.isEmptySet() &&
!AssumedConstantRange.isSingleElement()) {
if (Instruction *I = dyn_cast<Instruction>(&V)) {
assert(I == getCtxI() && "Should not annotate an instruction which is "
"not the context instruction");
if (isa<CallInst>(I) || isa<LoadInst>(I))
if (setRangeMetadataIfisBetterRange(I, AssumedConstantRange))
Changed = ChangeStatus::CHANGED;
}
}
return Changed;
}
};
struct AAValueConstantRangeArgument final
: AAArgumentFromCallSiteArguments<
AAValueConstantRange, AAValueConstantRangeImpl, IntegerRangeState,
true /* BridgeCallBaseContext */> {
using Base = AAArgumentFromCallSiteArguments<
AAValueConstantRange, AAValueConstantRangeImpl, IntegerRangeState,
true /* BridgeCallBaseContext */>;
AAValueConstantRangeArgument(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
indicatePessimisticFixpoint();
} else {
Base::initialize(A);
}
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(value_range)
}
};
struct AAValueConstantRangeReturned
: AAReturnedFromReturnedValues<AAValueConstantRange,
AAValueConstantRangeImpl,
AAValueConstantRangeImpl::StateType,
/* PropogateCallBaseContext */ true> {
using Base =
AAReturnedFromReturnedValues<AAValueConstantRange,
AAValueConstantRangeImpl,
AAValueConstantRangeImpl::StateType,
/* PropogateCallBaseContext */ true>;
AAValueConstantRangeReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FNRET_ATTR(value_range)
}
};
struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
AAValueConstantRangeFloating(const IRPosition &IRP, Attributor &A)
: AAValueConstantRangeImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAValueConstantRangeImpl::initialize(A);
if (isAtFixpoint())
return;
Value &V = getAssociatedValue();
if (auto *C = dyn_cast<ConstantInt>(&V)) {
unionAssumed(ConstantRange(C->getValue()));
indicateOptimisticFixpoint();
return;
}
if (isa<UndefValue>(&V)) {
// Collapse the undef state to 0.
unionAssumed(ConstantRange(APInt(getBitWidth(), 0)));
indicateOptimisticFixpoint();
return;
}
if (isa<CallBase>(&V))
return;
if (isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<CastInst>(&V))
return;
// If it is a load instruction with range metadata, use it.
if (LoadInst *LI = dyn_cast<LoadInst>(&V))
if (auto *RangeMD = LI->getMetadata(LLVMContext::MD_range)) {
intersectKnown(getConstantRangeFromMetadata(*RangeMD));
return;
}
// We can work with PHI and select instruction as we traverse their operands
// during update.
if (isa<SelectInst>(V) || isa<PHINode>(V))
return;
// Otherwise we give up.
indicatePessimisticFixpoint();
LLVM_DEBUG(dbgs() << "[AAValueConstantRange] We give up: "
<< getAssociatedValue() << "\n");
}
bool calculateBinaryOperator(
Attributor &A, BinaryOperator *BinOp, IntegerRangeState &T,
const Instruction *CtxI,
SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) {
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
// Simplify the operands first.
bool UsedAssumedInformation = false;
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedLHS.hasValue())
return true;
if (!SimplifiedLHS.getValue())
return false;
LHS = *SimplifiedLHS;
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedRHS.hasValue())
return true;
if (!SimplifiedRHS.getValue())
return false;
RHS = *SimplifiedRHS;
// TODO: Allow non integers as well.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return false;
auto &LHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*LHS, getCallBaseContext()),
DepClassTy::REQUIRED);
QuerriedAAs.push_back(&LHSAA);
auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
auto &RHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*RHS, getCallBaseContext()),
DepClassTy::REQUIRED);
QuerriedAAs.push_back(&RHSAA);
auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
auto AssumedRange = LHSAARange.binaryOp(BinOp->getOpcode(), RHSAARange);
T.unionAssumed(AssumedRange);
// TODO: Track a known state too.
return T.isValidState();
}
bool calculateCastInst(
Attributor &A, CastInst *CastI, IntegerRangeState &T,
const Instruction *CtxI,
SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) {
assert(CastI->getNumOperands() == 1 && "Expected cast to be unary!");
// TODO: Allow non integers as well.
Value *OpV = CastI->getOperand(0);
// Simplify the operand first.
bool UsedAssumedInformation = false;
const auto &SimplifiedOpV =
A.getAssumedSimplified(IRPosition::value(*OpV, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedOpV.hasValue())
return true;
if (!SimplifiedOpV.getValue())
return false;
OpV = *SimplifiedOpV;
if (!OpV->getType()->isIntegerTy())
return false;
auto &OpAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*OpV, getCallBaseContext()),
DepClassTy::REQUIRED);
QuerriedAAs.push_back(&OpAA);
T.unionAssumed(
OpAA.getAssumed().castOp(CastI->getOpcode(), getState().getBitWidth()));
return T.isValidState();
}
bool
calculateCmpInst(Attributor &A, CmpInst *CmpI, IntegerRangeState &T,
const Instruction *CtxI,
SmallVectorImpl<const AAValueConstantRange *> &QuerriedAAs) {
Value *LHS = CmpI->getOperand(0);
Value *RHS = CmpI->getOperand(1);
// Simplify the operands first.
bool UsedAssumedInformation = false;
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedLHS.hasValue())
return true;
if (!SimplifiedLHS.getValue())
return false;
LHS = *SimplifiedLHS;
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedRHS.hasValue())
return true;
if (!SimplifiedRHS.getValue())
return false;
RHS = *SimplifiedRHS;
// TODO: Allow non integers as well.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return false;
auto &LHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*LHS, getCallBaseContext()),
DepClassTy::REQUIRED);
QuerriedAAs.push_back(&LHSAA);
auto &RHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*RHS, getCallBaseContext()),
DepClassTy::REQUIRED);
QuerriedAAs.push_back(&RHSAA);
auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
// If one of them is empty set, we can't decide.
if (LHSAARange.isEmptySet() || RHSAARange.isEmptySet())
return true;
bool MustTrue = false, MustFalse = false;
auto AllowedRegion =
ConstantRange::makeAllowedICmpRegion(CmpI->getPredicate(), RHSAARange);
if (AllowedRegion.intersectWith(LHSAARange).isEmptySet())
MustFalse = true;
if (LHSAARange.icmp(CmpI->getPredicate(), RHSAARange))
MustTrue = true;
assert((!MustTrue || !MustFalse) &&
"Either MustTrue or MustFalse should be false!");
if (MustTrue)
T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 1)));
else if (MustFalse)
T.unionAssumed(ConstantRange(APInt(/* numBits */ 1, /* val */ 0)));
else
T.unionAssumed(ConstantRange(/* BitWidth */ 1, /* isFullSet */ true));
LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " " << LHSAA
<< " " << RHSAA << "\n");
// TODO: Track a known state too.
return T.isValidState();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
IntegerRangeState &T, bool Stripped) -> bool {
Instruction *I = dyn_cast<Instruction>(&V);
if (!I || isa<CallBase>(I)) {
// Simplify the operand first.
bool UsedAssumedInformation = false;
const auto &SimplifiedOpV =
A.getAssumedSimplified(IRPosition::value(V, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedOpV.hasValue())
return true;
if (!SimplifiedOpV.getValue())
return false;
Value *VPtr = *SimplifiedOpV;
// If the value is not instruction, we query AA to Attributor.
const auto &AA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*VPtr, getCallBaseContext()),
DepClassTy::REQUIRED);
// Clamp operator is not used to utilize a program point CtxI.
T.unionAssumed(AA.getAssumedConstantRange(A, CtxI));
return T.isValidState();
}
SmallVector<const AAValueConstantRange *, 4> QuerriedAAs;
if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
if (!calculateBinaryOperator(A, BinOp, T, CtxI, QuerriedAAs))
return false;
} else if (auto *CmpI = dyn_cast<CmpInst>(I)) {
if (!calculateCmpInst(A, CmpI, T, CtxI, QuerriedAAs))
return false;
} else if (auto *CastI = dyn_cast<CastInst>(I)) {
if (!calculateCastInst(A, CastI, T, CtxI, QuerriedAAs))
return false;
} else {
// Give up with other instructions.
// TODO: Add other instructions
T.indicatePessimisticFixpoint();
return false;
}
// Catch circular reasoning in a pessimistic way for now.
// TODO: Check how the range evolves and if we stripped anything, see also
// AADereferenceable or AAAlign for similar situations.
for (const AAValueConstantRange *QueriedAA : QuerriedAAs) {
if (QueriedAA != this)
continue;
// If we are in a stady state we do not need to worry.
if (T.getAssumed() == getState().getAssumed())
continue;
T.indicatePessimisticFixpoint();
}
return T.isValidState();
};
IntegerRangeState T(getBitWidth());
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T,
VisitValueCB, getCtxI(),
+ UsedAssumedInformation,
/* UseValueSimplify */ false))
return indicatePessimisticFixpoint();
// Ensure that long def-use chains can't cause circular reasoning either by
// introducing a cutoff below.
if (clampStateAndIndicateChange(getState(), T) == ChangeStatus::UNCHANGED)
return ChangeStatus::UNCHANGED;
if (++NumChanges > MaxNumChanges) {
LLVM_DEBUG(dbgs() << "[AAValueConstantRange] performed " << NumChanges
<< " but only " << MaxNumChanges
<< " are allowed to avoid cyclic reasoning.");
return indicatePessimisticFixpoint();
}
return ChangeStatus::CHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(value_range)
}
/// Tracker to bail after too many widening steps of the constant range.
int NumChanges = 0;
/// Upper bound for the number of allowed changes (=widening steps) for the
/// constant range before we give up.
static constexpr int MaxNumChanges = 5;
};
struct AAValueConstantRangeFunction : AAValueConstantRangeImpl {
AAValueConstantRangeFunction(const IRPosition &IRP, Attributor &A)
: AAValueConstantRangeImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable("AAValueConstantRange(Function|CallSite)::updateImpl will "
"not be called");
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(value_range) }
};
struct AAValueConstantRangeCallSite : AAValueConstantRangeFunction {
AAValueConstantRangeCallSite(const IRPosition &IRP, Attributor &A)
: AAValueConstantRangeFunction(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(value_range) }
};
struct AAValueConstantRangeCallSiteReturned
: AACallSiteReturnedFromReturned<AAValueConstantRange,
AAValueConstantRangeImpl,
AAValueConstantRangeImpl::StateType,
/* IntroduceCallBaseContext */ true> {
AAValueConstantRangeCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AACallSiteReturnedFromReturned<AAValueConstantRange,
AAValueConstantRangeImpl,
AAValueConstantRangeImpl::StateType,
/* IntroduceCallBaseContext */ true>(IRP,
A) {
}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// If it is a load instruction with range metadata, use the metadata.
if (CallInst *CI = dyn_cast<CallInst>(&getAssociatedValue()))
if (auto *RangeMD = CI->getMetadata(LLVMContext::MD_range))
intersectKnown(getConstantRangeFromMetadata(*RangeMD));
AAValueConstantRangeImpl::initialize(A);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSRET_ATTR(value_range)
}
};
struct AAValueConstantRangeCallSiteArgument : AAValueConstantRangeFloating {
AAValueConstantRangeCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAValueConstantRangeFloating(IRP, A) {}
/// See AbstractAttribute::manifest()
ChangeStatus manifest(Attributor &A) override {
return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(value_range)
}
};
/// ------------------ Potential Values Attribute -------------------------
struct AAPotentialValuesImpl : AAPotentialValues {
using StateType = PotentialConstantIntValuesState;
AAPotentialValuesImpl(const IRPosition &IRP, Attributor &A)
: AAPotentialValues(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
if (A.hasSimplificationCallback(getIRPosition()))
indicatePessimisticFixpoint();
else
AAPotentialValues::initialize(A);
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
std::string Str;
llvm::raw_string_ostream OS(Str);
OS << getState();
return OS.str();
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
return indicatePessimisticFixpoint();
}
};
struct AAPotentialValuesArgument final
: AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
PotentialConstantIntValuesState> {
using Base =
AAArgumentFromCallSiteArguments<AAPotentialValues, AAPotentialValuesImpl,
PotentialConstantIntValuesState>;
AAPotentialValuesArgument(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
indicatePessimisticFixpoint();
} else {
Base::initialize(A);
}
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(potential_values)
}
};
struct AAPotentialValuesReturned
: AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
using Base =
AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FNRET_ATTR(potential_values)
}
};
struct AAPotentialValuesFloating : AAPotentialValuesImpl {
AAPotentialValuesFloating(const IRPosition &IRP, Attributor &A)
: AAPotentialValuesImpl(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
AAPotentialValuesImpl::initialize(A);
if (isAtFixpoint())
return;
Value &V = getAssociatedValue();
if (auto *C = dyn_cast<ConstantInt>(&V)) {
unionAssumed(C->getValue());
indicateOptimisticFixpoint();
return;
}
if (isa<UndefValue>(&V)) {
unionAssumedWithUndef();
indicateOptimisticFixpoint();
return;
}
if (isa<BinaryOperator>(&V) || isa<ICmpInst>(&V) || isa<CastInst>(&V))
return;
if (isa<SelectInst>(V) || isa<PHINode>(V) || isa<LoadInst>(V))
return;
indicatePessimisticFixpoint();
LLVM_DEBUG(dbgs() << "[AAPotentialValues] We give up: "
<< getAssociatedValue() << "\n");
}
static bool calculateICmpInst(const ICmpInst *ICI, const APInt &LHS,
const APInt &RHS) {
return ICmpInst::compare(LHS, RHS, ICI->getPredicate());
}
static APInt calculateCastInst(const CastInst *CI, const APInt &Src,
uint32_t ResultBitWidth) {
Instruction::CastOps CastOp = CI->getOpcode();
switch (CastOp) {
default:
llvm_unreachable("unsupported or not integer cast");
case Instruction::Trunc:
return Src.trunc(ResultBitWidth);
case Instruction::SExt:
return Src.sext(ResultBitWidth);
case Instruction::ZExt:
return Src.zext(ResultBitWidth);
case Instruction::BitCast:
return Src;
}
}
static APInt calculateBinaryOperator(const BinaryOperator *BinOp,
const APInt &LHS, const APInt &RHS,
bool &SkipOperation, bool &Unsupported) {
Instruction::BinaryOps BinOpcode = BinOp->getOpcode();
// Unsupported is set to true when the binary operator is not supported.
// SkipOperation is set to true when UB occur with the given operand pair
// (LHS, RHS).
// TODO: we should look at nsw and nuw keywords to handle operations
// that create poison or undef value.
switch (BinOpcode) {
default:
Unsupported = true;
return LHS;
case Instruction::Add:
return LHS + RHS;
case Instruction::Sub:
return LHS - RHS;
case Instruction::Mul:
return LHS * RHS;
case Instruction::UDiv:
if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.udiv(RHS);
case Instruction::SDiv:
if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.sdiv(RHS);
case Instruction::URem:
if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.urem(RHS);
case Instruction::SRem:
if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.srem(RHS);
case Instruction::Shl:
return LHS.shl(RHS);
case Instruction::LShr:
return LHS.lshr(RHS);
case Instruction::AShr:
return LHS.ashr(RHS);
case Instruction::And:
return LHS & RHS;
case Instruction::Or:
return LHS | RHS;
case Instruction::Xor:
return LHS ^ RHS;
}
}
bool calculateBinaryOperatorAndTakeUnion(const BinaryOperator *BinOp,
const APInt &LHS, const APInt &RHS) {
bool SkipOperation = false;
bool Unsupported = false;
APInt Result =
calculateBinaryOperator(BinOp, LHS, RHS, SkipOperation, Unsupported);
if (Unsupported)
return false;
// If SkipOperation is true, we can ignore this operand pair (L, R).
if (!SkipOperation)
unionAssumed(Result);
return isValidState();
}
ChangeStatus updateWithICmpInst(Attributor &A, ICmpInst *ICI) {
auto AssumedBefore = getAssumed();
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
// Simplify the operands first.
bool UsedAssumedInformation = false;
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedLHS.hasValue())
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedRHS.hasValue())
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return indicatePessimisticFixpoint();
auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
DepClassTy::REQUIRED);
if (!LHSAA.isValidState())
return indicatePessimisticFixpoint();
auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
DepClassTy::REQUIRED);
if (!RHSAA.isValidState())
return indicatePessimisticFixpoint();
const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
// TODO: make use of undef flag to limit potential values aggressively.
bool MaybeTrue = false, MaybeFalse = false;
const APInt Zero(RHS->getType()->getIntegerBitWidth(), 0);
if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
// The result of any comparison between undefs can be soundly replaced
// with undef.
unionAssumedWithUndef();
} else if (LHSAA.undefIsContained()) {
for (const APInt &R : RHSAAPVS) {
bool CmpResult = calculateICmpInst(ICI, Zero, R);
MaybeTrue |= CmpResult;
MaybeFalse |= !CmpResult;
if (MaybeTrue & MaybeFalse)
return indicatePessimisticFixpoint();
}
} else if (RHSAA.undefIsContained()) {
for (const APInt &L : LHSAAPVS) {
bool CmpResult = calculateICmpInst(ICI, L, Zero);
MaybeTrue |= CmpResult;
MaybeFalse |= !CmpResult;
if (MaybeTrue & MaybeFalse)
return indicatePessimisticFixpoint();
}
} else {
for (const APInt &L : LHSAAPVS) {
for (const APInt &R : RHSAAPVS) {
bool CmpResult = calculateICmpInst(ICI, L, R);
MaybeTrue |= CmpResult;
MaybeFalse |= !CmpResult;
if (MaybeTrue & MaybeFalse)
return indicatePessimisticFixpoint();
}
}
}
if (MaybeTrue)
unionAssumed(APInt(/* numBits */ 1, /* val */ 1));
if (MaybeFalse)
unionAssumed(APInt(/* numBits */ 1, /* val */ 0));
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
ChangeStatus updateWithSelectInst(Attributor &A, SelectInst *SI) {
auto AssumedBefore = getAssumed();
Value *LHS = SI->getTrueValue();
Value *RHS = SI->getFalseValue();
// Simplify the operands first.
bool UsedAssumedInformation = false;
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedLHS.hasValue())
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedRHS.hasValue())
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return indicatePessimisticFixpoint();
Optional<Constant *> C = A.getAssumedConstant(*SI->getCondition(), *this,
UsedAssumedInformation);
// Check if we only need one operand.
bool OnlyLeft = false, OnlyRight = false;
if (C.hasValue() && *C && (*C)->isOneValue())
OnlyLeft = true;
else if (C.hasValue() && *C && (*C)->isZeroValue())
OnlyRight = true;
const AAPotentialValues *LHSAA = nullptr, *RHSAA = nullptr;
if (!OnlyRight) {
LHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
DepClassTy::REQUIRED);
if (!LHSAA->isValidState())
return indicatePessimisticFixpoint();
}
if (!OnlyLeft) {
RHSAA = &A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
DepClassTy::REQUIRED);
if (!RHSAA->isValidState())
return indicatePessimisticFixpoint();
}
if (!LHSAA || !RHSAA) {
// select (true/false), lhs, rhs
auto *OpAA = LHSAA ? LHSAA : RHSAA;
if (OpAA->undefIsContained())
unionAssumedWithUndef();
else
unionAssumed(*OpAA);
} else if (LHSAA->undefIsContained() && RHSAA->undefIsContained()) {
// select i1 *, undef , undef => undef
unionAssumedWithUndef();
} else {
unionAssumed(*LHSAA);
unionAssumed(*RHSAA);
}
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
ChangeStatus updateWithCastInst(Attributor &A, CastInst *CI) {
auto AssumedBefore = getAssumed();
if (!CI->isIntegerCast())
return indicatePessimisticFixpoint();
assert(CI->getNumOperands() == 1 && "Expected cast to be unary!");
uint32_t ResultBitWidth = CI->getDestTy()->getIntegerBitWidth();
Value *Src = CI->getOperand(0);
// Simplify the operand first.
bool UsedAssumedInformation = false;
const auto &SimplifiedSrc =
A.getAssumedSimplified(IRPosition::value(*Src, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedSrc.hasValue())
return ChangeStatus::UNCHANGED;
if (!SimplifiedSrc.getValue())
return indicatePessimisticFixpoint();
Src = *SimplifiedSrc;
auto &SrcAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*Src),
DepClassTy::REQUIRED);
if (!SrcAA.isValidState())
return indicatePessimisticFixpoint();
const DenseSet<APInt> &SrcAAPVS = SrcAA.getAssumedSet();
if (SrcAA.undefIsContained())
unionAssumedWithUndef();
else {
for (const APInt &S : SrcAAPVS) {
APInt T = calculateCastInst(CI, S, ResultBitWidth);
unionAssumed(T);
}
}
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
ChangeStatus updateWithBinaryOperator(Attributor &A, BinaryOperator *BinOp) {
auto AssumedBefore = getAssumed();
Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);
// Simplify the operands first.
bool UsedAssumedInformation = false;
const auto &SimplifiedLHS =
A.getAssumedSimplified(IRPosition::value(*LHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedLHS.hasValue())
return ChangeStatus::UNCHANGED;
if (!SimplifiedLHS.getValue())
return indicatePessimisticFixpoint();
LHS = *SimplifiedLHS;
const auto &SimplifiedRHS =
A.getAssumedSimplified(IRPosition::value(*RHS, getCallBaseContext()),
*this, UsedAssumedInformation);
if (!SimplifiedRHS.hasValue())
return ChangeStatus::UNCHANGED;
if (!SimplifiedRHS.getValue())
return indicatePessimisticFixpoint();
RHS = *SimplifiedRHS;
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return indicatePessimisticFixpoint();
auto &LHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*LHS),
DepClassTy::REQUIRED);
if (!LHSAA.isValidState())
return indicatePessimisticFixpoint();
auto &RHSAA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(*RHS),
DepClassTy::REQUIRED);
if (!RHSAA.isValidState())
return indicatePessimisticFixpoint();
const DenseSet<APInt> &LHSAAPVS = LHSAA.getAssumedSet();
const DenseSet<APInt> &RHSAAPVS = RHSAA.getAssumedSet();
const APInt Zero = APInt(LHS->getType()->getIntegerBitWidth(), 0);
// TODO: make use of undef flag to limit potential values aggressively.
if (LHSAA.undefIsContained() && RHSAA.undefIsContained()) {
if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, Zero))
return indicatePessimisticFixpoint();
} else if (LHSAA.undefIsContained()) {
for (const APInt &R : RHSAAPVS) {
if (!calculateBinaryOperatorAndTakeUnion(BinOp, Zero, R))
return indicatePessimisticFixpoint();
}
} else if (RHSAA.undefIsContained()) {
for (const APInt &L : LHSAAPVS) {
if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, Zero))
return indicatePessimisticFixpoint();
}
} else {
for (const APInt &L : LHSAAPVS) {
for (const APInt &R : RHSAAPVS) {
if (!calculateBinaryOperatorAndTakeUnion(BinOp, L, R))
return indicatePessimisticFixpoint();
}
}
}
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
ChangeStatus updateWithPHINode(Attributor &A, PHINode *PHI) {
auto AssumedBefore = getAssumed();
for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
Value *IncomingValue = PHI->getIncomingValue(u);
// Simplify the operand first.
bool UsedAssumedInformation = false;
const auto &SimplifiedIncomingValue = A.getAssumedSimplified(
IRPosition::value(*IncomingValue, getCallBaseContext()), *this,
UsedAssumedInformation);
if (!SimplifiedIncomingValue.hasValue())
continue;
if (!SimplifiedIncomingValue.getValue())
return indicatePessimisticFixpoint();
IncomingValue = *SimplifiedIncomingValue;
auto &PotentialValuesAA = A.getAAFor<AAPotentialValues>(
*this, IRPosition::value(*IncomingValue), DepClassTy::REQUIRED);
if (!PotentialValuesAA.isValidState())
return indicatePessimisticFixpoint();
if (PotentialValuesAA.undefIsContained())
unionAssumedWithUndef();
else
unionAssumed(PotentialValuesAA.getAssumed());
}
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
ChangeStatus updateWithLoad(Attributor &A, LoadInst &L) {
if (!L.getType()->isIntegerTy())
return indicatePessimisticFixpoint();
auto Union = [&](Value &V) {
if (isa<UndefValue>(V)) {
unionAssumedWithUndef();
return true;
}
if (ConstantInt *CI = dyn_cast<ConstantInt>(&V)) {
unionAssumed(CI->getValue());
return true;
}
return false;
};
auto AssumedBefore = getAssumed();
if (!AAValueSimplifyImpl::handleLoad(A, *this, L, Union))
return indicatePessimisticFixpoint();
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
Instruction *I = dyn_cast<Instruction>(&V);
if (auto *ICI = dyn_cast<ICmpInst>(I))
return updateWithICmpInst(A, ICI);
if (auto *SI = dyn_cast<SelectInst>(I))
return updateWithSelectInst(A, SI);
if (auto *CI = dyn_cast<CastInst>(I))
return updateWithCastInst(A, CI);
if (auto *BinOp = dyn_cast<BinaryOperator>(I))
return updateWithBinaryOperator(A, BinOp);
if (auto *PHI = dyn_cast<PHINode>(I))
return updateWithPHINode(A, PHI);
if (auto *L = dyn_cast<LoadInst>(I))
return updateWithLoad(A, *L);
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FLOATING_ATTR(potential_values)
}
};
struct AAPotentialValuesFunction : AAPotentialValuesImpl {
AAPotentialValuesFunction(const IRPosition &IRP, Attributor &A)
: AAPotentialValuesImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
ChangeStatus updateImpl(Attributor &A) override {
llvm_unreachable("AAPotentialValues(Function|CallSite)::updateImpl will "
"not be called");
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FN_ATTR(potential_values)
}
};
struct AAPotentialValuesCallSite : AAPotentialValuesFunction {
AAPotentialValuesCallSite(const IRPosition &IRP, Attributor &A)
: AAPotentialValuesFunction(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CS_ATTR(potential_values)
}
};
struct AAPotentialValuesCallSiteReturned
: AACallSiteReturnedFromReturned<AAPotentialValues, AAPotentialValuesImpl> {
AAPotentialValuesCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AACallSiteReturnedFromReturned<AAPotentialValues,
AAPotentialValuesImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSRET_ATTR(potential_values)
}
};
struct AAPotentialValuesCallSiteArgument : AAPotentialValuesFloating {
AAPotentialValuesCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AAPotentialValuesFloating(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
AAPotentialValuesImpl::initialize(A);
if (isAtFixpoint())
return;
Value &V = getAssociatedValue();
if (auto *C = dyn_cast<ConstantInt>(&V)) {
unionAssumed(C->getValue());
indicateOptimisticFixpoint();
return;
}
if (isa<UndefValue>(&V)) {
unionAssumedWithUndef();
indicateOptimisticFixpoint();
return;
}
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
auto AssumedBefore = getAssumed();
auto &AA = A.getAAFor<AAPotentialValues>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
const auto &S = AA.getAssumed();
unionAssumed(S);
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_CSARG_ATTR(potential_values)
}
};
/// ------------------------ NoUndef Attribute ---------------------------------
struct AANoUndefImpl : AANoUndef {
AANoUndefImpl(const IRPosition &IRP, Attributor &A) : AANoUndef(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
if (getIRPosition().hasAttr({Attribute::NoUndef})) {
indicateOptimisticFixpoint();
return;
}
Value &V = getAssociatedValue();
if (isa<UndefValue>(V))
indicatePessimisticFixpoint();
else if (isa<FreezeInst>(V))
indicateOptimisticFixpoint();
else if (getPositionKind() != IRPosition::IRP_RETURNED &&
isGuaranteedNotToBeUndefOrPoison(&V))
indicateOptimisticFixpoint();
else
AANoUndef::initialize(A);
}
/// See followUsesInMBEC
bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
AANoUndef::StateType &State) {
const Value *UseV = U->get();
const DominatorTree *DT = nullptr;
AssumptionCache *AC = nullptr;
InformationCache &InfoCache = A.getInfoCache();
if (Function *F = getAnchorScope()) {
DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
}
State.setKnown(isGuaranteedNotToBeUndefOrPoison(UseV, AC, I, DT));
bool TrackUse = false;
// Track use for instructions which must produce undef or poison bits when
// at least one operand contains such bits.
if (isa<CastInst>(*I) || isa<GetElementPtrInst>(*I))
TrackUse = true;
return TrackUse;
}
/// See AbstractAttribute::getAsStr().
const std::string getAsStr() const override {
return getAssumed() ? "noundef" : "may-undef-or-poison";
}
ChangeStatus manifest(Attributor &A) override {
// We don't manifest noundef attribute for dead positions because the
// associated values with dead positions would be replaced with undef
// values.
bool UsedAssumedInformation = false;
if (A.isAssumedDead(getIRPosition(), nullptr, nullptr,
UsedAssumedInformation))
return ChangeStatus::UNCHANGED;
// A position whose simplified value does not have any value is
// considered to be dead. We don't manifest noundef in such positions for
// the same reason above.
if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation)
.hasValue())
return ChangeStatus::UNCHANGED;
return AANoUndef::manifest(A);
}
};
struct AANoUndefFloating : public AANoUndefImpl {
AANoUndefFloating(const IRPosition &IRP, Attributor &A)
: AANoUndefImpl(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AANoUndefImpl::initialize(A);
if (!getState().isAtFixpoint())
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto VisitValueCB = [&](Value &V, const Instruction *CtxI,
AANoUndef::StateType &T, bool Stripped) -> bool {
const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
if (!Stripped && this == &AA) {
T.indicatePessimisticFixpoint();
} else {
const AANoUndef::StateType &S =
static_cast<const AANoUndef::StateType &>(AA.getState());
T ^= S;
}
return T.isValidState();
};
StateType T;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
- VisitValueCB, getCtxI()))
+ VisitValueCB, getCtxI(),
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return clampStateAndIndicateChange(getState(), T);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
};
struct AANoUndefReturned final
: AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl> {
AANoUndefReturned(const IRPosition &IRP, Attributor &A)
: AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
};
struct AANoUndefArgument final
: AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl> {
AANoUndefArgument(const IRPosition &IRP, Attributor &A)
: AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noundef) }
};
struct AANoUndefCallSiteArgument final : AANoUndefFloating {
AANoUndefCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AANoUndefFloating(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSARG_ATTR(noundef) }
};
struct AANoUndefCallSiteReturned final
: AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl> {
AANoUndefCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
};
struct AACallEdgesImpl : public AACallEdges {
AACallEdgesImpl(const IRPosition &IRP, Attributor &A) : AACallEdges(IRP, A) {}
virtual const SetVector<Function *> &getOptimisticEdges() const override {
return CalledFunctions;
}
virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
virtual bool hasNonAsmUnknownCallee() const override {
return HasUnknownCalleeNonAsm;
}
const std::string getAsStr() const override {
return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
std::to_string(CalledFunctions.size()) + "]";
}
void trackStatistics() const override {}
protected:
void addCalledFunction(Function *Fn, ChangeStatus &Change) {
if (CalledFunctions.insert(Fn)) {
Change = ChangeStatus::CHANGED;
LLVM_DEBUG(dbgs() << "[AACallEdges] New call edge: " << Fn->getName()
<< "\n");
}
}
void setHasUnknownCallee(bool NonAsm, ChangeStatus &Change) {
if (!HasUnknownCallee)
Change = ChangeStatus::CHANGED;
if (NonAsm && !HasUnknownCalleeNonAsm)
Change = ChangeStatus::CHANGED;
HasUnknownCalleeNonAsm |= NonAsm;
HasUnknownCallee = true;
}
private:
/// Optimistic set of functions that might be called by this position.
SetVector<Function *> CalledFunctions;
/// Is there any call with a unknown callee.
bool HasUnknownCallee = false;
/// Is there any call with a unknown callee, excluding any inline asm.
bool HasUnknownCalleeNonAsm = false;
};
struct AACallEdgesCallSite : public AACallEdgesImpl {
AACallEdgesCallSite(const IRPosition &IRP, Attributor &A)
: AACallEdgesImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = ChangeStatus::UNCHANGED;
auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown,
bool Stripped) -> bool {
if (Function *Fn = dyn_cast<Function>(&V)) {
addCalledFunction(Fn, Change);
} else {
LLVM_DEBUG(dbgs() << "[AACallEdges] Unrecognized value: " << V << "\n");
setHasUnknownCallee(true, Change);
}
// Explore all values.
return true;
};
// Process any value that we might call.
auto ProcessCalledOperand = [&](Value *V) {
bool DummyValue = false;
+ bool UsedAssumedInformation = false;
if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
DummyValue, VisitValue, nullptr,
- false)) {
+ UsedAssumedInformation, false)) {
// If we haven't gone through all values, assume that there are unknown
// callees.
setHasUnknownCallee(true, Change);
}
};
CallBase *CB = cast<CallBase>(getCtxI());
if (CB->isInlineAsm()) {
setHasUnknownCallee(false, Change);
return Change;
}
// Process callee metadata if available.
if (auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees)) {
for (auto &Op : MD->operands()) {
Function *Callee = mdconst::dyn_extract_or_null<Function>(Op);
if (Callee)
addCalledFunction(Callee, Change);
}
return Change;
}
// The most simple case.
ProcessCalledOperand(CB->getCalledOperand());
// Process callback functions.
SmallVector<const Use *, 4u> CallbackUses;
AbstractCallSite::getCallbackUses(*CB, CallbackUses);
for (const Use *U : CallbackUses)
ProcessCalledOperand(U->get());
return Change;
}
};
struct AACallEdgesFunction : public AACallEdgesImpl {
AACallEdgesFunction(const IRPosition &IRP, Attributor &A)
: AACallEdgesImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = ChangeStatus::UNCHANGED;
auto ProcessCallInst = [&](Instruction &Inst) {
CallBase &CB = cast<CallBase>(Inst);
auto &CBEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
if (CBEdges.hasNonAsmUnknownCallee())
setHasUnknownCallee(true, Change);
if (CBEdges.hasUnknownCallee())
setHasUnknownCallee(false, Change);
for (Function *F : CBEdges.getOptimisticEdges())
addCalledFunction(F, Change);
return true;
};
// Visit all callable instructions.
bool UsedAssumedInformation = false;
if (!A.checkForAllCallLikeInstructions(ProcessCallInst, *this,
- UsedAssumedInformation)) {
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true)) {
// If we haven't looked at all call like instructions, assume that there
// are unknown callees.
setHasUnknownCallee(true, Change);
}
return Change;
}
};
struct AAFunctionReachabilityFunction : public AAFunctionReachability {
private:
struct QuerySet {
void markReachable(const Function &Fn) {
Reachable.insert(&Fn);
Unreachable.erase(&Fn);
}
/// If there is no information about the function None is returned.
Optional<bool> isCachedReachable(const Function &Fn) {
// Assume that we can reach the function.
// TODO: Be more specific with the unknown callee.
if (CanReachUnknownCallee)
return true;
if (Reachable.count(&Fn))
return true;
if (Unreachable.count(&Fn))
return false;
return llvm::None;
}
/// Set of functions that we know for sure is reachable.
DenseSet<const Function *> Reachable;
/// Set of functions that are unreachable, but might become reachable.
DenseSet<const Function *> Unreachable;
/// If we can reach a function with a call to a unknown function we assume
/// that we can reach any function.
bool CanReachUnknownCallee = false;
};
struct QueryResolver : public QuerySet {
ChangeStatus update(Attributor &A, const AAFunctionReachability &AA,
ArrayRef<const AACallEdges *> AAEdgesList) {
ChangeStatus Change = ChangeStatus::UNCHANGED;
for (auto *AAEdges : AAEdgesList) {
if (AAEdges->hasUnknownCallee()) {
if (!CanReachUnknownCallee)
Change = ChangeStatus::CHANGED;
CanReachUnknownCallee = true;
return Change;
}
}
for (const Function *Fn : make_early_inc_range(Unreachable)) {
if (checkIfReachable(A, AA, AAEdgesList, *Fn)) {
Change = ChangeStatus::CHANGED;
markReachable(*Fn);
}
}
return Change;
}
bool isReachable(Attributor &A, AAFunctionReachability &AA,
ArrayRef<const AACallEdges *> AAEdgesList,
const Function &Fn) {
Optional<bool> Cached = isCachedReachable(Fn);
if (Cached.hasValue())
return Cached.getValue();
// The query was not cached, thus it is new. We need to request an update
// explicitly to make sure this the information is properly run to a
// fixpoint.
A.registerForUpdate(AA);
// We need to assume that this function can't reach Fn to prevent
// an infinite loop if this function is recursive.
Unreachable.insert(&Fn);
bool Result = checkIfReachable(A, AA, AAEdgesList, Fn);
if (Result)
markReachable(Fn);
return Result;
}
bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA,
ArrayRef<const AACallEdges *> AAEdgesList,
const Function &Fn) const {
// Handle the most trivial case first.
for (auto *AAEdges : AAEdgesList) {
const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
if (Edges.count(const_cast<Function *>(&Fn)))
return true;
}
SmallVector<const AAFunctionReachability *, 8> Deps;
for (auto &AAEdges : AAEdgesList) {
const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
for (Function *Edge : Edges) {
// We don't need a dependency if the result is reachable.
const AAFunctionReachability &EdgeReachability =
A.getAAFor<AAFunctionReachability>(
AA, IRPosition::function(*Edge), DepClassTy::NONE);
Deps.push_back(&EdgeReachability);
if (EdgeReachability.canReach(A, Fn))
return true;
}
}
// The result is false for now, set dependencies and leave.
for (auto *Dep : Deps)
A.recordDependence(*Dep, AA, DepClassTy::REQUIRED);
return false;
}
};
/// Get call edges that can be reached by this instruction.
bool getReachableCallEdges(Attributor &A, const AAReachability &Reachability,
const Instruction &Inst,
SmallVector<const AACallEdges *> &Result) const {
// Determine call like instructions that we can reach from the inst.
auto CheckCallBase = [&](Instruction &CBInst) {
if (!Reachability.isAssumedReachable(A, Inst, CBInst))
return true;
auto &CB = cast<CallBase>(CBInst);
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
Result.push_back(&AAEdges);
return true;
};
bool UsedAssumedInformation = false;
return A.checkForAllCallLikeInstructions(CheckCallBase, *this,
UsedAssumedInformation,
/* CheckBBLivenessOnly */ true);
}
public:
AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
: AAFunctionReachability(IRP, A) {}
bool canReach(Attributor &A, const Function &Fn) const override {
if (!isValidState())
return true;
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
// Attributor returns attributes as const, so this function has to be
// const for users of this attribute to use it without having to do
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
bool Result = NonConstThis->WholeFunction.isReachable(A, *NonConstThis,
{&AAEdges}, Fn);
return Result;
}
/// Can \p CB reach \p Fn
bool canReach(Attributor &A, CallBase &CB,
const Function &Fn) const override {
if (!isValidState())
return true;
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
// Attributor returns attributes as const, so this function has to be
// const for users of this attribute to use it without having to do
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
QueryResolver &CBQuery = NonConstThis->CBQueries[&CB];
bool Result = CBQuery.isReachable(A, *NonConstThis, {&AAEdges}, Fn);
return Result;
}
bool instructionCanReach(Attributor &A, const Instruction &Inst,
const Function &Fn,
bool UseBackwards) const override {
if (!isValidState())
return true;
if (UseBackwards)
return AA::isPotentiallyReachable(A, Inst, Fn, *this, nullptr);
const auto &Reachability = A.getAAFor<AAReachability>(
*this, IRPosition::function(*getAssociatedFunction()),
DepClassTy::REQUIRED);
SmallVector<const AACallEdges *> CallEdges;
bool AllKnown = getReachableCallEdges(A, Reachability, Inst, CallEdges);
// Attributor returns attributes as const, so this function has to be
// const for users of this attribute to use it without having to do
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
QueryResolver &InstQSet = NonConstThis->InstQueries[&Inst];
if (!AllKnown)
InstQSet.CanReachUnknownCallee = true;
return InstQSet.isReachable(A, *NonConstThis, CallEdges, Fn);
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
ChangeStatus Change = ChangeStatus::UNCHANGED;
Change |= WholeFunction.update(A, *this, {&AAEdges});
for (auto &CBPair : CBQueries) {
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(*CBPair.first),
DepClassTy::REQUIRED);
Change |= CBPair.second.update(A, *this, {&AAEdges});
}
// Update the Instruction queries.
const AAReachability *Reachability;
if (!InstQueries.empty()) {
Reachability = &A.getAAFor<AAReachability>(
*this, IRPosition::function(*getAssociatedFunction()),
DepClassTy::REQUIRED);
}
// Check for local callbases first.
for (auto &InstPair : InstQueries) {
SmallVector<const AACallEdges *> CallEdges;
bool AllKnown =
getReachableCallEdges(A, *Reachability, *InstPair.first, CallEdges);
// Update will return change if we this effects any queries.
if (!AllKnown)
InstPair.second.CanReachUnknownCallee = true;
Change |= InstPair.second.update(A, *this, CallEdges);
}
return Change;
}
const std::string getAsStr() const override {
size_t QueryCount =
WholeFunction.Reachable.size() + WholeFunction.Unreachable.size();
return "FunctionReachability [" +
std::to_string(WholeFunction.Reachable.size()) + "," +
std::to_string(QueryCount) + "]";
}
void trackStatistics() const override {}
private:
bool canReachUnknownCallee() const override {
return WholeFunction.CanReachUnknownCallee;
}
/// Used to answer if a the whole function can reacha a specific function.
QueryResolver WholeFunction;
/// Used to answer if a call base inside this function can reach a specific
/// function.
DenseMap<const CallBase *, QueryResolver> CBQueries;
/// This is for instruction queries than scan "forward".
DenseMap<const Instruction *, QueryResolver> InstQueries;
};
/// ---------------------- Assumption Propagation ------------------------------
struct AAAssumptionInfoImpl : public AAAssumptionInfo {
AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A,
const DenseSet<StringRef> &Known)
: AAAssumptionInfo(IRP, A, Known) {}
bool hasAssumption(const StringRef Assumption) const override {
return isValidState() && setContains(Assumption);
}
/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
const SetContents &Known = getKnown();
const SetContents &Assumed = getAssumed();
const std::string KnownStr =
llvm::join(Known.getSet().begin(), Known.getSet().end(), ",");
const std::string AssumedStr =
(Assumed.isUniversal())
? "Universal"
: llvm::join(Assumed.getSet().begin(), Assumed.getSet().end(), ",");
return "Known [" + KnownStr + "]," + " Assumed [" + AssumedStr + "]";
}
};
/// Propagates assumption information from parent functions to all of their
/// successors. An assumption can be propagated if the containing function
/// dominates the called function.
///
/// We start with a "known" set of assumptions already valid for the associated
/// function and an "assumed" set that initially contains all possible
/// assumptions. The assumed set is inter-procedurally updated by narrowing its
/// contents as concrete values are known. The concrete values are seeded by the
/// first nodes that are either entries into the call graph, or contains no
/// assumptions. Each node is updated as the intersection of the assumed state
/// with all of its predecessors.
struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl {
AAAssumptionInfoFunction(const IRPosition &IRP, Attributor &A)
: AAAssumptionInfoImpl(IRP, A,
getAssumptions(*IRP.getAssociatedFunction())) {}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
const auto &Assumptions = getKnown();
// Don't manifest a universal set if it somehow made it here.
if (Assumptions.isUniversal())
return ChangeStatus::UNCHANGED;
Function *AssociatedFunction = getAssociatedFunction();
bool Changed = addAssumptions(*AssociatedFunction, Assumptions.getSet());
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
bool Changed = false;
auto CallSitePred = [&](AbstractCallSite ACS) {
const auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
*this, IRPosition::callsite_function(*ACS.getInstruction()),
DepClassTy::REQUIRED);
// Get the set of assumptions shared by all of this function's callers.
Changed |= getIntersection(AssumptionAA.getAssumed());
return !getAssumed().empty() || !getKnown().empty();
};
- bool AllCallSitesKnown;
+ bool UsedAssumedInformation = false;
// Get the intersection of all assumptions held by this node's predecessors.
// If we don't know all the call sites then this is either an entry into the
// call graph or an empty node. This node is known to only contain its own
// assumptions and can be propagated to its successors.
- if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown))
+ if (!A.checkForAllCallSites(CallSitePred, *this, true,
+ UsedAssumedInformation))
return indicatePessimisticFixpoint();
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
void trackStatistics() const override {}
};
/// Assumption Info defined for call sites.
struct AAAssumptionInfoCallSite final : AAAssumptionInfoImpl {
AAAssumptionInfoCallSite(const IRPosition &IRP, Attributor &A)
: AAAssumptionInfoImpl(IRP, A, getInitialAssumptions(IRP)) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
}
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// Don't manifest a universal set if it somehow made it here.
if (getKnown().isUniversal())
return ChangeStatus::UNCHANGED;
CallBase &AssociatedCall = cast<CallBase>(getAssociatedValue());
bool Changed = addAssumptions(AssociatedCall, getAssumed().getSet());
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
auto &AssumptionAA =
A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
bool Changed = getIntersection(AssumptionAA.getAssumed());
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
private:
/// Helper to initialized the known set as all the assumptions this call and
/// the callee contain.
DenseSet<StringRef> getInitialAssumptions(const IRPosition &IRP) {
const CallBase &CB = cast<CallBase>(IRP.getAssociatedValue());
auto Assumptions = getAssumptions(CB);
if (Function *F = IRP.getAssociatedFunction())
set_union(Assumptions, getAssumptions(*F));
if (Function *F = IRP.getAssociatedFunction())
set_union(Assumptions, getAssumptions(*F));
return Assumptions;
}
};
AACallGraphNode *AACallEdgeIterator::operator*() const {
return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>(
&A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I))));
}
void AttributorCallGraph::print() { llvm::WriteGraph(outs(), this); }
const char AAReturnedValues::ID = 0;
const char AANoUnwind::ID = 0;
const char AANoSync::ID = 0;
const char AANoFree::ID = 0;
const char AANonNull::ID = 0;
const char AANoRecurse::ID = 0;
const char AAWillReturn::ID = 0;
const char AAUndefinedBehavior::ID = 0;
const char AANoAlias::ID = 0;
const char AAReachability::ID = 0;
const char AANoReturn::ID = 0;
const char AAIsDead::ID = 0;
const char AADereferenceable::ID = 0;
const char AAAlign::ID = 0;
const char AANoCapture::ID = 0;
const char AAValueSimplify::ID = 0;
const char AAHeapToStack::ID = 0;
const char AAPrivatizablePtr::ID = 0;
const char AAMemoryBehavior::ID = 0;
const char AAMemoryLocation::ID = 0;
const char AAValueConstantRange::ID = 0;
const char AAPotentialValues::ID = 0;
const char AANoUndef::ID = 0;
const char AACallEdges::ID = 0;
const char AAFunctionReachability::ID = 0;
const char AAPointerInfo::ID = 0;
const char AAAssumptionInfo::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
#define SWITCH_PK_INV(CLASS, PK, POS_NAME) \
case IRPosition::PK: \
llvm_unreachable("Cannot create " #CLASS " for a " POS_NAME " position!");
#define SWITCH_PK_CREATE(CLASS, IRP, PK, SUFFIX) \
case IRPosition::PK: \
AA = new (A.Allocator) CLASS##SUFFIX(IRP, A); \
++NumAAs; \
break;
#define CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
CLASS *AA = nullptr; \
switch (IRP.getPositionKind()) { \
SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
} \
return *AA; \
}
#define CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
CLASS *AA = nullptr; \
switch (IRP.getPositionKind()) { \
SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
SWITCH_PK_INV(CLASS, IRP_FUNCTION, "function") \
SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
} \
return *AA; \
}
#define CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
CLASS *AA = nullptr; \
switch (IRP.getPositionKind()) { \
SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_RETURNED, Returned) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
} \
return *AA; \
}
#define CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
CLASS *AA = nullptr; \
switch (IRP.getPositionKind()) { \
SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
SWITCH_PK_INV(CLASS, IRP_ARGUMENT, "argument") \
SWITCH_PK_INV(CLASS, IRP_FLOAT, "floating") \
SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
SWITCH_PK_INV(CLASS, IRP_CALL_SITE_RETURNED, "call site returned") \
SWITCH_PK_INV(CLASS, IRP_CALL_SITE_ARGUMENT, "call site argument") \
SWITCH_PK_INV(CLASS, IRP_CALL_SITE, "call site") \
SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
} \
return *AA; \
}
#define CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(CLASS) \
CLASS &CLASS::createForPosition(const IRPosition &IRP, Attributor &A) { \
CLASS *AA = nullptr; \
switch (IRP.getPositionKind()) { \
SWITCH_PK_INV(CLASS, IRP_INVALID, "invalid") \
SWITCH_PK_INV(CLASS, IRP_RETURNED, "returned") \
SWITCH_PK_CREATE(CLASS, IRP, IRP_FUNCTION, Function) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE, CallSite) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_FLOAT, Floating) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_ARGUMENT, Argument) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_RETURNED, CallSiteReturned) \
SWITCH_PK_CREATE(CLASS, IRP, IRP_CALL_SITE_ARGUMENT, CallSiteArgument) \
} \
return *AA; \
}
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUnwind)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAssumptionInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAFunctionReachability)
CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
#undef CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION
#undef CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION
#undef CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION
#undef CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION
#undef CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION
#undef SWITCH_PK_CREATE
#undef SWITCH_PK_INV
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 213a998d5bba..e2f1944cee63 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1,2036 +1,2054 @@
//===- FunctionAttrs.cpp - Pass which marks functions attributes ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This file implements interprocedural passes which walk the
/// call-graph deducing and/or propagating function attributes.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <iterator>
#include <map>
#include <vector>
using namespace llvm;
#define DEBUG_TYPE "function-attrs"
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumWriteOnly, "Number of functions marked writeonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
STATISTIC(NumReturned, "Number of arguments marked returned");
STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
STATISTIC(NumWriteOnlyArg, "Number of arguments marked writeonly");
STATISTIC(NumNoAlias, "Number of function returns marked noalias");
STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
STATISTIC(NumNoUnwind, "Number of functions marked as nounwind");
STATISTIC(NumNoFree, "Number of functions marked as nofree");
STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
STATISTIC(NumNoSync, "Number of functions marked as nosync");
STATISTIC(NumThinLinkNoRecurse,
"Number of functions marked as norecurse during thinlink");
STATISTIC(NumThinLinkNoUnwind,
"Number of functions marked as nounwind during thinlink");
static cl::opt<bool> EnableNonnullArgPropagation(
"enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
cl::desc("Try to propagate nonnull argument attributes from callsites to "
"caller functions."));
static cl::opt<bool> DisableNoUnwindInference(
"disable-nounwind-inference", cl::Hidden,
cl::desc("Stop inferring nounwind attribute during function-attrs pass"));
static cl::opt<bool> DisableNoFreeInference(
"disable-nofree-inference", cl::Hidden,
cl::desc("Stop inferring nofree attribute during function-attrs pass"));
static cl::opt<bool> DisableThinLTOPropagation(
"disable-thinlto-funcattrs", cl::init(true), cl::Hidden,
cl::desc("Don't propagate function-attrs in thinLTO"));
namespace {
using SCCNodeSet = SmallSetVector<Function *, 8>;
} // end anonymous namespace
/// Returns the memory access attribute for function F using AAR for AA results,
/// where SCCNodes is the current SCC.
///
/// If ThisBody is true, this function may examine the function body and will
/// return a result pertaining to this copy of the function. If it is false, the
/// result will be based only on AA results for the function declaration; it
/// will be assumed that some other (perhaps less optimized) version of the
/// function may be selected at link time.
static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
AAResults &AAR,
const SCCNodeSet &SCCNodes) {
FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
if (MRB == FMRB_DoesNotAccessMemory)
// Already perfect!
return MAK_ReadNone;
if (!ThisBody) {
if (AliasAnalysis::onlyReadsMemory(MRB))
return MAK_ReadOnly;
if (AliasAnalysis::onlyWritesMemory(MRB))
return MAK_WriteOnly;
// Conservatively assume it reads and writes to memory.
return MAK_MayWrite;
}
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
bool WritesMemory = false;
for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
if (auto *Call = dyn_cast<CallBase>(&I)) {
// Ignore calls to functions in the same SCC, as long as the call sites
// don't have operand bundles. Calls with operand bundles are allowed to
// have memory effects not described by the memory effects of the call
// target.
if (!Call->hasOperandBundles() && Call->getCalledFunction() &&
SCCNodes.count(Call->getCalledFunction()))
continue;
FunctionModRefBehavior MRB = AAR.getModRefBehavior(Call);
ModRefInfo MRI = createModRefInfo(MRB);
// If the call doesn't access memory, we're done.
if (isNoModRef(MRI))
continue;
// A pseudo probe call shouldn't change any function attribute since it
// doesn't translate to a real instruction. It comes with a memory access
// tag to prevent itself being removed by optimizations and not block
// other instructions being optimized.
if (isa<PseudoProbeInst>(I))
continue;
if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
// The call could access any memory. If that includes writes, note it.
if (isModSet(MRI))
WritesMemory = true;
// If it reads, note it.
if (isRefSet(MRI))
ReadsMemory = true;
continue;
}
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
for (const Use &U : Call->args()) {
const Value *Arg = U;
if (!Arg->getType()->isPtrOrPtrVectorTy())
continue;
MemoryLocation Loc =
MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata());
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
if (isModSet(MRI))
// Writes non-local memory.
WritesMemory = true;
if (isRefSet(MRI))
// Ok, it reads non-local memory.
ReadsMemory = true;
}
continue;
} else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(LI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(SI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
} else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) {
// Ignore vaargs on local memory.
MemoryLocation Loc = MemoryLocation::get(VI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
// Any remaining instructions need to be taken seriously! Check if they
// read or write memory.
//
// Writes memory, remember that.
WritesMemory |= I.mayWriteToMemory();
// If this instruction may read memory, remember that.
ReadsMemory |= I.mayReadFromMemory();
}
if (WritesMemory) {
if (!ReadsMemory)
return MAK_WriteOnly;
else
return MAK_MayWrite;
}
return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
}
MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
AAResults &AAR) {
return checkFunctionMemoryAccess(F, /*ThisBody=*/true, AAR, {});
}
/// Deduce readonly/readnone attributes for the SCC.
template <typename AARGetterT>
static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
SmallSet<Function *, 8> &Changed) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
bool WritesMemory = false;
for (Function *F : SCCNodes) {
// Call the callable parameter to look up AA results for this function.
AAResults &AAR = AARGetter(*F);
// Non-exact function definitions may not be selected at link time, and an
// alternative version that writes to memory may be selected. See the
// comment on GlobalValue::isDefinitionExact for more details.
switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
AAR, SCCNodes)) {
case MAK_MayWrite:
return;
case MAK_ReadOnly:
ReadsMemory = true;
break;
case MAK_WriteOnly:
WritesMemory = true;
break;
case MAK_ReadNone:
// Nothing to do!
break;
}
}
// If the SCC contains both functions that read and functions that write, then
// we cannot add readonly attributes.
if (ReadsMemory && WritesMemory)
return;
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
// Already perfect!
continue;
if (F->onlyReadsMemory() && ReadsMemory)
// No change.
continue;
if (F->onlyWritesMemory() && WritesMemory)
continue;
Changed.insert(F);
// Clear out any existing attributes.
AttributeMask AttrsToRemove;
AttrsToRemove.addAttribute(Attribute::ReadOnly);
AttrsToRemove.addAttribute(Attribute::ReadNone);
AttrsToRemove.addAttribute(Attribute::WriteOnly);
if (!WritesMemory && !ReadsMemory) {
// Clear out any "access range attributes" if readnone was deduced.
AttrsToRemove.addAttribute(Attribute::ArgMemOnly);
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly);
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
}
F->removeFnAttrs(AttrsToRemove);
// Add in the new attribute.
if (WritesMemory && !ReadsMemory)
F->addFnAttr(Attribute::WriteOnly);
else
F->addFnAttr(ReadsMemory ? Attribute::ReadOnly : Attribute::ReadNone);
if (WritesMemory && !ReadsMemory)
++NumWriteOnly;
else if (ReadsMemory)
++NumReadOnly;
else
++NumReadNone;
}
}
// Compute definitive function attributes for a function taking into account
// prevailing definitions and linkage types
static FunctionSummary *calculatePrevailingSummary(
ValueInfo VI,
DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing) {
if (CachedPrevailingSummary.count(VI))
return CachedPrevailingSummary[VI];
/// At this point, prevailing symbols have been resolved. The following leads
/// to returning a conservative result:
/// - Multiple instances with local linkage. Normally local linkage would be
/// unique per module
/// as the GUID includes the module path. We could have a guid alias if
/// there wasn't any distinguishing path when each file was compiled, but
/// that should be rare so we'll punt on those.
/// These next 2 cases should not happen and will assert:
/// - Multiple instances with external linkage. This should be caught in
/// symbol resolution
/// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
/// knowledge meaning we have to go conservative.
/// Otherwise, we calculate attributes for a function as:
/// 1. If we have a local linkage, take its attributes. If there's somehow
/// multiple, bail and go conservative.
/// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
/// prevailing, take its attributes.
/// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
/// differences. However, if the prevailing copy is known it will be used
/// so take its attributes. If the prevailing copy is in a native file
/// all IR copies will be dead and propagation will go conservative.
/// 4. AvailableExternally summaries without a prevailing copy are known to
/// occur in a couple of circumstances:
/// a. An internal function gets imported due to its caller getting
/// imported, it becomes AvailableExternally but no prevailing
/// definition exists. Because it has to get imported along with its
/// caller the attributes will be captured by propagating on its
/// caller.
/// b. C++11 [temp.explicit]p10 can generate AvailableExternally
/// definitions of explicitly instanced template declarations
/// for inlining which are ultimately dropped from the TU. Since this
/// is localized to the TU the attributes will have already made it to
/// the callers.
/// These are edge cases and already captured by their callers so we
/// ignore these for now. If they become relevant to optimize in the
/// future this can be revisited.
/// 5. Otherwise, go conservative.
CachedPrevailingSummary[VI] = nullptr;
FunctionSummary *Local = nullptr;
FunctionSummary *Prevailing = nullptr;
for (const auto &GVS : VI.getSummaryList()) {
if (!GVS->isLive())
continue;
FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject());
// Virtual and Unknown (e.g. indirect) calls require going conservative
if (!FS || FS->fflags().HasUnknownCall)
return nullptr;
const auto &Linkage = GVS->linkage();
if (GlobalValue::isLocalLinkage(Linkage)) {
if (Local) {
LLVM_DEBUG(
dbgs()
<< "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
"function "
<< VI.name() << " from " << FS->modulePath() << ". Previous module "
<< Local->modulePath() << "\n");
return nullptr;
}
Local = FS;
} else if (GlobalValue::isExternalLinkage(Linkage)) {
assert(IsPrevailing(VI.getGUID(), GVS.get()));
Prevailing = FS;
break;
} else if (GlobalValue::isWeakODRLinkage(Linkage) ||
GlobalValue::isLinkOnceODRLinkage(Linkage) ||
GlobalValue::isWeakAnyLinkage(Linkage) ||
GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
if (IsPrevailing(VI.getGUID(), GVS.get())) {
Prevailing = FS;
break;
}
} else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
// TODO: Handle these cases if they become meaningful
continue;
}
}
if (Local) {
assert(!Prevailing);
CachedPrevailingSummary[VI] = Local;
} else if (Prevailing) {
assert(!Local);
CachedPrevailingSummary[VI] = Prevailing;
}
return CachedPrevailingSummary[VI];
}
bool llvm::thinLTOPropagateFunctionAttrs(
ModuleSummaryIndex &Index,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
IsPrevailing) {
// TODO: implement addNoAliasAttrs once
// there's more information about the return type in the summary
if (DisableThinLTOPropagation)
return false;
DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
bool Changed = false;
auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
// Assume we can propagate unless we discover otherwise
FunctionSummary::FFlags InferredFlags;
InferredFlags.NoRecurse = (SCCNodes.size() == 1);
InferredFlags.NoUnwind = true;
for (auto &V : SCCNodes) {
FunctionSummary *CallerSummary =
calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing);
// Function summaries can fail to contain information such as declarations
if (!CallerSummary)
return;
if (CallerSummary->fflags().MayThrow)
InferredFlags.NoUnwind = false;
for (const auto &Callee : CallerSummary->calls()) {
FunctionSummary *CalleeSummary = calculatePrevailingSummary(
Callee.first, CachedPrevailingSummary, IsPrevailing);
if (!CalleeSummary)
return;
if (!CalleeSummary->fflags().NoRecurse)
InferredFlags.NoRecurse = false;
if (!CalleeSummary->fflags().NoUnwind)
InferredFlags.NoUnwind = false;
if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
break;
}
}
if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
Changed = true;
for (auto &V : SCCNodes) {
if (InferredFlags.NoRecurse) {
LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
<< V.name() << "\n");
++NumThinLinkNoRecurse;
}
if (InferredFlags.NoUnwind) {
LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
<< V.name() << "\n");
++NumThinLinkNoUnwind;
}
for (auto &S : V.getSummaryList()) {
if (auto *FS = dyn_cast<FunctionSummary>(S.get())) {
if (InferredFlags.NoRecurse)
FS->setNoRecurse();
if (InferredFlags.NoUnwind)
FS->setNoUnwind();
}
}
}
}
};
// Call propagation functions on each SCC in the Index
for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd();
++I) {
std::vector<ValueInfo> Nodes(*I);
PropagateAttributes(Nodes);
}
return Changed;
}
namespace {
/// For a given pointer Argument, this retains a list of Arguments of functions
/// in the same SCC that the pointer data flows into. We use this to build an
/// SCC of the arguments.
struct ArgumentGraphNode {
Argument *Definition;
SmallVector<ArgumentGraphNode *, 4> Uses;
};
class ArgumentGraph {
// We store pointers to ArgumentGraphNode objects, so it's important that
// that they not move around upon insert.
using ArgumentMapTy = std::map<Argument *, ArgumentGraphNode>;
ArgumentMapTy ArgumentMap;
// There is no root node for the argument graph, in fact:
// void f(int *x, int *y) { if (...) f(x, y); }
// is an example where the graph is disconnected. The SCCIterator requires a
// single entry point, so we maintain a fake ("synthetic") root node that
// uses every node. Because the graph is directed and nothing points into
// the root, it will not participate in any SCCs (except for its own).
ArgumentGraphNode SyntheticRoot;
public:
ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
using iterator = SmallVectorImpl<ArgumentGraphNode *>::iterator;
iterator begin() { return SyntheticRoot.Uses.begin(); }
iterator end() { return SyntheticRoot.Uses.end(); }
ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
ArgumentGraphNode *operator[](Argument *A) {
ArgumentGraphNode &Node = ArgumentMap[A];
Node.Definition = A;
SyntheticRoot.Uses.push_back(&Node);
return &Node;
}
};
/// This tracker checks whether callees are in the SCC, and if so it does not
/// consider that a capture, instead adding it to the "Uses" list and
/// continuing with the analysis.
struct ArgumentUsesTracker : public CaptureTracker {
ArgumentUsesTracker(const SCCNodeSet &SCCNodes) : SCCNodes(SCCNodes) {}
void tooManyUses() override { Captured = true; }
bool captured(const Use *U) override {
CallBase *CB = dyn_cast<CallBase>(U->getUser());
if (!CB) {
Captured = true;
return true;
}
Function *F = CB->getCalledFunction();
if (!F || !F->hasExactDefinition() || !SCCNodes.count(F)) {
Captured = true;
return true;
}
assert(!CB->isCallee(U) && "callee operand reported captured?");
const unsigned UseIndex = CB->getDataOperandNo(U);
if (UseIndex >= CB->arg_size()) {
// Data operand, but not a argument operand -- must be a bundle operand
assert(CB->hasOperandBundles() && "Must be!");
// CaptureTracking told us that we're being captured by an operand bundle
// use. In this case it does not matter if the callee is within our SCC
// or not -- we've been captured in some unknown way, and we have to be
// conservative.
Captured = true;
return true;
}
if (UseIndex >= F->arg_size()) {
assert(F->isVarArg() && "More params than args in non-varargs call");
Captured = true;
return true;
}
Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
return false;
}
// True only if certainly captured (used outside our SCC).
bool Captured = false;
// Uses within our SCC.
SmallVector<Argument *, 4> Uses;
const SCCNodeSet &SCCNodes;
};
} // end anonymous namespace
namespace llvm {
template <> struct GraphTraits<ArgumentGraphNode *> {
using NodeRef = ArgumentGraphNode *;
using ChildIteratorType = SmallVectorImpl<ArgumentGraphNode *>::iterator;
static NodeRef getEntryNode(NodeRef A) { return A; }
static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
};
template <>
struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
return AG->begin();
}
static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
};
} // end namespace llvm
/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
static Attribute::AttrKind
determinePointerAccessAttrs(Argument *A,
const SmallPtrSet<Argument *, 8> &SCCNodes) {
SmallVector<Use *, 32> Worklist;
SmallPtrSet<Use *, 32> Visited;
// inalloca arguments are always clobbered by the call.
if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
return Attribute::None;
bool IsRead = false;
bool IsWrite = false;
for (Use &U : A->uses()) {
Visited.insert(&U);
Worklist.push_back(&U);
}
while (!Worklist.empty()) {
if (IsWrite && IsRead)
// No point in searching further..
return Attribute::None;
Use *U = Worklist.pop_back_val();
Instruction *I = cast<Instruction>(U->getUser());
switch (I->getOpcode()) {
case Instruction::BitCast:
case Instruction::GetElementPtr:
case Instruction::PHI:
case Instruction::Select:
case Instruction::AddrSpaceCast:
// The original value is not read/written via this if the new value isn't.
for (Use &UU : I->uses())
if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
break;
case Instruction::Call:
case Instruction::Invoke: {
CallBase &CB = cast<CallBase>(*I);
if (CB.isCallee(U)) {
IsRead = true;
// Note that indirect calls do not capture, see comment in
// CaptureTracking for context
continue;
}
// Given we've explictily handled the callee operand above, what's left
// must be a data operand (e.g. argument or operand bundle)
const unsigned UseIndex = CB.getDataOperandNo(U);
if (!CB.doesNotCapture(UseIndex)) {
if (!CB.onlyReadsMemory())
// If the callee can save a copy into other memory, then simply
// scanning uses of the call is insufficient. We have no way
// of tracking copies of the pointer through memory to see
// if a reloaded copy is written to, thus we must give up.
return Attribute::None;
// Push users for processing once we finish this one
if (!I->getType()->isVoidTy())
for (Use &UU : I->uses())
if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
}
if (CB.doesNotAccessMemory())
continue;
if (Function *F = CB.getCalledFunction())
if (CB.isArgOperand(U) && UseIndex < F->arg_size() &&
SCCNodes.count(F->getArg(UseIndex)))
// This is an argument which is part of the speculative SCC. Note
// that only operands corresponding to formal arguments of the callee
// can participate in the speculation.
break;
// The accessors used on call site here do the right thing for calls and
// invokes with operand bundles.
if (CB.doesNotAccessMemory(UseIndex)) {
/* nop */
} else if (CB.onlyReadsMemory() || CB.onlyReadsMemory(UseIndex)) {
IsRead = true;
} else if (CB.hasFnAttr(Attribute::WriteOnly) ||
CB.dataOperandHasImpliedAttr(UseIndex, Attribute::WriteOnly)) {
IsWrite = true;
} else {
return Attribute::None;
}
break;
}
case Instruction::Load:
// A volatile load has side effects beyond what readonly can be relied
// upon.
if (cast<LoadInst>(I)->isVolatile())
return Attribute::None;
IsRead = true;
break;
case Instruction::Store:
if (cast<StoreInst>(I)->getValueOperand() == *U)
// untrackable capture
return Attribute::None;
// A volatile store has side effects beyond what writeonly can be relied
// upon.
if (cast<StoreInst>(I)->isVolatile())
return Attribute::None;
IsWrite = true;
break;
case Instruction::ICmp:
case Instruction::Ret:
break;
default:
return Attribute::None;
}
}
if (IsWrite && IsRead)
return Attribute::None;
else if (IsRead)
return Attribute::ReadOnly;
else if (IsWrite)
return Attribute::WriteOnly;
else
return Attribute::ReadNone;
}
/// Deduce returned attributes for the SCC.
static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining if an argument is always returned.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
continue;
if (F->getReturnType()->isVoidTy())
continue;
// There is nothing to do if an argument is already marked as 'returned'.
if (llvm::any_of(F->args(),
[](const Argument &Arg) { return Arg.hasReturnedAttr(); }))
continue;
auto FindRetArg = [&]() -> Value * {
Value *RetArg = nullptr;
for (BasicBlock &BB : *F)
if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) {
// Note that stripPointerCasts should look through functions with
// returned arguments.
Value *RetVal = Ret->getReturnValue()->stripPointerCasts();
if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType())
return nullptr;
if (!RetArg)
RetArg = RetVal;
else if (RetArg != RetVal)
return nullptr;
}
return RetArg;
};
if (Value *RetArg = FindRetArg()) {
auto *A = cast<Argument>(RetArg);
A->addAttr(Attribute::Returned);
++NumReturned;
Changed.insert(F);
}
}
}
/// If a callsite has arguments that are also arguments to the parent function,
/// try to propagate attributes from the callsite's arguments to the parent's
/// arguments. This may be important because inlining can cause information loss
/// when attribute knowledge disappears with the inlined call.
static bool addArgumentAttrsFromCallsites(Function &F) {
if (!EnableNonnullArgPropagation)
return false;
bool Changed = false;
// For an argument attribute to transfer from a callsite to the parent, the
// call must be guaranteed to execute every time the parent is called.
// Conservatively, just check for calls in the entry block that are guaranteed
// to execute.
// TODO: This could be enhanced by testing if the callsite post-dominates the
// entry block or by doing simple forward walks or backward walks to the
// callsite.
BasicBlock &Entry = F.getEntryBlock();
for (Instruction &I : Entry) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (auto *CalledFunc = CB->getCalledFunction()) {
for (auto &CSArg : CalledFunc->args()) {
if (!CSArg.hasNonNullAttr(/* AllowUndefOrPoison */ false))
continue;
// If the non-null callsite argument operand is an argument to 'F'
// (the caller) and the call is guaranteed to execute, then the value
// must be non-null throughout 'F'.
auto *FArg = dyn_cast<Argument>(CB->getArgOperand(CSArg.getArgNo()));
if (FArg && !FArg->hasNonNullAttr()) {
FArg->addAttr(Attribute::NonNull);
Changed = true;
}
}
}
}
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
break;
}
return Changed;
}
static bool addAccessAttr(Argument *A, Attribute::AttrKind R) {
assert((R == Attribute::ReadOnly || R == Attribute::ReadNone ||
R == Attribute::WriteOnly)
&& "Must be an access attribute.");
assert(A && "Argument must not be null.");
// If the argument already has the attribute, nothing needs to be done.
if (A->hasAttribute(R))
return false;
// Otherwise, remove potentially conflicting attribute, add the new one,
// and update statistics.
A->removeAttr(Attribute::WriteOnly);
A->removeAttr(Attribute::ReadOnly);
A->removeAttr(Attribute::ReadNone);
A->addAttr(R);
if (R == Attribute::ReadOnly)
++NumReadOnlyArg;
else if (R == Attribute::WriteOnly)
++NumWriteOnlyArg;
else
++NumReadNoneArg;
return true;
}
/// Deduce nocapture attributes for the SCC.
static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
ArgumentGraph AG;
// Check each function in turn, determining which pointer arguments are not
// captured.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
continue;
if (addArgumentAttrsFromCallsites(*F))
Changed.insert(F);
// Functions that are readonly (or readnone) and nounwind and don't return
// a value can't capture arguments. Don't analyze them.
if (F->onlyReadsMemory() && F->doesNotThrow() &&
F->getReturnType()->isVoidTy()) {
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
++A) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(F);
}
}
continue;
}
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
++A) {
if (!A->getType()->isPointerTy())
continue;
bool HasNonLocalUses = false;
if (!A->hasNoCaptureAttr()) {
ArgumentUsesTracker Tracker(SCCNodes);
PointerMayBeCaptured(&*A, &Tracker);
if (!Tracker.Captured) {
if (Tracker.Uses.empty()) {
// If it's trivially not captured, mark it nocapture now.
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(F);
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
// its particulars for Argument-SCC analysis later.
ArgumentGraphNode *Node = AG[&*A];
for (Argument *Use : Tracker.Uses) {
Node->Uses.push_back(AG[Use]);
if (Use != &*A)
HasNonLocalUses = true;
}
}
}
// Otherwise, it's captured. Don't bother doing SCC analysis on it.
}
if (!HasNonLocalUses && !A->onlyReadsMemory()) {
// Can we determine that it's readonly/readnone/writeonly without doing
// an SCC? Note that we don't allow any calls at all here, or else our
// result will be dependent on the iteration order through the
// functions in the SCC.
SmallPtrSet<Argument *, 8> Self;
Self.insert(&*A);
Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self);
if (R != Attribute::None)
if (addAccessAttr(A, R))
Changed.insert(F);
}
}
}
// The graph we've collected is partial because we stopped scanning for
// argument uses once we solved the argument trivially. These partial nodes
// show up as ArgumentGraphNode objects with an empty Uses list, and for
// these nodes the final decision about whether they capture has already been
// made. If the definition doesn't have a 'nocapture' attribute by now, it
// captures.
for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
if (ArgumentSCC.size() == 1) {
if (!ArgumentSCC[0]->Definition)
continue; // synthetic root node
// eg. "void f(int* x) { if (...) f(x); }"
if (ArgumentSCC[0]->Uses.size() == 1 &&
ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
Argument *A = ArgumentSCC[0]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(A->getParent());
// Infer the access attributes given the new nocapture one
SmallPtrSet<Argument *, 8> Self;
Self.insert(&*A);
Attribute::AttrKind R = determinePointerAccessAttrs(&*A, Self);
if (R != Attribute::None)
addAccessAttr(A, R);
}
continue;
}
bool SCCCaptured = false;
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *Node = *I;
if (Node->Uses.empty()) {
if (!Node->Definition->hasNoCaptureAttr())
SCCCaptured = true;
}
}
if (SCCCaptured)
continue;
SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
// Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
// quickly looking up whether a given Argument is in this ArgumentSCC.
for (ArgumentGraphNode *I : ArgumentSCC) {
ArgumentSCCNodes.insert(I->Definition);
}
for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *N = *I;
for (ArgumentGraphNode *Use : N->Uses) {
Argument *A = Use->Definition;
if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
continue;
SCCCaptured = true;
break;
}
}
if (SCCCaptured)
continue;
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
Changed.insert(A->getParent());
}
// We also want to compute readonly/readnone/writeonly. With a small number
// of false negatives, we can assume that any pointer which is captured
// isn't going to be provably readonly or readnone, since by definition
// we can't analyze all uses of a captured pointer.
//
// The false negatives happen when the pointer is captured by a function
// that promises readonly/readnone behaviour on the pointer, then the
// pointer's lifetime ends before anything that writes to arbitrary memory.
// Also, a readonly/readnone pointer may be returned, but returning a
// pointer is capturing it.
auto meetAccessAttr = [](Attribute::AttrKind A, Attribute::AttrKind B) {
if (A == B)
return A;
if (A == Attribute::ReadNone)
return B;
if (B == Attribute::ReadNone)
return A;
return Attribute::None;
};
Attribute::AttrKind AccessAttr = Attribute::ReadNone;
for (unsigned i = 0, e = ArgumentSCC.size();
i != e && AccessAttr != Attribute::None; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
Attribute::AttrKind K = determinePointerAccessAttrs(A, ArgumentSCCNodes);
AccessAttr = meetAccessAttr(AccessAttr, K);
}
if (AccessAttr != Attribute::None) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
if (addAccessAttr(A, AccessAttr))
Changed.insert(A->getParent());
}
}
}
}
/// Tests whether a function is "malloc-like".
///
/// A function is "malloc-like" if it returns either null or a pointer that
/// doesn't alias any other pointer visible to the caller.
static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
SmallSetVector<Value *, 8> FlowsToReturn;
for (BasicBlock &BB : *F)
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
FlowsToReturn.insert(Ret->getReturnValue());
for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
Value *RetVal = FlowsToReturn[i];
if (Constant *C = dyn_cast<Constant>(RetVal)) {
if (!C->isNullValue() && !isa<UndefValue>(C))
return false;
continue;
}
if (isa<Argument>(RetVal))
return false;
if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
switch (RVI->getOpcode()) {
// Extend the analysis by looking upwards.
case Instruction::BitCast:
case Instruction::GetElementPtr:
case Instruction::AddrSpaceCast:
FlowsToReturn.insert(RVI->getOperand(0));
continue;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(RVI);
FlowsToReturn.insert(SI->getTrueValue());
FlowsToReturn.insert(SI->getFalseValue());
continue;
}
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(RVI);
for (Value *IncValue : PN->incoming_values())
FlowsToReturn.insert(IncValue);
continue;
}
// Check whether the pointer came from an allocation.
case Instruction::Alloca:
break;
case Instruction::Call:
case Instruction::Invoke: {
CallBase &CB = cast<CallBase>(*RVI);
if (CB.hasRetAttr(Attribute::NoAlias))
break;
if (CB.getCalledFunction() && SCCNodes.count(CB.getCalledFunction()))
break;
LLVM_FALLTHROUGH;
}
default:
return false; // Did not come from an allocation.
}
if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
return false;
}
return true;
}
/// Deduce noalias attributes for the SCC.
static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining which functions return noalias
// pointers.
for (Function *F : SCCNodes) {
// Already noalias.
if (F->returnDoesNotAlias())
continue;
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
return;
// We annotate noalias return values, which are only applicable to
// pointer types.
if (!F->getReturnType()->isPointerTy())
continue;
if (!isFunctionMallocLike(F, SCCNodes))
return;
}
for (Function *F : SCCNodes) {
if (F->returnDoesNotAlias() ||
!F->getReturnType()->isPointerTy())
continue;
F->setReturnDoesNotAlias();
++NumNoAlias;
Changed.insert(F);
}
}
/// Tests whether this function is known to not return null.
///
/// Requires that the function returns a pointer.
///
/// Returns true if it believes the function will not return a null, and sets
/// \p Speculative based on whether the returned conclusion is a speculative
/// conclusion due to SCC calls.
static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
bool &Speculative) {
assert(F->getReturnType()->isPointerTy() &&
"nonnull only meaningful on pointer types");
Speculative = false;
SmallSetVector<Value *, 8> FlowsToReturn;
for (BasicBlock &BB : *F)
if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
FlowsToReturn.insert(Ret->getReturnValue());
auto &DL = F->getParent()->getDataLayout();
for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
Value *RetVal = FlowsToReturn[i];
// If this value is locally known to be non-null, we're good
if (isKnownNonZero(RetVal, DL))
continue;
// Otherwise, we need to look upwards since we can't make any local
// conclusions.
Instruction *RVI = dyn_cast<Instruction>(RetVal);
if (!RVI)
return false;
switch (RVI->getOpcode()) {
// Extend the analysis by looking upwards.
case Instruction::BitCast:
case Instruction::GetElementPtr:
case Instruction::AddrSpaceCast:
FlowsToReturn.insert(RVI->getOperand(0));
continue;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(RVI);
FlowsToReturn.insert(SI->getTrueValue());
FlowsToReturn.insert(SI->getFalseValue());
continue;
}
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(RVI);
for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
FlowsToReturn.insert(PN->getIncomingValue(i));
continue;
}
case Instruction::Call:
case Instruction::Invoke: {
CallBase &CB = cast<CallBase>(*RVI);
Function *Callee = CB.getCalledFunction();
// A call to a node within the SCC is assumed to return null until
// proven otherwise
if (Callee && SCCNodes.count(Callee)) {
Speculative = true;
continue;
}
return false;
}
default:
return false; // Unknown source, may be null
};
llvm_unreachable("should have either continued or returned");
}
return true;
}
/// Deduce nonnull attributes for the SCC.
static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
// Speculative that all functions in the SCC return only nonnull
// pointers. We may refute this as we analyze functions.
bool SCCReturnsNonNull = true;
// Check each function in turn, determining which functions return nonnull
// pointers.
for (Function *F : SCCNodes) {
// Already nonnull.
if (F->getAttributes().hasRetAttr(Attribute::NonNull))
continue;
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
return;
// We annotate nonnull return values, which are only applicable to
// pointer types.
if (!F->getReturnType()->isPointerTy())
continue;
bool Speculative = false;
if (isReturnNonNull(F, SCCNodes, Speculative)) {
if (!Speculative) {
// Mark the function eagerly since we may discover a function
// which prevents us from speculating about the entire SCC
LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
<< " as nonnull\n");
F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
Changed.insert(F);
}
continue;
}
// At least one function returns something which could be null, can't
// speculate any more.
SCCReturnsNonNull = false;
}
if (SCCReturnsNonNull) {
for (Function *F : SCCNodes) {
if (F->getAttributes().hasRetAttr(Attribute::NonNull) ||
!F->getReturnType()->isPointerTy())
continue;
LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
Changed.insert(F);
}
}
}
namespace {
/// Collects a set of attribute inference requests and performs them all in one
/// go on a single SCC Node. Inference involves scanning function bodies
/// looking for instructions that violate attribute assumptions.
/// As soon as all the bodies are fine we are free to set the attribute.
/// Customization of inference for individual attributes is performed by
/// providing a handful of predicates for each attribute.
class AttributeInferer {
public:
/// Describes a request for inference of a single attribute.
struct InferenceDescriptor {
/// Returns true if this function does not have to be handled.
/// General intent for this predicate is to provide an optimization
/// for functions that do not need this attribute inference at all
/// (say, for functions that already have the attribute).
std::function<bool(const Function &)> SkipFunction;
/// Returns true if this instruction violates attribute assumptions.
std::function<bool(Instruction &)> InstrBreaksAttribute;
/// Sets the inferred attribute for this function.
std::function<void(Function &)> SetAttribute;
/// Attribute we derive.
Attribute::AttrKind AKind;
/// If true, only "exact" definitions can be used to infer this attribute.
/// See GlobalValue::isDefinitionExact.
bool RequiresExactDefinition;
InferenceDescriptor(Attribute::AttrKind AK,
std::function<bool(const Function &)> SkipFunc,
std::function<bool(Instruction &)> InstrScan,
std::function<void(Function &)> SetAttr,
bool ReqExactDef)
: SkipFunction(SkipFunc), InstrBreaksAttribute(InstrScan),
SetAttribute(SetAttr), AKind(AK),
RequiresExactDefinition(ReqExactDef) {}
};
private:
SmallVector<InferenceDescriptor, 4> InferenceDescriptors;
public:
void registerAttrInference(InferenceDescriptor AttrInference) {
InferenceDescriptors.push_back(AttrInference);
}
void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed);
};
/// Perform all the requested attribute inference actions according to the
/// attribute predicates stored before.
void AttributeInferer::run(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
// Go through all the functions in SCC and check corresponding attribute
// assumptions for each of them. Attributes that are invalid for this SCC
// will be removed from InferInSCC.
for (Function *F : SCCNodes) {
// No attributes whose assumptions are still valid - done.
if (InferInSCC.empty())
return;
// Check if our attributes ever need scanning/can be scanned.
llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) {
if (ID.SkipFunction(*F))
return false;
// Remove from further inference (invalidate) when visiting a function
// that has no instructions to scan/has an unsuitable definition.
return F->isDeclaration() ||
(ID.RequiresExactDefinition && !F->hasExactDefinition());
});
// For each attribute still in InferInSCC that doesn't explicitly skip F,
// set up the F instructions scan to verify assumptions of the attribute.
SmallVector<InferenceDescriptor, 4> InferInThisFunc;
llvm::copy_if(
InferInSCC, std::back_inserter(InferInThisFunc),
[F](const InferenceDescriptor &ID) { return !ID.SkipFunction(*F); });
if (InferInThisFunc.empty())
continue;
// Start instruction scan.
for (Instruction &I : instructions(*F)) {
llvm::erase_if(InferInThisFunc, [&](const InferenceDescriptor &ID) {
if (!ID.InstrBreaksAttribute(I))
return false;
// Remove attribute from further inference on any other functions
// because attribute assumptions have just been violated.
llvm::erase_if(InferInSCC, [&ID](const InferenceDescriptor &D) {
return D.AKind == ID.AKind;
});
// Remove attribute from the rest of current instruction scan.
return true;
});
if (InferInThisFunc.empty())
break;
}
}
if (InferInSCC.empty())
return;
for (Function *F : SCCNodes)
// At this point InferInSCC contains only functions that were either:
// - explicitly skipped from scan/inference, or
// - verified to have no instructions that break attribute assumptions.
// Hence we just go and force the attribute for all non-skipped functions.
for (auto &ID : InferInSCC) {
if (ID.SkipFunction(*F))
continue;
Changed.insert(F);
ID.SetAttribute(*F);
}
}
struct SCCNodesResult {
SCCNodeSet SCCNodes;
bool HasUnknownCall;
};
} // end anonymous namespace
/// Helper for non-Convergent inference predicate InstrBreaksAttribute.
static bool InstrBreaksNonConvergent(Instruction &I,
const SCCNodeSet &SCCNodes) {
const CallBase *CB = dyn_cast<CallBase>(&I);
// Breaks non-convergent assumption if CS is a convergent call to a function
// not in the SCC.
return CB && CB->isConvergent() &&
!SCCNodes.contains(CB->getCalledFunction());
}
/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
if (!I.mayThrow())
return false;
if (const auto *CI = dyn_cast<CallInst>(&I)) {
if (Function *Callee = CI->getCalledFunction()) {
// I is a may-throw call to a function inside our SCC. This doesn't
// invalidate our current working assumption that the SCC is no-throw; we
// just have to scan that other function.
if (SCCNodes.contains(Callee))
return false;
}
}
return true;
}
/// Helper for NoFree inference predicate InstrBreaksAttribute.
static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
CallBase *CB = dyn_cast<CallBase>(&I);
if (!CB)
return false;
if (CB->hasFnAttr(Attribute::NoFree))
return false;
// Speculatively assume in SCC.
if (Function *Callee = CB->getCalledFunction())
if (SCCNodes.contains(Callee))
return false;
return true;
}
/// Attempt to remove convergent function attribute when possible.
///
/// Returns true if any changes to function attributes were made.
static void inferConvergent(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
// Request to remove the convergent attribute from all functions in the SCC
// if every callsite within the SCC is not convergent (except for calls
// to functions within the SCC).
// Note: Removal of the attr from the callsites will happen in
// InstCombineCalls separately.
AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
Attribute::Convergent,
// Skip non-convergent functions.
[](const Function &F) { return !F.isConvergent(); },
// Instructions that break non-convergent assumption.
[SCCNodes](Instruction &I) {
return InstrBreaksNonConvergent(I, SCCNodes);
},
[](Function &F) {
LLVM_DEBUG(dbgs() << "Removing convergent attr from fn " << F.getName()
<< "\n");
F.setNotConvergent();
},
/* RequiresExactDefinition= */ false});
// Perform all the requested attribute inference actions.
AI.run(SCCNodes, Changed);
}
/// Infer attributes from all functions in the SCC by scanning every
/// instruction for compliance to the attribute assumptions. Currently it
/// does:
/// - addition of NoUnwind attribute
///
/// Returns true if any changes to function attributes were made.
static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
if (!DisableNoUnwindInference)
// Request to infer nounwind attribute for all the functions in the SCC if
// every callsite within the SCC is not throwing (except for calls to
// functions within the SCC). Note that nounwind attribute suffers from
// derefinement - results may change depending on how functions are
// optimized. Thus it can be inferred only from exact definitions.
AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
Attribute::NoUnwind,
// Skip non-throwing functions.
[](const Function &F) { return F.doesNotThrow(); },
// Instructions that break non-throwing assumption.
[&SCCNodes](Instruction &I) {
return InstrBreaksNonThrowing(I, SCCNodes);
},
[](Function &F) {
LLVM_DEBUG(dbgs()
<< "Adding nounwind attr to fn " << F.getName() << "\n");
F.setDoesNotThrow();
++NumNoUnwind;
},
/* RequiresExactDefinition= */ true});
if (!DisableNoFreeInference)
// Request to infer nofree attribute for all the functions in the SCC if
// every callsite within the SCC does not directly or indirectly free
// memory (except for calls to functions within the SCC). Note that nofree
// attribute suffers from derefinement - results may change depending on
// how functions are optimized. Thus it can be inferred only from exact
// definitions.
AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
Attribute::NoFree,
// Skip functions known not to free memory.
[](const Function &F) { return F.doesNotFreeMemory(); },
// Instructions that break non-deallocating assumption.
[&SCCNodes](Instruction &I) {
return InstrBreaksNoFree(I, SCCNodes);
},
[](Function &F) {
LLVM_DEBUG(dbgs()
<< "Adding nofree attr to fn " << F.getName() << "\n");
F.setDoesNotFreeMemory();
++NumNoFree;
},
/* RequiresExactDefinition= */ true});
// Perform all the requested attribute inference actions.
AI.run(SCCNodes, Changed);
}
static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
if (SCCNodes.size() != 1)
return;
Function *F = *SCCNodes.begin();
if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
return;
// If all of the calls in F are identifiable and are to norecurse functions, F
// is norecurse. This check also detects self-recursion as F is not currently
// marked norecurse, so any called from F to F will not be marked norecurse.
for (auto &BB : *F)
for (auto &I : BB.instructionsWithoutDebug())
if (auto *CB = dyn_cast<CallBase>(&I)) {
Function *Callee = CB->getCalledFunction();
if (!Callee || Callee == F || !Callee->doesNotRecurse())
// Function calls a potentially recursive function.
return;
}
// Every call was to a non-recursive function other than this function, and
// we have no indirect recursion as the SCC size is one. This function cannot
// recurse.
F->setDoesNotRecurse();
++NumNoRecurse;
Changed.insert(F);
}
static bool instructionDoesNotReturn(Instruction &I) {
if (auto *CB = dyn_cast<CallBase>(&I))
return CB->hasFnAttr(Attribute::NoReturn);
return false;
}
// A basic block can only return if it terminates with a ReturnInst and does not
// contain calls to noreturn functions.
static bool basicBlockCanReturn(BasicBlock &BB) {
if (!isa<ReturnInst>(BB.getTerminator()))
return false;
return none_of(BB, instructionDoesNotReturn);
}
+// FIXME: this doesn't handle recursion.
+static bool canReturn(Function &F) {
+ SmallVector<BasicBlock *, 16> Worklist;
+ SmallPtrSet<BasicBlock *, 16> Visited;
+
+ Visited.insert(&F.front());
+ Worklist.push_back(&F.front());
+
+ do {
+ BasicBlock *BB = Worklist.pop_back_val();
+ if (basicBlockCanReturn(*BB))
+ return true;
+ for (BasicBlock *Succ : successors(BB))
+ if (Visited.insert(Succ).second)
+ Worklist.push_back(Succ);
+ } while (!Worklist.empty());
+
+ return false;
+}
+
// Set the noreturn function attribute if possible.
static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
F->doesNotReturn())
continue;
- // The function can return if any basic blocks can return.
- // FIXME: this doesn't handle recursion or unreachable blocks.
- if (none_of(*F, basicBlockCanReturn)) {
+ if (!canReturn(*F)) {
F->setDoesNotReturn();
Changed.insert(F);
}
}
}
static bool functionWillReturn(const Function &F) {
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F.hasExactDefinition())
return false;
// Must-progress function without side-effects must return.
if (F.mustProgress() && F.onlyReadsMemory())
return true;
// Can only analyze functions with a definition.
if (F.isDeclaration())
return false;
// Functions with loops require more sophisticated analysis, as the loop
// may be infinite. For now, don't try to handle them.
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>> Backedges;
FindFunctionBackedges(F, Backedges);
if (!Backedges.empty())
return false;
// If there are no loops, then the function is willreturn if all calls in
// it are willreturn.
return all_of(instructions(F), [](const Instruction &I) {
return I.willReturn();
});
}
// Set the willreturn function attribute if possible.
static void addWillReturn(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || F->willReturn() || !functionWillReturn(*F))
continue;
F->setWillReturn();
NumWillReturn++;
Changed.insert(F);
}
}
// Return true if this is an atomic which has an ordering stronger than
// unordered. Note that this is different than the predicate we use in
// Attributor. Here we chose to be conservative and consider monotonic
// operations potentially synchronizing. We generally don't do much with
// monotonic operations, so this is simply risk reduction.
static bool isOrderedAtomic(Instruction *I) {
if (!I->isAtomic())
return false;
if (auto *FI = dyn_cast<FenceInst>(I))
// All legal orderings for fence are stronger than monotonic.
return FI->getSyncScopeID() != SyncScope::SingleThread;
else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
return true;
else if (auto *SI = dyn_cast<StoreInst>(I))
return !SI->isUnordered();
else if (auto *LI = dyn_cast<LoadInst>(I))
return !LI->isUnordered();
else {
llvm_unreachable("unknown atomic instruction?");
}
}
static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
// Volatile may synchronize
if (I.isVolatile())
return true;
// An ordered atomic may synchronize. (See comment about on monotonic.)
if (isOrderedAtomic(&I))
return true;
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
// Non call site cases covered by the two checks above
return false;
if (CB->hasFnAttr(Attribute::NoSync))
return false;
// Non volatile memset/memcpy/memmoves are nosync
// NOTE: Only intrinsics with volatile flags should be handled here. All
// others should be marked in Intrinsics.td.
if (auto *MI = dyn_cast<MemIntrinsic>(&I))
if (!MI->isVolatile())
return false;
// Speculatively assume in SCC.
if (Function *Callee = CB->getCalledFunction())
if (SCCNodes.contains(Callee))
return false;
return true;
}
// Infer the nosync attribute.
static void addNoSyncAttr(const SCCNodeSet &SCCNodes,
SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
Attribute::NoSync,
// Skip already marked functions.
[](const Function &F) { return F.hasNoSync(); },
// Instructions that break nosync assumption.
[&SCCNodes](Instruction &I) {
return InstrBreaksNoSync(I, SCCNodes);
},
[](Function &F) {
LLVM_DEBUG(dbgs()
<< "Adding nosync attr to fn " << F.getName() << "\n");
F.setNoSync();
++NumNoSync;
},
/* RequiresExactDefinition= */ true});
AI.run(SCCNodes, Changed);
}
static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
SCCNodesResult Res;
Res.HasUnknownCall = false;
for (Function *F : Functions) {
if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) ||
F->isPresplitCoroutine()) {
// Treat any function we're trying not to optimize as if it were an
// indirect call and omit it from the node set used below.
Res.HasUnknownCall = true;
continue;
}
// Track whether any functions in this SCC have an unknown call edge.
// Note: if this is ever a performance hit, we can common it with
// subsequent routines which also do scans over the instructions of the
// function.
if (!Res.HasUnknownCall) {
for (Instruction &I : instructions(*F)) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
if (!CB->getCalledFunction()) {
Res.HasUnknownCall = true;
break;
}
}
}
}
Res.SCCNodes.insert(F);
}
return Res;
}
template <typename AARGetterT>
static SmallSet<Function *, 8>
deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
SCCNodesResult Nodes = createSCCNodeSet(Functions);
// Bail if the SCC only contains optnone functions.
if (Nodes.SCCNodes.empty())
return {};
SmallSet<Function *, 8> Changed;
addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
addReadAttrs(Nodes.SCCNodes, AARGetter, Changed);
addArgumentAttrs(Nodes.SCCNodes, Changed);
inferConvergent(Nodes.SCCNodes, Changed);
addNoReturnAttrs(Nodes.SCCNodes, Changed);
addWillReturn(Nodes.SCCNodes, Changed);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
if (!Nodes.HasUnknownCall) {
addNoAliasAttrs(Nodes.SCCNodes, Changed);
addNonNullAttrs(Nodes.SCCNodes, Changed);
inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed);
addNoRecurseAttrs(Nodes.SCCNodes, Changed);
}
addNoSyncAttr(Nodes.SCCNodes, Changed);
// Finally, infer the maximal set of attributes from the ones we've inferred
// above. This is handling the cases where one attribute on a signature
// implies another, but for implementation reasons the inference rule for
// the later is missing (or simply less sophisticated).
for (Function *F : Nodes.SCCNodes)
if (F)
if (inferAttributesFromOthers(*F))
Changed.insert(F);
return Changed;
}
PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG,
CGSCCUpdateResult &) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
// We pass a lambda into functions to wire them up to the analysis manager
// for getting function analyses.
auto AARGetter = [&](Function &F) -> AAResults & {
return FAM.getResult<AAManager>(F);
};
SmallVector<Function *, 8> Functions;
for (LazyCallGraph::Node &N : C) {
Functions.push_back(&N.getFunction());
}
auto ChangedFunctions = deriveAttrsInPostOrder(Functions, AARGetter);
if (ChangedFunctions.empty())
return PreservedAnalyses::all();
// Invalidate analyses for modified functions so that we don't have to
// invalidate all analyses for all functions in this SCC.
PreservedAnalyses FuncPA;
// We haven't changed the CFG for modified functions.
FuncPA.preserveSet<CFGAnalyses>();
for (Function *Changed : ChangedFunctions) {
FAM.invalidate(*Changed, FuncPA);
// Also invalidate any direct callers of changed functions since analyses
// may care about attributes of direct callees. For example, MemorySSA cares
// about whether or not a call's callee modifies memory and queries that
// through function attributes.
for (auto *U : Changed->users()) {
if (auto *Call = dyn_cast<CallBase>(U)) {
if (Call->getCalledFunction() == Changed)
FAM.invalidate(*Call->getFunction(), FuncPA);
}
}
}
PreservedAnalyses PA;
// We have not added or removed functions.
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
// We already invalidated all relevant function analyses above.
PA.preserveSet<AllAnalysesOn<Function>>();
return PA;
}
namespace {
struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
// Pass identification, replacement for typeid
static char ID;
PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) {
initializePostOrderFunctionAttrsLegacyPassPass(
*PassRegistry::getPassRegistry());
}
bool runOnSCC(CallGraphSCC &SCC) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
getAAResultsAnalysisUsage(AU);
CallGraphSCCPass::getAnalysisUsage(AU);
}
};
} // end anonymous namespace
char PostOrderFunctionAttrsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs",
"Deduce function attributes", false, false)
Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
return new PostOrderFunctionAttrsLegacyPass();
}
template <typename AARGetterT>
static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
SmallVector<Function *, 8> Functions;
for (CallGraphNode *I : SCC) {
Functions.push_back(I->getFunction());
}
return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
}
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
if (skipSCC(SCC))
return false;
return runImpl(SCC, LegacyAARGetter(*this));
}
namespace {
struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
// Pass identification, replacement for typeid
static char ID;
ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) {
initializeReversePostOrderFunctionAttrsLegacyPassPass(
*PassRegistry::getPassRegistry());
}
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<CallGraphWrapperPass>();
AU.addPreserved<CallGraphWrapperPass>();
}
};
} // end anonymous namespace
char ReversePostOrderFunctionAttrsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass,
"rpo-function-attrs", "Deduce function attributes in RPO",
false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass,
"rpo-function-attrs", "Deduce function attributes in RPO",
false, false)
Pass *llvm::createReversePostOrderFunctionAttrsPass() {
return new ReversePostOrderFunctionAttrsLegacyPass();
}
static bool addNoRecurseAttrsTopDown(Function &F) {
// We check the preconditions for the function prior to calling this to avoid
// the cost of building up a reversible post-order list. We assert them here
// to make sure none of the invariants this relies on were violated.
assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
assert(!F.doesNotRecurse() &&
"This function has already been deduced as norecurs!");
assert(F.hasInternalLinkage() &&
"Can only do top-down deduction for internal linkage functions!");
// If F is internal and all of its uses are calls from a non-recursive
// functions, then none of its calls could in fact recurse without going
// through a function marked norecurse, and so we can mark this function too
// as norecurse. Note that the uses must actually be calls -- otherwise
// a pointer to this function could be returned from a norecurse function but
// this function could be recursively (indirectly) called. Note that this
// also detects if F is directly recursive as F is not yet marked as
// a norecurse function.
for (auto *U : F.users()) {
auto *I = dyn_cast<Instruction>(U);
if (!I)
return false;
CallBase *CB = dyn_cast<CallBase>(I);
if (!CB || !CB->getParent()->getParent()->doesNotRecurse())
return false;
}
F.setDoesNotRecurse();
++NumNoRecurse;
return true;
}
static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
// We only have a post-order SCC traversal (because SCCs are inherently
// discovered in post-order), so we accumulate them in a vector and then walk
// it in reverse. This is simpler than using the RPO iterator infrastructure
// because we need to combine SCC detection and the PO walk of the call
// graph. We can also cheat egregiously because we're primarily interested in
// synthesizing norecurse and so we can only save the singular SCCs as SCCs
// with multiple functions in them will clearly be recursive.
SmallVector<Function *, 16> Worklist;
for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
if (I->size() != 1)
continue;
Function *F = I->front()->getFunction();
if (F && !F->isDeclaration() && !F->doesNotRecurse() &&
F->hasInternalLinkage())
Worklist.push_back(F);
}
bool Changed = false;
for (auto *F : llvm::reverse(Worklist))
Changed |= addNoRecurseAttrsTopDown(*F);
return Changed;
}
bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
return false;
auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
return deduceFunctionAttributeInRPO(M, CG);
}
PreservedAnalyses
ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &CG = AM.getResult<CallGraphAnalysis>(M);
if (!deduceFunctionAttributeInRPO(M, CG))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<CallGraphAnalysis>();
return PA;
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 520b6ebf9e74..5113c0c67acc 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1,5166 +1,5168 @@
//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// OpenMP specific optimizations:
//
// - Deduplication of runtime calls, e.g., omp_get_thread_num.
// - Replacing globalized device memory with stack memory.
// - Replacing globalized device memory with shared memory.
// - Parallel region merging.
// - Transforming generic-mode device kernels to SPMD mode.
// - Specializing the state machine for generic-mode device kernels.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
#include "llvm/IR/Assumptions.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include <algorithm>
using namespace llvm;
using namespace omp;
#define DEBUG_TYPE "openmp-opt"
static cl::opt<bool> DisableOpenMPOptimizations(
"openmp-opt-disable", cl::ZeroOrMore,
cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
cl::init(false));
static cl::opt<bool> EnableParallelRegionMerging(
"openmp-opt-enable-merging", cl::ZeroOrMore,
cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
cl::desc("Disable function internalization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
cl::Hidden);
static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
cl::init(false), cl::Hidden);
static cl::opt<bool> HideMemoryTransferLatency(
"openmp-hide-memory-transfer-latency",
cl::desc("[WIP] Tries to hide the latency of host to device memory"
" transfers"),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptDeglobalization(
"openmp-opt-disable-deglobalization", cl::ZeroOrMore,
cl::desc("Disable OpenMP optimizations involving deglobalization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptSPMDization(
"openmp-opt-disable-spmdization", cl::ZeroOrMore,
cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptFolding(
"openmp-opt-disable-folding", cl::ZeroOrMore,
cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
cl::init(false));
static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
"openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
cl::desc("Disable OpenMP optimizations that replace the state machine."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptBarrierElimination(
"openmp-opt-disable-barrier-elimination", cl::ZeroOrMore,
cl::desc("Disable OpenMP optimizations that eliminate barriers."),
cl::Hidden, cl::init(false));
static cl::opt<bool> PrintModuleAfterOptimizations(
"openmp-opt-print-module", cl::ZeroOrMore,
cl::desc("Print the current module after OpenMP optimizations."),
cl::Hidden, cl::init(false));
static cl::opt<bool> AlwaysInlineDeviceFunctions(
"openmp-opt-inline-device", cl::ZeroOrMore,
cl::desc("Inline all applicible functions on the device."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
cl::desc("Enables more verbose remarks."), cl::Hidden,
cl::init(false));
static cl::opt<unsigned>
SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
cl::desc("Maximal number of attributor iterations."),
cl::init(256));
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
"Number of OpenMP parallel regions deleted");
STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
"Number of OpenMP runtime functions identified");
STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
"Number of OpenMP runtime function uses identified");
STATISTIC(NumOpenMPTargetRegionKernels,
"Number of OpenMP target region entry points (=kernels) identified");
STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
"Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode");
STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,
"Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode without a state machines");
STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,
"Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines with fallback");
STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,
"Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines without fallback");
STATISTIC(
NumOpenMPParallelRegionsReplacedInGPUStateMachine,
"Number of OpenMP parallel regions replaced with ID in GPU state machines");
STATISTIC(NumOpenMPParallelRegionsMerged,
"Number of OpenMP parallel regions merged");
STATISTIC(NumBytesMovedToSharedMemory,
"Amount of memory pushed to shared memory");
STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated");
#if !defined(NDEBUG)
static constexpr auto TAG = "[" DEBUG_TYPE "]";
#endif
namespace {
struct AAHeapToShared;
struct AAICVTracker;
/// OpenMP specific information. For now, stores RFIs and ICVs also needed for
/// Attributor runs.
struct OMPInformationCache : public InformationCache {
OMPInformationCache(Module &M, AnalysisGetter &AG,
BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
KernelSet &Kernels)
: InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
Kernels(Kernels) {
OMPBuilder.initialize();
initializeRuntimeFunctions();
initializeInternalControlVars();
}
/// Generic information that describes an internal control variable.
struct InternalControlVarInfo {
/// The kind, as described by InternalControlVar enum.
InternalControlVar Kind;
/// The name of the ICV.
StringRef Name;
/// Environment variable associated with this ICV.
StringRef EnvVarName;
/// Initial value kind.
ICVInitValue InitKind;
/// Initial value.
ConstantInt *InitValue;
/// Setter RTL function associated with this ICV.
RuntimeFunction Setter;
/// Getter RTL function associated with this ICV.
RuntimeFunction Getter;
/// RTL Function corresponding to the override clause of this ICV
RuntimeFunction Clause;
};
/// Generic information that describes a runtime function
struct RuntimeFunctionInfo {
/// The kind, as described by the RuntimeFunction enum.
RuntimeFunction Kind;
/// The name of the function.
StringRef Name;
/// Flag to indicate a variadic function.
bool IsVarArg;
/// The return type of the function.
Type *ReturnType;
/// The argument types of the function.
SmallVector<Type *, 8> ArgumentTypes;
/// The declaration if available.
Function *Declaration = nullptr;
/// Uses of this runtime function per function containing the use.
using UseVector = SmallVector<Use *, 16>;
/// Clear UsesMap for runtime function.
void clearUsesMap() { UsesMap.clear(); }
/// Boolean conversion that is true if the runtime function was found.
operator bool() const { return Declaration; }
/// Return the vector of uses in function \p F.
UseVector &getOrCreateUseVector(Function *F) {
std::shared_ptr<UseVector> &UV = UsesMap[F];
if (!UV)
UV = std::make_shared<UseVector>();
return *UV;
}
/// Return the vector of uses in function \p F or `nullptr` if there are
/// none.
const UseVector *getUseVector(Function &F) const {
auto I = UsesMap.find(&F);
if (I != UsesMap.end())
return I->second.get();
return nullptr;
}
/// Return how many functions contain uses of this runtime function.
size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
/// Return the number of arguments (or the minimal number for variadic
/// functions).
size_t getNumArgs() const { return ArgumentTypes.size(); }
/// Run the callback \p CB on each use and forget the use if the result is
/// true. The callback will be fed the function in which the use was
/// encountered as second argument.
void foreachUse(SmallVectorImpl<Function *> &SCC,
function_ref<bool(Use &, Function &)> CB) {
for (Function *F : SCC)
foreachUse(CB, F);
}
/// Run the callback \p CB on each use within the function \p F and forget
/// the use if the result is true.
void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
SmallVector<unsigned, 8> ToBeDeleted;
ToBeDeleted.clear();
unsigned Idx = 0;
UseVector &UV = getOrCreateUseVector(F);
for (Use *U : UV) {
if (CB(*U, *F))
ToBeDeleted.push_back(Idx);
++Idx;
}
// Remove the to-be-deleted indices in reverse order as prior
// modifications will not modify the smaller indices.
while (!ToBeDeleted.empty()) {
unsigned Idx = ToBeDeleted.pop_back_val();
UV[Idx] = UV.back();
UV.pop_back();
}
}
private:
/// Map from functions to all uses of this runtime function contained in
/// them.
DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
public:
/// Iterators for the uses of this runtime function.
decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
decltype(UsesMap)::iterator end() { return UsesMap.end(); }
};
/// An OpenMP-IR-Builder instance
OpenMPIRBuilder OMPBuilder;
/// Map from runtime function kind to the runtime function description.
EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
RuntimeFunction::OMPRTL___last>
RFIs;
/// Map from function declarations/definitions to their runtime enum type.
DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
/// Map from ICV kind to the ICV description.
EnumeratedArray<InternalControlVarInfo, InternalControlVar,
InternalControlVar::ICV___last>
ICVs;
/// Helper to initialize all internal control variable information for those
/// defined in OMPKinds.def.
void initializeInternalControlVars() {
#define ICV_RT_SET(_Name, RTL) \
{ \
auto &ICV = ICVs[_Name]; \
ICV.Setter = RTL; \
}
#define ICV_RT_GET(Name, RTL) \
{ \
auto &ICV = ICVs[Name]; \
ICV.Getter = RTL; \
}
#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \
{ \
auto &ICV = ICVs[Enum]; \
ICV.Name = _Name; \
ICV.Kind = Enum; \
ICV.InitKind = Init; \
ICV.EnvVarName = _EnvVarName; \
switch (ICV.InitKind) { \
case ICV_IMPLEMENTATION_DEFINED: \
ICV.InitValue = nullptr; \
break; \
case ICV_ZERO: \
ICV.InitValue = ConstantInt::get( \
Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
break; \
case ICV_FALSE: \
ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
break; \
case ICV_LAST: \
break; \
} \
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
}
/// Returns true if the function declaration \p F matches the runtime
/// function types, that is, return type \p RTFRetType, and argument types
/// \p RTFArgTypes.
static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
SmallVector<Type *, 8> &RTFArgTypes) {
// TODO: We should output information to the user (under debug output
// and via remarks).
if (!F)
return false;
if (F->getReturnType() != RTFRetType)
return false;
if (F->arg_size() != RTFArgTypes.size())
return false;
auto *RTFTyIt = RTFArgTypes.begin();
for (Argument &Arg : F->args()) {
if (Arg.getType() != *RTFTyIt)
return false;
++RTFTyIt;
}
return true;
}
// Helper to collect all uses of the declaration in the UsesMap.
unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
unsigned NumUses = 0;
if (!RFI.Declaration)
return NumUses;
OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
if (CollectStats) {
NumOpenMPRuntimeFunctionsIdentified += 1;
NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
}
// TODO: We directly convert uses into proper calls and unknown uses.
for (Use &U : RFI.Declaration->uses()) {
if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
if (ModuleSlice.count(UserI->getFunction())) {
RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
++NumUses;
}
} else {
RFI.getOrCreateUseVector(nullptr).push_back(&U);
++NumUses;
}
}
return NumUses;
}
// Helper function to recollect uses of a runtime function.
void recollectUsesForFunction(RuntimeFunction RTF) {
auto &RFI = RFIs[RTF];
RFI.clearUsesMap();
collectUses(RFI, /*CollectStats*/ false);
}
// Helper function to recollect uses of all runtime functions.
void recollectUses() {
for (int Idx = 0; Idx < RFIs.size(); ++Idx)
recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
}
// Helper function to inherit the calling convention of the function callee.
void setCallingConvention(FunctionCallee Callee, CallInst *CI) {
if (Function *Fn = dyn_cast<Function>(Callee.getCallee()))
CI->setCallingConv(Fn->getCallingConv());
}
/// Helper to initialize all runtime function information for those defined
/// in OpenMPKinds.def.
void initializeRuntimeFunctions() {
Module &M = *((*ModuleSlice.begin())->getParent());
// Helper macros for handling __VA_ARGS__ in OMP_RTL
#define OMP_TYPE(VarName, ...) \
Type *VarName = OMPBuilder.VarName; \
(void)VarName;
#define OMP_ARRAY_TYPE(VarName, ...) \
ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
(void)VarName##Ty; \
PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
(void)VarName##PtrTy;
#define OMP_FUNCTION_TYPE(VarName, ...) \
FunctionType *VarName = OMPBuilder.VarName; \
(void)VarName; \
PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
(void)VarName##Ptr;
#define OMP_STRUCT_TYPE(VarName, ...) \
StructType *VarName = OMPBuilder.VarName; \
(void)VarName; \
PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
(void)VarName##Ptr;
#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
{ \
SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
Function *F = M.getFunction(_Name); \
RTLFunctions.insert(F); \
if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
RuntimeFunctionIDMap[F] = _Enum; \
auto &RFI = RFIs[_Enum]; \
RFI.Kind = _Enum; \
RFI.Name = _Name; \
RFI.IsVarArg = _IsVarArg; \
RFI.ReturnType = OMPBuilder._ReturnType; \
RFI.ArgumentTypes = std::move(ArgsTypes); \
RFI.Declaration = F; \
unsigned NumUses = collectUses(RFI); \
(void)NumUses; \
LLVM_DEBUG({ \
dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \
<< " found\n"; \
if (RFI.Declaration) \
dbgs() << TAG << "-> got " << NumUses << " uses in " \
<< RFI.getNumFunctionsWithUses() \
<< " different functions.\n"; \
}); \
} \
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
// Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
// functions, except if `optnone` is present.
for (Function &F : M) {
for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
if (F.getName().startswith(Prefix) &&
!F.hasFnAttribute(Attribute::OptimizeNone))
F.removeFnAttr(Attribute::NoInline);
}
// TODO: We should attach the attributes defined in OMPKinds.def.
}
/// Collection of known kernels (\see Kernel) in the module.
KernelSet &Kernels;
/// Collection of known OpenMP runtime functions..
DenseSet<const Function *> RTLFunctions;
};
template <typename Ty, bool InsertInvalidates = true>
struct BooleanStateWithSetVector : public BooleanState {
bool contains(const Ty &Elem) const { return Set.contains(Elem); }
bool insert(const Ty &Elem) {
if (InsertInvalidates)
BooleanState::indicatePessimisticFixpoint();
return Set.insert(Elem);
}
const Ty &operator[](int Idx) const { return Set[Idx]; }
bool operator==(const BooleanStateWithSetVector &RHS) const {
return BooleanState::operator==(RHS) && Set == RHS.Set;
}
bool operator!=(const BooleanStateWithSetVector &RHS) const {
return !(*this == RHS);
}
bool empty() const { return Set.empty(); }
size_t size() const { return Set.size(); }
/// "Clamp" this state with \p RHS.
BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
BooleanState::operator^=(RHS);
Set.insert(RHS.Set.begin(), RHS.Set.end());
return *this;
}
private:
/// A set to keep track of elements.
SetVector<Ty> Set;
public:
typename decltype(Set)::iterator begin() { return Set.begin(); }
typename decltype(Set)::iterator end() { return Set.end(); }
typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
typename decltype(Set)::const_iterator end() const { return Set.end(); }
};
template <typename Ty, bool InsertInvalidates = true>
using BooleanStateWithPtrSetVector =
BooleanStateWithSetVector<Ty *, InsertInvalidates>;
struct KernelInfoState : AbstractState {
/// Flag to track if we reached a fixpoint.
bool IsAtFixpoint = false;
/// The parallel regions (identified by the outlined parallel functions) that
/// can be reached from the associated function.
BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
ReachedKnownParallelRegions;
/// State to track what parallel region we might reach.
BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
/// State to track if we are in SPMD-mode, assumed or know, and why we decided
/// we cannot be. If it is assumed, then RequiresFullRuntime should also be
/// false.
BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
/// The __kmpc_target_init call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
CallBase *KernelInitCB = nullptr;
/// The __kmpc_target_deinit call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
CallBase *KernelDeinitCB = nullptr;
/// Flag to indicate if the associated function is a kernel entry.
bool IsKernelEntry = false;
/// State to track what kernel entries can reach the associated function.
BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;
/// State to indicate if we can track parallel level of the associated
/// function. We will give up tracking if we encounter unknown caller or the
/// caller is __kmpc_parallel_51.
BooleanStateWithSetVector<uint8_t> ParallelLevels;
/// Abstract State interface
///{
KernelInfoState() {}
KernelInfoState(bool BestState) {
if (!BestState)
indicatePessimisticFixpoint();
}
/// See AbstractState::isValidState(...)
bool isValidState() const override { return true; }
/// See AbstractState::isAtFixpoint(...)
bool isAtFixpoint() const override { return IsAtFixpoint; }
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
IsAtFixpoint = true;
ReachingKernelEntries.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
ReachedKnownParallelRegions.indicatePessimisticFixpoint();
ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
return ChangeStatus::CHANGED;
}
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
IsAtFixpoint = true;
ReachingKernelEntries.indicateOptimisticFixpoint();
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
ReachedKnownParallelRegions.indicateOptimisticFixpoint();
ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
/// Return the assumed state
KernelInfoState &getAssumed() { return *this; }
const KernelInfoState &getAssumed() const { return *this; }
bool operator==(const KernelInfoState &RHS) const {
if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
return false;
if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
return false;
if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
return false;
if (ReachingKernelEntries != RHS.ReachingKernelEntries)
return false;
return true;
}
/// Returns true if this kernel contains any OpenMP parallel regions.
bool mayContainParallelRegion() {
return !ReachedKnownParallelRegions.empty() ||
!ReachedUnknownParallelRegions.empty();
}
/// Return empty set as the best state of potential values.
static KernelInfoState getBestState() { return KernelInfoState(true); }
static KernelInfoState getBestState(KernelInfoState &KIS) {
return getBestState();
}
/// Return full set as the worst state of potential values.
static KernelInfoState getWorstState() { return KernelInfoState(false); }
/// "Clamp" this state with \p KIS.
KernelInfoState operator^=(const KernelInfoState &KIS) {
// Do not merge two different _init and _deinit call sites.
if (KIS.KernelInitCB) {
if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
"assumptions.");
KernelInitCB = KIS.KernelInitCB;
}
if (KIS.KernelDeinitCB) {
if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
"assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
return *this;
}
KernelInfoState operator&=(const KernelInfoState &KIS) {
return (*this ^= KIS);
}
///}
};
/// Used to map the values physically (in the IR) stored in an offload
/// array, to a vector in memory.
struct OffloadArray {
/// Physical array (in the IR).
AllocaInst *Array = nullptr;
/// Mapped values.
SmallVector<Value *, 8> StoredValues;
/// Last stores made in the offload array.
SmallVector<StoreInst *, 8> LastAccesses;
OffloadArray() = default;
/// Initializes the OffloadArray with the values stored in \p Array before
/// instruction \p Before is reached. Returns false if the initialization
/// fails.
/// This MUST be used immediately after the construction of the object.
bool initialize(AllocaInst &Array, Instruction &Before) {
if (!Array.getAllocatedType()->isArrayTy())
return false;
if (!getValues(Array, Before))
return false;
this->Array = &Array;
return true;
}
static const unsigned DeviceIDArgNum = 1;
static const unsigned BasePtrsArgNum = 3;
static const unsigned PtrsArgNum = 4;
static const unsigned SizesArgNum = 5;
private:
/// Traverses the BasicBlock where \p Array is, collecting the stores made to
/// \p Array, leaving StoredValues with the values stored before the
/// instruction \p Before is reached.
bool getValues(AllocaInst &Array, Instruction &Before) {
// Initialize container.
const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
StoredValues.assign(NumValues, nullptr);
LastAccesses.assign(NumValues, nullptr);
// TODO: This assumes the instruction \p Before is in the same
// BasicBlock as Array. Make it general, for any control flow graph.
BasicBlock *BB = Array.getParent();
if (BB != Before.getParent())
return false;
const DataLayout &DL = Array.getModule()->getDataLayout();
const unsigned int PointerSize = DL.getPointerSize();
for (Instruction &I : *BB) {
if (&I == &Before)
break;
if (!isa<StoreInst>(&I))
continue;
auto *S = cast<StoreInst>(&I);
int64_t Offset = -1;
auto *Dst =
GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
if (Dst == &Array) {
int64_t Idx = Offset / PointerSize;
StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
LastAccesses[Idx] = S;
}
}
return isFilled();
}
/// Returns true if all values in StoredValues and
/// LastAccesses are not nullptrs.
bool isFilled() {
const unsigned NumValues = StoredValues.size();
for (unsigned I = 0; I < NumValues; ++I) {
if (!StoredValues[I] || !LastAccesses[I])
return false;
}
return true;
}
};
struct OpenMPOpt {
using OptimizationRemarkGetter =
function_ref<OptimizationRemarkEmitter &(Function *)>;
OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
OptimizationRemarkGetter OREGetter,
OMPInformationCache &OMPInfoCache, Attributor &A)
: M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
/// Check if any remarks are enabled for openmp-opt
bool remarksEnabled() {
auto &Ctx = M.getContext();
return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
}
/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
bool run(bool IsModulePass) {
if (SCC.empty())
return false;
bool Changed = false;
LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
<< " functions in a slice with "
<< OMPInfoCache.ModuleSlice.size() << " functions\n");
if (IsModulePass) {
Changed |= runAttributor(IsModulePass);
// Recollect uses, in case Attributor deleted any.
OMPInfoCache.recollectUses();
// TODO: This should be folded into buildCustomStateMachine.
Changed |= rewriteDeviceCodeStateMachine();
if (remarksEnabled())
analysisGlobalization();
Changed |= eliminateBarriers();
} else {
if (PrintICVValues)
printICVs();
if (PrintOpenMPKernels)
printKernels();
Changed |= runAttributor(IsModulePass);
// Recollect uses, in case Attributor deleted any.
OMPInfoCache.recollectUses();
Changed |= deleteParallelRegions();
if (HideMemoryTransferLatency)
Changed |= hideMemTransfersLatency();
Changed |= deduplicateRuntimeCalls();
if (EnableParallelRegionMerging) {
if (mergeParallelRegions()) {
deduplicateRuntimeCalls();
Changed = true;
}
}
Changed |= eliminateBarriers();
}
return Changed;
}
/// Print initial ICV values for testing.
/// FIXME: This should be done from the Attributor once it is added.
void printICVs() const {
InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
ICV_proc_bind};
for (Function *F : OMPInfoCache.ModuleSlice) {
for (auto ICV : ICVs) {
auto ICVInfo = OMPInfoCache.ICVs[ICV];
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
<< " Value: "
<< (ICVInfo.InitValue
? toString(ICVInfo.InitValue->getValue(), 10, true)
: "IMPLEMENTATION_DEFINED");
};
emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
}
}
}
/// Print OpenMP GPU kernels for testing.
void printKernels() const {
for (Function *F : SCC) {
if (!OMPInfoCache.Kernels.count(F))
continue;
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "OpenMP GPU kernel "
<< ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
};
emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
}
}
/// Return the call if \p U is a callee use in a regular call. If \p RFI is
/// given it has to be the callee or a nullptr is returned.
static CallInst *getCallIfRegularCall(
Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
CallInst *CI = dyn_cast<CallInst>(U.getUser());
if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
(!RFI ||
(RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
return CI;
return nullptr;
}
/// Return the call if \p V is a regular call. If \p RFI is given it has to be
/// the callee or a nullptr is returned.
static CallInst *getCallIfRegularCall(
Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
CallInst *CI = dyn_cast<CallInst>(&V);
if (CI && !CI->hasOperandBundles() &&
(!RFI ||
(RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
return CI;
return nullptr;
}
private:
/// Merge parallel regions when it is safe.
bool mergeParallelRegions() {
const unsigned CallbackCalleeOperand = 2;
const unsigned CallbackFirstArgOperand = 3;
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
// Check if there are any __kmpc_fork_call calls to merge.
OMPInformationCache::RuntimeFunctionInfo &RFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
if (!RFI.Declaration)
return false;
// Unmergable calls that prevent merging a parallel region.
OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
};
bool Changed = false;
LoopInfo *LI = nullptr;
DominatorTree *DT = nullptr;
SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
BasicBlock *StartBB = nullptr, *EndBB = nullptr;
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
assert(StartBB != nullptr && "StartBB should not be null");
CGStartBB->getTerminator()->setSuccessor(0, StartBB);
assert(EndBB != nullptr && "EndBB should not be null");
EndBB->getTerminator()->setSuccessor(0, CGEndBB);
};
auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
ReplacementValue = &Inner;
return CodeGenIP;
};
auto FiniCB = [&](InsertPointTy CodeGenIP) {};
/// Create a sequential execution region within a merged parallel region,
/// encapsulated in a master construct with a barrier for synchronization.
auto CreateSequentialRegion = [&](Function *OuterFn,
BasicBlock *OuterPredBB,
Instruction *SeqStartI,
Instruction *SeqEndI) {
// Isolate the instructions of the sequential region to a separate
// block.
BasicBlock *ParentBB = SeqStartI->getParent();
BasicBlock *SeqEndBB =
SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
BasicBlock *SeqAfterBB =
SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
BasicBlock *SeqStartBB =
SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&
"Expected a different CFG");
const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
ParentBB->getTerminator()->eraseFromParent();
auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
BasicBlock &ContinuationIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
assert(SeqStartBB != nullptr && "SeqStartBB should not be null");
CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
};
auto FiniCB = [&](InsertPointTy CodeGenIP) {};
// Find outputs from the sequential region to outside users and
// broadcast their values to them.
for (Instruction &I : *SeqStartBB) {
SmallPtrSet<Instruction *, 4> OutsideUsers;
for (User *Usr : I.users()) {
Instruction &UsrI = *cast<Instruction>(Usr);
// Ignore outputs to LT intrinsics, code extraction for the merged
// parallel region will fix them.
if (UsrI.isLifetimeStartOrEnd())
continue;
if (UsrI.getParent() != SeqStartBB)
OutsideUsers.insert(&UsrI);
}
if (OutsideUsers.empty())
continue;
// Emit an alloca in the outer region to store the broadcasted
// value.
const DataLayout &DL = M.getDataLayout();
AllocaInst *AllocaI = new AllocaInst(
I.getType(), DL.getAllocaAddrSpace(), nullptr,
I.getName() + ".seq.output.alloc", &OuterFn->front().front());
// Emit a store instruction in the sequential BB to update the
// value.
new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
// Emit a load instruction and replace the use of the output value
// with it.
for (Instruction *UsrI : OutsideUsers) {
LoadInst *LoadI = new LoadInst(
I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
UsrI->replaceUsesOfWith(&I, LoadI);
}
}
OpenMPIRBuilder::LocationDescription Loc(
InsertPointTy(ParentBB, ParentBB->end()), DL);
InsertPointTy SeqAfterIP =
OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
<< "\n");
};
// Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
// contained in BB and only separated by instructions that can be
// redundantly executed in parallel. The block BB is split before the first
// call (in MergableCIs) and after the last so the entire region we merge
// into a single parallel region is contained in a single basic block
// without any other instructions. We use the OpenMPIRBuilder to outline
// that block and call the resulting function via __kmpc_fork_call.
auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs,
BasicBlock *BB) {
// TODO: Change the interface to allow single CIs expanded, e.g, to
// include an outer loop.
assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");
auto Remark = [&](OptimizationRemark OR) {
OR << "Parallel region merged with parallel region"
<< (MergableCIs.size() > 2 ? "s" : "") << " at ";
for (auto *CI : llvm::drop_begin(MergableCIs)) {
OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
if (CI != MergableCIs.back())
OR << ", ";
}
return OR << ".";
};
emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);
Function *OriginalFn = BB->getParent();
LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()
<< " parallel regions in " << OriginalFn->getName()
<< "\n");
// Isolate the calls to merge in a separate block.
EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
BasicBlock *AfterBB =
SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
"omp.par.merged");
assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG");
const DebugLoc DL = BB->getTerminator()->getDebugLoc();
BB->getTerminator()->eraseFromParent();
// Create sequential regions for sequential instructions that are
// in-between mergable parallel regions.
for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
It != End; ++It) {
Instruction *ForkCI = *It;
Instruction *NextForkCI = *(It + 1);
// Continue if there are not in-between instructions.
if (ForkCI->getNextNode() == NextForkCI)
continue;
CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
NextForkCI->getPrevNode());
}
OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
DL);
IRBuilder<>::InsertPoint AllocaIP(
&OriginalFn->getEntryBlock(),
OriginalFn->getEntryBlock().getFirstInsertionPt());
// Create the merged parallel region with default proc binding, to
// avoid overriding binding settings, and without explicit cancellation.
InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
OMP_PROC_BIND_default, /* IsCancellable */ false);
BranchInst::Create(AfterBB, AfterIP.getBlock());
// Perform the actual outlining.
OMPInfoCache.OMPBuilder.finalize(OriginalFn);
Function *OutlinedFn = MergableCIs.front()->getCaller();
// Replace the __kmpc_fork_call calls with direct calls to the outlined
// callbacks.
SmallVector<Value *, 8> Args;
for (auto *CI : MergableCIs) {
Value *Callee =
CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
FunctionType *FT =
cast<FunctionType>(Callee->getType()->getPointerElementType());
Args.clear();
Args.push_back(OutlinedFn->getArg(0));
Args.push_back(OutlinedFn->getArg(1));
for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
++U)
Args.push_back(CI->getArgOperand(U));
CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
if (CI->getDebugLoc())
NewCI->setDebugLoc(CI->getDebugLoc());
// Forward parameter attributes from the callback to the callee.
for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
++U)
for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
NewCI->addParamAttr(
U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
// Emit an explicit barrier to replace the implicit fork-join barrier.
if (CI != MergableCIs.back()) {
// TODO: Remove barrier if the merged parallel region includes the
// 'nowait' clause.
OMPInfoCache.OMPBuilder.createBarrier(
InsertPointTy(NewCI->getParent(),
NewCI->getNextNode()->getIterator()),
OMPD_parallel);
}
CI->eraseFromParent();
}
assert(OutlinedFn != OriginalFn && "Outlining failed");
CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
CGUpdater.reanalyzeFunction(*OriginalFn);
NumOpenMPParallelRegionsMerged += MergableCIs.size();
return true;
};
// Helper function that identifes sequences of
// __kmpc_fork_call uses in a basic block.
auto DetectPRsCB = [&](Use &U, Function &F) {
CallInst *CI = getCallIfRegularCall(U, &RFI);
BB2PRMap[CI->getParent()].insert(CI);
return false;
};
BB2PRMap.clear();
RFI.foreachUse(SCC, DetectPRsCB);
SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
// Find mergable parallel regions within a basic block that are
// safe to merge, that is any in-between instructions can safely
// execute in parallel after merging.
// TODO: support merging across basic-blocks.
for (auto &It : BB2PRMap) {
auto &CIs = It.getSecond();
if (CIs.size() < 2)
continue;
BasicBlock *BB = It.getFirst();
SmallVector<CallInst *, 4> MergableCIs;
/// Returns true if the instruction is mergable, false otherwise.
/// A terminator instruction is unmergable by definition since merging
/// works within a BB. Instructions before the mergable region are
/// mergable if they are not calls to OpenMP runtime functions that may
/// set different execution parameters for subsequent parallel regions.
/// Instructions in-between parallel regions are mergable if they are not
/// calls to any non-intrinsic function since that may call a non-mergable
/// OpenMP runtime function.
auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
// We do not merge across BBs, hence return false (unmergable) if the
// instruction is a terminator.
if (I.isTerminator())
return false;
if (!isa<CallInst>(&I))
return true;
CallInst *CI = cast<CallInst>(&I);
if (IsBeforeMergableRegion) {
Function *CalledFunction = CI->getCalledFunction();
if (!CalledFunction)
return false;
// Return false (unmergable) if the call before the parallel
// region calls an explicit affinity (proc_bind) or number of
// threads (num_threads) compiler-generated function. Those settings
// may be incompatible with following parallel regions.
// TODO: ICV tracking to detect compatibility.
for (const auto &RFI : UnmergableCallsInfo) {
if (CalledFunction == RFI.Declaration)
return false;
}
} else {
// Return false (unmergable) if there is a call instruction
// in-between parallel regions when it is not an intrinsic. It
// may call an unmergable OpenMP runtime function in its callpath.
// TODO: Keep track of possible OpenMP calls in the callpath.
if (!isa<IntrinsicInst>(CI))
return false;
}
return true;
};
// Find maximal number of parallel region CIs that are safe to merge.
for (auto It = BB->begin(), End = BB->end(); It != End;) {
Instruction &I = *It;
++It;
if (CIs.count(&I)) {
MergableCIs.push_back(cast<CallInst>(&I));
continue;
}
// Continue expanding if the instruction is mergable.
if (IsMergable(I, MergableCIs.empty()))
continue;
// Forward the instruction iterator to skip the next parallel region
// since there is an unmergable instruction which can affect it.
for (; It != End; ++It) {
Instruction &SkipI = *It;
if (CIs.count(&SkipI)) {
LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI
<< " due to " << I << "\n");
++It;
break;
}
}
// Store mergable regions found.
if (MergableCIs.size() > 1) {
MergableCIsVector.push_back(MergableCIs);
LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()
<< " parallel regions in block " << BB->getName()
<< " of function " << BB->getParent()->getName()
<< "\n";);
}
MergableCIs.clear();
}
if (!MergableCIsVector.empty()) {
Changed = true;
for (auto &MergableCIs : MergableCIsVector)
Merge(MergableCIs, BB);
MergableCIsVector.clear();
}
}
if (Changed) {
/// Re-collect use for fork calls, emitted barrier calls, and
/// any emitted master/end_master calls.
OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
}
return Changed;
}
/// Try to delete parallel regions if possible.
bool deleteParallelRegions() {
const unsigned CallbackCalleeOperand = 2;
OMPInformationCache::RuntimeFunctionInfo &RFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
if (!RFI.Declaration)
return false;
bool Changed = false;
auto DeleteCallCB = [&](Use &U, Function &) {
CallInst *CI = getCallIfRegularCall(U);
if (!CI)
return false;
auto *Fn = dyn_cast<Function>(
CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
if (!Fn)
return false;
if (!Fn->onlyReadsMemory())
return false;
if (!Fn->hasFnAttribute(Attribute::WillReturn))
return false;
LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "
<< CI->getCaller()->getName() << "\n");
auto Remark = [&](OptimizationRemark OR) {
return OR << "Removing parallel region with no side-effects.";
};
emitRemark<OptimizationRemark>(CI, "OMP160", Remark);
CGUpdater.removeCallSite(*CI);
CI->eraseFromParent();
Changed = true;
++NumOpenMPParallelRegionsDeleted;
return true;
};
RFI.foreachUse(SCC, DeleteCallCB);
return Changed;
}
/// Try to eliminate runtime calls by reusing existing ones.
bool deduplicateRuntimeCalls() {
bool Changed = false;
RuntimeFunction DeduplicableRuntimeCallIDs[] = {
OMPRTL_omp_get_num_threads,
OMPRTL_omp_in_parallel,
OMPRTL_omp_get_cancellation,
OMPRTL_omp_get_thread_limit,
OMPRTL_omp_get_supported_active_levels,
OMPRTL_omp_get_level,
OMPRTL_omp_get_ancestor_thread_num,
OMPRTL_omp_get_team_size,
OMPRTL_omp_get_active_level,
OMPRTL_omp_in_final,
OMPRTL_omp_get_proc_bind,
OMPRTL_omp_get_num_places,
OMPRTL_omp_get_num_procs,
OMPRTL_omp_get_place_num,
OMPRTL_omp_get_partition_num_places,
OMPRTL_omp_get_partition_place_nums};
// Global-tid is handled separately.
SmallSetVector<Value *, 16> GTIdArgs;
collectGlobalThreadIdArguments(GTIdArgs);
LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
<< " global thread ID arguments\n");
for (Function *F : SCC) {
for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
Changed |= deduplicateRuntimeCalls(
*F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
// __kmpc_global_thread_num is special as we can replace it with an
// argument in enough cases to make it worth trying.
Value *GTIdArg = nullptr;
for (Argument &Arg : F->args())
if (GTIdArgs.count(&Arg)) {
GTIdArg = &Arg;
break;
}
Changed |= deduplicateRuntimeCalls(
*F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
}
return Changed;
}
/// Tries to hide the latency of runtime calls that involve host to
/// device memory transfers by splitting them into their "issue" and "wait"
/// versions. The "issue" is moved upwards as much as possible. The "wait" is
/// moved downards as much as possible. The "issue" issues the memory transfer
/// asynchronously, returning a handle. The "wait" waits in the returned
/// handle for the memory transfer to finish.
bool hideMemTransfersLatency() {
auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
bool Changed = false;
auto SplitMemTransfers = [&](Use &U, Function &Decl) {
auto *RTCall = getCallIfRegularCall(U, &RFI);
if (!RTCall)
return false;
OffloadArray OffloadArrays[3];
if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
return false;
LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays));
// TODO: Check if can be moved upwards.
bool WasSplit = false;
Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
if (WaitMovementPoint)
WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
Changed |= WasSplit;
return WasSplit;
};
RFI.foreachUse(SCC, SplitMemTransfers);
return Changed;
}
/// Eliminates redundant, aligned barriers in OpenMP offloaded kernels.
/// TODO: Make this an AA and expand it to work across blocks and functions.
bool eliminateBarriers() {
bool Changed = false;
if (DisableOpenMPOptBarrierElimination)
return /*Changed=*/false;
if (OMPInfoCache.Kernels.empty())
return /*Changed=*/false;
enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT };
class BarrierInfo {
Instruction *I;
enum ImplicitBarrierType Type;
public:
BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {}
BarrierInfo(Instruction &I) : I(&I) {}
bool isImplicit() { return !I; }
bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; }
bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; }
Instruction *getInstruction() { return I; }
};
for (Function *Kernel : OMPInfoCache.Kernels) {
for (BasicBlock &BB : *Kernel) {
SmallVector<BarrierInfo, 8> BarriersInBlock;
SmallPtrSet<Instruction *, 8> BarriersToBeDeleted;
// Add the kernel entry implicit barrier.
if (&Kernel->getEntryBlock() == &BB)
BarriersInBlock.push_back(IBT_ENTRY);
// Find implicit and explicit aligned barriers in the same basic block.
for (Instruction &I : BB) {
if (isa<ReturnInst>(I)) {
// Add the implicit barrier when exiting the kernel.
BarriersInBlock.push_back(IBT_EXIT);
continue;
}
CallBase *CB = dyn_cast<CallBase>(&I);
if (!CB)
continue;
auto IsAlignBarrierCB = [&](CallBase &CB) {
switch (CB.getIntrinsicID()) {
case Intrinsic::nvvm_barrier0:
case Intrinsic::nvvm_barrier0_and:
case Intrinsic::nvvm_barrier0_or:
case Intrinsic::nvvm_barrier0_popc:
return true;
default:
break;
}
return hasAssumption(CB,
KnownAssumptionString("ompx_aligned_barrier"));
};
if (IsAlignBarrierCB(*CB)) {
// Add an explicit aligned barrier.
BarriersInBlock.push_back(I);
}
}
if (BarriersInBlock.size() <= 1)
continue;
// A barrier in a barrier pair is removeable if all instructions
// between the barriers in the pair are side-effect free modulo the
// barrier operation.
auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI,
BarrierInfo *EndBI) {
assert(
!StartBI->isImplicitExit() &&
"Expected start barrier to be other than a kernel exit barrier");
assert(
!EndBI->isImplicitEntry() &&
"Expected end barrier to be other than a kernel entry barrier");
// If StarBI instructions is null then this the implicit
// kernel entry barrier, so iterate from the first instruction in the
// entry block.
Instruction *I = (StartBI->isImplicitEntry())
? &Kernel->getEntryBlock().front()
: StartBI->getInstruction()->getNextNode();
assert(I && "Expected non-null start instruction");
Instruction *E = (EndBI->isImplicitExit())
? I->getParent()->getTerminator()
: EndBI->getInstruction();
assert(E && "Expected non-null end instruction");
for (; I != E; I = I->getNextNode()) {
if (!I->mayHaveSideEffects() && !I->mayReadFromMemory())
continue;
auto IsPotentiallyAffectedByBarrier =
[](Optional<MemoryLocation> Loc) {
const Value *Obj = (Loc && Loc->Ptr)
? getUnderlyingObject(Loc->Ptr)
: nullptr;
if (!Obj) {
LLVM_DEBUG(
dbgs()
<< "Access to unknown location requires barriers\n");
return true;
}
if (isa<UndefValue>(Obj))
return false;
if (isa<AllocaInst>(Obj))
return false;
if (auto *GV = dyn_cast<GlobalVariable>(Obj)) {
if (GV->isConstant())
return false;
if (GV->isThreadLocal())
return false;
if (GV->getAddressSpace() == (int)AddressSpace::Local)
return false;
if (GV->getAddressSpace() == (int)AddressSpace::Constant)
return false;
}
LLVM_DEBUG(dbgs() << "Access to '" << *Obj
<< "' requires barriers\n");
return true;
};
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI);
if (IsPotentiallyAffectedByBarrier(Loc))
return false;
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
Optional<MemoryLocation> Loc =
MemoryLocation::getForSource(MTI);
if (IsPotentiallyAffectedByBarrier(Loc))
return false;
}
continue;
}
if (auto *LI = dyn_cast<LoadInst>(I))
if (LI->hasMetadata(LLVMContext::MD_invariant_load))
continue;
Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
if (IsPotentiallyAffectedByBarrier(Loc))
return false;
}
return true;
};
// Iterate barrier pairs and remove an explicit barrier if analysis
// deems it removeable.
for (auto *It = BarriersInBlock.begin(),
*End = BarriersInBlock.end() - 1;
It != End; ++It) {
BarrierInfo *StartBI = It;
BarrierInfo *EndBI = (It + 1);
// Cannot remove when both are implicit barriers, continue.
if (StartBI->isImplicit() && EndBI->isImplicit())
continue;
if (!IsBarrierRemoveable(StartBI, EndBI))
continue;
assert(!(StartBI->isImplicit() && EndBI->isImplicit()) &&
"Expected at least one explicit barrier to remove.");
// Remove an explicit barrier, check first, then second.
if (!StartBI->isImplicit()) {
LLVM_DEBUG(dbgs() << "Remove start barrier "
<< *StartBI->getInstruction() << "\n");
BarriersToBeDeleted.insert(StartBI->getInstruction());
} else {
LLVM_DEBUG(dbgs() << "Remove end barrier "
<< *EndBI->getInstruction() << "\n");
BarriersToBeDeleted.insert(EndBI->getInstruction());
}
}
if (BarriersToBeDeleted.empty())
continue;
Changed = true;
for (Instruction *I : BarriersToBeDeleted) {
++NumBarriersEliminated;
auto Remark = [&](OptimizationRemark OR) {
return OR << "Redundant barrier eliminated.";
};
if (EnableVerboseRemarks)
emitRemark<OptimizationRemark>(I, "OMP190", Remark);
I->eraseFromParent();
}
}
}
return Changed;
}
void analysisGlobalization() {
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
auto CheckGlobalization = [&](Use &U, Function &Decl) {
if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
auto Remark = [&](OptimizationRemarkMissed ORM) {
return ORM
<< "Found thread data sharing on the GPU. "
<< "Expect degraded performance due to data globalization.";
};
emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
}
return false;
};
RFI.foreachUse(SCC, CheckGlobalization);
}
/// Maps the values stored in the offload arrays passed as arguments to
/// \p RuntimeCall into the offload arrays in \p OAs.
bool getValuesInOffloadArrays(CallInst &RuntimeCall,
MutableArrayRef<OffloadArray> OAs) {
assert(OAs.size() == 3 && "Need space for three offload arrays!");
// A runtime call that involves memory offloading looks something like:
// call void @__tgt_target_data_begin_mapper(arg0, arg1,
// i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
// ...)
// So, the idea is to access the allocas that allocate space for these
// offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
// Therefore:
// i8** %offload_baseptrs.
Value *BasePtrsArg =
RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
// i8** %offload_ptrs.
Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
// i8** %offload_sizes.
Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
// Get values stored in **offload_baseptrs.
auto *V = getUnderlyingObject(BasePtrsArg);
if (!isa<AllocaInst>(V))
return false;
auto *BasePtrsArray = cast<AllocaInst>(V);
if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
return false;
// Get values stored in **offload_baseptrs.
V = getUnderlyingObject(PtrsArg);
if (!isa<AllocaInst>(V))
return false;
auto *PtrsArray = cast<AllocaInst>(V);
if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
return false;
// Get values stored in **offload_sizes.
V = getUnderlyingObject(SizesArg);
// If it's a [constant] global array don't analyze it.
if (isa<GlobalValue>(V))
return isa<Constant>(V);
if (!isa<AllocaInst>(V))
return false;
auto *SizesArray = cast<AllocaInst>(V);
if (!OAs[2].initialize(*SizesArray, RuntimeCall))
return false;
return true;
}
/// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
/// For now this is a way to test that the function getValuesInOffloadArrays
/// is working properly.
/// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
assert(OAs.size() == 3 && "There are three offload arrays to debug!");
LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n");
std::string ValuesStr;
raw_string_ostream Printer(ValuesStr);
std::string Separator = " --- ";
for (auto *BP : OAs[0].StoredValues) {
BP->print(Printer);
Printer << Separator;
}
LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n");
ValuesStr.clear();
for (auto *P : OAs[1].StoredValues) {
P->print(Printer);
Printer << Separator;
}
LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n");
ValuesStr.clear();
for (auto *S : OAs[2].StoredValues) {
S->print(Printer);
Printer << Separator;
}
LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n");
}
/// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
/// moved. Returns nullptr if the movement is not possible, or not worth it.
Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
// FIXME: This traverses only the BasicBlock where RuntimeCall is.
// Make it traverse the CFG.
Instruction *CurrentI = &RuntimeCall;
bool IsWorthIt = false;
while ((CurrentI = CurrentI->getNextNode())) {
// TODO: Once we detect the regions to be offloaded we should use the
// alias analysis manager to check if CurrentI may modify one of
// the offloaded regions.
if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
if (IsWorthIt)
return CurrentI;
return nullptr;
}
// FIXME: For now if we move it over anything without side effect
// is worth it.
IsWorthIt = true;
}
// Return end of BasicBlock.
return RuntimeCall.getParent()->getTerminator();
}
/// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
Instruction &WaitMovementPoint) {
// Create stack allocated handle (__tgt_async_info) at the beginning of the
// function. Used for storing information of the async transfer, allowing to
// wait on it later.
auto &IRBuilder = OMPInfoCache.OMPBuilder;
auto *F = RuntimeCall.getCaller();
Instruction *FirstInst = &(F->getEntryBlock().front());
AllocaInst *Handle = new AllocaInst(
IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
// Add "issue" runtime call declaration:
// declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
// i8**, i8**, i64*, i64*)
FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___tgt_target_data_begin_mapper_issue);
// Change RuntimeCall call site for its asynchronous version.
SmallVector<Value *, 16> Args;
for (auto &Arg : RuntimeCall.args())
Args.push_back(Arg.get());
Args.push_back(Handle);
CallInst *IssueCallsite =
CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite);
RuntimeCall.eraseFromParent();
// Add "wait" runtime call declaration:
// declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___tgt_target_data_begin_mapper_wait);
Value *WaitParams[2] = {
IssueCallsite->getArgOperand(
OffloadArray::DeviceIDArgNum), // device_id.
Handle // handle to wait on.
};
CallInst *WaitCallsite = CallInst::Create(
WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite);
return true;
}
static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
bool GlobalOnly, bool &SingleChoice) {
if (CurrentIdent == NextIdent)
return CurrentIdent;
// TODO: Figure out how to actually combine multiple debug locations. For
// now we just keep an existing one if there is a single choice.
if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
SingleChoice = !CurrentIdent;
return NextIdent;
}
return nullptr;
}
/// Return an `struct ident_t*` value that represents the ones used in the
/// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
/// return a local `struct ident_t*`. For now, if we cannot find a suitable
/// return value we create one from scratch. We also do not yet combine
/// information, e.g., the source locations, see combinedIdentStruct.
Value *
getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
Function &F, bool GlobalOnly) {
bool SingleChoice = true;
Value *Ident = nullptr;
auto CombineIdentStruct = [&](Use &U, Function &Caller) {
CallInst *CI = getCallIfRegularCall(U, &RFI);
if (!CI || &F != &Caller)
return false;
Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
/* GlobalOnly */ true, SingleChoice);
return false;
};
RFI.foreachUse(SCC, CombineIdentStruct);
if (!Ident || !SingleChoice) {
// The IRBuilder uses the insertion block to get to the module, this is
// unfortunate but we work around it for now.
if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
&F.getEntryBlock(), F.getEntryBlock().begin()));
// Create a fallback location if non was found.
// TODO: Use the debug locations of the calls instead.
uint32_t SrcLocStrSize;
Constant *Loc =
OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize);
}
return Ident;
}
/// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
/// \p ReplVal if given.
bool deduplicateRuntimeCalls(Function &F,
OMPInformationCache::RuntimeFunctionInfo &RFI,
Value *ReplVal = nullptr) {
auto *UV = RFI.getUseVector(F);
if (!UV || UV->size() + (ReplVal != nullptr) < 2)
return false;
LLVM_DEBUG(
dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name
<< (ReplVal ? " with an existing value\n" : "\n") << "\n");
assert((!ReplVal || (isa<Argument>(ReplVal) &&
cast<Argument>(ReplVal)->getParent() == &F)) &&
"Unexpected replacement value!");
// TODO: Use dominance to find a good position instead.
auto CanBeMoved = [this](CallBase &CB) {
unsigned NumArgs = CB.arg_size();
if (NumArgs == 0)
return true;
if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
return false;
for (unsigned U = 1; U < NumArgs; ++U)
if (isa<Instruction>(CB.getArgOperand(U)))
return false;
return true;
};
if (!ReplVal) {
for (Use *U : *UV)
if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
if (!CanBeMoved(*CI))
continue;
// If the function is a kernel, dedup will move
// the runtime call right after the kernel init callsite. Otherwise,
// it will move it to the beginning of the caller function.
if (isKernel(F)) {
auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
auto *KernelInitUV = KernelInitRFI.getUseVector(F);
if (KernelInitUV->empty())
continue;
assert(KernelInitUV->size() == 1 &&
"Expected a single __kmpc_target_init in kernel\n");
CallInst *KernelInitCI =
getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
assert(KernelInitCI &&
"Expected a call to __kmpc_target_init in kernel\n");
CI->moveAfter(KernelInitCI);
} else
CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
ReplVal = CI;
break;
}
if (!ReplVal)
return false;
}
// If we use a call as a replacement value we need to make sure the ident is
// valid at the new location. For now we just pick a global one, either
// existing and used by one of the calls, or created from scratch.
if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
if (!CI->arg_empty() &&
CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
/* GlobalOnly */ true);
CI->setArgOperand(0, Ident);
}
}
bool Changed = false;
auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
CallInst *CI = getCallIfRegularCall(U, &RFI);
if (!CI || CI == ReplVal || &F != &Caller)
return false;
assert(CI->getCaller() == &F && "Unexpected call!");
auto Remark = [&](OptimizationRemark OR) {
return OR << "OpenMP runtime call "
<< ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
};
if (CI->getDebugLoc())
emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
else
emitRemark<OptimizationRemark>(&F, "OMP170", Remark);
CGUpdater.removeCallSite(*CI);
CI->replaceAllUsesWith(ReplVal);
CI->eraseFromParent();
++NumOpenMPRuntimeCallsDeduplicated;
Changed = true;
return true;
};
RFI.foreachUse(SCC, ReplaceAndDeleteCB);
return Changed;
}
/// Collect arguments that represent the global thread id in \p GTIdArgs.
void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
// TODO: Below we basically perform a fixpoint iteration with a pessimistic
// initialization. We could define an AbstractAttribute instead and
// run the Attributor here once it can be run as an SCC pass.
// Helper to check the argument \p ArgNo at all call sites of \p F for
// a GTId.
auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
if (!F.hasLocalLinkage())
return false;
for (Use &U : F.uses()) {
if (CallInst *CI = getCallIfRegularCall(U)) {
Value *ArgOp = CI->getArgOperand(ArgNo);
if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
getCallIfRegularCall(
*ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
continue;
}
return false;
}
return true;
};
// Helper to identify uses of a GTId as GTId arguments.
auto AddUserArgs = [&](Value &GTId) {
for (Use &U : GTId.uses())
if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
if (CI->isArgOperand(&U))
if (Function *Callee = CI->getCalledFunction())
if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
};
// The argument users of __kmpc_global_thread_num calls are GTIds.
OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
AddUserArgs(*CI);
return false;
});
// Transitively search for more arguments by looking at the users of the
// ones we know already. During the search the GTIdArgs vector is extended
// so we cannot cache the size nor can we use a range based for.
for (unsigned U = 0; U < GTIdArgs.size(); ++U)
AddUserArgs(*GTIdArgs[U]);
}
/// Kernel (=GPU) optimizations and utility functions
///
///{{
/// Check if \p F is a kernel, hence entry point for target offloading.
bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
/// Cache to remember the unique kernel for a function.
DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
/// Find the unique kernel that will execute \p F, if any.
Kernel getUniqueKernelFor(Function &F);
/// Find the unique kernel that will execute \p I, if any.
Kernel getUniqueKernelFor(Instruction &I) {
return getUniqueKernelFor(*I.getFunction());
}
/// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
/// the cases we can avoid taking the address of a function.
bool rewriteDeviceCodeStateMachine();
///
///}}
/// Emit a remark generically
///
/// This template function can be used to generically emit a remark. The
/// RemarkKind should be one of the following:
/// - OptimizationRemark to indicate a successful optimization attempt
/// - OptimizationRemarkMissed to report a failed optimization attempt
/// - OptimizationRemarkAnalysis to provide additional information about an
/// optimization attempt
///
/// The remark is built using a callback function provided by the caller that
/// takes a RemarkKind as input and returns a RemarkKind.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Instruction *I, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
Function *F = I->getParent()->getParent();
auto &ORE = OREGetter(F);
if (RemarkName.startswith("OMP"))
ORE.emit([&]() {
return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I))
<< " [" << RemarkName << "]";
});
else
ORE.emit(
[&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); });
}
/// Emit a remark on a function.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Function *F, StringRef RemarkName,
RemarkCallBack &&RemarkCB) const {
auto &ORE = OREGetter(F);
if (RemarkName.startswith("OMP"))
ORE.emit([&]() {
return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F))
<< " [" << RemarkName << "]";
});
else
ORE.emit(
[&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); });
}
/// RAII struct to temporarily change an RTL function's linkage to external.
/// This prevents it from being mistakenly removed by other optimizations.
struct ExternalizationRAII {
ExternalizationRAII(OMPInformationCache &OMPInfoCache,
RuntimeFunction RFKind)
: Declaration(OMPInfoCache.RFIs[RFKind].Declaration) {
if (!Declaration)
return;
LinkageType = Declaration->getLinkage();
Declaration->setLinkage(GlobalValue::ExternalLinkage);
}
~ExternalizationRAII() {
if (!Declaration)
return;
Declaration->setLinkage(LinkageType);
}
Function *Declaration;
GlobalValue::LinkageTypes LinkageType;
};
/// The underlying module.
Module &M;
/// The SCC we are operating on.
SmallVectorImpl<Function *> &SCC;
/// Callback to update the call graph, the first argument is a removed call,
/// the second an optional replacement call.
CallGraphUpdater &CGUpdater;
/// Callback to get an OptimizationRemarkEmitter from a Function *
OptimizationRemarkGetter OREGetter;
/// OpenMP-specific information cache. Also Used for Attributor runs.
OMPInformationCache &OMPInfoCache;
/// Attributor instance.
Attributor &A;
/// Helper function to run Attributor on SCC.
bool runAttributor(bool IsModulePass) {
if (SCC.empty())
return false;
// Temporarily make these function have external linkage so the Attributor
// doesn't remove them when we try to look them up later.
ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel);
ExternalizationRAII EndParallel(OMPInfoCache,
OMPRTL___kmpc_kernel_end_parallel);
ExternalizationRAII BarrierSPMD(OMPInfoCache,
OMPRTL___kmpc_barrier_simple_spmd);
ExternalizationRAII BarrierGeneric(OMPInfoCache,
OMPRTL___kmpc_barrier_simple_generic);
ExternalizationRAII ThreadId(OMPInfoCache,
OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ ExternalizationRAII NumThreads(
+ OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block);
ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
registerAAs(IsModulePass);
ChangeStatus Changed = A.run();
LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()
<< " functions, result: " << Changed << ".\n");
return Changed == ChangeStatus::CHANGED;
}
void registerFoldRuntimeCall(RuntimeFunction RF);
/// Populate the Attributor with abstract attribute opportunities in the
/// function.
void registerAAs(bool IsModulePass);
};
Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
if (!OMPInfoCache.ModuleSlice.count(&F))
return nullptr;
// Use a scope to keep the lifetime of the CachedKernel short.
{
Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
if (CachedKernel)
return *CachedKernel;
// TODO: We should use an AA to create an (optimistic and callback
// call-aware) call graph. For now we stick to simple patterns that
// are less powerful, basically the worst fixpoint.
if (isKernel(F)) {
CachedKernel = Kernel(&F);
return *CachedKernel;
}
CachedKernel = nullptr;
if (!F.hasLocalLinkage()) {
// See https://openmp.llvm.org/remarks/OptimizationRemarks.html
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Potentially unknown OpenMP target region caller.";
};
emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
return nullptr;
}
}
auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
// Allow use in equality comparisons.
if (Cmp->isEquality())
return getUniqueKernelFor(*Cmp);
return nullptr;
}
if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
// Allow direct calls.
if (CB->isCallee(&U))
return getUniqueKernelFor(*CB);
OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
// Allow the use in __kmpc_parallel_51 calls.
if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
return getUniqueKernelFor(*CB);
return nullptr;
}
// Disallow every other use.
return nullptr;
};
// TODO: In the future we want to track more than just a unique kernel.
SmallPtrSet<Kernel, 2> PotentialKernels;
OMPInformationCache::foreachUse(F, [&](const Use &U) {
PotentialKernels.insert(GetUniqueKernelForUse(U));
});
Kernel K = nullptr;
if (PotentialKernels.size() == 1)
K = *PotentialKernels.begin();
// Cache the result.
UniqueKernelMap[&F] = K;
return K;
}
bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
bool Changed = false;
if (!KernelParallelRFI)
return Changed;
// If we have disabled state machine changes, exit
if (DisableOpenMPOptStateMachineRewrite)
return Changed;
for (Function *F : SCC) {
// Check if the function is a use in a __kmpc_parallel_51 call at
// all.
bool UnknownUse = false;
bool KernelParallelUse = false;
unsigned NumDirectCalls = 0;
SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
OMPInformationCache::foreachUse(*F, [&](Use &U) {
if (auto *CB = dyn_cast<CallBase>(U.getUser()))
if (CB->isCallee(&U)) {
++NumDirectCalls;
return;
}
if (isa<ICmpInst>(U.getUser())) {
ToBeReplacedStateMachineUses.push_back(&U);
return;
}
// Find wrapper functions that represent parallel kernels.
CallInst *CI =
OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
const unsigned int WrapperFunctionArgNo = 6;
if (!KernelParallelUse && CI &&
CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
KernelParallelUse = true;
ToBeReplacedStateMachineUses.push_back(&U);
return;
}
UnknownUse = true;
});
// Do not emit a remark if we haven't seen a __kmpc_parallel_51
// use.
if (!KernelParallelUse)
continue;
// If this ever hits, we should investigate.
// TODO: Checking the number of uses is not a necessary restriction and
// should be lifted.
if (UnknownUse || NumDirectCalls != 1 ||
ToBeReplacedStateMachineUses.size() > 2) {
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Parallel region is used in "
<< (UnknownUse ? "unknown" : "unexpected")
<< " ways. Will not attempt to rewrite the state machine.";
};
emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
continue;
}
// Even if we have __kmpc_parallel_51 calls, we (for now) give
// up if the function is not called from a unique kernel.
Kernel K = getUniqueKernelFor(*F);
if (!K) {
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Parallel region is not called from a unique kernel. "
"Will not attempt to rewrite the state machine.";
};
emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
continue;
}
// We now know F is a parallel body function called only from the kernel K.
// We also identified the state machine uses in which we replace the
// function pointer by a new global symbol for identification purposes. This
// ensures only direct calls to the function are left.
Module &M = *F->getParent();
Type *Int8Ty = Type::getInt8Ty(M.getContext());
auto *ID = new GlobalVariable(
M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
UndefValue::get(Int8Ty), F->getName() + ".ID");
for (Use *U : ToBeReplacedStateMachineUses)
U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
ID, U->get()->getType()));
++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
Changed = true;
}
return Changed;
}
/// Abstract Attribute for tracking ICV values.
struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
void initialize(Attributor &A) override {
Function *F = getAnchorScope();
if (!F || !A.isFunctionIPOAmendable(*F))
indicatePessimisticFixpoint();
}
/// Returns true if value is assumed to be tracked.
bool isAssumedTracked() const { return getAssumed(); }
/// Returns true if value is known to be tracked.
bool isKnownTracked() const { return getAssumed(); }
/// Create an abstract attribute biew for the position \p IRP.
static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
/// Return the value with which \p I can be replaced for specific \p ICV.
virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
const Instruction *I,
Attributor &A) const {
return None;
}
/// Return an assumed unique ICV value if a single candidate is found. If
/// there cannot be one, return a nullptr. If it is not clear yet, return the
/// Optional::NoneType.
virtual Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const = 0;
// Currently only nthreads is being tracked.
// this array will only grow with time.
InternalControlVar TrackableICVs[1] = {ICV_nthreads};
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAICVTracker"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AAICVTracker
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
static const char ID;
};
struct AAICVTrackerFunction : public AAICVTracker {
AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
const std::string getAsStr() const override { return "ICVTrackerFunction"; }
// FIXME: come up with some stats.
void trackStatistics() const override {}
/// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
return ChangeStatus::UNCHANGED;
}
// Map of ICV to their values at specific program point.
EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
InternalControlVar::ICV___last>
ICVReplacementValuesMap;
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
Function *F = getAnchorScope();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
for (InternalControlVar ICV : TrackableICVs) {
auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
auto &ValuesMap = ICVReplacementValuesMap[ICV];
auto TrackValues = [&](Use &U, Function &) {
CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
if (!CI)
return false;
// FIXME: handle setters with more that 1 arguments.
/// Track new value.
if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
HasChanged = ChangeStatus::CHANGED;
return false;
};
auto CallCheck = [&](Instruction &I) {
Optional<Value *> ReplVal = getValueForCall(A, I, ICV);
if (ReplVal.hasValue() &&
ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
HasChanged = ChangeStatus::CHANGED;
return true;
};
// Track all changes of an ICV.
SetterRFI.foreachUse(TrackValues, F);
bool UsedAssumedInformation = false;
A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
UsedAssumedInformation,
/* CheckBBLivenessOnly */ true);
/// TODO: Figure out a way to avoid adding entry in
/// ICVReplacementValuesMap
Instruction *Entry = &F->getEntryBlock().front();
if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
ValuesMap.insert(std::make_pair(Entry, nullptr));
}
return HasChanged;
}
/// Helper to check if \p I is a call and get the value for it if it is
/// unique.
Optional<Value *> getValueForCall(Attributor &A, const Instruction &I,
InternalControlVar &ICV) const {
const auto *CB = dyn_cast<CallBase>(&I);
if (!CB || CB->hasFnAttr("no_openmp") ||
CB->hasFnAttr("no_openmp_routines"))
return None;
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
Function *CalledFunction = CB->getCalledFunction();
// Indirect call, assume ICV changes.
if (CalledFunction == nullptr)
return nullptr;
if (CalledFunction == GetterRFI.Declaration)
return None;
if (CalledFunction == SetterRFI.Declaration) {
if (ICVReplacementValuesMap[ICV].count(&I))
return ICVReplacementValuesMap[ICV].lookup(&I);
return nullptr;
}
// Since we don't know, assume it changes the ICV.
if (CalledFunction->isDeclaration())
return nullptr;
const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
if (ICVTrackingAA.isAssumedTracked()) {
Optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV);
if (!URV || (*URV && AA::isValidAtPosition(**URV, I, OMPInfoCache)))
return URV;
}
// If we don't know, assume it changes.
return nullptr;
}
// We don't check unique value for a function, so return None.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
return None;
}
/// Return the value with which \p I can be replaced for specific \p ICV.
Optional<Value *> getReplacementValue(InternalControlVar ICV,
const Instruction *I,
Attributor &A) const override {
const auto &ValuesMap = ICVReplacementValuesMap[ICV];
if (ValuesMap.count(I))
return ValuesMap.lookup(I);
SmallVector<const Instruction *, 16> Worklist;
SmallPtrSet<const Instruction *, 16> Visited;
Worklist.push_back(I);
Optional<Value *> ReplVal;
while (!Worklist.empty()) {
const Instruction *CurrInst = Worklist.pop_back_val();
if (!Visited.insert(CurrInst).second)
continue;
const BasicBlock *CurrBB = CurrInst->getParent();
// Go up and look for all potential setters/calls that might change the
// ICV.
while ((CurrInst = CurrInst->getPrevNode())) {
if (ValuesMap.count(CurrInst)) {
Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
// Unknown value, track new.
if (!ReplVal.hasValue()) {
ReplVal = NewReplVal;
break;
}
// If we found a new value, we can't know the icv value anymore.
if (NewReplVal.hasValue())
if (ReplVal != NewReplVal)
return nullptr;
break;
}
Optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
if (!NewReplVal.hasValue())
continue;
// Unknown value, track new.
if (!ReplVal.hasValue()) {
ReplVal = NewReplVal;
break;
}
// if (NewReplVal.hasValue())
// We found a new value, we can't know the icv value anymore.
if (ReplVal != NewReplVal)
return nullptr;
}
// If we are in the same BB and we have a value, we are done.
if (CurrBB == I->getParent() && ReplVal.hasValue())
return ReplVal;
// Go through all predecessors and add terminators for analysis.
for (const BasicBlock *Pred : predecessors(CurrBB))
if (const Instruction *Terminator = Pred->getTerminator())
Worklist.push_back(Terminator);
}
return ReplVal;
}
};
struct AAICVTrackerFunctionReturned : AAICVTracker {
AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
const std::string getAsStr() const override {
return "ICVTrackerFunctionReturned";
}
// FIXME: come up with some stats.
void trackStatistics() const override {}
/// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
return ChangeStatus::UNCHANGED;
}
// Map of ICV to their values at specific program point.
EnumeratedArray<Optional<Value *>, InternalControlVar,
InternalControlVar::ICV___last>
ICVReplacementValuesMap;
/// Return the value with which \p I can be replaced for specific \p ICV.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
return ICVReplacementValuesMap[ICV];
}
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
if (!ICVTrackingAA.isAssumedTracked())
return indicatePessimisticFixpoint();
for (InternalControlVar ICV : TrackableICVs) {
Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
Optional<Value *> UniqueICVValue;
auto CheckReturnInst = [&](Instruction &I) {
Optional<Value *> NewReplVal =
ICVTrackingAA.getReplacementValue(ICV, &I, A);
// If we found a second ICV value there is no unique returned value.
if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
return false;
UniqueICVValue = NewReplVal;
return true;
};
bool UsedAssumedInformation = false;
if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
UsedAssumedInformation,
/* CheckBBLivenessOnly */ true))
UniqueICVValue = nullptr;
if (UniqueICVValue == ReplVal)
continue;
ReplVal = UniqueICVValue;
Changed = ChangeStatus::CHANGED;
}
return Changed;
}
};
struct AAICVTrackerCallSite : AAICVTracker {
AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
: AAICVTracker(IRP, A) {}
void initialize(Attributor &A) override {
Function *F = getAnchorScope();
if (!F || !A.isFunctionIPOAmendable(*F))
indicatePessimisticFixpoint();
// We only initialize this AA for getters, so we need to know which ICV it
// gets.
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
for (InternalControlVar ICV : TrackableICVs) {
auto ICVInfo = OMPInfoCache.ICVs[ICV];
auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
if (Getter.Declaration == getAssociatedFunction()) {
AssociatedICV = ICVInfo.Kind;
return;
}
}
/// Unknown ICV.
indicatePessimisticFixpoint();
}
ChangeStatus manifest(Attributor &A) override {
if (!ReplVal.hasValue() || !ReplVal.getValue())
return ChangeStatus::UNCHANGED;
A.changeValueAfterManifest(*getCtxI(), **ReplVal);
A.deleteAfterManifest(*getCtxI());
return ChangeStatus::CHANGED;
}
// FIXME: come up with better string.
const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
// FIXME: come up with some stats.
void trackStatistics() const override {}
InternalControlVar AssociatedICV;
Optional<Value *> ReplVal;
ChangeStatus updateImpl(Attributor &A) override {
const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
// We don't have any information, so we assume it changes the ICV.
if (!ICVTrackingAA.isAssumedTracked())
return indicatePessimisticFixpoint();
Optional<Value *> NewReplVal =
ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
if (ReplVal == NewReplVal)
return ChangeStatus::UNCHANGED;
ReplVal = NewReplVal;
return ChangeStatus::CHANGED;
}
// Return the value with which associated value can be replaced for specific
// \p ICV.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
return ReplVal;
}
};
struct AAICVTrackerCallSiteReturned : AAICVTracker {
AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
const std::string getAsStr() const override {
return "ICVTrackerCallSiteReturned";
}
// FIXME: come up with some stats.
void trackStatistics() const override {}
/// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
return ChangeStatus::UNCHANGED;
}
// Map of ICV to their values at specific program point.
EnumeratedArray<Optional<Value *>, InternalControlVar,
InternalControlVar::ICV___last>
ICVReplacementValuesMap;
/// Return the value with which associated value can be replaced for specific
/// \p ICV.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
return ICVReplacementValuesMap[ICV];
}
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::returned(*getAssociatedFunction()),
DepClassTy::REQUIRED);
// We don't have any information, so we assume it changes the ICV.
if (!ICVTrackingAA.isAssumedTracked())
return indicatePessimisticFixpoint();
for (InternalControlVar ICV : TrackableICVs) {
Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
Optional<Value *> NewReplVal =
ICVTrackingAA.getUniqueReplacementValue(ICV);
if (ReplVal == NewReplVal)
continue;
ReplVal = NewReplVal;
Changed = ChangeStatus::CHANGED;
}
return Changed;
}
};
struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
: AAExecutionDomain(IRP, A) {}
const std::string getAsStr() const override {
return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
"/" + std::to_string(NumBBs) + " BBs thread 0 only.";
}
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}
void initialize(Attributor &A) override {
Function *F = getAnchorScope();
for (const auto &BB : *F)
SingleThreadedBBs.insert(&BB);
NumBBs = SingleThreadedBBs.size();
}
ChangeStatus manifest(Attributor &A) override {
LLVM_DEBUG({
for (const BasicBlock *BB : SingleThreadedBBs)
dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "
<< BB->getName() << " is executed by a single thread.\n";
});
return ChangeStatus::UNCHANGED;
}
ChangeStatus updateImpl(Attributor &A) override;
/// Check if an instruction is executed by a single thread.
bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
return isExecutedByInitialThreadOnly(*I.getParent());
}
bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
return isValidState() && SingleThreadedBBs.contains(&BB);
}
/// Set of basic blocks that are executed by a single thread.
SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs;
/// Total number of basic blocks in this function.
long unsigned NumBBs;
};
ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
Function *F = getAnchorScope();
ReversePostOrderTraversal<Function *> RPOT(F);
auto NumSingleThreadedBBs = SingleThreadedBBs.size();
bool AllCallSitesKnown;
auto PredForCallSite = [&](AbstractCallSite ACS) {
const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*ACS.getInstruction()->getFunction()),
DepClassTy::REQUIRED);
return ACS.isDirectCall() &&
ExecutionDomainAA.isExecutedByInitialThreadOnly(
*ACS.getInstruction());
};
if (!A.checkForAllCallSites(PredForCallSite, *this,
/* RequiresAllCallSites */ true,
AllCallSitesKnown))
SingleThreadedBBs.remove(&F->getEntryBlock());
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
// Check if the edge into the successor block contains a condition that only
// lets the main thread execute it.
auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
if (!Edge || !Edge->isConditional())
return false;
if (Edge->getSuccessor(0) != SuccessorBB)
return false;
auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
return false;
ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
if (!C)
return false;
// Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
if (C->isAllOnesValue()) {
auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
const int InitModeArgNo = 1;
auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
}
if (C->isZero()) {
// Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
return true;
// Match: 0 == llvm.amdgcn.workitem.id.x()
if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
return true;
}
return false;
};
// Merge all the predecessor states into the current basic block. A basic
// block is executed by a single thread if all of its predecessors are.
auto MergePredecessorStates = [&](BasicBlock *BB) {
if (pred_empty(BB))
return SingleThreadedBBs.contains(BB);
bool IsInitialThread = true;
for (BasicBlock *PredBB : predecessors(BB)) {
if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
BB))
IsInitialThread &= SingleThreadedBBs.contains(PredBB);
}
return IsInitialThread;
};
for (auto *BB : RPOT) {
if (!MergePredecessorStates(BB))
SingleThreadedBBs.remove(BB);
}
return (NumSingleThreadedBBs == SingleThreadedBBs.size())
? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// Try to replace memory allocation calls called by a single thread with a
/// static buffer of shared memory.
struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
static AAHeapToShared &createForPosition(const IRPosition &IRP,
Attributor &A);
/// Returns true if HeapToShared conversion is assumed to be possible.
virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;
/// Returns true if HeapToShared conversion is assumed and the CB is a
/// callsite to a free operation to be removed.
virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;
/// See AbstractAttribute::getName().
const std::string getName() const override { return "AAHeapToShared"; }
/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAHeapToShared.
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
/// Unique ID (due to the unique address)
static const char ID;
};
struct AAHeapToSharedFunction : public AAHeapToShared {
AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
: AAHeapToShared(IRP, A) {}
const std::string getAsStr() const override {
return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
" malloc calls eligible.";
}
/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}
/// This functions finds free calls that will be removed by the
/// HeapToShared transformation.
void findPotentialRemovedFreeCalls(Attributor &A) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
PotentialRemovedFreeCalls.clear();
// Update free call users of found malloc calls.
for (CallBase *CB : MallocCalls) {
SmallVector<CallBase *, 4> FreeCalls;
for (auto *U : CB->users()) {
CallBase *C = dyn_cast<CallBase>(U);
if (C && C->getCalledFunction() == FreeRFI.Declaration)
FreeCalls.push_back(C);
}
if (FreeCalls.size() != 1)
continue;
PotentialRemovedFreeCalls.insert(FreeCalls.front());
}
}
void initialize(Attributor &A) override {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
for (User *U : RFI.Declaration->users())
if (CallBase *CB = dyn_cast<CallBase>(U))
MallocCalls.insert(CB);
findPotentialRemovedFreeCalls(A);
}
bool isAssumedHeapToShared(CallBase &CB) const override {
return isValidState() && MallocCalls.count(&CB);
}
bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
return isValidState() && PotentialRemovedFreeCalls.count(&CB);
}
ChangeStatus manifest(Attributor &A) override {
if (MallocCalls.empty())
return ChangeStatus::UNCHANGED;
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
Function *F = getAnchorScope();
auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
DepClassTy::OPTIONAL);
ChangeStatus Changed = ChangeStatus::UNCHANGED;
for (CallBase *CB : MallocCalls) {
// Skip replacing this if HeapToStack has already claimed it.
if (HS && HS->isAssumedHeapToStack(*CB))
continue;
// Find the unique free call to remove it.
SmallVector<CallBase *, 4> FreeCalls;
for (auto *U : CB->users()) {
CallBase *C = dyn_cast<CallBase>(U);
if (C && C->getCalledFunction() == FreeCall.Declaration)
FreeCalls.push_back(C);
}
if (FreeCalls.size() != 1)
continue;
auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
<< " with " << AllocSize->getZExtValue()
<< " bytes of shared memory\n");
// Create a new shared memory buffer of the same size as the allocation
// and replace all the uses of the original allocation with it.
Module *M = CB->getModule();
Type *Int8Ty = Type::getInt8Ty(M->getContext());
Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
auto *SharedMem = new GlobalVariable(
*M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
UndefValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
GlobalValue::NotThreadLocal,
static_cast<unsigned>(AddressSpace::Shared));
auto *NewBuffer =
ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
auto Remark = [&](OptimizationRemark OR) {
return OR << "Replaced globalized variable with "
<< ore::NV("SharedMemory", AllocSize->getZExtValue())
<< ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
<< "of shared memory.";
};
A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
MaybeAlign Alignment = CB->getRetAlign();
assert(Alignment &&
"HeapToShared on allocation without alignment attribute");
SharedMem->setAlignment(MaybeAlign(Alignment));
A.changeValueAfterManifest(*CB, *NewBuffer);
A.deleteAfterManifest(*CB);
A.deleteAfterManifest(*FreeCalls.front());
NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
Changed = ChangeStatus::CHANGED;
}
return Changed;
}
ChangeStatus updateImpl(Attributor &A) override {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
Function *F = getAnchorScope();
auto NumMallocCalls = MallocCalls.size();
// Only consider malloc calls executed by a single thread with a constant.
for (User *U : RFI.Declaration->users()) {
const auto &ED = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
if (CallBase *CB = dyn_cast<CallBase>(U))
if (!isa<ConstantInt>(CB->getArgOperand(0)) ||
!ED.isExecutedByInitialThreadOnly(*CB))
MallocCalls.remove(CB);
}
findPotentialRemovedFreeCalls(A);
if (NumMallocCalls != MallocCalls.size())
return ChangeStatus::CHANGED;
return ChangeStatus::UNCHANGED;
}
/// Collection of all malloc calls in a function.
SmallSetVector<CallBase *, 4> MallocCalls;
/// Collection of potentially removed free calls in a function.
SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
};
struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Statistics are tracked as part of manifest for now.
void trackStatistics() const override {}
/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
if (!isValidState())
return "<invalid>";
return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
: "generic") +
std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
: "") +
std::string(" #PRs: ") +
(ReachedKnownParallelRegions.isValidState()
? std::to_string(ReachedKnownParallelRegions.size())
: "<invalid>") +
", #Unknown PRs: " +
(ReachedUnknownParallelRegions.isValidState()
? std::to_string(ReachedUnknownParallelRegions.size())
: "<invalid>") +
", #Reaching Kernels: " +
(ReachingKernelEntries.isValidState()
? std::to_string(ReachingKernelEntries.size())
: "<invalid>");
}
/// Create an abstract attribute biew for the position \p IRP.
static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAKernelInfo"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is AAKernelInfo
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
static const char ID;
};
/// The function kernel info abstract attribute, basically, what can we say
/// about a function with regards to the KernelInfoState.
struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
: AAKernelInfo(IRP, A) {}
SmallPtrSet<Instruction *, 4> GuardedInstructions;
SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
return GuardedInstructions;
}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// This is a high-level transform that might change the constant arguments
// of the init and dinit calls. We need to tell the Attributor about this
// to avoid other parts using the current constant value for simpliication.
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
Function *Fn = getAnchorScope();
if (!OMPInfoCache.Kernels.count(Fn))
return;
// Add itself to the reaching kernel and set IsKernelEntry.
ReachingKernelEntries.insert(Fn);
IsKernelEntry = true;
OMPInformationCache::RuntimeFunctionInfo &InitRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
// For kernels we perform more initialization work, first we find the init
// and deinit calls.
auto StoreCallBase = [](Use &U,
OMPInformationCache::RuntimeFunctionInfo &RFI,
CallBase *&Storage) {
CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
assert(CB &&
"Unexpected use of __kmpc_target_init or __kmpc_target_deinit!");
assert(!Storage &&
"Multiple uses of __kmpc_target_init or __kmpc_target_deinit!");
Storage = CB;
return false;
};
InitRFI.foreachUse(
[&](Use &U, Function &) {
StoreCallBase(U, InitRFI, KernelInitCB);
return false;
},
Fn);
DeinitRFI.foreachUse(
[&](Use &U, Function &) {
StoreCallBase(U, DeinitRFI, KernelDeinitCB);
return false;
},
Fn);
// Ignore kernels without initializers such as global constructors.
if (!KernelInitCB || !KernelDeinitCB) {
indicateOptimisticFixpoint();
return;
}
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
// knows the constant arguments to __kmpc_target_init and
// __kmpc_target_deinit might actually change.
Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
// IRP represents the "use generic state machine" argument of an
// __kmpc_target_init call. We will answer this one with the internal
// state. As long as we are not in an invalid state, we will create a
// custom state machine so the value should be a `i1 false`. If we are
// in an invalid state, we won't change the value that is in the IR.
if (!ReachedKnownParallelRegions.isValidState())
return nullptr;
// If we have disabled state machine rewrites, don't make a custom one.
if (DisableOpenMPOptStateMachineRewrite)
return nullptr;
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
UsedAssumedInformation = !isAtFixpoint();
auto *FalseVal =
ConstantInt::getBool(IRP.getAnchorValue().getContext(), false);
return FalseVal;
};
Attributor::SimplifictionCallbackTy ModeSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
// IRP represents the "SPMDCompatibilityTracker" argument of an
// __kmpc_target_init or
// __kmpc_target_deinit call. We will answer this one with the internal
// state.
if (!SPMDCompatibilityTracker.isValidState())
return nullptr;
if (!SPMDCompatibilityTracker.isAtFixpoint()) {
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
UsedAssumedInformation = true;
} else {
UsedAssumedInformation = false;
}
auto *Val = ConstantInt::getSigned(
IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
: OMP_TGT_EXEC_MODE_GENERIC);
return Val;
};
Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
// IRP represents the "RequiresFullRuntime" argument of an
// __kmpc_target_init or __kmpc_target_deinit call. We will answer this
// one with the internal state of the SPMDCompatibilityTracker, so if
// generic then true, if SPMD then false.
if (!SPMDCompatibilityTracker.isValidState())
return nullptr;
if (!SPMDCompatibilityTracker.isAtFixpoint()) {
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
UsedAssumedInformation = true;
} else {
UsedAssumedInformation = false;
}
auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
!SPMDCompatibilityTracker.isAssumed());
return Val;
};
constexpr const int InitModeArgNo = 1;
constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
constexpr const int InitRequiresFullRuntimeArgNo = 3;
constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB,
InitRequiresFullRuntimeArgNo),
IsGenericModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelDeinitCB,
DeinitRequiresFullRuntimeArgNo),
IsGenericModeSimplifyCB);
// Check if we know we are in SPMD-mode already.
ConstantInt *ModeArg =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
// This is a generic region but SPMDization is disabled so stop tracking.
else if (DisableOpenMPOptSPMDization)
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
}
/// Sanitize the string \p S such that it is a suitable global symbol name.
static std::string sanitizeForGlobalName(std::string S) {
std::replace_if(
S.begin(), S.end(),
[](const char C) {
return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
(C >= '0' && C <= '9') || C == '_');
},
'.');
return S;
}
/// Modify the IR based on the KernelInfoState as the fixpoint iteration is
/// finished now.
ChangeStatus manifest(Attributor &A) override {
// If we are not looking at a kernel with __kmpc_target_init and
// __kmpc_target_deinit call we cannot actually manifest the information.
if (!KernelInitCB || !KernelDeinitCB)
return ChangeStatus::UNCHANGED;
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (!changeToSPMDMode(A, Changed))
return buildCustomStateMachine(A);
return Changed;
}
bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
if (!SPMDCompatibilityTracker.isAssumed()) {
for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
if (!NonCompatibleI)
continue;
// Skip diagnostics on calls to known OpenMP runtime functions for now.
if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
continue;
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
ORA << "Value has potential side effects preventing SPMD-mode "
"execution";
if (isa<CallBase>(NonCompatibleI)) {
ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to "
"the called function to override";
}
return ORA << ".";
};
A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
Remark);
LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "
<< *NonCompatibleI << "\n");
}
return false;
}
// Check if the kernel is already in SPMD mode, if so, return success.
Function *Kernel = getAnchorScope();
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
// Set the global exec mode flag to indicate SPMD-Generic mode.
assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
"ExecMode is not an integer!");
const int8_t ExecModeVal =
cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
return true;
// We will now unconditionally modify the IR, indicate a change.
Changed = ChangeStatus::CHANGED;
auto CreateGuardedRegion = [&](Instruction *RegionStartI,
Instruction *RegionEndI) {
LoopInfo *LI = nullptr;
DominatorTree *DT = nullptr;
MemorySSAUpdater *MSU = nullptr;
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
BasicBlock *ParentBB = RegionStartI->getParent();
Function *Fn = ParentBB->getParent();
Module &M = *Fn->getParent();
// Create all the blocks and logic.
// ParentBB:
// goto RegionCheckTidBB
// RegionCheckTidBB:
// Tid = __kmpc_hardware_thread_id()
// if (Tid != 0)
// goto RegionBarrierBB
// RegionStartBB:
// <execute instructions guarded>
// goto RegionEndBB
// RegionEndBB:
// <store escaping values to shared mem>
// goto RegionBarrierBB
// RegionBarrierBB:
// __kmpc_simple_barrier_spmd()
// // second barrier is omitted if lacking escaping values.
// <load escaping values from shared mem>
// __kmpc_simple_barrier_spmd()
// goto RegionExitBB
// RegionExitBB:
// <execute rest of instructions>
BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
DT, LI, MSU, "region.guarded.end");
BasicBlock *RegionBarrierBB =
SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
MSU, "region.barrier");
BasicBlock *RegionExitBB =
SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
DT, LI, MSU, "region.exit");
BasicBlock *RegionStartBB =
SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
"Expected a different CFG");
BasicBlock *RegionCheckTidBB = SplitBlock(
ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
// Register basic blocks with the Attributor.
A.registerManifestAddedBasicBlock(*RegionEndBB);
A.registerManifestAddedBasicBlock(*RegionBarrierBB);
A.registerManifestAddedBasicBlock(*RegionExitBB);
A.registerManifestAddedBasicBlock(*RegionStartBB);
A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
bool HasBroadcastValues = false;
// Find escaping outputs from the guarded region to outside users and
// broadcast their values to them.
for (Instruction &I : *RegionStartBB) {
SmallPtrSet<Instruction *, 4> OutsideUsers;
for (User *Usr : I.users()) {
Instruction &UsrI = *cast<Instruction>(Usr);
if (UsrI.getParent() != RegionStartBB)
OutsideUsers.insert(&UsrI);
}
if (OutsideUsers.empty())
continue;
HasBroadcastValues = true;
// Emit a global variable in shared memory to store the broadcasted
// value.
auto *SharedMem = new GlobalVariable(
M, I.getType(), /* IsConstant */ false,
GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
sanitizeForGlobalName(
(I.getName() + ".guarded.output.alloc").str()),
nullptr, GlobalValue::NotThreadLocal,
static_cast<unsigned>(AddressSpace::Shared));
// Emit a store instruction to update the value.
new StoreInst(&I, SharedMem, RegionEndBB->getTerminator());
LoadInst *LoadI = new LoadInst(I.getType(), SharedMem,
I.getName() + ".guarded.output.load",
RegionBarrierBB->getTerminator());
// Emit a load instruction and replace uses of the output value.
for (Instruction *UsrI : OutsideUsers)
UsrI->replaceUsesOfWith(&I, LoadI);
}
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
// Go to tid check BB in ParentBB.
const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
ParentBB->getTerminator()->eraseFromParent();
OpenMPIRBuilder::LocationDescription Loc(
InsertPointTy(ParentBB, ParentBB->end()), DL);
OMPInfoCache.OMPBuilder.updateToLocation(Loc);
uint32_t SrcLocStrSize;
auto *SrcLocStr =
OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize);
Value *Ident =
OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize);
BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
// Add check for Tid in RegionCheckTidBB
RegionCheckTidBB->getTerminator()->eraseFromParent();
OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
FunctionCallee HardwareTidFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
CallInst *Tid =
OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
Tid->setDebugLoc(DL);
OMPInfoCache.setCallingConvention(HardwareTidFn, Tid);
Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
OMPInfoCache.OMPBuilder.Builder
.CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
->setDebugLoc(DL);
// First barrier for synchronization, ensures main thread has updated
// values.
FunctionCallee BarrierFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_barrier_simple_spmd);
OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
CallInst *Barrier =
OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid});
Barrier->setDebugLoc(DL);
OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
// Second barrier ensures workers have read broadcast values.
if (HasBroadcastValues) {
CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "",
RegionBarrierBB->getTerminator());
Barrier->setDebugLoc(DL);
OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
}
};
auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
SmallPtrSet<BasicBlock *, 8> Visited;
for (Instruction *GuardedI : SPMDCompatibilityTracker) {
BasicBlock *BB = GuardedI->getParent();
if (!Visited.insert(BB).second)
continue;
SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
Instruction *LastEffect = nullptr;
BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
while (++IP != IPEnd) {
if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
continue;
Instruction *I = &*IP;
if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
continue;
if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
LastEffect = nullptr;
continue;
}
if (LastEffect)
Reorders.push_back({I, LastEffect});
LastEffect = &*IP;
}
for (auto &Reorder : Reorders)
Reorder.first->moveBefore(Reorder.second);
}
SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
for (Instruction *GuardedI : SPMDCompatibilityTracker) {
BasicBlock *BB = GuardedI->getParent();
auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
IRPosition::function(*GuardedI->getFunction()), nullptr,
DepClassTy::NONE);
assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
// Continue if instruction is already guarded.
if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
continue;
Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
for (Instruction &I : *BB) {
// If instruction I needs to be guarded update the guarded region
// bounds.
if (SPMDCompatibilityTracker.contains(&I)) {
CalleeAAFunction.getGuardedInstructions().insert(&I);
if (GuardedRegionStart)
GuardedRegionEnd = &I;
else
GuardedRegionStart = GuardedRegionEnd = &I;
continue;
}
// Instruction I does not need guarding, store
// any region found and reset bounds.
if (GuardedRegionStart) {
GuardedRegions.push_back(
std::make_pair(GuardedRegionStart, GuardedRegionEnd));
GuardedRegionStart = nullptr;
GuardedRegionEnd = nullptr;
}
}
}
for (auto &GR : GuardedRegions)
CreateGuardedRegion(GR.first, GR.second);
// Adjust the global exec mode flag that tells the runtime what mode this
// kernel is executed in.
assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
"Initially non-SPMD kernel has SPMD exec mode!");
ExecMode->setInitializer(
ConstantInt::get(ExecMode->getInitializer()->getType(),
ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
const int InitModeArgNo = 1;
const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
const int InitRequiresFullRuntimeArgNo = 3;
const int DeinitRequiresFullRuntimeArgNo = 2;
auto &Ctx = getAnchorValue().getContext();
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
*ConstantInt::getBool(Ctx, false));
A.changeUseAfterManifest(
KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
*ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, false));
A.changeUseAfterManifest(
KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, false));
++NumOpenMPTargetRegionKernelsSPMD;
auto Remark = [&](OptimizationRemark OR) {
return OR << "Transformed generic-mode kernel to SPMD-mode.";
};
A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
return true;
};
ChangeStatus buildCustomStateMachine(Attributor &A) {
// If we have disabled state machine rewrites, don't make a custom one
if (DisableOpenMPOptStateMachineRewrite)
return ChangeStatus::UNCHANGED;
// Don't rewrite the state machine if we are not in a valid state.
if (!ReachedKnownParallelRegions.isValidState())
return ChangeStatus::UNCHANGED;
const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
// Check if the current configuration is non-SPMD and generic state machine.
// If we already have SPMD mode or a custom state machine we do not need to
// go any further. If it is anything but a constant something is weird and
// we give up.
ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
ConstantInt *Mode =
dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
(Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
return ChangeStatus::UNCHANGED;
// If not SPMD mode, indicate we use a custom state machine now.
auto &Ctx = getAnchorValue().getContext();
auto *FalseVal = ConstantInt::getBool(Ctx, false);
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
// If we don't actually need a state machine we are done here. This can
// happen if there simply are no parallel regions. In the resulting kernel
// all worker threads will simply exit right away, leaving the main thread
// to do the work alone.
if (!mayContainParallelRegion()) {
++NumOpenMPTargetRegionKernelsWithoutStateMachine;
auto Remark = [&](OptimizationRemark OR) {
return OR << "Removing unused state machine from generic-mode kernel.";
};
A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
return ChangeStatus::CHANGED;
}
// Keep track in the statistics of our new shiny custom state machine.
if (ReachedUnknownParallelRegions.empty()) {
++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
auto Remark = [&](OptimizationRemark OR) {
return OR << "Rewriting generic-mode kernel with a customized state "
"machine.";
};
A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
} else {
++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
auto Remark = [&](OptimizationRemarkAnalysis OR) {
return OR << "Generic-mode kernel is executed with a customized state "
"machine that requires a fallback.";
};
A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);
// Tell the user why we ended up with a fallback.
for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
if (!UnknownParallelRegionCB)
continue;
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
return ORA << "Call may contain unknown parallel regions. Use "
<< "`__attribute__((assume(\"omp_no_parallelism\")))` to "
"override.";
};
A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
"OMP133", Remark);
}
}
// Create all the blocks:
//
// InitCB = __kmpc_target_init(...)
// BlockHwSize =
// __kmpc_get_hardware_num_threads_in_block();
// WarpSize = __kmpc_get_warp_size();
// BlockSize = BlockHwSize - WarpSize;
// if (InitCB >= BlockSize) return;
// IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
// if (IsWorker) {
// SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
// if (!WorkFn) return;
// SMIsActiveCheckBB: if (Active) {
// SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>)
// ParFn0(...);
// SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>)
// ParFn1(...);
// ...
// SMIfCascadeCurrentBB: else
// ((WorkFnTy*)WorkFn)(...);
// SMEndParallelBB: __kmpc_kernel_end_parallel(...);
// }
// SMDoneBB: __kmpc_barrier_simple_generic(...);
// goto SMBeginBB;
// }
// UserCodeEntryBB: // user code
// __kmpc_target_deinit(...)
//
Function *Kernel = getAssociatedFunction();
assert(Kernel && "Expected an associated function!");
BasicBlock *InitBB = KernelInitCB->getParent();
BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
KernelInitCB->getNextNode(), "thread.user_code.check");
BasicBlock *IsWorkerCheckBB =
BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineBeginBB = BasicBlock::Create(
Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineIfCascadeCurrentBB =
BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
Kernel, UserCodeEntryBB);
BasicBlock *StateMachineEndParallelBB =
BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
Kernel, UserCodeEntryBB);
BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
A.registerManifestAddedBasicBlock(*InitBB);
A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);
const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
InitBB->getTerminator()->eraseFromParent();
Module &M = *Kernel->getParent();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
FunctionCallee BlockHwSizeFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
FunctionCallee WarpSizeFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_warp_size);
CallInst *BlockHwSize =
CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
BlockHwSize->setDebugLoc(DLoc);
CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
WarpSize->setDebugLoc(DLoc);
Instruction *BlockSize =
BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
BlockSize->setDebugLoc(DLoc);
Instruction *IsMainOrWorker =
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
BlockSize, "thread.is_main_or_worker", InitBB);
IsMainOrWorker->setDebugLoc(DLoc);
BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
InitBB);
Instruction *IsWorker =
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
ConstantInt::get(KernelInitCB->getType(), -1),
"thread.is_worker", IsWorkerCheckBB);
IsWorker->setDebugLoc(DLoc);
BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
IsWorkerCheckBB);
// Create local storage for the work function pointer.
const DataLayout &DL = M.getDataLayout();
Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
Instruction *WorkFnAI =
new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
"worker.work_fn.addr", &Kernel->getEntryBlock().front());
WorkFnAI->setDebugLoc(DLoc);
OMPInfoCache.OMPBuilder.updateToLocation(
OpenMPIRBuilder::LocationDescription(
IRBuilder<>::InsertPoint(StateMachineBeginBB,
StateMachineBeginBB->end()),
DLoc));
Value *Ident = KernelInitCB->getArgOperand(0);
Value *GTid = KernelInitCB;
FunctionCallee BarrierFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_barrier_simple_generic);
CallInst *Barrier =
CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB);
OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
Barrier->setDebugLoc(DLoc);
if (WorkFnAI->getType()->getPointerAddressSpace() !=
(unsigned int)AddressSpace::Generic) {
WorkFnAI = new AddrSpaceCastInst(
WorkFnAI,
PointerType::getWithSamePointeeType(
cast<PointerType>(WorkFnAI->getType()),
(unsigned int)AddressSpace::Generic),
WorkFnAI->getName() + ".generic", StateMachineBeginBB);
WorkFnAI->setDebugLoc(DLoc);
}
FunctionCallee KernelParallelFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_kernel_parallel);
CallInst *IsActiveWorker = CallInst::Create(
KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker);
IsActiveWorker->setDebugLoc(DLoc);
Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
StateMachineBeginBB);
WorkFn->setDebugLoc(DLoc);
FunctionType *ParallelRegionFnTy = FunctionType::get(
Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
false);
Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
StateMachineBeginBB);
Instruction *IsDone =
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
Constant::getNullValue(VoidPtrTy), "worker.is_done",
StateMachineBeginBB);
IsDone->setDebugLoc(DLoc);
BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
IsDone, StateMachineBeginBB)
->setDebugLoc(DLoc);
BranchInst::Create(StateMachineIfCascadeCurrentBB,
StateMachineDoneBarrierBB, IsActiveWorker,
StateMachineIsActiveCheckBB)
->setDebugLoc(DLoc);
Value *ZeroArg =
Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
// Now that we have most of the CFG skeleton it is time for the if-cascade
// that checks the function pointer we got from the runtime against the
// parallel regions we expect, if there are any.
for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
auto *ParallelRegion = ReachedKnownParallelRegions[I];
BasicBlock *PRExecuteBB = BasicBlock::Create(
Ctx, "worker_state_machine.parallel_region.execute", Kernel,
StateMachineEndParallelBB);
CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
->setDebugLoc(DLoc);
BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
->setDebugLoc(DLoc);
BasicBlock *PRNextBB =
BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
Kernel, StateMachineEndParallelBB);
// Check if we need to compare the pointer at all or if we can just
// call the parallel region function.
Value *IsPR;
if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
Instruction *CmpI = ICmpInst::Create(
ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
"worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
CmpI->setDebugLoc(DLoc);
IsPR = CmpI;
} else {
IsPR = ConstantInt::getTrue(Ctx);
}
BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
StateMachineIfCascadeCurrentBB)
->setDebugLoc(DLoc);
StateMachineIfCascadeCurrentBB = PRNextBB;
}
// At the end of the if-cascade we place the indirect function pointer call
// in case we might need it, that is if there can be parallel regions we
// have not handled in the if-cascade above.
if (!ReachedUnknownParallelRegions.empty()) {
StateMachineIfCascadeCurrentBB->setName(
"worker_state_machine.parallel_region.fallback.execute");
CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
StateMachineIfCascadeCurrentBB)
->setDebugLoc(DLoc);
}
BranchInst::Create(StateMachineEndParallelBB,
StateMachineIfCascadeCurrentBB)
->setDebugLoc(DLoc);
FunctionCallee EndParallelFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_kernel_end_parallel);
CallInst *EndParallel =
CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB);
OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel);
EndParallel->setDebugLoc(DLoc);
BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
->setDebugLoc(DLoc);
CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
->setDebugLoc(DLoc);
BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
->setDebugLoc(DLoc);
return ChangeStatus::CHANGED;
}
/// Fixpoint iteration update function. Will be called every time a dependence
/// changed its state (and in the beginning).
ChangeStatus updateImpl(Attributor &A) override {
KernelInfoState StateBefore = getState();
// Callback to check a read/write instruction.
auto CheckRWInst = [&](Instruction &I) {
// We handle calls later.
if (isa<CallBase>(I))
return true;
// We only care about write effects.
if (!I.mayWriteToMemory())
return true;
if (auto *SI = dyn_cast<StoreInst>(&I)) {
SmallVector<const Value *> Objects;
getUnderlyingObjects(SI->getPointerOperand(), Objects);
if (llvm::all_of(Objects,
[](const Value *Obj) { return isa<AllocaInst>(Obj); }))
return true;
// Check for AAHeapToStack moved objects which must not be guarded.
auto &HS = A.getAAFor<AAHeapToStack>(
*this, IRPosition::function(*I.getFunction()),
DepClassTy::OPTIONAL);
if (llvm::all_of(Objects, [&HS](const Value *Obj) {
auto *CB = dyn_cast<CallBase>(Obj);
if (!CB)
return false;
return HS.isAssumedHeapToStack(*CB);
})) {
return true;
}
}
// Insert instruction that needs guarding.
SPMDCompatibilityTracker.insert(&I);
return true;
};
bool UsedAssumedInformationInCheckRWInst = false;
if (!SPMDCompatibilityTracker.isAtFixpoint())
if (!A.checkForAllReadWriteInstructions(
CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
bool UsedAssumedInformationFromReachingKernels = false;
if (!IsKernelEntry) {
updateParallelLevels(A);
bool AllReachingKernelsKnown = true;
updateReachingKernelEntries(A, AllReachingKernelsKnown);
UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown;
if (!ParallelLevels.isValidState())
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
else if (!ReachingKernelEntries.isValidState())
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
else if (!SPMDCompatibilityTracker.empty()) {
// Check if all reaching kernels agree on the mode as we can otherwise
// not guard instructions. We might not be sure about the mode so we
// we cannot fix the internal spmd-zation state either.
int SPMD = 0, Generic = 0;
for (auto *Kernel : ReachingKernelEntries) {
auto &CBAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
if (CBAA.SPMDCompatibilityTracker.isValidState() &&
CBAA.SPMDCompatibilityTracker.isAssumed())
++SPMD;
else
++Generic;
if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint())
UsedAssumedInformationFromReachingKernels = true;
}
if (SPMD != 0 && Generic != 0)
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
}
}
// Callback to check a call instruction.
bool AllParallelRegionStatesWereFixed = true;
bool AllSPMDStatesWereFixed = true;
auto CheckCallInst = [&](Instruction &I) {
auto &CB = cast<CallBase>(I);
auto &CBAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
getState() ^= CBAA.getState();
AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
AllParallelRegionStatesWereFixed &=
CBAA.ReachedKnownParallelRegions.isAtFixpoint();
AllParallelRegionStatesWereFixed &=
CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
return true;
};
bool UsedAssumedInformationInCheckCallInst = false;
if (!A.checkForAllCallLikeInstructions(
CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
LLVM_DEBUG(dbgs() << TAG
<< "Failed to visit all call-like instructions!\n";);
return indicatePessimisticFixpoint();
}
// If we haven't used any assumed information for the reached parallel
// region states we can fix it.
if (!UsedAssumedInformationInCheckCallInst &&
AllParallelRegionStatesWereFixed) {
ReachedKnownParallelRegions.indicateOptimisticFixpoint();
ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
}
// If we are sure there are no parallel regions in the kernel we do not
// want SPMD mode.
if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() &&
ReachedKnownParallelRegions.isAtFixpoint() &&
ReachedUnknownParallelRegions.isValidState() &&
ReachedKnownParallelRegions.isValidState() &&
!mayContainParallelRegion())
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
// If we haven't used any assumed information for the SPMD state we can fix
// it.
if (!UsedAssumedInformationInCheckRWInst &&
!UsedAssumedInformationInCheckCallInst &&
!UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed)
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
return StateBefore == getState() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
private:
/// Update info regarding reaching kernels.
void updateReachingKernelEntries(Attributor &A,
bool &AllReachingKernelsKnown) {
auto PredCallSite = [&](AbstractCallSite ACS) {
Function *Caller = ACS.getInstruction()->getFunction();
assert(Caller && "Caller is nullptr");
auto &CAA = A.getOrCreateAAFor<AAKernelInfo>(
IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
if (CAA.ReachingKernelEntries.isValidState()) {
ReachingKernelEntries ^= CAA.ReachingKernelEntries;
return true;
}
// We lost track of the caller of the associated function, any kernel
// could reach now.
ReachingKernelEntries.indicatePessimisticFixpoint();
return true;
};
if (!A.checkForAllCallSites(PredCallSite, *this,
true /* RequireAllCallSites */,
AllReachingKernelsKnown))
ReachingKernelEntries.indicatePessimisticFixpoint();
}
/// Update info regarding parallel levels.
void updateParallelLevels(Attributor &A) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
auto PredCallSite = [&](AbstractCallSite ACS) {
Function *Caller = ACS.getInstruction()->getFunction();
assert(Caller && "Caller is nullptr");
auto &CAA =
A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
if (CAA.ParallelLevels.isValidState()) {
// Any function that is called by `__kmpc_parallel_51` will not be
// folded as the parallel level in the function is updated. In order to
// get it right, all the analysis would depend on the implentation. That
// said, if in the future any change to the implementation, the analysis
// could be wrong. As a consequence, we are just conservative here.
if (Caller == Parallel51RFI.Declaration) {
ParallelLevels.indicatePessimisticFixpoint();
return true;
}
ParallelLevels ^= CAA.ParallelLevels;
return true;
}
// We lost track of the caller of the associated function, any kernel
// could reach now.
ParallelLevels.indicatePessimisticFixpoint();
return true;
};
bool AllCallSitesKnown = true;
if (!A.checkForAllCallSites(PredCallSite, *this,
true /* RequireAllCallSites */,
AllCallSitesKnown))
ParallelLevels.indicatePessimisticFixpoint();
}
};
/// The call site kernel info abstract attribute, basically, what can we say
/// about a call site with regards to the KernelInfoState. For now this simply
/// forwards the information from the callee.
struct AAKernelInfoCallSite : AAKernelInfo {
AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
: AAKernelInfo(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAKernelInfo::initialize(A);
CallBase &CB = cast<CallBase>(getAssociatedValue());
Function *Callee = getAssociatedFunction();
auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
// Check for SPMD-mode assumptions.
if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
indicateOptimisticFixpoint();
}
// First weed out calls we do not care about, that is readonly/readnone
// calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
// parallel region or anything else we are looking for.
if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
indicateOptimisticFixpoint();
return;
}
// Next we check if we know the callee. If it is a known OpenMP function
// we will handle them explicitly in the switch below. If it is not, we
// will use an AAKernelInfo object on the callee to gather information and
// merge that into the current state. The latter happens in the updateImpl.
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
// Unknown caller or declarations are not analyzable, we give up.
if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
// Unknown callees might contain parallel regions, except if they have
// an appropriate assumption attached.
if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
AssumptionAA.hasAssumption("omp_no_parallelism")))
ReachedUnknownParallelRegions.insert(&CB);
// If SPMDCompatibilityTracker is not fixed, we need to give up on the
// idea we can run something unknown in SPMD-mode.
if (!SPMDCompatibilityTracker.isAtFixpoint()) {
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
}
// We have updated the state for this unknown call properly, there won't
// be any change so we indicate a fixpoint.
indicateOptimisticFixpoint();
}
// If the callee is known and can be used in IPO, we will update the state
// based on the callee state in updateImpl.
return;
}
const unsigned int WrapperFunctionArgNo = 6;
RuntimeFunction RF = It->getSecond();
switch (RF) {
// All the functions we know are compatible with SPMD mode.
case OMPRTL___kmpc_is_spmd_exec_mode:
case OMPRTL___kmpc_distribute_static_fini:
case OMPRTL___kmpc_for_static_fini:
case OMPRTL___kmpc_global_thread_num:
case OMPRTL___kmpc_get_hardware_num_threads_in_block:
case OMPRTL___kmpc_get_hardware_num_blocks:
case OMPRTL___kmpc_single:
case OMPRTL___kmpc_end_single:
case OMPRTL___kmpc_master:
case OMPRTL___kmpc_end_master:
case OMPRTL___kmpc_barrier:
case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2:
case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2:
case OMPRTL___kmpc_nvptx_end_reduce_nowait:
break;
case OMPRTL___kmpc_distribute_static_init_4:
case OMPRTL___kmpc_distribute_static_init_4u:
case OMPRTL___kmpc_distribute_static_init_8:
case OMPRTL___kmpc_distribute_static_init_8u:
case OMPRTL___kmpc_for_static_init_4:
case OMPRTL___kmpc_for_static_init_4u:
case OMPRTL___kmpc_for_static_init_8:
case OMPRTL___kmpc_for_static_init_8u: {
// Check the schedule and allow static schedule in SPMD mode.
unsigned ScheduleArgOpNo = 2;
auto *ScheduleTypeCI =
dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
unsigned ScheduleTypeVal =
ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
switch (OMPScheduleType(ScheduleTypeVal)) {
case OMPScheduleType::Static:
case OMPScheduleType::StaticChunked:
case OMPScheduleType::Distribute:
case OMPScheduleType::DistributeChunked:
break;
default:
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
break;
};
} break;
case OMPRTL___kmpc_target_init:
KernelInitCB = &CB;
break;
case OMPRTL___kmpc_target_deinit:
KernelDeinitCB = &CB;
break;
case OMPRTL___kmpc_parallel_51:
if (auto *ParallelRegion = dyn_cast<Function>(
CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
ReachedKnownParallelRegions.insert(ParallelRegion);
break;
}
// The condition above should usually get the parallel region function
// pointer and record it. In the off chance it doesn't we assume the
// worst.
ReachedUnknownParallelRegions.insert(&CB);
break;
case OMPRTL___kmpc_omp_task:
// We do not look into tasks right now, just give up.
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
ReachedUnknownParallelRegions.insert(&CB);
break;
case OMPRTL___kmpc_alloc_shared:
case OMPRTL___kmpc_free_shared:
// Return without setting a fixpoint, to be resolved in updateImpl.
return;
default:
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
// generally. However, they do not hide parallel regions.
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
break;
}
// All other OpenMP runtime calls will not reach parallel regions so they
// can be safely ignored for now. Since it is a known OpenMP runtime call we
// have now modeled all effects and there is no need for any update.
indicateOptimisticFixpoint();
}
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
// call site specific liveness information and then it makes
// sense to specialize attributes for call sites arguments instead of
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
// If F is not a runtime function, propagate the AAKernelInfo of the callee.
if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
const IRPosition &FnPos = IRPosition::function(*F);
auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
if (getState() == FnAA.getState())
return ChangeStatus::UNCHANGED;
getState() = FnAA.getState();
return ChangeStatus::CHANGED;
}
// F is a runtime function that allocates or frees memory, check
// AAHeapToStack and AAHeapToShared.
KernelInfoState StateBefore = getState();
assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||
It->getSecond() == OMPRTL___kmpc_free_shared) &&
"Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call");
CallBase &CB = cast<CallBase>(getAssociatedValue());
auto &HeapToStackAA = A.getAAFor<AAHeapToStack>(
*this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>(
*this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
RuntimeFunction RF = It->getSecond();
switch (RF) {
// If neither HeapToStack nor HeapToShared assume the call is removed,
// assume SPMD incompatibility.
case OMPRTL___kmpc_alloc_shared:
if (!HeapToStackAA.isAssumedHeapToStack(CB) &&
!HeapToSharedAA.isAssumedHeapToShared(CB))
SPMDCompatibilityTracker.insert(&CB);
break;
case OMPRTL___kmpc_free_shared:
if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) &&
!HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB))
SPMDCompatibilityTracker.insert(&CB);
break;
default:
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
}
return StateBefore == getState() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
};
struct AAFoldRuntimeCall
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Statistics are tracked as part of manifest for now.
void trackStatistics() const override {}
/// Create an abstract attribute biew for the position \p IRP.
static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
Attributor &A);
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAFoldRuntimeCall"; }
/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }
/// This function should return true if the type of the \p AA is
/// AAFoldRuntimeCall
static bool classof(const AbstractAttribute *AA) {
return (AA->getIdAddr() == &ID);
}
static const char ID;
};
struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AAFoldRuntimeCall(IRP, A) {}
/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
if (!isValidState())
return "<invalid>";
std::string Str("simplified value: ");
if (!SimplifiedValue.hasValue())
return Str + std::string("none");
if (!SimplifiedValue.getValue())
return Str + std::string("nullptr");
if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue()))
return Str + std::to_string(CI->getSExtValue());
return Str + std::string("unknown");
}
void initialize(Attributor &A) override {
if (DisableOpenMPOptFolding)
indicatePessimisticFixpoint();
Function *Callee = getAssociatedFunction();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&
"Expected a known OpenMP runtime function");
RFKind = It->getSecond();
CallBase &CB = cast<CallBase>(getAssociatedValue());
A.registerSimplificationCallback(
IRPosition::callsite_returned(CB),
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
assert((isValidState() || (SimplifiedValue.hasValue() &&
SimplifiedValue.getValue() == nullptr)) &&
"Unexpected invalid state!");
if (!isAtFixpoint()) {
UsedAssumedInformation = true;
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
}
return SimplifiedValue;
});
}
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
switch (RFKind) {
case OMPRTL___kmpc_is_spmd_exec_mode:
Changed |= foldIsSPMDExecMode(A);
break;
case OMPRTL___kmpc_is_generic_main_thread_id:
Changed |= foldIsGenericMainThread(A);
break;
case OMPRTL___kmpc_parallel_level:
Changed |= foldParallelLevel(A);
break;
case OMPRTL___kmpc_get_hardware_num_threads_in_block:
Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
break;
case OMPRTL___kmpc_get_hardware_num_blocks:
Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
break;
default:
llvm_unreachable("Unhandled OpenMP runtime function!");
}
return Changed;
}
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
Instruction &I = *getCtxI();
A.changeValueAfterManifest(I, **SimplifiedValue);
A.deleteAfterManifest(I);
CallBase *CB = dyn_cast<CallBase>(&I);
auto Remark = [&](OptimizationRemark OR) {
if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
return OR << "Replacing OpenMP runtime call "
<< CB->getCalledFunction()->getName() << " with "
<< ore::NV("FoldedValue", C->getZExtValue()) << ".";
return OR << "Replacing OpenMP runtime call "
<< CB->getCalledFunction()->getName() << ".";
};
if (CB && EnableVerboseRemarks)
A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
<< **SimplifiedValue << "\n");
Changed = ChangeStatus::CHANGED;
}
return Changed;
}
ChangeStatus indicatePessimisticFixpoint() override {
SimplifiedValue = nullptr;
return AAFoldRuntimeCall::indicatePessimisticFixpoint();
}
private:
/// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
ChangeStatus foldIsSPMDExecMode(Attributor &A) {
Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
return indicatePessimisticFixpoint();
for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
DepClassTy::REQUIRED);
if (!AA.isValidState()) {
SimplifiedValue = nullptr;
return indicatePessimisticFixpoint();
}
if (AA.SPMDCompatibilityTracker.isAssumed()) {
if (AA.SPMDCompatibilityTracker.isAtFixpoint())
++KnownSPMDCount;
else
++AssumedSPMDCount;
} else {
if (AA.SPMDCompatibilityTracker.isAtFixpoint())
++KnownNonSPMDCount;
else
++AssumedNonSPMDCount;
}
}
if ((AssumedSPMDCount + KnownSPMDCount) &&
(AssumedNonSPMDCount + KnownNonSPMDCount))
return indicatePessimisticFixpoint();
auto &Ctx = getAnchorValue().getContext();
if (KnownSPMDCount || AssumedSPMDCount) {
assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
"Expected only SPMD kernels!");
// All reaching kernels are in SPMD mode. Update all function calls to
// __kmpc_is_spmd_exec_mode to 1.
SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
} else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
"Expected only non-SPMD kernels!");
// All reaching kernels are in non-SPMD mode. Update all function
// calls to __kmpc_is_spmd_exec_mode to 0.
SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
} else {
// We have empty reaching kernels, therefore we cannot tell if the
// associated call site can be folded. At this moment, SimplifiedValue
// must be none.
assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none");
}
return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// Fold __kmpc_is_generic_main_thread_id into a constant if possible.
ChangeStatus foldIsGenericMainThread(Attributor &A) {
Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
CallBase &CB = cast<CallBase>(getAssociatedValue());
Function *F = CB.getFunction();
const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
if (!ExecutionDomainAA.isValidState())
return indicatePessimisticFixpoint();
auto &Ctx = getAnchorValue().getContext();
if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB))
SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
else
return indicatePessimisticFixpoint();
return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// Fold __kmpc_parallel_level into a constant if possible.
ChangeStatus foldParallelLevel(Attributor &A) {
Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
if (!CallerKernelInfoAA.ParallelLevels.isValidState())
return indicatePessimisticFixpoint();
if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
return indicatePessimisticFixpoint();
if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
assert(!SimplifiedValue.hasValue() &&
"SimplifiedValue should keep none at this point");
return ChangeStatus::UNCHANGED;
}
unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
DepClassTy::REQUIRED);
if (!AA.SPMDCompatibilityTracker.isValidState())
return indicatePessimisticFixpoint();
if (AA.SPMDCompatibilityTracker.isAssumed()) {
if (AA.SPMDCompatibilityTracker.isAtFixpoint())
++KnownSPMDCount;
else
++AssumedSPMDCount;
} else {
if (AA.SPMDCompatibilityTracker.isAtFixpoint())
++KnownNonSPMDCount;
else
++AssumedNonSPMDCount;
}
}
if ((AssumedSPMDCount + KnownSPMDCount) &&
(AssumedNonSPMDCount + KnownNonSPMDCount))
return indicatePessimisticFixpoint();
auto &Ctx = getAnchorValue().getContext();
// If the caller can only be reached by SPMD kernel entries, the parallel
// level is 1. Similarly, if the caller can only be reached by non-SPMD
// kernel entries, it is 0.
if (AssumedSPMDCount || KnownSPMDCount) {
assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&
"Expected only SPMD kernels!");
SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
} else {
assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&
"Expected only non-SPMD kernels!");
SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
}
return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
// Specialize only if all the calls agree with the attribute constant value
int32_t CurrentAttrValue = -1;
Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
return indicatePessimisticFixpoint();
// Iterate over the kernels that reach this function
for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
int32_t NextAttrVal = -1;
if (K->hasFnAttribute(Attr))
NextAttrVal =
std::stoi(K->getFnAttribute(Attr).getValueAsString().str());
if (NextAttrVal == -1 ||
(CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
return indicatePessimisticFixpoint();
CurrentAttrValue = NextAttrVal;
}
if (CurrentAttrValue != -1) {
auto &Ctx = getAnchorValue().getContext();
SimplifiedValue =
ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
}
return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
/// An optional value the associated value is assumed to fold to. That is, we
/// assume the associated value (which is a call) can be replaced by this
/// simplified value.
Optional<Value *> SimplifiedValue;
/// The runtime function kind of the callee of the associated call site.
RuntimeFunction RFKind;
};
} // namespace
/// Register folding callsite
void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
auto &RFI = OMPInfoCache.RFIs[RF];
RFI.foreachUse(SCC, [&](Use &U, Function &F) {
CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
if (!CI)
return false;
A.getOrCreateAAFor<AAFoldRuntimeCall>(
IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
return false;
});
}
void OpenMPOpt::registerAAs(bool IsModulePass) {
if (SCC.empty())
return;
if (IsModulePass) {
// Ensure we create the AAKernelInfo AAs first and without triggering an
// update. This will make sure we register all value simplification
// callbacks before any other AA has the chance to create an AAValueSimplify
// or similar.
for (Function *Kernel : OMPInfoCache.Kernels)
A.getOrCreateAAFor<AAKernelInfo>(
IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
}
// Create CallSite AA for all Getters.
for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
auto CreateAA = [&](Use &U, Function &Caller) {
CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
if (!CI)
return false;
auto &CB = cast<CallBase>(*CI);
IRPosition CBPos = IRPosition::callsite_function(CB);
A.getOrCreateAAFor<AAICVTracker>(CBPos);
return false;
};
GetterRFI.foreachUse(SCC, CreateAA);
}
auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
auto CreateAA = [&](Use &U, Function &F) {
A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
return false;
};
if (!DisableOpenMPOptDeglobalization)
GlobalizationRFI.foreachUse(SCC, CreateAA);
// Create an ExecutionDomain AA for every function and a HeapToStack AA for
// every function if there is a device kernel.
if (!isOpenMPDevice(M))
return;
for (auto *F : SCC) {
if (F->isDeclaration())
continue;
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
if (!DisableOpenMPOptDeglobalization)
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
for (auto &I : instructions(*F)) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
bool UsedAssumedInformation = false;
A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
UsedAssumedInformation);
} else if (auto *SI = dyn_cast<StoreInst>(&I)) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
}
}
}
}
const char AAICVTracker::ID = 0;
const char AAKernelInfo::ID = 0;
const char AAExecutionDomain::ID = 0;
const char AAHeapToShared::ID = 0;
const char AAFoldRuntimeCall::ID = 0;
AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
Attributor &A) {
AAICVTracker *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
llvm_unreachable("ICVTracker can only be created for function position!");
case IRPosition::IRP_RETURNED:
AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
break;
case IRPosition::IRP_CALL_SITE_RETURNED:
AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
break;
case IRPosition::IRP_CALL_SITE:
AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
break;
case IRPosition::IRP_FUNCTION:
AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
break;
}
return *AA;
}
AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
Attributor &A) {
AAExecutionDomainFunction *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE:
llvm_unreachable(
"AAExecutionDomain can only be created for function position!");
case IRPosition::IRP_FUNCTION:
AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
break;
}
return *AA;
}
AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
Attributor &A) {
AAHeapToSharedFunction *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE:
llvm_unreachable(
"AAHeapToShared can only be created for function position!");
case IRPosition::IRP_FUNCTION:
AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
break;
}
return *AA;
}
AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
Attributor &A) {
AAKernelInfo *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
llvm_unreachable("KernelInfo can only be created for function position!");
case IRPosition::IRP_CALL_SITE:
AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
break;
case IRPosition::IRP_FUNCTION:
AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
break;
}
return *AA;
}
AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
Attributor &A) {
AAFoldRuntimeCall *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_FUNCTION:
case IRPosition::IRP_CALL_SITE:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
llvm_unreachable("KernelInfo can only be created for call site position!");
case IRPosition::IRP_CALL_SITE_RETURNED:
AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
break;
}
return *AA;
}
PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
if (!containsOpenMP(M))
return PreservedAnalyses::all();
if (DisableOpenMPOptimizations)
return PreservedAnalyses::all();
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
KernelSet Kernels = getDeviceKernels(M);
auto IsCalled = [&](Function &F) {
if (Kernels.contains(&F))
return true;
for (const User *U : F.users())
if (!isa<BlockAddress>(U))
return true;
return false;
};
auto EmitRemark = [&](Function &F) {
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
ORE.emit([&]() {
OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
return ORA << "Could not internalize function. "
<< "Some optimizations may not be possible. [OMP140]";
});
};
// Create internal copies of each function if this is a kernel Module. This
// allows iterprocedural passes to see every call edge.
DenseMap<Function *, Function *> InternalizedMap;
if (isOpenMPDevice(M)) {
SmallPtrSet<Function *, 16> InternalizeFns;
for (Function &F : M)
if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
!DisableInternalization) {
if (Attributor::isInternalizable(F)) {
InternalizeFns.insert(&F);
} else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
EmitRemark(F);
}
}
Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
}
// Look at every function in the Module unless it was internalized.
SmallVector<Function *, 16> SCC;
for (Function &F : M)
if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
SCC.push_back(&F);
if (SCC.empty())
return PreservedAnalyses::all();
AnalysisGetter AG(FAM);
auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
};
BumpPtrAllocator Allocator;
CallGraphUpdater CGUpdater;
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
// Optionally inline device functions for potentially better performance.
if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
for (Function &F : M)
if (!F.isDeclaration() && !Kernels.contains(&F) &&
!F.hasFnAttribute(Attribute::NoInline))
F.addFnAttr(Attribute::AlwaysInline);
if (PrintModuleAfterOptimizations)
LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
if (Changed)
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
if (!containsOpenMP(*C.begin()->getFunction().getParent()))
return PreservedAnalyses::all();
if (DisableOpenMPOptimizations)
return PreservedAnalyses::all();
SmallVector<Function *, 16> SCC;
// If there are kernels in the module, we have to run on all SCC's.
for (LazyCallGraph::Node &N : C) {
Function *Fn = &N.getFunction();
SCC.push_back(Fn);
}
if (SCC.empty())
return PreservedAnalyses::all();
Module &M = *C.begin()->getFunction().getParent();
KernelSet Kernels = getDeviceKernels(M);
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
AnalysisGetter AG(FAM);
auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
};
BumpPtrAllocator Allocator;
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
/*CGSCC*/ Functions, Kernels);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
if (PrintModuleAfterOptimizations)
LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
if (Changed)
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
namespace {
struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
CallGraphUpdater CGUpdater;
static char ID;
OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool runOnSCC(CallGraphSCC &CGSCC) override {
if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
return false;
if (DisableOpenMPOptimizations || skipSCC(CGSCC))
return false;
SmallVector<Function *, 16> SCC;
// If there are kernels in the module, we have to run on all SCC's.
for (CallGraphNode *CGN : CGSCC) {
Function *Fn = CGN->getFunction();
if (!Fn || Fn->isDeclaration())
continue;
SCC.push_back(Fn);
}
if (SCC.empty())
return false;
Module &M = CGSCC.getCallGraph().getModule();
KernelSet Kernels = getDeviceKernels(M);
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
CGUpdater.initialize(CG, CGSCC);
// Maintain a map of functions to avoid rebuilding the ORE
DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
if (!ORE)
ORE = std::make_unique<OptimizationRemarkEmitter>(F);
return *ORE;
};
AnalysisGetter AG;
SetVector<Function *> Functions(SCC.begin(), SCC.end());
BumpPtrAllocator Allocator;
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
Allocator,
/*CGSCC*/ Functions, Kernels);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Result = OMPOpt.run(false);
if (PrintModuleAfterOptimizations)
LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
return Result;
}
bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
};
} // end anonymous namespace
KernelSet llvm::omp::getDeviceKernels(Module &M) {
// TODO: Create a more cross-platform way of determining device kernels.
NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
KernelSet Kernels;
if (!MD)
return Kernels;
for (auto *Op : MD->operands()) {
if (Op->getNumOperands() < 2)
continue;
MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
if (!KindID || KindID->getString() != "kernel")
continue;
Function *KernelFn =
mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
if (!KernelFn)
continue;
++NumOpenMPTargetRegionKernels;
Kernels.insert(KernelFn);
}
return Kernels;
}
bool llvm::omp::containsOpenMP(Module &M) {
Metadata *MD = M.getModuleFlag("openmp");
if (!MD)
return false;
return true;
}
bool llvm::omp::isOpenMPDevice(Module &M) {
Metadata *MD = M.getModuleFlag("openmp-device");
if (!MD)
return false;
return true;
}
char OpenMPOptCGSCCLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
"OpenMP specific optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",
"OpenMP specific optimizations", false, false)
Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
return new OpenMPOptCGSCCLegacyPass();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index ae636e7b61f7..c5c8e880eb3d 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1,2198 +1,2208 @@
//===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The code below implements dead store elimination using MemorySSA. It uses
// the following general approach: given a MemoryDef, walk upwards to find
// clobbering MemoryDefs that may be killed by the starting def. Then check
// that there are no uses that may read the location of the original MemoryDef
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
// upwards.
// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
// checking all uses starting at MaybeDeadAccess and walking until we see
// StartDef.
// 3. For each found CurrentDef, check that:
// 1. There are no barrier instructions between CurrentDef and StartDef (like
// throws or stores with ordering constraints).
// 2. StartDef is executed whenever CurrentDef is executed.
// 3. StartDef completely overwrites CurrentDef.
// 4. Erase CurrentDef from the function and MemorySSA.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <map>
#include <utility>
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "dse"
STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther, "Number of other instrs removed");
STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
STATISTIC(NumModifiedStores, "Number of stores modified");
STATISTIC(NumCFGChecks, "Number of stores modified");
STATISTIC(NumCFGTries, "Number of stores modified");
STATISTIC(NumCFGSuccess, "Number of stores modified");
STATISTIC(NumGetDomMemoryDefPassed,
"Number of times a valid candidate is returned from getDomMemoryDef");
STATISTIC(NumDomMemDefChecks,
"Number iterations check for reads in getDomMemoryDef");
DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
"Controls which MemoryDefs are eliminated.");
static cl::opt<bool>
EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
cl::init(true), cl::Hidden,
cl::desc("Enable partial-overwrite tracking in DSE"));
static cl::opt<bool>
EnablePartialStoreMerging("enable-dse-partial-store-merging",
cl::init(true), cl::Hidden,
cl::desc("Enable partial store merging in DSE"));
static cl::opt<unsigned>
MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
"dead store elimination (default = 150)"));
static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
"dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
cl::desc("The maximum number of steps while walking upwards to find "
"MemoryDefs that may be killed (default = 90)"));
static cl::opt<unsigned> MemorySSAPartialStoreLimit(
"dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
cl::desc("The maximum number candidates that only partially overwrite the "
"killing MemoryDef to consider"
" (default = 5)"));
static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
"dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
"other stores per basic block (default = 5000)"));
static cl::opt<unsigned> MemorySSASameBBStepCost(
"dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
cl::desc(
"The cost of a step in the same basic block as the killing MemoryDef"
"(default = 1)"));
static cl::opt<unsigned>
MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
cl::Hidden,
cl::desc("The cost of a step in a different basic "
"block than the killing MemoryDef"
"(default = 5)"));
static cl::opt<unsigned> MemorySSAPathCheckLimit(
"dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
cl::desc("The maximum number of blocks to check when trying to prove that "
"all paths to an exit go through a killing block (default = 50)"));
// This flags allows or disallows DSE to optimize MemorySSA during its
// traversal. Note that DSE optimizing MemorySSA may impact other passes
// downstream of the DSE invocation and can lead to issues not being
// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
// those cases, the flag can be used to check if DSE's MemorySSA optimizations
// impact follow-up passes.
static cl::opt<bool>
OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
cl::desc("Allow DSE to optimize memory accesses."));
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
using OverlapIntervalsTy = std::map<int64_t, int64_t>;
using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
/// Returns true if the end of this instruction can be safely shortened in
/// length.
static bool isShortenableAtTheEnd(Instruction *I) {
// Don't shorten stores for now
if (isa<StoreInst>(I))
return false;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
default: return false;
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memcpy_element_unordered_atomic:
case Intrinsic::memset_element_unordered_atomic:
// Do shorten memory intrinsics.
// FIXME: Add memmove if it's also safe to transform.
return true;
}
}
// Don't shorten libcalls calls for now.
return false;
}
/// Returns true if the beginning of this instruction can be safely shortened
/// in length.
static bool isShortenableAtTheBeginning(Instruction *I) {
// FIXME: Handle only memset for now. Supporting memcpy/memmove should be
// easily done by offsetting the source address.
return isa<AnyMemSetInst>(I);
}
static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
const TargetLibraryInfo &TLI,
const Function *F) {
uint64_t Size;
ObjectSizeOpts Opts;
Opts.NullIsUnknownSize = NullPointerIsDefined(F);
if (getObjectSize(V, Size, DL, &TLI, Opts))
return Size;
return MemoryLocation::UnknownSize;
}
namespace {
enum OverwriteResult {
OW_Begin,
OW_Complete,
OW_End,
OW_PartialEarlierWithFullLater,
OW_MaybePartial,
OW_None,
OW_Unknown
};
} // end anonymous namespace
/// Check if two instruction are masked stores that completely
/// overwrite one another. More specifically, \p KillingI has to
/// overwrite \p DeadI.
static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
const Instruction *DeadI,
BatchAAResults &AA) {
const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
if (KillingII == nullptr || DeadII == nullptr)
return OW_Unknown;
if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
DeadII->getIntrinsicID() != Intrinsic::masked_store)
return OW_Unknown;
// Pointers.
Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
return OW_Unknown;
// Masks.
// TODO: check that KillingII's mask is a superset of the DeadII's mask.
if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
return OW_Unknown;
return OW_Complete;
}
/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
/// overwritten by a killing (smaller) store which doesn't write outside the big
/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
/// NOTE: This function must only be called if both \p KillingLoc and \p
/// DeadLoc belong to the same underlying object with valid \p KillingOff and
/// \p DeadOff.
static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
const MemoryLocation &DeadLoc,
int64_t KillingOff, int64_t DeadOff,
Instruction *DeadI,
InstOverlapIntervalsTy &IOL) {
const uint64_t KillingSize = KillingLoc.Size.getValue();
const uint64_t DeadSize = DeadLoc.Size.getValue();
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
// dead store.
// Note: The correctness of this logic depends on the fact that this function
// is not even called providing DepWrite when there are any intervening reads.
if (EnablePartialOverwriteTracking &&
KillingOff < int64_t(DeadOff + DeadSize) &&
int64_t(KillingOff + KillingSize) >= DeadOff) {
// Insert our part of the overlap into the map.
auto &IM = IOL[DeadI];
LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
<< int64_t(DeadOff + DeadSize) << ") KillingLoc ["
<< KillingOff << ", " << int64_t(KillingOff + KillingSize)
<< ")\n");
// Make sure that we only insert non-overlapping intervals and combine
// adjacent intervals. The intervals are stored in the map with the ending
// offset as the key (in the half-open sense) and the starting offset as
// the value.
int64_t KillingIntStart = KillingOff;
int64_t KillingIntEnd = KillingOff + KillingSize;
// Find any intervals ending at, or after, KillingIntStart which start
// before KillingIntEnd.
auto ILI = IM.lower_bound(KillingIntStart);
if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
// This existing interval is overlapped with the current store somewhere
// in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
// intervals and adjusting our start and end.
KillingIntStart = std::min(KillingIntStart, ILI->second);
KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
// Continue erasing and adjusting our end in case other previous
// intervals are also overlapped with the current store.
//
// |--- dead 1 ---| |--- dead 2 ---|
// |------- killing---------|
//
while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
assert(ILI->second > KillingIntStart && "Unexpected interval");
KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
}
}
IM[KillingIntEnd] = KillingIntStart;
ILI = IM.begin();
if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
<< DeadOff << ", " << int64_t(DeadOff + DeadSize)
<< ") Composite KillingLoc [" << ILI->second << ", "
<< ILI->first << ")\n");
++NumCompletePartials;
return OW_Complete;
}
}
// Check for a dead store which writes to all the memory locations that
// the killing store writes to.
if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
int64_t(DeadOff + DeadSize) > KillingOff &&
uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
<< ", " << int64_t(DeadOff + DeadSize)
<< ") by a killing store [" << KillingOff << ", "
<< int64_t(KillingOff + KillingSize) << ")\n");
// TODO: Maybe come up with a better name?
return OW_PartialEarlierWithFullLater;
}
// Another interesting case is if the killing store overwrites the end of the
// dead store.
//
// |--dead--|
// |-- killing --|
//
// In this case we may want to trim the size of dead store to avoid
// generating stores to addresses which will definitely be overwritten killing
// store.
if (!EnablePartialOverwriteTracking &&
(KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
return OW_End;
// Finally, we also need to check if the killing store overwrites the
// beginning of the dead store.
//
// |--dead--|
// |-- killing --|
//
// In this case we may want to move the destination address and trim the size
// of dead store to avoid generating stores to addresses which will definitely
// be overwritten killing store.
if (!EnablePartialOverwriteTracking &&
(KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
"Expect to be handled as OW_Complete");
return OW_Begin;
}
// Otherwise, they don't completely overlap.
return OW_Unknown;
}
/// Returns true if the memory which is accessed by the second instruction is not
/// modified between the first and the second instruction.
/// Precondition: Second instruction must be dominated by the first
/// instruction.
static bool
memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
BatchAAResults &AA, const DataLayout &DL,
DominatorTree *DT) {
// Do a backwards scan through the CFG from SecondI to FirstI. Look for
// instructions which can modify the memory location accessed by SecondI.
//
// While doing the walk keep track of the address to check. It might be
// different in different basic blocks due to PHI translation.
using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
SmallVector<BlockAddressPair, 16> WorkList;
// Keep track of the address we visited each block with. Bail out if we
// visit a block with different addresses.
DenseMap<BasicBlock *, Value *> Visited;
BasicBlock::iterator FirstBBI(FirstI);
++FirstBBI;
BasicBlock::iterator SecondBBI(SecondI);
BasicBlock *FirstBB = FirstI->getParent();
BasicBlock *SecondBB = SecondI->getParent();
MemoryLocation MemLoc;
if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
MemLoc = MemoryLocation::getForDest(MemSet);
else
MemLoc = MemoryLocation::get(SecondI);
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB.
WorkList.push_back(
std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
bool isFirstBlock = true;
// Check all blocks going backward until we reach the FirstBB.
while (!WorkList.empty()) {
BlockAddressPair Current = WorkList.pop_back_val();
BasicBlock *B = Current.first;
PHITransAddr &Addr = Current.second;
Value *Ptr = Addr.getAddr();
// Ignore instructions before FirstI if this is the FirstBB.
BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
BasicBlock::iterator EI;
if (isFirstBlock) {
// Ignore instructions after SecondI if this is the first visit of SecondBB.
assert(B == SecondBB && "first block is not the store block");
EI = SecondBBI;
isFirstBlock = false;
} else {
// It's not SecondBB or (in case of a loop) the second visit of SecondBB.
// In this case we also have to look at instructions after SecondI.
EI = B->end();
}
for (; BI != EI; ++BI) {
Instruction *I = &*BI;
if (I->mayWriteToMemory() && I != SecondI)
if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
return false;
}
if (B != FirstBB) {
assert(B != &FirstBB->getParent()->getEntryBlock() &&
"Should not hit the entry block because SI must be dominated by LI");
for (BasicBlock *Pred : predecessors(B)) {
PHITransAddr PredAddr = Addr;
if (PredAddr.NeedsPHITranslationFromBlock(B)) {
if (!PredAddr.IsPotentiallyPHITranslatable())
return false;
if (PredAddr.PHITranslateValue(B, Pred, DT, false))
return false;
}
Value *TranslatedPtr = PredAddr.getAddr();
auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
if (!Inserted.second) {
// We already visited this block before. If it was with a different
// address - bail out!
if (TranslatedPtr != Inserted.first->second)
return false;
// ... otherwise just skip it.
continue;
}
WorkList.push_back(std::make_pair(Pred, PredAddr));
}
}
}
return true;
}
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
uint64_t &DeadSize, int64_t KillingStart,
uint64_t KillingSize, bool IsOverwriteEnd) {
auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
// We assume that memet/memcpy operates in chunks of the "largest" native
// type size and aligned on the same value. That means optimal start and size
// of memset/memcpy should be modulo of preferred alignment of that type. That
// is it there is no any sense in trying to reduce store size any further
// since any "extra" stores comes for free anyway.
// On the other hand, maximum alignment we can achieve is limited by alignment
// of initial store.
// TODO: Limit maximum alignment by preferred (or abi?) alignment of the
// "largest" native type.
// Note: What is the proper way to get that value?
// Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
// PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
int64_t ToRemoveStart = 0;
uint64_t ToRemoveSize = 0;
// Compute start and size of the region to remove. Make sure 'PrefAlign' is
// maintained on the remaining store.
if (IsOverwriteEnd) {
// Calculate required adjustment for 'KillingStart' in order to keep
// remaining store size aligned on 'PerfAlign'.
uint64_t Off =
offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
ToRemoveStart = KillingStart + Off;
if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
return false;
ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
} else {
ToRemoveStart = DeadStart;
assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
"Not overlapping accesses?");
ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
// Calculate required adjustment for 'ToRemoveSize'in order to keep
// start of the remaining store aligned on 'PerfAlign'.
uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
if (Off != 0) {
if (ToRemoveSize <= (PrefAlign.value() - Off))
return false;
ToRemoveSize -= PrefAlign.value() - Off;
}
assert(isAligned(PrefAlign, ToRemoveSize) &&
"Should preserve selected alignment");
}
assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
uint64_t NewSize = DeadSize - ToRemoveSize;
if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
// When shortening an atomic memory intrinsic, the newly shortened
// length must remain an integer multiple of the element size.
const uint32_t ElementSize = AMI->getElementSizeInBytes();
if (0 != NewSize % ElementSize)
return false;
}
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
<< (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
<< "\n KILLER [" << ToRemoveStart << ", "
<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
Value *DeadWriteLength = DeadIntrinsic->getLength();
Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
DeadIntrinsic->setLength(TrimmedLength);
DeadIntrinsic->setDestAlignment(PrefAlign);
if (!IsOverwriteEnd) {
Value *OrigDest = DeadIntrinsic->getRawDest();
Type *Int8PtrTy =
Type::getInt8PtrTy(DeadIntrinsic->getContext(),
OrigDest->getType()->getPointerAddressSpace());
Value *Dest = OrigDest;
if (OrigDest->getType() != Int8PtrTy)
Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
Value *Indices[1] = {
ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
if (NewDestGEP->getType() != OrigDest->getType())
NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
"", DeadI);
DeadIntrinsic->setDest(NewDestGEP);
}
// Finally update start and size of dead access.
if (!IsOverwriteEnd)
DeadStart += ToRemoveSize;
DeadSize = NewSize;
return true;
}
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
int64_t &DeadStart, uint64_t &DeadSize) {
if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
return false;
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
int64_t KillingStart = OII->second;
uint64_t KillingSize = OII->first - KillingStart;
assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
if (KillingStart > DeadStart &&
// Note: "KillingStart - KillingStart" is known to be positive due to
// preceding check.
(uint64_t)(KillingStart - DeadStart) < DeadSize &&
// Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
// be non negative due to preceding checks.
KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
true)) {
IntervalMap.erase(OII);
return true;
}
}
return false;
}
static bool tryToShortenBegin(Instruction *DeadI,
OverlapIntervalsTy &IntervalMap,
int64_t &DeadStart, uint64_t &DeadSize) {
if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
return false;
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
int64_t KillingStart = OII->second;
uint64_t KillingSize = OII->first - KillingStart;
assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
if (KillingStart <= DeadStart &&
// Note: "DeadStart - KillingStart" is known to be non negative due to
// preceding check.
KillingSize > (uint64_t)(DeadStart - KillingStart)) {
// Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
// be positive due to preceding checks.
assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
"Should have been handled as OW_Complete");
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
false)) {
IntervalMap.erase(OII);
return true;
}
}
return false;
}
static Constant *
tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
int64_t KillingOffset, int64_t DeadOffset,
const DataLayout &DL, BatchAAResults &AA,
DominatorTree *DT) {
if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
// b) the killing store is fully contained in the dead one and
// c) they both have a constant value
// d) none of the two stores need padding
// Merge the two stores, replacing the dead store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
// some mem intrinsics (if needed)
APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
APInt KillingValue =
cast<ConstantInt>(KillingI->getValueOperand())->getValue();
unsigned KillingBits = KillingValue.getBitWidth();
assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
KillingValue = KillingValue.zext(DeadValue.getBitWidth());
// Offset of the smaller store inside the larger store
unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
unsigned LShiftAmount =
DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
: BitOffsetDiff;
APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
LShiftAmount + KillingBits);
// Clear the bits we'll be replacing, then OR with the smaller
// store, shifted appropriately.
APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
<< "\n Killing: " << *KillingI
<< "\n Merged Value: " << Merged << '\n');
return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
}
return nullptr;
}
namespace {
// Returns true if \p I is an intrisnic that does not read or write memory.
bool isNoopIntrinsic(Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_end:
case Intrinsic::launder_invariant_group:
case Intrinsic::assume:
return true;
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_label:
case Intrinsic::dbg_value:
llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
default:
return false;
}
}
return false;
}
// Check if we can ignore \p D for DSE.
bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
if (auto *CB = dyn_cast<CallBase>(DI))
if (CB->onlyAccessesInaccessibleMemory())
return true;
// We can eliminate stores to locations not visible to the caller across
// throwing instructions.
if (DI->mayThrow() && !DefVisibleToCaller)
return true;
// We can remove the dead stores, irrespective of the fence and its ordering
// (release/acquire/seq_cst). Fences only constraints the ordering of
// already visible stores, it does not make a store visible to other
// threads. So, skipping over a fence does not change a store from being
// dead.
if (isa<FenceInst>(DI))
return true;
// Skip intrinsics that do not really read or modify memory.
if (isNoopIntrinsic(DI))
return true;
return false;
}
struct DSEState {
Function &F;
AliasAnalysis &AA;
EarliestEscapeInfo EI;
/// The single BatchAA instance that is used to cache AA queries. It will
/// not be invalidated over the whole run. This is safe, because:
/// 1. Only memory writes are removed, so the alias cache for memory
/// locations remains valid.
/// 2. No new instructions are added (only instructions removed), so cached
/// information for a deleted value cannot be accessed by a re-used new
/// value pointer.
BatchAAResults BatchAA;
MemorySSA &MSSA;
DominatorTree &DT;
PostDominatorTree &PDT;
const TargetLibraryInfo &TLI;
const DataLayout &DL;
const LoopInfo &LI;
// Whether the function contains any irreducible control flow, useful for
// being accurately able to detect loops.
bool ContainsIrreducibleLoops;
// All MemoryDefs that potentially could kill other MemDefs.
SmallVector<MemoryDef *, 64> MemDefs;
// Any that should be skipped as they are already deleted
SmallPtrSet<MemoryAccess *, 4> SkipStores;
// Keep track whether a given object is captured before return or not.
DenseMap<const Value *, bool> CapturedBeforeReturn;
// Keep track of all of the objects that are invisible to the caller after
// the function returns.
DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
// Keep track of blocks with throwing instructions not modeled in MemorySSA.
SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
// Post-order numbers for each basic block. Used to figure out if memory
// accesses are executed before another access.
DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
/// basic block.
MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ // Check if there are root nodes that are terminated by UnreachableInst.
+ // Those roots pessimize post-dominance queries. If there are such roots,
+ // fall back to CFG scan starting from all non-unreachable roots.
+ bool AnyUnreachableExit;
// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
DSEState &operator=(const DSEState &) = delete;
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
: F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
for (BasicBlock *BB : post_order(&F)) {
PostOrderNumbers[BB] = PO++;
for (Instruction &I : *BB) {
MemoryAccess *MA = MSSA.getMemoryAccess(&I);
if (I.mayThrow() && !MA)
ThrowingBlocks.insert(I.getParent());
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
(getLocForWrite(&I) || isMemTerminatorInst(&I)))
MemDefs.push_back(MD);
}
}
// Treat byval or inalloca arguments the same as Allocas, stores to them are
// dead at the end of the function.
for (Argument &AI : F.args())
if (AI.hasPassPointeeByValueCopyAttr())
InvisibleToCallerAfterRet.insert({&AI, true});
// Collect whether there is any irreducible control flow in the function.
ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
+
+ AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
+ return isa<UnreachableInst>(E->getTerminator());
+ });
}
/// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
/// KillingI instruction) completely overwrites a store to the 'DeadLoc'
/// location (by \p DeadI instruction).
/// Return OW_MaybePartial if \p KillingI does not completely overwrite
/// \p DeadI, but they both write to the same underlying object. In that
/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
/// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
OverwriteResult isOverwrite(const Instruction *KillingI,
const Instruction *DeadI,
const MemoryLocation &KillingLoc,
const MemoryLocation &DeadLoc,
int64_t &KillingOff, int64_t &DeadOff) {
// AliasAnalysis does not always account for loops. Limit overwrite checks
// to dependencies for which we can guarantee they are independent of any
// loops they are in.
if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
return OW_Unknown;
const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
// Check whether the killing store overwrites the whole object, in which
// case the size/offset of the dead store does not matter.
if (DeadUndObj == KillingUndObj && KillingLoc.Size.isPrecise()) {
uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
if (KillingUndObjSize != MemoryLocation::UnknownSize &&
KillingUndObjSize == KillingLoc.Size.getValue())
return OW_Complete;
}
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
// In case no constant size is known, try to an IR values for the number
// of bytes written and check if they match.
const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
if (KillingMemI && DeadMemI) {
const Value *KillingV = KillingMemI->getLength();
const Value *DeadV = DeadMemI->getLength();
if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
return OW_Complete;
}
// Masked stores have imprecise locations, but we can reason about them
// to some extent.
return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
}
const uint64_t KillingSize = KillingLoc.Size.getValue();
const uint64_t DeadSize = DeadLoc.Size.getValue();
// Query the alias information
AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
// If the start pointers are the same, we just have to compare sizes to see if
// the killing store was larger than the dead store.
if (AAR == AliasResult::MustAlias) {
// Make sure that the KillingSize size is >= the DeadSize size.
if (KillingSize >= DeadSize)
return OW_Complete;
}
// If we hit a partial alias we may have a full overwrite
if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
int32_t Off = AAR.getOffset();
if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
return OW_Complete;
}
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
if (DeadUndObj != KillingUndObj) {
// Non aliasing stores to different objects don't overlap. Note that
// if the killing store is known to overwrite whole object (out of
// bounds access overwrites whole object as well) then it is assumed to
// completely overwrite any store to the same object even if they don't
// actually alias (see next check).
if (AAR == AliasResult::NoAlias)
return OW_None;
return OW_Unknown;
}
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
DeadOff = 0;
KillingOff = 0;
const Value *DeadBasePtr =
GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
const Value *KillingBasePtr =
GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
// If the base pointers still differ, we have two completely different
// stores.
if (DeadBasePtr != KillingBasePtr)
return OW_Unknown;
// The killing access completely overlaps the dead store if and only if
// both start and end of the dead one is "inside" the killing one:
// |<->|--dead--|<->|
// |-----killing------|
// Accesses may overlap if and only if start of one of them is "inside"
// another one:
// |<->|--dead--|<-------->|
// |-------killing--------|
// OR
// |-------dead-------|
// |<->|---killing---|<----->|
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
// Check if the dead access starts "not before" the killing one.
if (DeadOff >= KillingOff) {
// If the dead access ends "not after" the killing access then the
// dead one is completely overwritten by the killing one.
if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
return OW_Complete;
// If start of the dead access is "before" end of the killing access
// then accesses overlap.
else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
return OW_MaybePartial;
}
// If start of the killing access is "before" end of the dead access then
// accesses overlap.
else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
return OW_MaybePartial;
}
// Can reach here only if accesses are known not to overlap.
return OW_None;
}
bool isInvisibleToCallerAfterRet(const Value *V) {
if (isa<AllocaInst>(V))
return true;
auto I = InvisibleToCallerAfterRet.insert({V, false});
if (I.second) {
if (!isInvisibleToCallerOnUnwind(V)) {
I.first->second = false;
} else if (isNoAliasCall(V)) {
I.first->second = !PointerMayBeCaptured(V, true, false);
}
}
return I.first->second;
}
bool isInvisibleToCallerOnUnwind(const Value *V) {
bool RequiresNoCaptureBeforeUnwind;
if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
return false;
if (!RequiresNoCaptureBeforeUnwind)
return true;
auto I = CapturedBeforeReturn.insert({V, true});
if (I.second)
// NOTE: This could be made more precise by PointerMayBeCapturedBefore
// with the killing MemoryDef. But we refrain from doing so for now to
// limit compile-time and this does not cause any changes to the number
// of stores removed on a large test set in practice.
I.first->second = PointerMayBeCaptured(V, false, true);
return !I.first->second;
}
Optional<MemoryLocation> getLocForWrite(Instruction *I) const {
if (!I->mayWriteToMemory())
return None;
if (auto *CB = dyn_cast<CallBase>(I))
return MemoryLocation::getForDest(CB, TLI);
return MemoryLocation::getOrNone(I);
}
/// Assuming this instruction has a dead analyzable write, can we delete
/// this instruction?
bool isRemovable(Instruction *I) {
assert(getLocForWrite(I) && "Must have analyzable write");
// Don't remove volatile/atomic stores.
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->isUnordered();
if (auto *CB = dyn_cast<CallBase>(I)) {
// Don't remove volatile memory intrinsics.
if (auto *MI = dyn_cast<MemIntrinsic>(CB))
return !MI->isVolatile();
// Never remove dead lifetime intrinsics, e.g. because they are followed
// by a free.
if (CB->isLifetimeStartOrEnd())
return false;
return CB->use_empty() && CB->willReturn() && CB->doesNotThrow();
}
return false;
}
/// Returns true if \p UseInst completely overwrites \p DefLoc
/// (stored by \p DefInst).
bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
Instruction *UseInst) {
// UseInst has a MemoryDef associated in MemorySSA. It's possible for a
// MemoryDef to not write to memory, e.g. a volatile load is modeled as a
// MemoryDef.
if (!UseInst->mayWriteToMemory())
return false;
if (auto *CB = dyn_cast<CallBase>(UseInst))
if (CB->onlyAccessesInaccessibleMemory())
return false;
int64_t InstWriteOffset, DepWriteOffset;
if (auto CC = getLocForWrite(UseInst))
return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
DepWriteOffset) == OW_Complete;
return false;
}
/// Returns true if \p Def is not read before returning from the function.
bool isWriteAtEndOfFunction(MemoryDef *Def) {
LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
<< *Def->getMemoryInst()
<< ") is at the end the function \n");
auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
if (!MaybeLoc) {
LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
return false;
}
SmallVector<MemoryAccess *, 4> WorkList;
SmallPtrSet<MemoryAccess *, 8> Visited;
auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
if (!Visited.insert(Acc).second)
return;
for (Use &U : Acc->uses())
WorkList.push_back(cast<MemoryAccess>(U.getUser()));
};
PushMemUses(Def);
for (unsigned I = 0; I < WorkList.size(); I++) {
if (WorkList.size() >= MemorySSAScanLimit) {
LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
return false;
}
MemoryAccess *UseAccess = WorkList[I];
// Simply adding the users of MemoryPhi to the worklist is not enough,
// because we might miss read clobbers in different iterations of a loop,
// for example.
// TODO: Add support for phi translation to handle the loop case.
if (isa<MemoryPhi>(UseAccess))
return false;
// TODO: Checking for aliasing is expensive. Consider reducing the amount
// of times this is called and/or caching it.
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
if (isReadClobber(*MaybeLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
return false;
}
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
PushMemUses(UseDef);
}
return true;
}
/// If \p I is a memory terminator like llvm.lifetime.end or free, return a
/// pair with the MemoryLocation terminated by \p I and a boolean flag
/// indicating whether \p I is a free-like call.
Optional<std::pair<MemoryLocation, bool>>
getLocForTerminator(Instruction *I) const {
uint64_t Len;
Value *Ptr;
if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
m_Value(Ptr))))
return {std::make_pair(MemoryLocation(Ptr, Len), false)};
if (auto *CB = dyn_cast<CallBase>(I)) {
if (isFreeCall(I, &TLI))
return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
true)};
}
return None;
}
/// Returns true if \p I is a memory terminator instruction like
/// llvm.lifetime.end or free.
bool isMemTerminatorInst(Instruction *I) const {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) ||
isFreeCall(I, &TLI);
}
/// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
/// instruction \p AccessI.
bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
Instruction *MaybeTerm) {
Optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
getLocForTerminator(MaybeTerm);
if (!MaybeTermLoc)
return false;
// If the terminator is a free-like call, all accesses to the underlying
// object can be considered terminated.
if (getUnderlyingObject(Loc.Ptr) !=
getUnderlyingObject(MaybeTermLoc->first.Ptr))
return false;
auto TermLoc = MaybeTermLoc->first;
if (MaybeTermLoc->second) {
const Value *LocUO = getUnderlyingObject(Loc.Ptr);
return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
}
int64_t InstWriteOffset = 0;
int64_t DepWriteOffset = 0;
return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
DepWriteOffset) == OW_Complete;
}
// Returns true if \p Use may read from \p DefLoc.
bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
if (isNoopIntrinsic(UseInst))
return false;
// Monotonic or weaker atomic stores can be re-ordered and do not need to be
// treated as read clobber.
if (auto SI = dyn_cast<StoreInst>(UseInst))
return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
if (!UseInst->mayReadFromMemory())
return false;
if (auto *CB = dyn_cast<CallBase>(UseInst))
if (CB->onlyAccessesInaccessibleMemory())
return false;
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
/// Returns true if a dependency between \p Current and \p KillingDef is
/// guaranteed to be loop invariant for the loops that they are in. Either
/// because they are known to be in the same block, in the same loop level or
/// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
/// during execution of the containing function.
bool isGuaranteedLoopIndependent(const Instruction *Current,
const Instruction *KillingDef,
const MemoryLocation &CurrentLoc) {
// If the dependency is within the same block or loop level (being careful
// of irreducible loops), we know that AA will return a valid result for the
// memory dependency. (Both at the function level, outside of any loop,
// would also be valid but we currently disable that to limit compile time).
if (Current->getParent() == KillingDef->getParent())
return true;
const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
if (!ContainsIrreducibleLoops && CurrentLI &&
CurrentLI == LI.getLoopFor(KillingDef->getParent()))
return true;
// Otherwise check the memory location is invariant to any loops.
return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
}
/// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
/// loop. In particular, this guarantees that it only references a single
/// MemoryLocation during execution of the containing function.
bool isGuaranteedLoopInvariant(const Value *Ptr) {
Ptr = Ptr->stripPointerCasts();
if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
if (GEP->hasAllConstantIndices())
Ptr = GEP->getPointerOperand()->stripPointerCasts();
if (auto *I = dyn_cast<Instruction>(Ptr))
return I->getParent()->isEntryBlock();
return true;
}
// Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
// with no read access between them or on any other path to a function exit
// block if \p KillingLoc is not accessible after the function returns. If
// there is no such MemoryDef, return None. The returned value may not
// (completely) overwrite \p KillingLoc. Currently we bail out when we
// encounter an aliasing MemoryUse (read).
Optional<MemoryAccess *>
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
const MemoryLocation &KillingLoc, const Value *KillingUndObj,
unsigned &ScanLimit, unsigned &WalkerStepLimit,
bool IsMemTerm, unsigned &PartialLimit) {
if (ScanLimit == 0 || WalkerStepLimit == 0) {
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
return None;
}
MemoryAccess *Current = StartAccess;
Instruction *KillingI = KillingDef->getMemoryInst();
LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
// Only optimize defining access of KillingDef when directly starting at its
// defining access. The defining access also must only access KillingLoc. At
// the moment we only support instructions with a single write location, so
// it should be sufficient to disable optimizations for instructions that
// also read from memory.
bool CanOptimize = OptimizeMemorySSA &&
KillingDef->getDefiningAccess() == StartAccess &&
!KillingI->mayReadFromMemory();
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
Optional<MemoryLocation> CurrentLoc;
for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
LLVM_DEBUG({
dbgs() << " visiting " << *Current;
if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
<< ")";
dbgs() << "\n";
});
// Reached TOP.
if (MSSA.isLiveOnEntryDef(Current)) {
LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
return None;
}
// Cost of a step. Accesses in the same block are more likely to be valid
// candidates for elimination, hence consider them cheaper.
unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
? MemorySSASameBBStepCost
: MemorySSAOtherBBStepCost;
if (WalkerStepLimit <= StepCost) {
LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
return None;
}
WalkerStepLimit -= StepCost;
// Return for MemoryPhis. They cannot be eliminated directly and the
// caller is responsible for traversing them.
if (isa<MemoryPhi>(Current)) {
LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
return Current;
}
// Below, check if CurrentDef is a valid candidate to be eliminated by
// KillingDef. If it is not, check the next candidate.
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
Instruction *CurrentI = CurrentDef->getMemoryInst();
if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
CanOptimize = false;
continue;
}
// Before we try to remove anything, check for any extra throwing
// instructions that block us from DSEing
if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
return None;
}
// Check for anything that looks like it will be a barrier to further
// removal
if (isDSEBarrier(KillingUndObj, CurrentI)) {
LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
return None;
}
// If Current is known to be on path that reads DefLoc or is a read
// clobber, bail out, as the path is not profitable. We skip this check
// for intrinsic calls, because the code knows how to handle memcpy
// intrinsics.
if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
return None;
// Quick check if there are direct uses that are read-clobbers.
if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
return !MSSA.dominates(StartAccess, UseOrDef) &&
isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
return false;
})) {
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
return None;
}
// If Current does not have an analyzable write location or is not
// removable, skip it.
CurrentLoc = getLocForWrite(CurrentI);
if (!CurrentLoc || !isRemovable(CurrentI)) {
CanOptimize = false;
continue;
}
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
// memory location and not located in different loops.
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
WalkerStepLimit -= 1;
CanOptimize = false;
continue;
}
if (IsMemTerm) {
// If the killing def is a memory terminator (e.g. lifetime.end), check
// the next candidate if the current Current does not write the same
// underlying object as the terminator.
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
CanOptimize = false;
continue;
}
} else {
int64_t KillingOffset = 0;
int64_t DeadOffset = 0;
auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
KillingOffset, DeadOffset);
if (CanOptimize) {
// CurrentDef is the earliest write clobber of KillingDef. Use it as
// optimized access. Do not optimize if CurrentDef is already the
// defining access of KillingDef.
if (CurrentDef != KillingDef->getDefiningAccess() &&
(OR == OW_Complete || OR == OW_MaybePartial))
KillingDef->setOptimized(CurrentDef);
// Once a may-aliasing def is encountered do not set an optimized
// access.
if (OR != OW_None)
CanOptimize = false;
}
// If Current does not write to the same object as KillingDef, check
// the next candidate.
if (OR == OW_Unknown || OR == OW_None)
continue;
else if (OR == OW_MaybePartial) {
// If KillingDef only partially overwrites Current, check the next
// candidate if the partial step limit is exceeded. This aggressively
// limits the number of candidates for partial store elimination,
// which are less likely to be removable in the end.
if (PartialLimit <= 1) {
WalkerStepLimit -= 1;
LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
continue;
}
PartialLimit -= 1;
}
}
break;
};
// Accesses to objects accessible after the function returns can only be
// eliminated if the access is dead along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
// they cover all paths from MaybeDeadAccess to any function exit.
SmallPtrSet<Instruction *, 16> KillingDefs;
KillingDefs.insert(KillingDef->getMemoryInst());
MemoryAccess *MaybeDeadAccess = Current;
MemoryLocation MaybeDeadLoc = *CurrentLoc;
Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
<< *MaybeDeadI << ")\n");
SmallSetVector<MemoryAccess *, 32> WorkList;
auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
PushMemUses(MaybeDeadAccess);
// Check if DeadDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
LLVM_DEBUG(dbgs() << " " << *UseAccess);
// Bail out if the number of accesses to check exceeds the scan limit.
if (ScanLimit < (WorkList.size() - I)) {
LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
return None;
}
--ScanLimit;
NumDomMemDefChecks++;
if (isa<MemoryPhi>(UseAccess)) {
if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
return DT.properlyDominates(KI->getParent(),
UseAccess->getBlock());
})) {
LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
continue;
}
LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
PushMemUses(UseAccess);
continue;
}
Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
return DT.dominates(KI, UseInst);
})) {
LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
continue;
}
// A memory terminator kills all preceeding MemoryDefs and all succeeding
// MemoryAccesses. We do not have to check it's users.
if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
LLVM_DEBUG(
dbgs()
<< " ... skipping, memterminator invalidates following accesses\n");
continue;
}
if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
PushMemUses(UseAccess);
continue;
}
if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
return None;
}
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
if (isReadClobber(MaybeDeadLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return None;
}
// If this worklist walks back to the original memory access (and the
// pointer is not guarenteed loop invariant) then we cannot assume that a
// store kills itself.
if (MaybeDeadAccess == UseAccess &&
!isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
return None;
}
// Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
// if it reads the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
// Check all uses for MemoryDefs, except for defs completely overwriting
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
// 1 = Def(LoE) ; <----- DeadDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
LLVM_DEBUG(dbgs()
<< " ... found killing def " << *UseInst << "\n");
KillingDefs.insert(UseInst);
}
} else {
LLVM_DEBUG(dbgs()
<< " ... found preceeding def " << *UseInst << "\n");
return None;
}
} else
PushMemUses(UseDef);
}
}
// For accesses to locations visible after the function returns, make sure
// that the location is dead (=overwritten) along all paths from
// MaybeDeadAccess to the exit.
if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
SmallPtrSet<BasicBlock *, 16> KillingBlocks;
for (Instruction *KD : KillingDefs)
KillingBlocks.insert(KD->getParent());
assert(!KillingBlocks.empty() &&
"Expected at least a single killing block");
// Find the common post-dominator of all killing blocks.
BasicBlock *CommonPred = *KillingBlocks.begin();
for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
if (!CommonPred)
break;
CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
}
- // If CommonPred is in the set of killing blocks, just check if it
- // post-dominates MaybeDeadAccess.
- if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
- return {MaybeDeadAccess};
- return None;
- }
-
// If the common post-dominator does not post-dominate MaybeDeadAccess,
// there is a path from MaybeDeadAccess to an exit not going through a
// killing block.
- if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
- SetVector<BasicBlock *> WorkList;
-
- // If CommonPred is null, there are multiple exits from the function.
- // They all have to be added to the worklist.
- if (CommonPred)
- WorkList.insert(CommonPred);
- else
- for (BasicBlock *R : PDT.roots())
+ if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
+ if (!AnyUnreachableExit)
+ return None;
+
+ // Fall back to CFG scan starting at all non-unreachable roots if not
+ // all paths to the exit go through CommonPred.
+ CommonPred = nullptr;
+ }
+
+ // If CommonPred itself is in the set of killing blocks, we're done.
+ if (KillingBlocks.count(CommonPred))
+ return {MaybeDeadAccess};
+
+ SetVector<BasicBlock *> WorkList;
+ // If CommonPred is null, there are multiple exits from the function.
+ // They all have to be added to the worklist.
+ if (CommonPred)
+ WorkList.insert(CommonPred);
+ else
+ for (BasicBlock *R : PDT.roots()) {
+ if (!isa<UnreachableInst>(R->getTerminator()))
WorkList.insert(R);
+ }
- NumCFGTries++;
- // Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching MaybeDeadAccess.
- for (unsigned I = 0; I < WorkList.size(); I++) {
- NumCFGChecks++;
- BasicBlock *Current = WorkList[I];
- if (KillingBlocks.count(Current))
- continue;
- if (Current == MaybeDeadAccess->getBlock())
- return None;
+ NumCFGTries++;
+ // Check if all paths starting from an exit node go through one of the
+ // killing blocks before reaching MaybeDeadAccess.
+ for (unsigned I = 0; I < WorkList.size(); I++) {
+ NumCFGChecks++;
+ BasicBlock *Current = WorkList[I];
+ if (KillingBlocks.count(Current))
+ continue;
+ if (Current == MaybeDeadAccess->getBlock())
+ return None;
- // MaybeDeadAccess is reachable from the entry, so we don't have to
- // explore unreachable blocks further.
- if (!DT.isReachableFromEntry(Current))
- continue;
+ // MaybeDeadAccess is reachable from the entry, so we don't have to
+ // explore unreachable blocks further.
+ if (!DT.isReachableFromEntry(Current))
+ continue;
- for (BasicBlock *Pred : predecessors(Current))
- WorkList.insert(Pred);
+ for (BasicBlock *Pred : predecessors(Current))
+ WorkList.insert(Pred);
- if (WorkList.size() >= MemorySSAPathCheckLimit)
- return None;
- }
- NumCFGSuccess++;
- return {MaybeDeadAccess};
+ if (WorkList.size() >= MemorySSAPathCheckLimit)
+ return None;
}
- return None;
+ NumCFGSuccess++;
}
// No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
// potentially dead.
return {MaybeDeadAccess};
}
// Delete dead memory defs
void deleteDeadInstruction(Instruction *SI) {
MemorySSAUpdater Updater(&MSSA);
SmallVector<Instruction *, 32> NowDeadInsts;
NowDeadInsts.push_back(SI);
--NumFastOther;
while (!NowDeadInsts.empty()) {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
// Try to preserve debug information attached to the dead instruction.
salvageDebugInfo(*DeadInst);
salvageKnowledge(DeadInst);
// Remove the Instruction from MSSA.
if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
SkipStores.insert(MD);
}
Updater.removeMemoryAccess(MA);
}
auto I = IOLs.find(DeadInst->getParent());
if (I != IOLs.end())
I->second.erase(DeadInst);
// Remove its operands
for (Use &O : DeadInst->operands())
if (Instruction *OpI = dyn_cast<Instruction>(O)) {
O = nullptr;
if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
EI.removeInstruction(DeadInst);
DeadInst->eraseFromParent();
}
}
// Check for any extra throws between \p KillingI and \p DeadI that block
// DSE. This only checks extra maythrows (those that aren't MemoryDef's).
// MemoryDef that may throw are handled during the walk from one def to the
// next.
bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
const Value *KillingUndObj) {
// First see if we can ignore it by using the fact that KillingI is an
// alloca/alloca like object that is not visible to the caller during
// execution of the function.
if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
return false;
if (KillingI->getParent() == DeadI->getParent())
return ThrowingBlocks.count(KillingI->getParent());
return !ThrowingBlocks.empty();
}
// Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
// instructions act as barriers:
// * A memory instruction that may throw and \p KillingI accesses a non-stack
// object.
// * Atomic stores stronger that monotonic.
bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
// If DeadI may throw it acts as a barrier, unless we are to an
// alloca/alloca like object that does not escape.
if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
return true;
// If DeadI is an atomic load/store stronger than monotonic, do not try to
// eliminate/reorder it.
if (DeadI->isAtomic()) {
if (auto *LI = dyn_cast<LoadInst>(DeadI))
return isStrongerThanMonotonic(LI->getOrdering());
if (auto *SI = dyn_cast<StoreInst>(DeadI))
return isStrongerThanMonotonic(SI->getOrdering());
if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
return isStrongerThanMonotonic(ARMW->getOrdering());
if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
llvm_unreachable("other instructions should be skipped in MemorySSA");
}
return false;
}
/// Eliminate writes to objects that are not visible in the caller and are not
/// accessed before returning from the function.
bool eliminateDeadWritesAtEndOfFunction() {
bool MadeChange = false;
LLVM_DEBUG(
dbgs()
<< "Trying to eliminate MemoryDefs at the end of the function\n");
for (MemoryDef *Def : llvm::reverse(MemDefs)) {
if (SkipStores.contains(Def))
continue;
Instruction *DefI = Def->getMemoryInst();
auto DefLoc = getLocForWrite(DefI);
if (!DefLoc || !isRemovable(DefI))
continue;
// NOTE: Currently eliminating writes at the end of a function is limited
// to MemoryDefs with a single underlying object, to save compile-time. In
// practice it appears the case with multiple underlying objects is very
// uncommon. If it turns out to be important, we can use
// getUnderlyingObjects here instead.
const Value *UO = getUnderlyingObject(DefLoc->Ptr);
if (!isInvisibleToCallerAfterRet(UO))
continue;
if (isWriteAtEndOfFunction(Def)) {
// See through pointer-to-pointer bitcasts
LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
"of the function\n");
deleteDeadInstruction(DefI);
++NumFastStores;
MadeChange = true;
}
}
return MadeChange;
}
/// If we have a zero initializing memset following a call to malloc,
/// try folding it into a call to calloc.
bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
Instruction *DefI = Def->getMemoryInst();
MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
if (!MemSet)
// TODO: Could handle zero store to small allocation as well.
return false;
Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
if (!StoredConstant || !StoredConstant->isNullValue())
return false;
if (!isRemovable(DefI))
// The memset might be volatile..
return false;
if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
F.hasFnAttribute(Attribute::SanitizeAddress) ||
F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
F.getName() == "calloc")
return false;
auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
if (!Malloc)
return false;
auto *InnerCallee = Malloc->getCalledFunction();
if (!InnerCallee)
return false;
LibFunc Func;
if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
Func != LibFunc_malloc)
return false;
auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
// Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
// of malloc block
auto *MallocBB = Malloc->getParent(),
*MemsetBB = Memset->getParent();
if (MallocBB == MemsetBB)
return true;
auto *Ptr = Memset->getArgOperand(0);
auto *TI = MallocBB->getTerminator();
ICmpInst::Predicate Pred;
BasicBlock *TrueBB, *FalseBB;
if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
FalseBB)))
return false;
if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
return false;
return true;
};
if (Malloc->getOperand(0) != MemSet->getLength())
return false;
if (!shouldCreateCalloc(Malloc, MemSet) ||
!DT.dominates(Malloc, MemSet) ||
!memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
return false;
IRBuilder<> IRB(Malloc);
const auto &DL = Malloc->getModule()->getDataLayout();
auto *Calloc =
emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
Malloc->getArgOperand(0), IRB, TLI);
if (!Calloc)
return false;
MemorySSAUpdater Updater(&MSSA);
auto *LastDef =
cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
auto *NewAccess =
Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
LastDef);
auto *NewAccessMD = cast<MemoryDef>(NewAccess);
Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
Updater.removeMemoryAccess(Malloc);
Malloc->replaceAllUsesWith(Calloc);
Malloc->eraseFromParent();
return true;
}
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
Instruction *DefI = Def->getMemoryInst();
StoreInst *Store = dyn_cast<StoreInst>(DefI);
MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
Constant *StoredConstant = nullptr;
if (Store)
StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
else if (MemSet)
StoredConstant = dyn_cast<Constant>(MemSet->getValue());
else
return false;
if (!isRemovable(DefI))
return false;
if (StoredConstant && isAllocationFn(DefUO, &TLI)) {
auto *CB = cast<CallBase>(DefUO);
auto *InitC = getInitialValueOfAllocation(CB, &TLI,
StoredConstant->getType());
// If the clobbering access is LiveOnEntry, no instructions between them
// can modify the memory location.
if (InitC && InitC == StoredConstant)
return MSSA.isLiveOnEntryDef(
MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def));
}
if (!Store)
return false;
if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
if (LoadI->getPointerOperand() == Store->getOperand(1)) {
// Get the defining access for the load.
auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
// Fast path: the defining accesses are the same.
if (LoadAccess == Def->getDefiningAccess())
return true;
// Look through phi accesses. Recursively scan all phi accesses by
// adding them to a worklist. Bail when we run into a memory def that
// does not match LoadAccess.
SetVector<MemoryAccess *> ToCheck;
MemoryAccess *Current =
MSSA.getWalker()->getClobberingMemoryAccess(Def);
// We don't want to bail when we run into the store memory def. But,
// the phi access may point to it. So, pretend like we've already
// checked it.
ToCheck.insert(Def);
ToCheck.insert(Current);
// Start at current (1) to simulate already having checked Def.
for (unsigned I = 1; I < ToCheck.size(); ++I) {
Current = ToCheck[I];
if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
// Check all the operands.
for (auto &Use : PhiAccess->incoming_values())
ToCheck.insert(cast<MemoryAccess>(&Use));
continue;
}
// If we found a memory def, bail. This happens when we have an
// unrelated write in between an otherwise noop store.
assert(isa<MemoryDef>(Current) &&
"Only MemoryDefs should reach here.");
// TODO: Skip no alias MemoryDefs that have no aliasing reads.
// We are searching for the definition of the store's destination.
// So, if that is the same definition as the load, then this is a
// noop. Otherwise, fail.
if (LoadAccess != Current)
return false;
}
return true;
}
}
return false;
}
bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
bool Changed = false;
for (auto OI : IOL) {
Instruction *DeadI = OI.first;
MemoryLocation Loc = *getLocForWrite(DeadI);
assert(isRemovable(DeadI) && "Expect only removable instruction");
const Value *Ptr = Loc.Ptr->stripPointerCasts();
int64_t DeadStart = 0;
uint64_t DeadSize = Loc.Size.getValue();
GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
OverlapIntervalsTy &IntervalMap = OI.second;
Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
if (IntervalMap.empty())
continue;
Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
}
return Changed;
}
/// Eliminates writes to locations where the value that is being written
/// is already stored at the same location.
bool eliminateRedundantStoresOfExistingValues() {
bool MadeChange = false;
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
"already existing value\n");
for (auto *Def : MemDefs) {
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
continue;
Instruction *DefInst = Def->getMemoryInst();
auto MaybeDefLoc = getLocForWrite(DefInst);
if (!MaybeDefLoc || !isRemovable(DefInst))
continue;
MemoryDef *UpperDef;
// To conserve compile-time, we avoid walking to the next clobbering def.
// Instead, we just try to get the optimized access, if it exists. DSE
// will try to optimize defs during the earlier traversal.
if (Def->isOptimized())
UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
else
UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
continue;
Instruction *UpperInst = UpperDef->getMemoryInst();
auto IsRedundantStore = [&]() {
if (DefInst->isIdenticalTo(UpperInst))
return true;
if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
// MemSetInst must have a write location.
MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
int64_t InstWriteOffset = 0;
int64_t DepWriteOffset = 0;
auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
InstWriteOffset, DepWriteOffset);
Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
return StoredByte && StoredByte == MemSetI->getOperand(1) &&
OR == OW_Complete;
}
}
return false;
};
if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
continue;
LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
<< '\n');
deleteDeadInstruction(DefInst);
NumRedundantStores++;
MadeChange = true;
}
return MadeChange;
}
};
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
DominatorTree &DT, PostDominatorTree &PDT,
const TargetLibraryInfo &TLI,
const LoopInfo &LI) {
bool MadeChange = false;
DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
if (State.SkipStores.count(KillingDef))
continue;
Instruction *KillingI = KillingDef->getMemoryInst();
Optional<MemoryLocation> MaybeKillingLoc;
if (State.isMemTerminatorInst(KillingI))
MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
else
MaybeKillingLoc = State.getLocForWrite(KillingI);
if (!MaybeKillingLoc) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
<< *KillingI << "\n");
continue;
}
MemoryLocation KillingLoc = *MaybeKillingLoc;
assert(KillingLoc.Ptr && "KillingLoc should not be null");
const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
<< *KillingDef << " (" << *KillingI << ")\n");
unsigned ScanLimit = MemorySSAScanLimit;
unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
unsigned PartialLimit = MemorySSAPartialStoreLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
ToCheck.insert(KillingDef->getDefiningAccess());
bool Shortend = false;
bool IsMemTerm = State.isMemTerminatorInst(KillingI);
// Check if MemoryAccesses in the worklist are killed by KillingDef.
for (unsigned I = 0; I < ToCheck.size(); I++) {
MemoryAccess *Current = ToCheck[I];
if (State.SkipStores.count(Current))
continue;
Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
WalkerStepLimit, IsMemTerm, PartialLimit);
if (!MaybeDeadAccess) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
MemoryAccess *DeadAccess = *MaybeDeadAccess;
LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
if (isa<MemoryPhi>(DeadAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
BasicBlock *PhiBlock = DeadAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
// strictly dominate our starting def.
if (State.PostOrderNumbers[IncomingBlock] >
State.PostOrderNumbers[PhiBlock])
ToCheck.insert(IncomingAccess);
}
continue;
}
auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
Instruction *DeadI = DeadDefAccess->getMemoryInst();
LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
ToCheck.insert(DeadDefAccess->getDefiningAccess());
NumGetDomMemoryDefPassed++;
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
if (IsMemTerm) {
const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
if (KillingUndObj != DeadUndObj)
continue;
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
<< "\n KILLER: " << *KillingI << '\n');
State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
} else {
// Check if DeadI overwrites KillingI.
int64_t KillingOffset = 0;
int64_t DeadOffset = 0;
OverwriteResult OR = State.isOverwrite(
KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
if (OR == OW_MaybePartial) {
auto Iter = State.IOLs.insert(
std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
DeadI->getParent(), InstOverlapIntervalsTy()));
auto &IOL = Iter.first->second;
OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
DeadOffset, DeadI, IOL);
}
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
auto *DeadSI = dyn_cast<StoreInst>(DeadI);
auto *KillingSI = dyn_cast<StoreInst>(KillingI);
// We are re-using tryToMergePartialOverlappingStores, which requires
// DeadSI to dominate DeadSI.
// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
if (Constant *Merged = tryToMergePartialOverlappingStores(
KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
State.BatchAA, &DT)) {
// Update stored value of earlier store to merged constant.
DeadSI->setOperand(0, Merged);
++NumModifiedStores;
MadeChange = true;
Shortend = true;
// Remove killing store and remove any outstanding overlap
// intervals for the updated store.
State.deleteDeadInstruction(KillingSI);
auto I = State.IOLs.find(DeadSI->getParent());
if (I != State.IOLs.end())
I->second.erase(DeadSI);
break;
}
}
}
if (OR == OW_Complete) {
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
<< "\n KILLER: " << *KillingI << '\n');
State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
}
}
}
// Check if the store is a no-op.
if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
<< '\n');
State.deleteDeadInstruction(KillingI);
NumRedundantStores++;
MadeChange = true;
continue;
}
// Can we form a calloc from a memset/malloc pair?
if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
<< " DEAD: " << *KillingI << '\n');
State.deleteDeadInstruction(KillingI);
MadeChange = true;
continue;
}
}
if (EnablePartialOverwriteTracking)
for (auto &KV : State.IOLs)
MadeChange |= State.removePartiallyOverlappedStores(KV.second);
MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
}
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// DSE Pass
//===----------------------------------------------------------------------===//
PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
AliasAnalysis &AA = AM.getResult<AAManager>(F);
const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
for (auto &I : instructions(F))
NumRemainingStores += isa<StoreInst>(&I);
#endif
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
PA.preserve<MemorySSAAnalysis>();
PA.preserve<LoopAnalysis>();
return PA;
}
namespace {
/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
class DSELegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
DSELegacyPass() : FunctionPass(ID) {
initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
PostDominatorTree &PDT =
getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
#ifdef LLVM_ENABLE_STATS
if (AreStatisticsEnabled())
for (auto &I : instructions(F))
NumRemainingStores += isa<StoreInst>(&I);
#endif
return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<PostDominatorTreeWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
}
};
} // end anonymous namespace
char DSELegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
false)
FunctionPass *llvm::createDeadStoreEliminationPass() {
return new DSELegacyPass();
}
diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 25bf69729c70..644372483edd 100644
--- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1,10267 +1,10255 @@
//===- SLPVectorizer.cpp - A bottom up SLP Vectorizer ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements the Bottom Up SLP vectorizer. It detects consecutive
// stores that can be put together into vector-stores. Next, it attempts to
// construct vectorizable tree using the use-def chains. If a profitable tree
// was found, the SLP vectorizer performs vectorization on the tree.
//
// The pass is inspired by the work described in the paper:
// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DOTGraphTraits.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <memory>
#include <set>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
using namespace slpvectorizer;
#define SV_NAME "slp-vectorizer"
#define DEBUG_TYPE "SLP"
STATISTIC(NumVectorInstructions, "Number of vector instructions generated");
cl::opt<bool> RunSLPVectorization("vectorize-slp", cl::init(true), cl::Hidden,
cl::desc("Run the SLP vectorization passes"));
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
"number "));
static cl::opt<bool>
ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
cl::desc("Attempt to vectorize horizontal reductions"));
static cl::opt<bool> ShouldStartVectorizeHorAtStore(
"slp-vectorize-hor-store", cl::init(false), cl::Hidden,
cl::desc(
"Attempt to vectorize horizontal reductions feeding into a store"));
static cl::opt<int>
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
static cl::opt<unsigned>
MaxVFOption("slp-max-vf", cl::init(0), cl::Hidden,
cl::desc("Maximum SLP vectorization factor (0=unlimited)"));
static cl::opt<int>
MaxStoreLookup("slp-max-store-lookup", cl::init(32), cl::Hidden,
cl::desc("Maximum depth of the lookup for consecutive stores."));
/// Limits the size of scheduling regions in a block.
/// It avoid long compile times for _very_ large blocks where vector
/// instructions are spread over a wide range.
/// This limit is way higher than needed by real-world functions.
static cl::opt<int>
ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
cl::desc("Limit the size of the SLP scheduling region per block"));
static cl::opt<int> MinVectorRegSizeOption(
"slp-min-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
static cl::opt<unsigned> RecursionMaxDepth(
"slp-recursion-max-depth", cl::init(12), cl::Hidden,
cl::desc("Limit the recursion depth when building a vectorizable tree"));
static cl::opt<unsigned> MinTreeSize(
"slp-min-tree-size", cl::init(3), cl::Hidden,
cl::desc("Only vectorize small trees if they are fully vectorizable"));
// The maximum depth that the look-ahead score heuristic will explore.
// The higher this value, the higher the compilation time overhead.
static cl::opt<int> LookAheadMaxDepth(
"slp-max-look-ahead-depth", cl::init(2), cl::Hidden,
cl::desc("The maximum look-ahead depth for operand reordering scores"));
// The Look-ahead heuristic goes through the users of the bundle to calculate
// the users cost in getExternalUsesCost(). To avoid compilation time increase
// we limit the number of users visited to this value.
static cl::opt<unsigned> LookAheadUsersBudget(
"slp-look-ahead-users-budget", cl::init(2), cl::Hidden,
cl::desc("The maximum number of users to visit while visiting the "
"predecessors. This prevents compilation time increase."));
static cl::opt<bool>
ViewSLPTree("view-slp-tree", cl::Hidden,
cl::desc("Display the SLP trees with Graphviz"));
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
// Another limit for the alias checks: The maximum distance between load/store
// instructions where alias checks are done.
// This limit is useful for very large basic blocks.
static const unsigned MaxMemDepDistance = 160;
/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
/// regions to be handled.
static const int MinScheduleRegionSize = 16;
/// Predicate for the element types that the SLP vectorizer supports.
///
/// The most important thing to filter here are types which are invalid in LLVM
/// vectors. We also filter target specific types which have absolutely no
/// meaningful vectorization path such as x86_fp80 and ppc_f128. This just
/// avoids spending time checking the cost model and realizing that they will
/// be inevitably scalarized.
static bool isValidElementType(Type *Ty) {
return VectorType::isValidElementType(Ty) && !Ty->isX86_FP80Ty() &&
!Ty->isPPC_FP128Ty();
}
/// \returns True if the value is a constant (but not globals/constant
/// expressions).
static bool isConstant(Value *V) {
return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
}
/// Checks if \p V is one of vector-like instructions, i.e. undef,
/// insertelement/extractelement with constant indices for fixed vector type or
/// extractvalue instruction.
static bool isVectorLikeInstWithConstOps(Value *V) {
if (!isa<InsertElementInst, ExtractElementInst>(V) &&
!isa<ExtractValueInst, UndefValue>(V))
return false;
auto *I = dyn_cast<Instruction>(V);
if (!I || isa<ExtractValueInst>(I))
return true;
if (!isa<FixedVectorType>(I->getOperand(0)->getType()))
return false;
if (isa<ExtractElementInst>(I))
return isConstant(I->getOperand(1));
assert(isa<InsertElementInst>(V) && "Expected only insertelement.");
return isConstant(I->getOperand(2));
}
/// \returns true if all of the instructions in \p VL are in the same block or
/// false otherwise.
static bool allSameBlock(ArrayRef<Value *> VL) {
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)
return false;
if (all_of(VL, isVectorLikeInstWithConstOps))
return true;
BasicBlock *BB = I0->getParent();
for (int I = 1, E = VL.size(); I < E; I++) {
auto *II = dyn_cast<Instruction>(VL[I]);
if (!II)
return false;
if (BB != II->getParent())
return false;
}
return true;
}
/// \returns True if all of the values in \p VL are constants (but not
/// globals/constant expressions).
static bool allConstant(ArrayRef<Value *> VL) {
// Constant expressions and globals can't be vectorized like normal integer/FP
// constants.
return all_of(VL, isConstant);
}
/// \returns True if all of the values in \p VL are identical or some of them
/// are UndefValue.
static bool isSplat(ArrayRef<Value *> VL) {
Value *FirstNonUndef = nullptr;
for (Value *V : VL) {
if (isa<UndefValue>(V))
continue;
if (!FirstNonUndef) {
FirstNonUndef = V;
continue;
}
if (V != FirstNonUndef)
return false;
}
return FirstNonUndef != nullptr;
}
/// \returns True if \p I is commutative, handles CmpInst and BinaryOperator.
static bool isCommutative(Instruction *I) {
if (auto *Cmp = dyn_cast<CmpInst>(I))
return Cmp->isCommutative();
if (auto *BO = dyn_cast<BinaryOperator>(I))
return BO->isCommutative();
// TODO: This should check for generic Instruction::isCommutative(), but
// we need to confirm that the caller code correctly handles Intrinsics
// for example (does not have 2 operands).
return false;
}
/// Checks if the given value is actually an undefined constant vector.
static bool isUndefVector(const Value *V) {
if (isa<UndefValue>(V))
return true;
auto *C = dyn_cast<Constant>(V);
if (!C)
return false;
if (!C->containsUndefOrPoisonElement())
return false;
auto *VecTy = dyn_cast<FixedVectorType>(C->getType());
if (!VecTy)
return false;
for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
if (Constant *Elem = C->getAggregateElement(I))
if (!isa<UndefValue>(Elem))
return false;
}
return true;
}
/// Checks if the vector of instructions can be represented as a shuffle, like:
/// %x0 = extractelement <4 x i8> %x, i32 0
/// %x3 = extractelement <4 x i8> %x, i32 3
/// %y1 = extractelement <4 x i8> %y, i32 1
/// %y2 = extractelement <4 x i8> %y, i32 2
/// %x0x0 = mul i8 %x0, %x0
/// %x3x3 = mul i8 %x3, %x3
/// %y1y1 = mul i8 %y1, %y1
/// %y2y2 = mul i8 %y2, %y2
/// %ins1 = insertelement <4 x i8> poison, i8 %x0x0, i32 0
/// %ins2 = insertelement <4 x i8> %ins1, i8 %x3x3, i32 1
/// %ins3 = insertelement <4 x i8> %ins2, i8 %y1y1, i32 2
/// %ins4 = insertelement <4 x i8> %ins3, i8 %y2y2, i32 3
/// ret <4 x i8> %ins4
/// can be transformed into:
/// %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <4 x i32> <i32 0, i32 3, i32 5,
/// i32 6>
/// %2 = mul <4 x i8> %1, %1
/// ret <4 x i8> %2
/// We convert this initially to something like:
/// %x0 = extractelement <4 x i8> %x, i32 0
/// %x3 = extractelement <4 x i8> %x, i32 3
/// %y1 = extractelement <4 x i8> %y, i32 1
/// %y2 = extractelement <4 x i8> %y, i32 2
/// %1 = insertelement <4 x i8> poison, i8 %x0, i32 0
/// %2 = insertelement <4 x i8> %1, i8 %x3, i32 1
/// %3 = insertelement <4 x i8> %2, i8 %y1, i32 2
/// %4 = insertelement <4 x i8> %3, i8 %y2, i32 3
/// %5 = mul <4 x i8> %4, %4
/// %6 = extractelement <4 x i8> %5, i32 0
/// %ins1 = insertelement <4 x i8> poison, i8 %6, i32 0
/// %7 = extractelement <4 x i8> %5, i32 1
/// %ins2 = insertelement <4 x i8> %ins1, i8 %7, i32 1
/// %8 = extractelement <4 x i8> %5, i32 2
/// %ins3 = insertelement <4 x i8> %ins2, i8 %8, i32 2
/// %9 = extractelement <4 x i8> %5, i32 3
/// %ins4 = insertelement <4 x i8> %ins3, i8 %9, i32 3
/// ret <4 x i8> %ins4
/// InstCombiner transforms this into a shuffle and vector mul
/// Mask will return the Shuffle Mask equivalent to the extracted elements.
/// TODO: Can we split off and reuse the shuffle mask detection from
/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>
isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
const auto *It =
find_if(VL, [](Value *V) { return isa<ExtractElementInst>(V); });
if (It == VL.end())
return None;
auto *EI0 = cast<ExtractElementInst>(*It);
if (isa<ScalableVectorType>(EI0->getVectorOperandType()))
return None;
unsigned Size =
cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
Value *Vec1 = nullptr;
Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };
ShuffleMode CommonShuffleMode = Unknown;
Mask.assign(VL.size(), UndefMaskElem);
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
// Undef can be represented as an undef element in a vector.
if (isa<UndefValue>(VL[I]))
continue;
auto *EI = cast<ExtractElementInst>(VL[I]);
if (isa<ScalableVectorType>(EI->getVectorOperandType()))
return None;
auto *Vec = EI->getVectorOperand();
// We can extractelement from undef or poison vector.
if (isUndefVector(Vec))
continue;
// All vector operands must have the same number of vector elements.
if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
return None;
if (isa<UndefValue>(EI->getIndexOperand()))
continue;
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
if (!Idx)
return None;
// Undefined behavior if Idx is negative or >= Size.
if (Idx->getValue().uge(Size))
continue;
unsigned IntIdx = Idx->getValue().getZExtValue();
Mask[I] = IntIdx;
// For correct shuffling we have to have at most 2 different vector operands
// in all extractelement instructions.
if (!Vec1 || Vec1 == Vec) {
Vec1 = Vec;
} else if (!Vec2 || Vec2 == Vec) {
Vec2 = Vec;
Mask[I] += Size;
} else {
return None;
}
if (CommonShuffleMode == Permute)
continue;
// If the extract index is not the same as the operation number, it is a
// permutation.
if (IntIdx != I) {
CommonShuffleMode = Permute;
continue;
}
CommonShuffleMode = Select;
}
// If we're not crossing lanes in different vectors, consider it as blending.
if (CommonShuffleMode == Select && Vec2)
return TargetTransformInfo::SK_Select;
// If Vec2 was never used, we have a permutation of a single vector, otherwise
// we have permutation of 2 vectors.
return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
: TargetTransformInfo::SK_PermuteSingleSrc;
}
namespace {
/// Main data required for vectorization of instructions.
struct InstructionsState {
/// The very first instruction in the list with the main opcode.
Value *OpValue = nullptr;
/// The main/alternate instruction.
Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;
/// The main/alternate opcodes for the list of instructions.
unsigned getOpcode() const {
return MainOp ? MainOp->getOpcode() : 0;
}
unsigned getAltOpcode() const {
return AltOp ? AltOp->getOpcode() : 0;
}
/// Some of the instructions in the list have alternate opcodes.
bool isAltShuffle() const { return AltOp != MainOp; }
bool isOpcodeOrAlt(Instruction *I) const {
unsigned CheckedOpcode = I->getOpcode();
return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode;
}
InstructionsState() = delete;
InstructionsState(Value *OpValue, Instruction *MainOp, Instruction *AltOp)
: OpValue(OpValue), MainOp(MainOp), AltOp(AltOp) {}
};
} // end anonymous namespace
/// Chooses the correct key for scheduling data. If \p Op has the same (or
/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p
/// OpValue.
static Value *isOneOf(const InstructionsState &S, Value *Op) {
auto *I = dyn_cast<Instruction>(Op);
if (I && S.isOpcodeOrAlt(I))
return Op;
return S.OpValue;
}
/// \returns true if \p Opcode is allowed as part of of the main/alternate
/// instruction for SLP vectorization.
///
/// Example of unsupported opcode is SDIV that can potentially cause UB if the
/// "shuffled out" lane would result in division by zero.
static bool isValidForAlternation(unsigned Opcode) {
if (Instruction::isIntDivRem(Opcode))
return false;
return true;
}
/// \returns analysis of the Instructions in \p VL described in
/// InstructionsState, the Opcode that we suppose the whole list
/// could be vectorized even if its structure is diverse.
static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
unsigned BaseIndex = 0) {
// Make sure these are all Instructions.
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
unsigned AltOpcode = Opcode;
unsigned AltIndex = BaseIndex;
// Check for one alternate opcode from another BinaryOperator.
// TODO - generalize to support all operators (types, calls etc.).
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
if (IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
isValidForAlternation(Opcode)) {
AltOpcode = InstOpcode;
AltIndex = Cnt;
continue;
}
} else if (IsCastOp && isa<CastInst>(VL[Cnt])) {
Type *Ty0 = cast<Instruction>(VL[BaseIndex])->getOperand(0)->getType();
Type *Ty1 = cast<Instruction>(VL[Cnt])->getOperand(0)->getType();
if (Ty0 == Ty1) {
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
if (Opcode == AltOpcode) {
assert(isValidForAlternation(Opcode) &&
isValidForAlternation(InstOpcode) &&
"Cast isn't safe for alternation, logic needs to be updated!");
AltOpcode = InstOpcode;
AltIndex = Cnt;
continue;
}
}
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
continue;
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
}
return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
cast<Instruction>(VL[AltIndex]));
}
/// \returns true if all of the values in \p VL have the same type or false
/// otherwise.
static bool allSameType(ArrayRef<Value *> VL) {
Type *Ty = VL[0]->getType();
for (int i = 1, e = VL.size(); i < e; i++)
if (VL[i]->getType() != Ty)
return false;
return true;
}
/// \returns True if Extract{Value,Element} instruction extracts element Idx.
static Optional<unsigned> getExtractIndex(Instruction *E) {
unsigned Opcode = E->getOpcode();
assert((Opcode == Instruction::ExtractElement ||
Opcode == Instruction::ExtractValue) &&
"Expected extractelement or extractvalue instruction.");
if (Opcode == Instruction::ExtractElement) {
auto *CI = dyn_cast<ConstantInt>(E->getOperand(1));
if (!CI)
return None;
return CI->getZExtValue();
}
ExtractValueInst *EI = cast<ExtractValueInst>(E);
if (EI->getNumIndices() != 1)
return None;
return *EI->idx_begin();
}
/// \returns True if in-tree use also needs extract. This refers to
/// possible scalar operand in vectorized instruction.
static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
TargetLibraryInfo *TLI) {
unsigned Opcode = UserInst->getOpcode();
switch (Opcode) {
case Instruction::Load: {
LoadInst *LI = cast<LoadInst>(UserInst);
return (LI->getPointerOperand() == Scalar);
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(UserInst);
return (SI->getPointerOperand() == Scalar);
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(UserInst);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
if (hasVectorInstrinsicScalarOpd(ID, i))
return (CI->getArgOperand(i) == Scalar);
}
LLVM_FALLTHROUGH;
}
default:
return false;
}
}
/// \returns the AA location that is being access by the instruction.
static MemoryLocation getLocation(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return MemoryLocation::get(SI);
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return MemoryLocation::get(LI);
return MemoryLocation();
}
/// \returns True if the instruction is not a volatile or atomic load/store.
static bool isSimple(Instruction *I) {
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return LI->isSimple();
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->isSimple();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
return !MI->isVolatile();
return true;
}
/// Shuffles \p Mask in accordance with the given \p SubMask.
static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
if (SubMask.empty())
return;
if (Mask.empty()) {
Mask.append(SubMask.begin(), SubMask.end());
return;
}
SmallVector<int> NewMask(SubMask.size(), UndefMaskElem);
int TermValue = std::min(Mask.size(), SubMask.size());
for (int I = 0, E = SubMask.size(); I < E; ++I) {
if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
Mask[SubMask[I]] >= TermValue)
continue;
NewMask[I] = Mask[SubMask[I]];
}
Mask.swap(NewMask);
}
/// Order may have elements assigned special value (size) which is out of
/// bounds. Such indices only appear on places which correspond to undef values
/// (see canReuseExtract for details) and used in order to avoid undef values
/// have effect on operands ordering.
/// The first loop below simply finds all unused indices and then the next loop
/// nest assigns these indices for undef values positions.
/// As an example below Order has two undef positions and they have assigned
/// values 3 and 7 respectively:
/// before: 6 9 5 4 9 2 1 0
/// after: 6 3 5 4 7 2 1 0
static void fixupOrderingIndices(SmallVectorImpl<unsigned> &Order) {
const unsigned Sz = Order.size();
SmallBitVector UnusedIndices(Sz, /*t=*/true);
SmallBitVector MaskedIndices(Sz);
for (unsigned I = 0; I < Sz; ++I) {
if (Order[I] < Sz)
UnusedIndices.reset(Order[I]);
else
MaskedIndices.set(I);
}
if (MaskedIndices.none())
return;
assert(UnusedIndices.count() == MaskedIndices.count() &&
"Non-synced masked/available indices.");
int Idx = UnusedIndices.find_first();
int MIdx = MaskedIndices.find_first();
while (MIdx >= 0) {
assert(Idx >= 0 && "Indices must be synced.");
Order[MIdx] = Idx;
Idx = UnusedIndices.find_next(Idx);
MIdx = MaskedIndices.find_next(MIdx);
}
}
namespace llvm {
static void inversePermutation(ArrayRef<unsigned> Indices,
SmallVectorImpl<int> &Mask) {
Mask.clear();
const unsigned E = Indices.size();
Mask.resize(E, UndefMaskElem);
for (unsigned I = 0; I < E; ++I)
Mask[Indices[I]] = I;
}
/// \returns inserting index of InsertElement or InsertValue instruction,
/// using Offset as base offset for index.
-static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) {
+static Optional<unsigned> getInsertIndex(Value *InsertInst,
+ unsigned Offset = 0) {
int Index = Offset;
if (auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) {
auto *VT = cast<FixedVectorType>(IE->getType());
if (CI->getValue().uge(VT->getNumElements()))
- return UndefMaskElem;
+ return None;
Index *= VT->getNumElements();
Index += CI->getZExtValue();
return Index;
}
- if (isa<UndefValue>(IE->getOperand(2)))
- return UndefMaskElem;
return None;
}
auto *IV = cast<InsertValueInst>(InsertInst);
Type *CurrentType = IV->getType();
for (unsigned I : IV->indices()) {
if (auto *ST = dyn_cast<StructType>(CurrentType)) {
Index *= ST->getNumElements();
CurrentType = ST->getElementType(I);
} else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
Index *= AT->getNumElements();
CurrentType = AT->getElementType();
} else {
return None;
}
Index += I;
}
return Index;
}
/// Reorders the list of scalars in accordance with the given \p Order and then
/// the \p Mask. \p Order - is the original order of the scalars, need to
/// reorder scalars into an unordered state at first according to the given
/// order. Then the ordered scalars are shuffled once again in accordance with
/// the provided mask.
static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
ArrayRef<int> Mask) {
assert(!Mask.empty() && "Expected non-empty mask.");
SmallVector<Value *> Prev(Scalars.size(),
UndefValue::get(Scalars.front()->getType()));
Prev.swap(Scalars);
for (unsigned I = 0, E = Prev.size(); I < E; ++I)
if (Mask[I] != UndefMaskElem)
Scalars[Mask[I]] = Prev[I];
}
namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
class BoUpSLP {
struct TreeEntry;
struct ScheduleData;
public:
using ValueList = SmallVector<Value *, 8>;
using InstrList = SmallVector<Instruction *, 16>;
using ValueSet = SmallPtrSet<Value *, 16>;
using StoreList = SmallVector<StoreInst *, 8>;
using ExtraValueToDebugLocsMap =
MapVector<Value *, SmallVector<Instruction *, 2>>;
using OrdersType = SmallVector<unsigned, 4>;
BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti,
TargetLibraryInfo *TLi, AAResults *Aa, LoopInfo *Li,
DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB,
const DataLayout *DL, OptimizationRemarkEmitter *ORE)
: F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC),
DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) {
CodeMetrics::collectEphemeralValues(F, AC, EphValues);
// Use the vector register size specified by the target unless overridden
// by a command-line option.
// TODO: It would be better to limit the vectorization factor based on
// data type rather than just register size. For example, x86 AVX has
// 256-bit registers, but it does not support integer operations
// at that width (that requires AVX2).
if (MaxVectorRegSizeOption.getNumOccurrences())
MaxVecRegSize = MaxVectorRegSizeOption;
else
MaxVecRegSize =
TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
.getFixedSize();
if (MinVectorRegSizeOption.getNumOccurrences())
MinVecRegSize = MinVectorRegSizeOption;
else
MinVecRegSize = TTI->getMinVectorRegisterBitWidth();
}
/// Vectorize the tree that starts with the elements in \p VL.
/// Returns the vectorized root.
Value *vectorizeTree();
/// Vectorize the tree but with the list of externally used values \p
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
/// generated extractvalue instructions.
Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues);
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
InstructionCost getSpillCost() const;
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
InstructionCost getTreeCost(ArrayRef<Value *> VectorizedVals = None);
/// Construct a vectorizable tree that starts at \p Roots, ignoring users for
/// the purpose of scheduling and extraction in the \p UserIgnoreLst.
void buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst = None);
/// Builds external uses of the vectorized scalars, i.e. the list of
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
/// ExternallyUsedValues contains additional list of external uses to handle
/// vectorization of reductions.
void
buildExternalUses(const ExtraValueToDebugLocsMap &ExternallyUsedValues = {});
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
VectorizableTree.clear();
ScalarToTreeEntry.clear();
MustGather.clear();
ExternalUses.clear();
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
}
MinBWs.clear();
InstrElementSize.clear();
}
unsigned getTreeSize() const { return VectorizableTree.size(); }
/// Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
/// Checks if the specified gather tree entry \p TE can be represented as a
/// shuffled vector entry + (possibly) permutation with other gathers. It
/// implements the checks only for possibly ordered scalars (Loads,
/// ExtractElement, ExtractValue), which can be part of the graph.
Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
/// Gets reordering data for the given tree entry. If the entry is vectorized
/// - just return ReorderIndices, otherwise check if the scalars can be
/// reordered and return the most optimal order.
/// \param TopToBottom If true, include the order of vectorized stores and
/// insertelement nodes, otherwise skip them.
Optional<OrdersType> getReorderingData(const TreeEntry &TE, bool TopToBottom);
/// Reorders the current graph to the most profitable order starting from the
/// root node to the leaf nodes. The best order is chosen only from the nodes
/// of the same size (vectorization factor). Smaller nodes are considered
/// parts of subgraph with smaller VF and they are reordered independently. We
/// can make it because we still need to extend smaller nodes to the wider VF
/// and we can merge reordering shuffles with the widening shuffles.
void reorderTopToBottom();
/// Reorders the current graph to the most profitable order starting from
/// leaves to the root. It allows to rotate small subgraphs and reduce the
/// number of reshuffles if the leaf nodes use the same order. In this case we
/// can merge the orders and just shuffle user node instead of shuffling its
/// operands. Plus, even the leaf nodes have different orders, it allows to
/// sink reordering in the graph closer to the root node and merge it later
/// during analysis.
void reorderBottomToTop(bool IgnoreReorder = false);
/// \return The vector element size in bits to use when vectorizing the
/// expression tree ending at \p V. If V is a store, the size is the width of
/// the stored value. Otherwise, the size is the width of the largest loaded
/// value reaching V. This method is used by the vectorizer to calculate
/// vectorization factors.
unsigned getVectorElementSize(Value *V);
/// Compute the minimum type sizes required to represent the entries in a
/// vectorizable tree.
void computeMinimumValueSizes();
// \returns maximum vector register size as set by TTI or overridden by cl::opt.
unsigned getMaxVecRegSize() const {
return MaxVecRegSize;
}
// \returns minimum vector register size as set by cl::opt.
unsigned getMinVecRegSize() const {
return MinVecRegSize;
}
unsigned getMinVF(unsigned Sz) const {
return std::max(2U, getMinVecRegSize() / Sz);
}
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
return MaxVF ? MaxVF : UINT_MAX;
}
/// Check if homogeneous aggregate is isomorphic to some VectorType.
/// Accepts homogeneous multidimensional aggregate of scalars/vectors like
/// {[4 x i16], [4 x i16]}, { <2 x float>, <2 x float> },
/// {{{i16, i16}, {i16, i16}}, {{i16, i16}, {i16, i16}}} and so on.
///
/// \returns number of elements in vector if isomorphism exists, 0 otherwise.
unsigned canMapToVector(Type *T, const DataLayout &DL) const;
/// \returns True if the VectorizableTree is both tiny and not fully
/// vectorizable. We do not vectorize such trees.
bool isTreeTinyAndNotFullyVectorizable(bool ForReduction = false) const;
/// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
/// can be load combined in the backend. Load combining may not be allowed in
/// the IR optimizer, so we do not want to alter the pattern. For example,
/// partially transforming a scalar bswap() pattern into vector code is
/// effectively impossible for the backend to undo.
/// TODO: If load combining is allowed in the IR optimizer, this analysis
/// may not be necessary.
bool isLoadCombineReductionCandidate(RecurKind RdxKind) const;
/// Assume that a vector of stores of bitwise-or/shifted/zexted loaded values
/// can be load combined in the backend. Load combining may not be allowed in
/// the IR optimizer, so we do not want to alter the pattern. For example,
/// partially transforming a scalar bswap() pattern into vector code is
/// effectively impossible for the backend to undo.
/// TODO: If load combining is allowed in the IR optimizer, this analysis
/// may not be necessary.
bool isLoadCombineCandidate() const;
OptimizationRemarkEmitter *getORE() { return ORE; }
/// This structure holds any data we need about the edges being traversed
/// during buildTree_rec(). We keep track of:
/// (i) the user TreeEntry index, and
/// (ii) the index of the edge.
struct EdgeInfo {
EdgeInfo() = default;
EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
: UserTE(UserTE), EdgeIdx(EdgeIdx) {}
/// The user TreeEntry.
TreeEntry *UserTE = nullptr;
/// The operand index of the use.
unsigned EdgeIdx = UINT_MAX;
#ifndef NDEBUG
friend inline raw_ostream &operator<<(raw_ostream &OS,
const BoUpSLP::EdgeInfo &EI) {
EI.dump(OS);
return OS;
}
/// Debug print.
void dump(raw_ostream &OS) const {
OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
<< " EdgeIdx:" << EdgeIdx << "}";
}
LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
#endif
};
/// A helper data structure to hold the operands of a vector of instructions.
/// This supports a fixed vector length for all operand vectors.
class VLOperands {
/// For each operand we need (i) the value, and (ii) the opcode that it
/// would be attached to if the expression was in a left-linearized form.
/// This is required to avoid illegal operand reordering.
/// For example:
/// \verbatim
/// 0 Op1
/// |/
/// Op1 Op2 Linearized + Op2
/// \ / ----------> |/
/// - -
///
/// Op1 - Op2 (0 + Op1) - Op2
/// \endverbatim
///
/// Value Op1 is attached to a '+' operation, and Op2 to a '-'.
///
/// Another way to think of this is to track all the operations across the
/// path from the operand all the way to the root of the tree and to
/// calculate the operation that corresponds to this path. For example, the
/// path from Op2 to the root crosses the RHS of the '-', therefore the
/// corresponding operation is a '-' (which matches the one in the
/// linearized tree, as shown above).
///
/// For lack of a better term, we refer to this operation as Accumulated
/// Path Operation (APO).
struct OperandData {
OperandData() = default;
OperandData(Value *V, bool APO, bool IsUsed)
: V(V), APO(APO), IsUsed(IsUsed) {}
/// The operand value.
Value *V = nullptr;
/// TreeEntries only allow a single opcode, or an alternate sequence of
/// them (e.g, +, -). Therefore, we can safely use a boolean value for the
/// APO. It is set to 'true' if 'V' is attached to an inverse operation
/// in the left-linearized form (e.g., Sub/Div), and 'false' otherwise
/// (e.g., Add/Mul)
bool APO = false;
/// Helper data for the reordering function.
bool IsUsed = false;
};
/// During operand reordering, we are trying to select the operand at lane
/// that matches best with the operand at the neighboring lane. Our
/// selection is based on the type of value we are looking for. For example,
/// if the neighboring lane has a load, we need to look for a load that is
/// accessing a consecutive address. These strategies are summarized in the
/// 'ReorderingMode' enumerator.
enum class ReorderingMode {
Load, ///< Matching loads to consecutive memory addresses
Opcode, ///< Matching instructions based on opcode (same or alternate)
Constant, ///< Matching constants
Splat, ///< Matching the same instruction multiple times (broadcast)
Failed, ///< We failed to create a vectorizable group
};
using OperandDataVec = SmallVector<OperandData, 2>;
/// A vector of operand vectors.
SmallVector<OperandDataVec, 4> OpsVec;
const DataLayout &DL;
ScalarEvolution &SE;
const BoUpSLP &R;
/// \returns the operand data at \p OpIdx and \p Lane.
OperandData &getData(unsigned OpIdx, unsigned Lane) {
return OpsVec[OpIdx][Lane];
}
/// \returns the operand data at \p OpIdx and \p Lane. Const version.
const OperandData &getData(unsigned OpIdx, unsigned Lane) const {
return OpsVec[OpIdx][Lane];
}
/// Clears the used flag for all entries.
void clearUsed() {
for (unsigned OpIdx = 0, NumOperands = getNumOperands();
OpIdx != NumOperands; ++OpIdx)
for (unsigned Lane = 0, NumLanes = getNumLanes(); Lane != NumLanes;
++Lane)
OpsVec[OpIdx][Lane].IsUsed = false;
}
/// Swap the operand at \p OpIdx1 with that one at \p OpIdx2.
void swap(unsigned OpIdx1, unsigned OpIdx2, unsigned Lane) {
std::swap(OpsVec[OpIdx1][Lane], OpsVec[OpIdx2][Lane]);
}
// The hard-coded scores listed here are not very important, though it shall
// be higher for better matches to improve the resulting cost. When
// computing the scores of matching one sub-tree with another, we are
// basically counting the number of values that are matching. So even if all
// scores are set to 1, we would still get a decent matching result.
// However, sometimes we have to break ties. For example we may have to
// choose between matching loads vs matching opcodes. This is what these
// scores are helping us with: they provide the order of preference. Also,
// this is important if the scalar is externally used or used in another
// tree entry node in the different lane.
/// Loads from consecutive memory addresses, e.g. load(A[i]), load(A[i+1]).
static const int ScoreConsecutiveLoads = 4;
/// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]).
static const int ScoreReversedLoads = 3;
/// ExtractElementInst from same vector and consecutive indexes.
static const int ScoreConsecutiveExtracts = 4;
/// ExtractElementInst from same vector and reversed indices.
static const int ScoreReversedExtracts = 3;
/// Constants.
static const int ScoreConstants = 2;
/// Instructions with the same opcode.
static const int ScoreSameOpcode = 2;
/// Instructions with alt opcodes (e.g, add + sub).
static const int ScoreAltOpcodes = 1;
/// Identical instructions (a.k.a. splat or broadcast).
static const int ScoreSplat = 1;
/// Matching with an undef is preferable to failing.
static const int ScoreUndef = 1;
/// Score for failing to find a decent match.
static const int ScoreFail = 0;
/// User exteranl to the vectorized code.
static const int ExternalUseCost = 1;
/// The user is internal but in a different lane.
static const int UserInDiffLaneCost = ExternalUseCost;
/// \returns the score of placing \p V1 and \p V2 in consecutive lanes.
static int getShallowScore(Value *V1, Value *V2, const DataLayout &DL,
ScalarEvolution &SE, int NumLanes) {
if (V1 == V2)
return VLOperands::ScoreSplat;
auto *LI1 = dyn_cast<LoadInst>(V1);
auto *LI2 = dyn_cast<LoadInst>(V2);
if (LI1 && LI2) {
if (LI1->getParent() != LI2->getParent())
return VLOperands::ScoreFail;
Optional<int> Dist = getPointersDiff(
LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
if (!Dist)
return VLOperands::ScoreFail;
// The distance is too large - still may be profitable to use masked
// loads/gathers.
if (std::abs(*Dist) > NumLanes / 2)
return VLOperands::ScoreAltOpcodes;
// This still will detect consecutive loads, but we might have "holes"
// in some cases. It is ok for non-power-2 vectorization and may produce
// better results. It should not affect current vectorization.
return (*Dist > 0) ? VLOperands::ScoreConsecutiveLoads
: VLOperands::ScoreReversedLoads;
}
auto *C1 = dyn_cast<Constant>(V1);
auto *C2 = dyn_cast<Constant>(V2);
if (C1 && C2)
return VLOperands::ScoreConstants;
// Extracts from consecutive indexes of the same vector better score as
// the extracts could be optimized away.
Value *EV1;
ConstantInt *Ex1Idx;
if (match(V1, m_ExtractElt(m_Value(EV1), m_ConstantInt(Ex1Idx)))) {
// Undefs are always profitable for extractelements.
if (isa<UndefValue>(V2))
return VLOperands::ScoreConsecutiveExtracts;
Value *EV2 = nullptr;
ConstantInt *Ex2Idx = nullptr;
if (match(V2,
m_ExtractElt(m_Value(EV2), m_CombineOr(m_ConstantInt(Ex2Idx),
m_Undef())))) {
// Undefs are always profitable for extractelements.
if (!Ex2Idx)
return VLOperands::ScoreConsecutiveExtracts;
if (isUndefVector(EV2) && EV2->getType() == EV1->getType())
return VLOperands::ScoreConsecutiveExtracts;
if (EV2 == EV1) {
int Idx1 = Ex1Idx->getZExtValue();
int Idx2 = Ex2Idx->getZExtValue();
int Dist = Idx2 - Idx1;
// The distance is too large - still may be profitable to use
// shuffles.
if (std::abs(Dist) > NumLanes / 2)
return VLOperands::ScoreAltOpcodes;
return (Dist > 0) ? VLOperands::ScoreConsecutiveExtracts
: VLOperands::ScoreReversedExtracts;
}
}
}
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
if (I1 && I2) {
if (I1->getParent() != I2->getParent())
return VLOperands::ScoreFail;
InstructionsState S = getSameOpcode({I1, I2});
// Note: Only consider instructions with <= 2 operands to avoid
// complexity explosion.
if (S.getOpcode() && S.MainOp->getNumOperands() <= 2)
return S.isAltShuffle() ? VLOperands::ScoreAltOpcodes
: VLOperands::ScoreSameOpcode;
}
if (isa<UndefValue>(V2))
return VLOperands::ScoreUndef;
return VLOperands::ScoreFail;
}
/// Holds the values and their lanes that are taking part in the look-ahead
/// score calculation. This is used in the external uses cost calculation.
/// Need to hold all the lanes in case of splat/broadcast at least to
/// correctly check for the use in the different lane.
SmallDenseMap<Value *, SmallSet<int, 4>> InLookAheadValues;
/// \returns the additional cost due to uses of \p LHS and \p RHS that are
/// either external to the vectorized code, or require shuffling.
int getExternalUsesCost(const std::pair<Value *, int> &LHS,
const std::pair<Value *, int> &RHS) {
int Cost = 0;
std::array<std::pair<Value *, int>, 2> Values = {{LHS, RHS}};
for (int Idx = 0, IdxE = Values.size(); Idx != IdxE; ++Idx) {
Value *V = Values[Idx].first;
if (isa<Constant>(V)) {
// Since this is a function pass, it doesn't make semantic sense to
// walk the users of a subclass of Constant. The users could be in
// another function, or even another module that happens to be in
// the same LLVMContext.
continue;
}
// Calculate the absolute lane, using the minimum relative lane of LHS
// and RHS as base and Idx as the offset.
int Ln = std::min(LHS.second, RHS.second) + Idx;
assert(Ln >= 0 && "Bad lane calculation");
unsigned UsersBudget = LookAheadUsersBudget;
for (User *U : V->users()) {
if (const TreeEntry *UserTE = R.getTreeEntry(U)) {
// The user is in the VectorizableTree. Check if we need to insert.
int UserLn = UserTE->findLaneForValue(U);
assert(UserLn >= 0 && "Bad lane");
// If the values are different, check just the line of the current
// value. If the values are the same, need to add UserInDiffLaneCost
// only if UserLn does not match both line numbers.
if ((LHS.first != RHS.first && UserLn != Ln) ||
(LHS.first == RHS.first && UserLn != LHS.second &&
UserLn != RHS.second)) {
Cost += UserInDiffLaneCost;
break;
}
} else {
// Check if the user is in the look-ahead code.
auto It2 = InLookAheadValues.find(U);
if (It2 != InLookAheadValues.end()) {
// The user is in the look-ahead code. Check the lane.
if (!It2->getSecond().contains(Ln)) {
Cost += UserInDiffLaneCost;
break;
}
} else {
// The user is neither in SLP tree nor in the look-ahead code.
Cost += ExternalUseCost;
break;
}
}
// Limit the number of visited uses to cap compilation time.
if (--UsersBudget == 0)
break;
}
}
return Cost;
}
/// Go through the operands of \p LHS and \p RHS recursively until \p
/// MaxLevel, and return the cummulative score. For example:
/// \verbatim
/// A[0] B[0] A[1] B[1] C[0] D[0] B[1] A[1]
/// \ / \ / \ / \ /
/// + + + +
/// G1 G2 G3 G4
/// \endverbatim
/// The getScoreAtLevelRec(G1, G2) function will try to match the nodes at
/// each level recursively, accumulating the score. It starts from matching
/// the additions at level 0, then moves on to the loads (level 1). The
/// score of G1 and G2 is higher than G1 and G3, because {A[0],A[1]} and
/// {B[0],B[1]} match with VLOperands::ScoreConsecutiveLoads, while
/// {A[0],C[0]} has a score of VLOperands::ScoreFail.
/// Please note that the order of the operands does not matter, as we
/// evaluate the score of all profitable combinations of operands. In
/// other words the score of G1 and G4 is the same as G1 and G2. This
/// heuristic is based on ideas described in:
/// Look-ahead SLP: Auto-vectorization in the presence of commutative
/// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
/// Luís F. W. Góes
int getScoreAtLevelRec(const std::pair<Value *, int> &LHS,
const std::pair<Value *, int> &RHS, int CurrLevel,
int MaxLevel) {
Value *V1 = LHS.first;
Value *V2 = RHS.first;
// Get the shallow score of V1 and V2.
int ShallowScoreAtThisLevel = std::max(
(int)ScoreFail, getShallowScore(V1, V2, DL, SE, getNumLanes()) -
getExternalUsesCost(LHS, RHS));
int Lane1 = LHS.second;
int Lane2 = RHS.second;
// If reached MaxLevel,
// or if V1 and V2 are not instructions,
// or if they are SPLAT,
// or if they are not consecutive,
// or if profitable to vectorize loads or extractelements, early return
// the current cost.
auto *I1 = dyn_cast<Instruction>(V1);
auto *I2 = dyn_cast<Instruction>(V2);
if (CurrLevel == MaxLevel || !(I1 && I2) || I1 == I2 ||
ShallowScoreAtThisLevel == VLOperands::ScoreFail ||
(((isa<LoadInst>(I1) && isa<LoadInst>(I2)) ||
(isa<ExtractElementInst>(I1) && isa<ExtractElementInst>(I2))) &&
ShallowScoreAtThisLevel))
return ShallowScoreAtThisLevel;
assert(I1 && I2 && "Should have early exited.");
// Keep track of in-tree values for determining the external-use cost.
InLookAheadValues[V1].insert(Lane1);
InLookAheadValues[V2].insert(Lane2);
// Contains the I2 operand indexes that got matched with I1 operands.
SmallSet<unsigned, 4> Op2Used;
// Recursion towards the operands of I1 and I2. We are trying all possible
// operand pairs, and keeping track of the best score.
for (unsigned OpIdx1 = 0, NumOperands1 = I1->getNumOperands();
OpIdx1 != NumOperands1; ++OpIdx1) {
// Try to pair op1I with the best operand of I2.
int MaxTmpScore = 0;
unsigned MaxOpIdx2 = 0;
bool FoundBest = false;
// If I2 is commutative try all combinations.
unsigned FromIdx = isCommutative(I2) ? 0 : OpIdx1;
unsigned ToIdx = isCommutative(I2)
? I2->getNumOperands()
: std::min(I2->getNumOperands(), OpIdx1 + 1);
assert(FromIdx <= ToIdx && "Bad index");
for (unsigned OpIdx2 = FromIdx; OpIdx2 != ToIdx; ++OpIdx2) {
// Skip operands already paired with OpIdx1.
if (Op2Used.count(OpIdx2))
continue;
// Recursively calculate the cost at each level
int TmpScore = getScoreAtLevelRec({I1->getOperand(OpIdx1), Lane1},
{I2->getOperand(OpIdx2), Lane2},
CurrLevel + 1, MaxLevel);
// Look for the best score.
if (TmpScore > VLOperands::ScoreFail && TmpScore > MaxTmpScore) {
MaxTmpScore = TmpScore;
MaxOpIdx2 = OpIdx2;
FoundBest = true;
}
}
if (FoundBest) {
// Pair {OpIdx1, MaxOpIdx2} was found to be best. Never revisit it.
Op2Used.insert(MaxOpIdx2);
ShallowScoreAtThisLevel += MaxTmpScore;
}
}
return ShallowScoreAtThisLevel;
}
/// \Returns the look-ahead score, which tells us how much the sub-trees
/// rooted at \p LHS and \p RHS match, the more they match the higher the
/// score. This helps break ties in an informed way when we cannot decide on
/// the order of the operands by just considering the immediate
/// predecessors.
int getLookAheadScore(const std::pair<Value *, int> &LHS,
const std::pair<Value *, int> &RHS) {
InLookAheadValues.clear();
return getScoreAtLevelRec(LHS, RHS, 1, LookAheadMaxDepth);
}
// Search all operands in Ops[*][Lane] for the one that matches best
// Ops[OpIdx][LastLane] and return its opreand index.
// If no good match can be found, return None.
Optional<unsigned>
getBestOperand(unsigned OpIdx, int Lane, int LastLane,
ArrayRef<ReorderingMode> ReorderingModes) {
unsigned NumOperands = getNumOperands();
// The operand of the previous lane at OpIdx.
Value *OpLastLane = getData(OpIdx, LastLane).V;
// Our strategy mode for OpIdx.
ReorderingMode RMode = ReorderingModes[OpIdx];
// The linearized opcode of the operand at OpIdx, Lane.
bool OpIdxAPO = getData(OpIdx, Lane).APO;
// The best operand index and its score.
// Sometimes we have more than one option (e.g., Opcode and Undefs), so we
// are using the score to differentiate between the two.
struct BestOpData {
Optional<unsigned> Idx = None;
unsigned Score = 0;
} BestOp;
// Iterate through all unused operands and look for the best.
for (unsigned Idx = 0; Idx != NumOperands; ++Idx) {
// Get the operand at Idx and Lane.
OperandData &OpData = getData(Idx, Lane);
Value *Op = OpData.V;
bool OpAPO = OpData.APO;
// Skip already selected operands.
if (OpData.IsUsed)
continue;
// Skip if we are trying to move the operand to a position with a
// different opcode in the linearized tree form. This would break the
// semantics.
if (OpAPO != OpIdxAPO)
continue;
// Look for an operand that matches the current mode.
switch (RMode) {
case ReorderingMode::Load:
case ReorderingMode::Constant:
case ReorderingMode::Opcode: {
bool LeftToRight = Lane > LastLane;
Value *OpLeft = (LeftToRight) ? OpLastLane : Op;
Value *OpRight = (LeftToRight) ? Op : OpLastLane;
unsigned Score =
getLookAheadScore({OpLeft, LastLane}, {OpRight, Lane});
if (Score > BestOp.Score) {
BestOp.Idx = Idx;
BestOp.Score = Score;
}
break;
}
case ReorderingMode::Splat:
if (Op == OpLastLane)
BestOp.Idx = Idx;
break;
case ReorderingMode::Failed:
return None;
}
}
if (BestOp.Idx) {
getData(BestOp.Idx.getValue(), Lane).IsUsed = true;
return BestOp.Idx;
}
// If we could not find a good match return None.
return None;
}
/// Helper for reorderOperandVecs.
/// \returns the lane that we should start reordering from. This is the one
/// which has the least number of operands that can freely move about or
/// less profitable because it already has the most optimal set of operands.
unsigned getBestLaneToStartReordering() const {
unsigned Min = UINT_MAX;
unsigned SameOpNumber = 0;
// std::pair<unsigned, unsigned> is used to implement a simple voting
// algorithm and choose the lane with the least number of operands that
// can freely move about or less profitable because it already has the
// most optimal set of operands. The first unsigned is a counter for
// voting, the second unsigned is the counter of lanes with instructions
// with same/alternate opcodes and same parent basic block.
MapVector<unsigned, std::pair<unsigned, unsigned>> HashMap;
// Try to be closer to the original results, if we have multiple lanes
// with same cost. If 2 lanes have the same cost, use the one with the
// lowest index.
for (int I = getNumLanes(); I > 0; --I) {
unsigned Lane = I - 1;
OperandsOrderData NumFreeOpsHash =
getMaxNumOperandsThatCanBeReordered(Lane);
// Compare the number of operands that can move and choose the one with
// the least number.
if (NumFreeOpsHash.NumOfAPOs < Min) {
Min = NumFreeOpsHash.NumOfAPOs;
SameOpNumber = NumFreeOpsHash.NumOpsWithSameOpcodeParent;
HashMap.clear();
HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
} else if (NumFreeOpsHash.NumOfAPOs == Min &&
NumFreeOpsHash.NumOpsWithSameOpcodeParent < SameOpNumber) {
// Select the most optimal lane in terms of number of operands that
// should be moved around.
SameOpNumber = NumFreeOpsHash.NumOpsWithSameOpcodeParent;
HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
} else if (NumFreeOpsHash.NumOfAPOs == Min &&
NumFreeOpsHash.NumOpsWithSameOpcodeParent == SameOpNumber) {
auto It = HashMap.find(NumFreeOpsHash.Hash);
if (It == HashMap.end())
HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane);
else
++It->second.first;
}
}
// Select the lane with the minimum counter.
unsigned BestLane = 0;
unsigned CntMin = UINT_MAX;
for (const auto &Data : reverse(HashMap)) {
if (Data.second.first < CntMin) {
CntMin = Data.second.first;
BestLane = Data.second.second;
}
}
return BestLane;
}
/// Data structure that helps to reorder operands.
struct OperandsOrderData {
/// The best number of operands with the same APOs, which can be
/// reordered.
unsigned NumOfAPOs = UINT_MAX;
/// Number of operands with the same/alternate instruction opcode and
/// parent.
unsigned NumOpsWithSameOpcodeParent = 0;
/// Hash for the actual operands ordering.
/// Used to count operands, actually their position id and opcode
/// value. It is used in the voting mechanism to find the lane with the
/// least number of operands that can freely move about or less profitable
/// because it already has the most optimal set of operands. Can be
/// replaced with SmallVector<unsigned> instead but hash code is faster
/// and requires less memory.
unsigned Hash = 0;
};
/// \returns the maximum number of operands that are allowed to be reordered
/// for \p Lane and the number of compatible instructions(with the same
/// parent/opcode). This is used as a heuristic for selecting the first lane
/// to start operand reordering.
OperandsOrderData getMaxNumOperandsThatCanBeReordered(unsigned Lane) const {
unsigned CntTrue = 0;
unsigned NumOperands = getNumOperands();
// Operands with the same APO can be reordered. We therefore need to count
// how many of them we have for each APO, like this: Cnt[APO] = x.
// Since we only have two APOs, namely true and false, we can avoid using
// a map. Instead we can simply count the number of operands that
// correspond to one of them (in this case the 'true' APO), and calculate
// the other by subtracting it from the total number of operands.
// Operands with the same instruction opcode and parent are more
// profitable since we don't need to move them in many cases, with a high
// probability such lane already can be vectorized effectively.
bool AllUndefs = true;
unsigned NumOpsWithSameOpcodeParent = 0;
Instruction *OpcodeI = nullptr;
BasicBlock *Parent = nullptr;
unsigned Hash = 0;
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
const OperandData &OpData = getData(OpIdx, Lane);
if (OpData.APO)
++CntTrue;
// Use Boyer-Moore majority voting for finding the majority opcode and
// the number of times it occurs.
if (auto *I = dyn_cast<Instruction>(OpData.V)) {
if (!OpcodeI || !getSameOpcode({OpcodeI, I}).getOpcode() ||
I->getParent() != Parent) {
if (NumOpsWithSameOpcodeParent == 0) {
NumOpsWithSameOpcodeParent = 1;
OpcodeI = I;
Parent = I->getParent();
} else {
--NumOpsWithSameOpcodeParent;
}
} else {
++NumOpsWithSameOpcodeParent;
}
}
Hash = hash_combine(
Hash, hash_value((OpIdx + 1) * (OpData.V->getValueID() + 1)));
AllUndefs = AllUndefs && isa<UndefValue>(OpData.V);
}
if (AllUndefs)
return {};
OperandsOrderData Data;
Data.NumOfAPOs = std::max(CntTrue, NumOperands - CntTrue);
Data.NumOpsWithSameOpcodeParent = NumOpsWithSameOpcodeParent;
Data.Hash = Hash;
return Data;
}
/// Go through the instructions in VL and append their operands.
void appendOperandsOfVL(ArrayRef<Value *> VL) {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
assert(isa<Instruction>(VL[0]) && "Expected instruction");
unsigned NumOperands = cast<Instruction>(VL[0])->getNumOperands();
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
OpsVec[OpIdx].resize(NumLanes);
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
// RHS operand. The LHS operand of both add and sub is never attached
// to an inversese operation in the linearized form, therefore its APO
// is false. The RHS is true only if VL[Lane] is an inverse operation.
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely
// tell the inverse operations by checking commutativity.
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
APO, false};
}
}
}
/// \returns the number of operands.
unsigned getNumOperands() const { return OpsVec.size(); }
/// \returns the number of lanes.
unsigned getNumLanes() const { return OpsVec[0].size(); }
/// \returns the operand value at \p OpIdx and \p Lane.
Value *getValue(unsigned OpIdx, unsigned Lane) const {
return getData(OpIdx, Lane).V;
}
/// \returns true if the data structure is empty.
bool empty() const { return OpsVec.empty(); }
/// Clears the data.
void clear() { OpsVec.clear(); }
/// \Returns true if there are enough operands identical to \p Op to fill
/// the whole vector.
/// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
bool OpAPO = getData(OpIdx, Lane).APO;
for (unsigned Ln = 0, Lns = getNumLanes(); Ln != Lns; ++Ln) {
if (Ln == Lane)
continue;
// This is set to true if we found a candidate for broadcast at Lane.
bool FoundCandidate = false;
for (unsigned OpI = 0, OpE = getNumOperands(); OpI != OpE; ++OpI) {
OperandData &Data = getData(OpI, Ln);
if (Data.APO != OpAPO || Data.IsUsed)
continue;
if (Data.V == Op) {
FoundCandidate = true;
Data.IsUsed = true;
break;
}
}
if (!FoundCandidate)
return false;
}
return true;
}
public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
ScalarEvolution &SE, const BoUpSLP &R)
: DL(DL), SE(SE), R(R) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL);
}
/// \Returns a value vector with the operands across all lanes for the
/// opearnd at \p OpIdx.
ValueList getVL(unsigned OpIdx) const {
ValueList OpVL(OpsVec[OpIdx].size());
assert(OpsVec[OpIdx].size() == getNumLanes() &&
"Expected same num of lanes across all operands");
for (unsigned Lane = 0, Lanes = getNumLanes(); Lane != Lanes; ++Lane)
OpVL[Lane] = OpsVec[OpIdx][Lane].V;
return OpVL;
}
// Performs operand reordering for 2 or more operands.
// The original operands are in OrigOps[OpIdx][Lane].
// The reordered operands are returned in 'SortedOps[OpIdx][Lane]'.
void reorder() {
unsigned NumOperands = getNumOperands();
unsigned NumLanes = getNumLanes();
// Each operand has its own mode. We are using this mode to help us select
// the instructions for each lane, so that they match best with the ones
// we have selected so far.
SmallVector<ReorderingMode, 2> ReorderingModes(NumOperands);
// This is a greedy single-pass algorithm. We are going over each lane
// once and deciding on the best order right away with no back-tracking.
// However, in order to increase its effectiveness, we start with the lane
// that has operands that can move the least. For example, given the
// following lanes:
// Lane 0 : A[0] = B[0] + C[0] // Visited 3rd
// Lane 1 : A[1] = C[1] - B[1] // Visited 1st
// Lane 2 : A[2] = B[2] + C[2] // Visited 2nd
// Lane 3 : A[3] = C[3] - B[3] // Visited 4th
// we will start at Lane 1, since the operands of the subtraction cannot
// be reordered. Then we will visit the rest of the lanes in a circular
// fashion. That is, Lanes 2, then Lane 0, and finally Lane 3.
// Find the first lane that we will start our search from.
unsigned FirstLane = getBestLaneToStartReordering();
// Initialize the modes.
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
Value *OpLane0 = getValue(OpIdx, FirstLane);
// Keep track if we have instructions with all the same opcode on one
// side.
if (isa<LoadInst>(OpLane0))
ReorderingModes[OpIdx] = ReorderingMode::Load;
else if (isa<Instruction>(OpLane0)) {
// Check if OpLane0 should be broadcast.
if (shouldBroadcast(OpLane0, OpIdx, FirstLane))
ReorderingModes[OpIdx] = ReorderingMode::Splat;
else
ReorderingModes[OpIdx] = ReorderingMode::Opcode;
}
else if (isa<Constant>(OpLane0))
ReorderingModes[OpIdx] = ReorderingMode::Constant;
else if (isa<Argument>(OpLane0))
// Our best hope is a Splat. It may save some cost in some cases.
ReorderingModes[OpIdx] = ReorderingMode::Splat;
else
// NOTE: This should be unreachable.
ReorderingModes[OpIdx] = ReorderingMode::Failed;
}
// Check that we don't have same operands. No need to reorder if operands
// are just perfect diamond or shuffled diamond match. Do not do it only
// for possible broadcasts or non-power of 2 number of scalars (just for
// now).
auto &&SkipReordering = [this]() {
SmallPtrSet<Value *, 4> UniqueValues;
ArrayRef<OperandData> Op0 = OpsVec.front();
for (const OperandData &Data : Op0)
UniqueValues.insert(Data.V);
for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
return !UniqueValues.contains(Data.V);
}))
return false;
}
// TODO: Check if we can remove a check for non-power-2 number of
// scalars after full support of non-power-2 vectorization.
return UniqueValues.size() != 2 && isPowerOf2_32(UniqueValues.size());
};
// If the initial strategy fails for any of the operand indexes, then we
// perform reordering again in a second pass. This helps avoid assigning
// high priority to the failed strategy, and should improve reordering for
// the non-failed operand indexes.
for (int Pass = 0; Pass != 2; ++Pass) {
// Check if no need to reorder operands since they're are perfect or
// shuffled diamond match.
// Need to to do it to avoid extra external use cost counting for
// shuffled matches, which may cause regressions.
if (SkipReordering())
break;
// Skip the second pass if the first pass did not fail.
bool StrategyFailed = false;
// Mark all operand data as free to use.
clearUsed();
// We keep the original operand order for the FirstLane, so reorder the
// rest of the lanes. We are visiting the nodes in a circular fashion,
// using FirstLane as the center point and increasing the radius
// distance.
for (unsigned Distance = 1; Distance != NumLanes; ++Distance) {
// Visit the lane on the right and then the lane on the left.
for (int Direction : {+1, -1}) {
int Lane = FirstLane + Direction * Distance;
if (Lane < 0 || Lane >= (int)NumLanes)
continue;
int LastLane = Lane - Direction;
assert(LastLane >= 0 && LastLane < (int)NumLanes &&
"Out of bounds");
// Look for a good match for each operand.
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
// Search for the operand that matches SortedOps[OpIdx][Lane-1].
Optional<unsigned> BestIdx =
getBestOperand(OpIdx, Lane, LastLane, ReorderingModes);
// By not selecting a value, we allow the operands that follow to
// select a better matching value. We will get a non-null value in
// the next run of getBestOperand().
if (BestIdx) {
// Swap the current operand with the one returned by
// getBestOperand().
swap(OpIdx, BestIdx.getValue(), Lane);
} else {
// We failed to find a best operand, set mode to 'Failed'.
ReorderingModes[OpIdx] = ReorderingMode::Failed;
// Enable the second pass.
StrategyFailed = true;
}
}
}
}
// Skip second pass if the strategy did not fail.
if (!StrategyFailed)
break;
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD static StringRef getModeStr(ReorderingMode RMode) {
switch (RMode) {
case ReorderingMode::Load:
return "Load";
case ReorderingMode::Opcode:
return "Opcode";
case ReorderingMode::Constant:
return "Constant";
case ReorderingMode::Splat:
return "Splat";
case ReorderingMode::Failed:
return "Failed";
}
llvm_unreachable("Unimplemented Reordering Type");
}
LLVM_DUMP_METHOD static raw_ostream &printMode(ReorderingMode RMode,
raw_ostream &OS) {
return OS << getModeStr(RMode);
}
/// Debug print.
LLVM_DUMP_METHOD static void dumpMode(ReorderingMode RMode) {
printMode(RMode, dbgs());
}
friend raw_ostream &operator<<(raw_ostream &OS, ReorderingMode RMode) {
return printMode(RMode, OS);
}
LLVM_DUMP_METHOD raw_ostream &print(raw_ostream &OS) const {
const unsigned Indent = 2;
unsigned Cnt = 0;
for (const OperandDataVec &OpDataVec : OpsVec) {
OS << "Operand " << Cnt++ << "\n";
for (const OperandData &OpData : OpDataVec) {
OS.indent(Indent) << "{";
if (Value *V = OpData.V)
OS << *V;
else
OS << "null";
OS << ", APO:" << OpData.APO << "}\n";
}
OS << "\n";
}
return OS;
}
/// Debug print.
LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
#endif
};
/// Checks if the instruction is marked for deletion.
bool isDeleted(Instruction *I) const { return DeletedInstructions.count(I); }
/// Marks values operands for later deletion by replacing them with Undefs.
void eraseInstructions(ArrayRef<Value *> AV);
~BoUpSLP();
private:
/// Checks if all users of \p I are the part of the vectorization tree.
bool areAllUsersVectorized(Instruction *I,
ArrayRef<Value *> VectorizedVals) const;
/// \returns the cost of the vectorizable entry.
InstructionCost getEntryCost(const TreeEntry *E,
ArrayRef<Value *> VectorizedVals);
/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
const EdgeInfo &EI);
/// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a
/// vector) and sets \p CurrentOrder to the identity permutation; otherwise
/// returns false, setting \p CurrentOrder to either an empty vector or a
/// non-identity permutation that allows to reuse extract instructions.
bool canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
SmallVectorImpl<unsigned> &CurrentOrder) const;
/// Vectorize a single entry in the tree.
Value *vectorizeTree(TreeEntry *E);
/// Vectorize a single entry in the tree, starting in \p VL.
Value *vectorizeTree(ArrayRef<Value *> VL);
/// \returns the scalarization cost for this type. Scalarization in this
/// context means the creation of vectors from a group of scalars. If \p
/// NeedToShuffle is true, need to add a cost of reshuffling some of the
/// vector elements.
InstructionCost getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices,
bool NeedToShuffle) const;
/// Checks if the gathered \p VL can be represented as shuffle(s) of previous
/// tree entries.
/// \returns ShuffleKind, if gathered values can be represented as shuffles of
/// previous tree entries. \p Mask is filled with the shuffle mask.
Optional<TargetTransformInfo::ShuffleKind>
isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
SmallVectorImpl<const TreeEntry *> &Entries);
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
void setInsertPointAfterBundle(const TreeEntry *E);
/// \returns a vector from a collection of scalars in \p VL.
Value *gather(ArrayRef<Value *> VL);
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
bool isFullyVectorizableTinyTree(bool ForReduction) const;
/// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right,
const DataLayout &DL,
ScalarEvolution &SE,
const BoUpSLP &R);
struct TreeEntry {
using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
TreeEntry(VecTreeTy &Container) : Container(Container) {}
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
auto &&IsSame = [VL](ArrayRef<Value *> Scalars, ArrayRef<int> Mask) {
if (Mask.size() != VL.size() && VL.size() == Scalars.size())
return std::equal(VL.begin(), VL.end(), Scalars.begin());
return VL.size() == Mask.size() &&
std::equal(VL.begin(), VL.end(), Mask.begin(),
[Scalars](Value *V, int Idx) {
return (isa<UndefValue>(V) &&
Idx == UndefMaskElem) ||
(Idx != UndefMaskElem && V == Scalars[Idx]);
});
};
if (!ReorderIndices.empty()) {
// TODO: implement matching if the nodes are just reordered, still can
// treat the vector as the same if the list of scalars matches VL
// directly, without reordering.
SmallVector<int> Mask;
inversePermutation(ReorderIndices, Mask);
if (VL.size() == Scalars.size())
return IsSame(Scalars, Mask);
if (VL.size() == ReuseShuffleIndices.size()) {
::addMask(Mask, ReuseShuffleIndices);
return IsSame(Scalars, Mask);
}
return false;
}
return IsSame(Scalars, ReuseShuffleIndices);
}
/// \returns true if current entry has same operands as \p TE.
bool hasEqualOperands(const TreeEntry &TE) const {
if (TE.getNumOperands() != getNumOperands())
return false;
SmallBitVector Used(getNumOperands());
for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
unsigned PrevCount = Used.count();
for (unsigned K = 0; K < E; ++K) {
if (Used.test(K))
continue;
if (getOperand(K) == TE.getOperand(I)) {
Used.set(K);
break;
}
}
// Check if we actually found the matching operand.
if (PrevCount == Used.count())
return false;
}
return true;
}
/// \return Final vectorization factor for the node. Defined by the total
/// number of vectorized scalars, including those, used several times in the
/// entry and counted in the \a ReuseShuffleIndices, if any.
unsigned getVectorFactor() const {
if (!ReuseShuffleIndices.empty())
return ReuseShuffleIndices.size();
return Scalars.size();
};
/// A vector of scalars.
ValueList Scalars;
/// The Scalars are vectorized into this value. It is initialized to Null.
Value *VectorizedValue = nullptr;
/// Do we need to gather this sequence or vectorize it
/// (either with vector instruction or with scatter/gather
/// intrinsics for store/load)?
enum EntryState { Vectorize, ScatterVectorize, NeedToGather };
EntryState State;
/// Does this sequence require some shuffling?
SmallVector<int, 4> ReuseShuffleIndices;
/// Does this entry require reordering?
SmallVector<unsigned, 4> ReorderIndices;
/// Points back to the VectorizableTree.
///
/// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
/// to be a pointer and needs to be able to initialize the child iterator.
/// Thus we need a reference back to the container to translate the indices
/// to entries.
VecTreeTy &Container;
/// The TreeEntry index containing the user of this entry. We can actually
/// have multiple users so the data structure is not truly a tree.
SmallVector<EdgeInfo, 1> UserTreeIndices;
/// The index of this treeEntry in VectorizableTree.
int Idx = -1;
private:
/// The operands of each instruction in each lane Operands[op_index][lane].
/// Note: This helps avoid the replication of the code that performs the
/// reordering of operands during buildTree_rec() and vectorizeTree().
SmallVector<ValueList, 2> Operands;
/// The main/alternate instruction.
Instruction *MainOp = nullptr;
Instruction *AltOp = nullptr;
public:
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
if (Operands.size() < OpIdx + 1)
Operands.resize(OpIdx + 1);
assert(Operands[OpIdx].empty() && "Already resized?");
assert(OpVL.size() <= Scalars.size() &&
"Number of operands is greater than the number of scalars.");
Operands[OpIdx].resize(OpVL.size());
copy(OpVL, Operands[OpIdx].begin());
}
/// Set the operands of this bundle in their original order.
void setOperandsInOrder() {
assert(Operands.empty() && "Already initialized?");
auto *I0 = cast<Instruction>(Scalars[0]);
Operands.resize(I0->getNumOperands());
unsigned NumLanes = Scalars.size();
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
OpIdx != NumOperands; ++OpIdx) {
Operands[OpIdx].resize(NumLanes);
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
auto *I = cast<Instruction>(Scalars[Lane]);
assert(I->getNumOperands() == NumOperands &&
"Expected same number of operands");
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
}
}
}
/// Reorders operands of the node to the given mask \p Mask.
void reorderOperands(ArrayRef<int> Mask) {
for (ValueList &Operand : Operands)
reorderScalars(Operand, Mask);
}
/// \returns the \p OpIdx operand of this TreeEntry.
ValueList &getOperand(unsigned OpIdx) {
assert(OpIdx < Operands.size() && "Off bounds");
return Operands[OpIdx];
}
/// \returns the \p OpIdx operand of this TreeEntry.
ArrayRef<Value *> getOperand(unsigned OpIdx) const {
assert(OpIdx < Operands.size() && "Off bounds");
return Operands[OpIdx];
}
/// \returns the number of operands.
unsigned getNumOperands() const { return Operands.size(); }
/// \return the single \p OpIdx operand.
Value *getSingleOperand(unsigned OpIdx) const {
assert(OpIdx < Operands.size() && "Off bounds");
assert(!Operands[OpIdx].empty() && "No operand available");
return Operands[OpIdx][0];
}
/// Some of the instructions in the list have alternate opcodes.
bool isAltShuffle() const { return MainOp != AltOp; }
bool isOpcodeOrAlt(Instruction *I) const {
unsigned CheckedOpcode = I->getOpcode();
return (getOpcode() == CheckedOpcode ||
getAltOpcode() == CheckedOpcode);
}
/// Chooses the correct key for scheduling data. If \p Op has the same (or
/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
/// \p OpValue.
Value *isOneOf(Value *Op) const {
auto *I = dyn_cast<Instruction>(Op);
if (I && isOpcodeOrAlt(I))
return Op;
return MainOp;
}
void setOperations(const InstructionsState &S) {
MainOp = S.MainOp;
AltOp = S.AltOp;
}
Instruction *getMainOp() const {
return MainOp;
}
Instruction *getAltOp() const {
return AltOp;
}
/// The main/alternate opcodes for the list of instructions.
unsigned getOpcode() const {
return MainOp ? MainOp->getOpcode() : 0;
}
unsigned getAltOpcode() const {
return AltOp ? AltOp->getOpcode() : 0;
}
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
int findLaneForValue(Value *V) const {
unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
if (!ReorderIndices.empty())
FoundLane = ReorderIndices[FoundLane];
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
if (!ReuseShuffleIndices.empty()) {
FoundLane = std::distance(ReuseShuffleIndices.begin(),
find(ReuseShuffleIndices, FoundLane));
}
return FoundLane;
}
#ifndef NDEBUG
/// Debug printer.
LLVM_DUMP_METHOD void dump() const {
dbgs() << Idx << ".\n";
for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
dbgs() << "Operand " << OpI << ":\n";
for (const Value *V : Operands[OpI])
dbgs().indent(2) << *V << "\n";
}
dbgs() << "Scalars: \n";
for (Value *V : Scalars)
dbgs().indent(2) << *V << "\n";
dbgs() << "State: ";
switch (State) {
case Vectorize:
dbgs() << "Vectorize\n";
break;
case ScatterVectorize:
dbgs() << "ScatterVectorize\n";
break;
case NeedToGather:
dbgs() << "NeedToGather\n";
break;
}
dbgs() << "MainOp: ";
if (MainOp)
dbgs() << *MainOp << "\n";
else
dbgs() << "NULL\n";
dbgs() << "AltOp: ";
if (AltOp)
dbgs() << *AltOp << "\n";
else
dbgs() << "NULL\n";
dbgs() << "VectorizedValue: ";
if (VectorizedValue)
dbgs() << *VectorizedValue << "\n";
else
dbgs() << "NULL\n";
dbgs() << "ReuseShuffleIndices: ";
if (ReuseShuffleIndices.empty())
dbgs() << "Empty";
else
for (int ReuseIdx : ReuseShuffleIndices)
dbgs() << ReuseIdx << ", ";
dbgs() << "\n";
dbgs() << "ReorderIndices: ";
for (unsigned ReorderIdx : ReorderIndices)
dbgs() << ReorderIdx << ", ";
dbgs() << "\n";
dbgs() << "UserTreeIndices: ";
for (const auto &EInfo : UserTreeIndices)
dbgs() << EInfo << ", ";
dbgs() << "\n";
}
#endif
};
#ifndef NDEBUG
void dumpTreeCosts(const TreeEntry *E, InstructionCost ReuseShuffleCost,
InstructionCost VecCost,
InstructionCost ScalarCost) const {
dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
dbgs() << "SLP: Costs:\n";
dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
dbgs() << "SLP: VectorCost = " << VecCost << "\n";
dbgs() << "SLP: ScalarCost = " << ScalarCost << "\n";
dbgs() << "SLP: ReuseShuffleCost + VecCost - ScalarCost = " <<
ReuseShuffleCost + VecCost - ScalarCost << "\n";
}
#endif
/// Create a new VectorizableTree entry.
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
ArrayRef<int> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
TreeEntry::EntryState EntryState =
Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
return newTreeEntry(VL, EntryState, Bundle, S, UserTreeIdx,
ReuseShuffleIndices, ReorderIndices);
}
TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
TreeEntry::EntryState EntryState,
Optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
ArrayRef<int> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
assert(((!Bundle && EntryState == TreeEntry::NeedToGather) ||
(Bundle && EntryState != TreeEntry::NeedToGather)) &&
"Need to vectorize gather entry?");
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
Last->State = EntryState;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
if (ReorderIndices.empty()) {
Last->Scalars.assign(VL.begin(), VL.end());
Last->setOperations(S);
} else {
// Reorder scalars and build final mask.
Last->Scalars.assign(VL.size(), nullptr);
transform(ReorderIndices, Last->Scalars.begin(),
[VL](unsigned Idx) -> Value * {
if (Idx >= VL.size())
return UndefValue::get(VL.front()->getType());
return VL[Idx];
});
InstructionsState S = getSameOpcode(Last->Scalars);
Last->setOperations(S);
Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
}
if (Last->State != TreeEntry::NeedToGather) {
for (Value *V : VL) {
assert(!getTreeEntry(V) && "Scalar already in tree!");
ScalarToTreeEntry[V] = Last;
}
// Update the scheduler bundle to point to this TreeEntry.
unsigned Lane = 0;
for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
BundleMember = BundleMember->NextInBundle) {
BundleMember->TE = Last;
BundleMember->Lane = Lane;
++Lane;
}
assert((!Bundle.getValue() || Lane == VL.size()) &&
"Bundle and VL out of sync");
} else {
MustGather.insert(VL.begin(), VL.end());
}
if (UserTreeIdx.UserTE)
Last->UserTreeIndices.push_back(UserTreeIdx);
return Last;
}
/// -- Vectorization State --
/// Holds all of the tree entries.
TreeEntry::VecTreeTy VectorizableTree;
#ifndef NDEBUG
/// Debug printer.
LLVM_DUMP_METHOD void dumpVectorizableTree() const {
for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
VectorizableTree[Id]->dump();
dbgs() << "\n";
}
}
#endif
TreeEntry *getTreeEntry(Value *V) { return ScalarToTreeEntry.lookup(V); }
const TreeEntry *getTreeEntry(Value *V) const {
return ScalarToTreeEntry.lookup(V);
}
/// Maps a specific scalar to its tree entry.
SmallDenseMap<Value*, TreeEntry *> ScalarToTreeEntry;
/// Maps a value to the proposed vectorizable size.
SmallDenseMap<Value *, unsigned> InstrElementSize;
/// A list of scalars that we found that we need to keep as scalars.
ValueSet MustGather;
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
ExternalUser(Value *S, llvm::User *U, int L)
: Scalar(S), User(U), Lane(L) {}
// Which scalar in our function.
Value *Scalar;
// Which user that uses the scalar.
llvm::User *User;
// Which lane does the scalar belong to.
int Lane;
};
using UserList = SmallVector<ExternalUser, 16>;
/// Checks if two instructions may access the same memory.
///
/// \p Loc1 is the location of \p Inst1. It is passed explicitly because it
/// is invariant in the calling loop.
bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1,
Instruction *Inst2) {
// First check if the result is already in the cache.
AliasCacheKey key = std::make_pair(Inst1, Inst2);
Optional<bool> &result = AliasCache[key];
if (result.hasValue()) {
return result.getValue();
}
bool aliased = true;
if (Loc1.Ptr && isSimple(Inst1))
aliased = isModOrRefSet(AA->getModRefInfo(Inst2, Loc1));
// Store the result in the cache.
result = aliased;
return aliased;
}
using AliasCacheKey = std::pair<Instruction *, Instruction *>;
/// Cache for alias results.
/// TODO: consider moving this to the AliasAnalysis itself.
DenseMap<AliasCacheKey, Optional<bool>> AliasCache;
/// Removes an instruction from its block and eventually deletes it.
/// It's like Instruction::eraseFromParent() except that the actual deletion
/// is delayed until BoUpSLP is destructed.
/// This is required to ensure that there are no incorrect collisions in the
/// AliasCache, which can happen if a new instruction is allocated at the
/// same address as a previously deleted instruction.
void eraseInstruction(Instruction *I, bool ReplaceOpsWithUndef = false) {
auto It = DeletedInstructions.try_emplace(I, ReplaceOpsWithUndef).first;
It->getSecond() = It->getSecond() && ReplaceOpsWithUndef;
}
/// Temporary store for deleted instructions. Instructions will be deleted
/// eventually when the BoUpSLP is destructed.
DenseMap<Instruction *, bool> DeletedInstructions;
/// A list of values that need to extracted out of the tree.
/// This list holds pairs of (Internal Scalar : External User). External User
/// can be nullptr, it means that this Internal Scalar will be used later,
/// after vectorization.
UserList ExternalUses;
/// Values used only by @llvm.assume calls.
SmallPtrSet<const Value *, 32> EphValues;
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> GatherShuffleSeq;
/// A list of blocks that we are going to CSE.
SetVector<BasicBlock *> CSEBlocks;
/// Contains all scheduling relevant data for an instruction.
/// A ScheduleData either represents a single instruction or a member of an
/// instruction bundle (= a group of instructions which is combined into a
/// vector instruction).
struct ScheduleData {
// The initial value for the dependency counters. It means that the
// dependencies are not calculated yet.
enum { InvalidDeps = -1 };
ScheduleData() = default;
void init(int BlockSchedulingRegionID, Value *OpVal) {
FirstInBundle = this;
NextInBundle = nullptr;
NextLoadStore = nullptr;
IsScheduled = false;
SchedulingRegionID = BlockSchedulingRegionID;
UnscheduledDepsInBundle = UnscheduledDeps;
clearDependencies();
OpValue = OpVal;
TE = nullptr;
Lane = -1;
}
/// Returns true if the dependency information has been calculated.
bool hasValidDependencies() const { return Dependencies != InvalidDeps; }
/// Returns true for single instructions and for bundle representatives
/// (= the head of a bundle).
bool isSchedulingEntity() const { return FirstInBundle == this; }
/// Returns true if it represents an instruction bundle and not only a
/// single instruction.
bool isPartOfBundle() const {
return NextInBundle != nullptr || FirstInBundle != this;
}
/// Returns true if it is ready for scheduling, i.e. it has no more
/// unscheduled depending instructions/bundles.
bool isReady() const {
assert(isSchedulingEntity() &&
"can't consider non-scheduling entity for ready list");
return UnscheduledDepsInBundle == 0 && !IsScheduled;
}
/// Modifies the number of unscheduled dependencies, also updating it for
/// the whole bundle.
int incrementUnscheduledDeps(int Incr) {
UnscheduledDeps += Incr;
return FirstInBundle->UnscheduledDepsInBundle += Incr;
}
/// Sets the number of unscheduled dependencies to the number of
/// dependencies.
void resetUnscheduledDeps() {
incrementUnscheduledDeps(Dependencies - UnscheduledDeps);
}
/// Clears all dependency information.
void clearDependencies() {
Dependencies = InvalidDeps;
resetUnscheduledDeps();
MemoryDependencies.clear();
}
void dump(raw_ostream &os) const {
if (!isSchedulingEntity()) {
os << "/ " << *Inst;
} else if (NextInBundle) {
os << '[' << *Inst;
ScheduleData *SD = NextInBundle;
while (SD) {
os << ';' << *SD->Inst;
SD = SD->NextInBundle;
}
os << ']';
} else {
os << *Inst;
}
}
Instruction *Inst = nullptr;
/// Points to the head in an instruction bundle (and always to this for
/// single instructions).
ScheduleData *FirstInBundle = nullptr;
/// Single linked list of all instructions in a bundle. Null if it is a
/// single instruction.
ScheduleData *NextInBundle = nullptr;
/// Single linked list of all memory instructions (e.g. load, store, call)
/// in the block - until the end of the scheduling region.
ScheduleData *NextLoadStore = nullptr;
/// The dependent memory instructions.
/// This list is derived on demand in calculateDependencies().
SmallVector<ScheduleData *, 4> MemoryDependencies;
/// This ScheduleData is in the current scheduling region if this matches
/// the current SchedulingRegionID of BlockScheduling.
int SchedulingRegionID = 0;
/// Used for getting a "good" final ordering of instructions.
int SchedulingPriority = 0;
/// The number of dependencies. Constitutes of the number of users of the
/// instruction plus the number of dependent memory instructions (if any).
/// This value is calculated on demand.
/// If InvalidDeps, the number of dependencies is not calculated yet.
int Dependencies = InvalidDeps;
/// The number of dependencies minus the number of dependencies of scheduled
/// instructions. As soon as this is zero, the instruction/bundle gets ready
/// for scheduling.
/// Note that this is negative as long as Dependencies is not calculated.
int UnscheduledDeps = InvalidDeps;
/// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for
/// single instructions.
int UnscheduledDepsInBundle = InvalidDeps;
/// True if this instruction is scheduled (or considered as scheduled in the
/// dry-run).
bool IsScheduled = false;
/// Opcode of the current instruction in the schedule data.
Value *OpValue = nullptr;
/// The TreeEntry that this instruction corresponds to.
TreeEntry *TE = nullptr;
/// The lane of this node in the TreeEntry.
int Lane = -1;
};
#ifndef NDEBUG
friend inline raw_ostream &operator<<(raw_ostream &os,
const BoUpSLP::ScheduleData &SD) {
SD.dump(os);
return os;
}
#endif
friend struct GraphTraits<BoUpSLP *>;
friend struct DOTGraphTraits<BoUpSLP *>;
/// Contains all scheduling data for a basic block.
struct BlockScheduling {
BlockScheduling(BasicBlock *BB)
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {}
void clear() {
ReadyInsts.clear();
ScheduleStart = nullptr;
ScheduleEnd = nullptr;
FirstLoadStoreInRegion = nullptr;
LastLoadStoreInRegion = nullptr;
// Reduce the maximum schedule region size by the size of the
// previous scheduling run.
ScheduleRegionSizeLimit -= ScheduleRegionSize;
if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
ScheduleRegionSizeLimit = MinScheduleRegionSize;
ScheduleRegionSize = 0;
// Make a new scheduling region, i.e. all existing ScheduleData is not
// in the new region yet.
++SchedulingRegionID;
}
ScheduleData *getScheduleData(Value *V) {
ScheduleData *SD = ScheduleDataMap[V];
if (SD && SD->SchedulingRegionID == SchedulingRegionID)
return SD;
return nullptr;
}
ScheduleData *getScheduleData(Value *V, Value *Key) {
if (V == Key)
return getScheduleData(V);
auto I = ExtraScheduleDataMap.find(V);
if (I != ExtraScheduleDataMap.end()) {
ScheduleData *SD = I->second[Key];
if (SD && SD->SchedulingRegionID == SchedulingRegionID)
return SD;
}
return nullptr;
}
bool isInSchedulingRegion(ScheduleData *SD) const {
return SD->SchedulingRegionID == SchedulingRegionID;
}
/// Marks an instruction as scheduled and puts all dependent ready
/// instructions into the ready-list.
template <typename ReadyListType>
void schedule(ScheduleData *SD, ReadyListType &ReadyList) {
SD->IsScheduled = true;
LLVM_DEBUG(dbgs() << "SLP: schedule " << *SD << "\n");
for (ScheduleData *BundleMember = SD; BundleMember;
BundleMember = BundleMember->NextInBundle) {
if (BundleMember->Inst != BundleMember->OpValue)
continue;
// Handle the def-use chain dependencies.
// Decrement the unscheduled counter and insert to ready list if ready.
auto &&DecrUnsched = [this, &ReadyList](Instruction *I) {
doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
if (OpDef && OpDef->hasValidDependencies() &&
OpDef->incrementUnscheduledDeps(-1) == 0) {
// There are no more unscheduled dependencies after
// decrementing, so we can put the dependent instruction
// into the ready list.
ScheduleData *DepBundle = OpDef->FirstInBundle;
assert(!DepBundle->IsScheduled &&
"already scheduled bundle gets ready");
ReadyList.insert(DepBundle);
LLVM_DEBUG(dbgs()
<< "SLP: gets ready (def): " << *DepBundle << "\n");
}
});
};
// If BundleMember is a vector bundle, its operands may have been
// reordered duiring buildTree(). We therefore need to get its operands
// through the TreeEntry.
if (TreeEntry *TE = BundleMember->TE) {
int Lane = BundleMember->Lane;
assert(Lane >= 0 && "Lane not set");
// Since vectorization tree is being built recursively this assertion
// ensures that the tree entry has all operands set before reaching
// this code. Couple of exceptions known at the moment are extracts
// where their second (immediate) operand is not added. Since
// immediates do not affect scheduler behavior this is considered
// okay.
auto *In = TE->getMainOp();
assert(In &&
(isa<ExtractValueInst>(In) || isa<ExtractElementInst>(In) ||
In->getNumOperands() == TE->getNumOperands()) &&
"Missed TreeEntry operands?");
(void)In; // fake use to avoid build failure when assertions disabled
for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
OpIdx != NumOperands; ++OpIdx)
if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
DecrUnsched(I);
} else {
// If BundleMember is a stand-alone instruction, no operand reordering
// has taken place, so we directly access its operands.
for (Use &U : BundleMember->Inst->operands())
if (auto *I = dyn_cast<Instruction>(U.get()))
DecrUnsched(I);
}
// Handle the memory dependencies.
for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
if (MemoryDepSD->incrementUnscheduledDeps(-1) == 0) {
// There are no more unscheduled dependencies after decrementing,
// so we can put the dependent instruction into the ready list.
ScheduleData *DepBundle = MemoryDepSD->FirstInBundle;
assert(!DepBundle->IsScheduled &&
"already scheduled bundle gets ready");
ReadyList.insert(DepBundle);
LLVM_DEBUG(dbgs()
<< "SLP: gets ready (mem): " << *DepBundle << "\n");
}
}
}
}
void doForAllOpcodes(Value *V,
function_ref<void(ScheduleData *SD)> Action) {
if (ScheduleData *SD = getScheduleData(V))
Action(SD);
auto I = ExtraScheduleDataMap.find(V);
if (I != ExtraScheduleDataMap.end())
for (auto &P : I->second)
if (P.second->SchedulingRegionID == SchedulingRegionID)
Action(P.second);
}
/// Put all instructions into the ReadyList which are ready for scheduling.
template <typename ReadyListType>
void initialFillReadyList(ReadyListType &ReadyList) {
for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
doForAllOpcodes(I, [&](ScheduleData *SD) {
if (SD->isSchedulingEntity() && SD->isReady()) {
ReadyList.insert(SD);
LLVM_DEBUG(dbgs()
<< "SLP: initially in ready list: " << *I << "\n");
}
});
}
}
/// Build a bundle from the ScheduleData nodes corresponding to the
/// scalar instruction for each lane.
ScheduleData *buildBundle(ArrayRef<Value *> VL);
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
/// actually moved at this stage.
/// \returns the scheduling bundle. The returned Optional value is non-None
/// if \p VL is allowed to be scheduled.
Optional<ScheduleData *>
tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S);
/// Un-bundles a group of instructions.
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
/// Allocates schedule data chunk.
ScheduleData *allocateScheduleDataChunks();
/// Extends the scheduling region so that V is inside the region.
/// \returns true if the region size is within the limit.
bool extendSchedulingRegion(Value *V, const InstructionsState &S);
/// Initialize the ScheduleData structures for new instructions in the
/// scheduling region.
void initScheduleData(Instruction *FromI, Instruction *ToI,
ScheduleData *PrevLoadStore,
ScheduleData *NextLoadStore);
/// Updates the dependency information of a bundle and of all instructions/
/// bundles which depend on the original bundle.
void calculateDependencies(ScheduleData *SD, bool InsertInReadyList,
BoUpSLP *SLP);
/// Sets all instruction in the scheduling region to un-scheduled.
void resetSchedule();
BasicBlock *BB;
/// Simple memory allocation for ScheduleData.
std::vector<std::unique_ptr<ScheduleData[]>> ScheduleDataChunks;
/// The size of a ScheduleData array in ScheduleDataChunks.
int ChunkSize;
/// The allocator position in the current chunk, which is the last entry
/// of ScheduleDataChunks.
int ChunkPos;
/// Attaches ScheduleData to Instruction.
/// Note that the mapping survives during all vectorization iterations, i.e.
/// ScheduleData structures are recycled.
DenseMap<Value *, ScheduleData *> ScheduleDataMap;
/// Attaches ScheduleData to Instruction with the leading key.
DenseMap<Value *, SmallDenseMap<Value *, ScheduleData *>>
ExtraScheduleDataMap;
struct ReadyList : SmallVector<ScheduleData *, 8> {
void insert(ScheduleData *SD) { push_back(SD); }
};
/// The ready-list for scheduling (only used for the dry-run).
ReadyList ReadyInsts;
/// The first instruction of the scheduling region.
Instruction *ScheduleStart = nullptr;
/// The first instruction _after_ the scheduling region.
Instruction *ScheduleEnd = nullptr;
/// The first memory accessing instruction in the scheduling region
/// (can be null).
ScheduleData *FirstLoadStoreInRegion = nullptr;
/// The last memory accessing instruction in the scheduling region
/// (can be null).
ScheduleData *LastLoadStoreInRegion = nullptr;
/// The current size of the scheduling region.
int ScheduleRegionSize = 0;
/// The maximum size allowed for the scheduling region.
int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget;
/// The ID of the scheduling region. For a new vectorization iteration this
/// is incremented which "removes" all ScheduleData from the region.
// Make sure that the initial SchedulingRegionID is greater than the
// initial SchedulingRegionID in ScheduleData (which is 0).
int SchedulingRegionID = 1;
};
/// Attaches the BlockScheduling structures to basic blocks.
MapVector<BasicBlock *, std::unique_ptr<BlockScheduling>> BlocksSchedules;
/// Performs the "real" scheduling. Done before vectorization is actually
/// performed in a basic block.
void scheduleBlock(BlockScheduling *BS);
/// List of users to ignore during scheduling and that don't need extracting.
ArrayRef<Value *> UserIgnoreList;
/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of
/// sorted SmallVectors of unsigned.
struct OrdersTypeDenseMapInfo {
static OrdersType getEmptyKey() {
OrdersType V;
V.push_back(~1U);
return V;
}
static OrdersType getTombstoneKey() {
OrdersType V;
V.push_back(~2U);
return V;
}
static unsigned getHashValue(const OrdersType &V) {
return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
}
static bool isEqual(const OrdersType &LHS, const OrdersType &RHS) {
return LHS == RHS;
}
};
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
TargetTransformInfo *TTI;
TargetLibraryInfo *TLI;
AAResults *AA;
LoopInfo *LI;
DominatorTree *DT;
AssumptionCache *AC;
DemandedBits *DB;
const DataLayout *DL;
OptimizationRemarkEmitter *ORE;
unsigned MaxVecRegSize; // This is set by TTI or overridden by cl::opt.
unsigned MinVecRegSize; // Set by cl::opt (default: 128).
/// Instruction builder to construct the vectorized tree.
IRBuilder<> Builder;
/// A map of scalar integer values to the smallest bit width with which they
/// can legally be represented. The values map to (width, signed) pairs,
/// where "width" indicates the minimum bit width and "signed" is True if the
/// value must be signed-extended, rather than zero-extended, back to its
/// original width.
MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
};
} // end namespace slpvectorizer
template <> struct GraphTraits<BoUpSLP *> {
using TreeEntry = BoUpSLP::TreeEntry;
/// NodeRef has to be a pointer per the GraphWriter.
using NodeRef = TreeEntry *;
using ContainerTy = BoUpSLP::TreeEntry::VecTreeTy;
/// Add the VectorizableTree to the index iterator to be able to return
/// TreeEntry pointers.
struct ChildIteratorType
: public iterator_adaptor_base<
ChildIteratorType, SmallVector<BoUpSLP::EdgeInfo, 1>::iterator> {
ContainerTy &VectorizableTree;
ChildIteratorType(SmallVector<BoUpSLP::EdgeInfo, 1>::iterator W,
ContainerTy &VT)
: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
NodeRef operator*() { return I->UserTE; }
};
static NodeRef getEntryNode(BoUpSLP &R) {
return R.VectorizableTree[0].get();
}
static ChildIteratorType child_begin(NodeRef N) {
return {N->UserTreeIndices.begin(), N->Container};
}
static ChildIteratorType child_end(NodeRef N) {
return {N->UserTreeIndices.end(), N->Container};
}
/// For the node iterator we just need to turn the TreeEntry iterator into a
/// TreeEntry* iterator so that it dereferences to NodeRef.
class nodes_iterator {
using ItTy = ContainerTy::iterator;
ItTy It;
public:
nodes_iterator(const ItTy &It2) : It(It2) {}
NodeRef operator*() { return It->get(); }
nodes_iterator operator++() {
++It;
return *this;
}
bool operator!=(const nodes_iterator &N2) const { return N2.It != It; }
};
static nodes_iterator nodes_begin(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.begin());
}
static nodes_iterator nodes_end(BoUpSLP *R) {
return nodes_iterator(R->VectorizableTree.end());
}
static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); }
};
template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
using TreeEntry = BoUpSLP::TreeEntry;
DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *R) {
std::string Str;
raw_string_ostream OS(Str);
if (isSplat(Entry->Scalars))
OS << "<splat> ";
for (auto V : Entry->Scalars) {
OS << *V;
if (llvm::any_of(R->ExternalUses, [&](const BoUpSLP::ExternalUser &EU) {
return EU.Scalar == V;
}))
OS << " <extract>";
OS << "\n";
}
return Str;
}
static std::string getNodeAttributes(const TreeEntry *Entry,
const BoUpSLP *) {
if (Entry->State == TreeEntry::NeedToGather)
return "color=red";
return "";
}
};
} // end namespace llvm
BoUpSLP::~BoUpSLP() {
for (const auto &Pair : DeletedInstructions) {
// Replace operands of ignored instructions with Undefs in case if they were
// marked for deletion.
if (Pair.getSecond()) {
Value *Undef = UndefValue::get(Pair.getFirst()->getType());
Pair.getFirst()->replaceAllUsesWith(Undef);
}
Pair.getFirst()->dropAllReferences();
}
for (const auto &Pair : DeletedInstructions) {
assert(Pair.getFirst()->use_empty() &&
"trying to erase instruction with users.");
Pair.getFirst()->eraseFromParent();
}
#ifdef EXPENSIVE_CHECKS
// If we could guarantee that this call is not extremely slow, we could
// remove the ifdef limitation (see PR47712).
assert(!verifyFunction(*F, &dbgs()));
#endif
}
void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
for (auto *V : AV) {
if (auto *I = dyn_cast<Instruction>(V))
eraseInstruction(I, /*ReplaceOpsWithUndef=*/true);
};
}
/// Reorders the given \p Reuses mask according to the given \p Mask. \p Reuses
/// contains original mask for the scalars reused in the node. Procedure
/// transform this mask in accordance with the given \p Mask.
static void reorderReuses(SmallVectorImpl<int> &Reuses, ArrayRef<int> Mask) {
assert(!Mask.empty() && Reuses.size() == Mask.size() &&
"Expected non-empty mask.");
SmallVector<int> Prev(Reuses.begin(), Reuses.end());
Prev.swap(Reuses);
for (unsigned I = 0, E = Prev.size(); I < E; ++I)
if (Mask[I] != UndefMaskElem)
Reuses[Mask[I]] = Prev[I];
}
/// Reorders the given \p Order according to the given \p Mask. \p Order - is
/// the original order of the scalars. Procedure transforms the provided order
/// in accordance with the given \p Mask. If the resulting \p Order is just an
/// identity order, \p Order is cleared.
static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
assert(!Mask.empty() && "Expected non-empty mask.");
SmallVector<int> MaskOrder;
if (Order.empty()) {
MaskOrder.resize(Mask.size());
std::iota(MaskOrder.begin(), MaskOrder.end(), 0);
} else {
inversePermutation(Order, MaskOrder);
}
reorderReuses(MaskOrder, Mask);
if (ShuffleVectorInst::isIdentityMask(MaskOrder)) {
Order.clear();
return;
}
Order.assign(Mask.size(), Mask.size());
for (unsigned I = 0, E = Mask.size(); I < E; ++I)
if (MaskOrder[I] != UndefMaskElem)
Order[MaskOrder[I]] = I;
fixupOrderingIndices(Order);
}
Optional<BoUpSLP::OrdersType>
BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
unsigned NumScalars = TE.Scalars.size();
OrdersType CurrentOrder(NumScalars, NumScalars);
SmallVector<int> Positions;
SmallBitVector UsedPositions(NumScalars);
const TreeEntry *STE = nullptr;
// Try to find all gathered scalars that are gets vectorized in other
// vectorize node. Here we can have only one single tree vector node to
// correctly identify order of the gathered scalars.
for (unsigned I = 0; I < NumScalars; ++I) {
Value *V = TE.Scalars[I];
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
continue;
if (const auto *LocalSTE = getTreeEntry(V)) {
if (!STE)
STE = LocalSTE;
else if (STE != LocalSTE)
// Take the order only from the single vector node.
return None;
unsigned Lane =
std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
if (Lane >= NumScalars)
return None;
if (CurrentOrder[Lane] != NumScalars) {
if (Lane != I)
continue;
UsedPositions.reset(CurrentOrder[Lane]);
}
// The partial identity (where only some elements of the gather node are
// in the identity order) is good.
CurrentOrder[Lane] = I;
UsedPositions.set(I);
}
}
// Need to keep the order if we have a vector entry and at least 2 scalars or
// the vectorized entry has just 2 scalars.
if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
for (unsigned I = 0; I < NumScalars; ++I)
if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
return false;
return true;
};
if (IsIdentityOrder(CurrentOrder)) {
CurrentOrder.clear();
return CurrentOrder;
}
auto *It = CurrentOrder.begin();
for (unsigned I = 0; I < NumScalars;) {
if (UsedPositions.test(I)) {
++I;
continue;
}
if (*It == NumScalars) {
*It = I;
++I;
}
++It;
}
return CurrentOrder;
}
return None;
}
Optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
bool TopToBottom) {
// No need to reorder if need to shuffle reuses, still need to shuffle the
// node.
if (!TE.ReuseShuffleIndices.empty())
return None;
if (TE.State == TreeEntry::Vectorize &&
(isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE.getMainOp()) ||
(TopToBottom && isa<StoreInst, InsertElementInst>(TE.getMainOp()))) &&
!TE.isAltShuffle())
return TE.ReorderIndices;
if (TE.State == TreeEntry::NeedToGather) {
// TODO: add analysis of other gather nodes with extractelement
// instructions and other values/instructions, not only undefs.
if (((TE.getOpcode() == Instruction::ExtractElement &&
!TE.isAltShuffle()) ||
(all_of(TE.Scalars,
[](Value *V) {
return isa<UndefValue, ExtractElementInst>(V);
}) &&
any_of(TE.Scalars,
[](Value *V) { return isa<ExtractElementInst>(V); }))) &&
all_of(TE.Scalars,
[](Value *V) {
auto *EE = dyn_cast<ExtractElementInst>(V);
return !EE || isa<FixedVectorType>(EE->getVectorOperandType());
}) &&
allSameType(TE.Scalars)) {
// Check that gather of extractelements can be represented as
// just a shuffle of a single vector.
OrdersType CurrentOrder;
bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder);
if (Reuse || !CurrentOrder.empty()) {
if (!CurrentOrder.empty())
fixupOrderingIndices(CurrentOrder);
return CurrentOrder;
}
}
if (Optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
return CurrentOrder;
}
return None;
}
void BoUpSLP::reorderTopToBottom() {
// Maps VF to the graph nodes.
DenseMap<unsigned, SetVector<TreeEntry *>> VFToOrderedEntries;
// ExtractElement gather nodes which can be vectorized and need to handle
// their ordering.
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
// Find all reorderable nodes with the given VF.
// Currently the are vectorized stores,loads,extracts + some gathering of
// extracts.
for_each(VectorizableTree, [this, &VFToOrderedEntries, &GathersToOrders](
const std::unique_ptr<TreeEntry> &TE) {
if (Optional<OrdersType> CurrentOrder =
getReorderingData(*TE.get(), /*TopToBottom=*/true)) {
// Do not include ordering for nodes used in the alt opcode vectorization,
// better to reorder them during bottom-to-top stage. If follow the order
// here, it causes reordering of the whole graph though actually it is
// profitable just to reorder the subgraph that starts from the alternate
// opcode vectorization node. Such nodes already end-up with the shuffle
// instruction and it is just enough to change this shuffle rather than
// rotate the scalars for the whole graph.
unsigned Cnt = 0;
const TreeEntry *UserTE = TE.get();
while (UserTE && Cnt < RecursionMaxDepth) {
if (UserTE->UserTreeIndices.size() != 1)
break;
if (all_of(UserTE->UserTreeIndices, [](const EdgeInfo &EI) {
return EI.UserTE->State == TreeEntry::Vectorize &&
EI.UserTE->isAltShuffle() && EI.UserTE->Idx != 0;
}))
return;
if (UserTE->UserTreeIndices.empty())
UserTE = nullptr;
else
UserTE = UserTE->UserTreeIndices.back().UserTE;
++Cnt;
}
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
if (TE->State != TreeEntry::Vectorize)
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
}
});
// Reorder the graph nodes according to their vectorization factor.
for (unsigned VF = VectorizableTree.front()->Scalars.size(); VF > 1;
VF /= 2) {
auto It = VFToOrderedEntries.find(VF);
if (It == VFToOrderedEntries.end())
continue;
// Try to find the most profitable order. We just are looking for the most
// used order and reorder scalar elements in the nodes according to this
// mostly used order.
ArrayRef<TreeEntry *> OrderedEntries = It->second.getArrayRef();
// All operands are reordered and used only in this node - propagate the
// most used order to the user node.
MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
for (const TreeEntry *OpTE : OrderedEntries) {
// No need to reorder this nodes, still need to extend and to use shuffle,
// just need to merge reordering shuffle and the reuse shuffle.
if (!OpTE->ReuseShuffleIndices.empty())
continue;
// Count number of orders uses.
const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
if (OpTE->State == TreeEntry::NeedToGather)
return GathersToOrders.find(OpTE)->second;
return OpTE->ReorderIndices;
}();
// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
SmallVector<int> Mask;
inversePermutation(Order, Mask);
unsigned E = Order.size();
OrdersType CurrentOrder(E, E);
transform(Mask, CurrentOrder.begin(), [E](int Idx) {
return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
});
fixupOrderingIndices(CurrentOrder);
++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
} else {
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
}
}
// Set order of the user node.
if (OrdersUses.empty())
continue;
// Choose the most used order.
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
unsigned Cnt = OrdersUses.front().second;
for (const auto &Pair : drop_begin(OrdersUses)) {
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
BestOrder = Pair.first;
Cnt = Pair.second;
}
}
// Set order of the user node.
if (BestOrder.empty())
continue;
SmallVector<int> Mask;
inversePermutation(BestOrder, Mask);
SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
unsigned E = BestOrder.size();
transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
return I < E ? static_cast<int>(I) : UndefMaskElem;
});
// Do an actual reordering, if profitable.
for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
// Just do the reordering for the nodes with the given VF.
if (TE->Scalars.size() != VF) {
if (TE->ReuseShuffleIndices.size() == VF) {
// Need to reorder the reuses masks of the operands with smaller VF to
// be able to find the match between the graph nodes and scalar
// operands of the given node during vectorization/cost estimation.
assert(all_of(TE->UserTreeIndices,
[VF, &TE](const EdgeInfo &EI) {
return EI.UserTE->Scalars.size() == VF ||
EI.UserTE->Scalars.size() ==
TE->Scalars.size();
}) &&
"All users must be of VF size.");
// Update ordering of the operands with the smaller VF than the given
// one.
reorderReuses(TE->ReuseShuffleIndices, Mask);
}
continue;
}
if (TE->State == TreeEntry::Vectorize &&
isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
InsertElementInst>(TE->getMainOp()) &&
!TE->isAltShuffle()) {
// Build correct orders for extract{element,value}, loads and
// stores.
reorderOrder(TE->ReorderIndices, Mask);
if (isa<InsertElementInst, StoreInst>(TE->getMainOp()))
TE->reorderOperands(Mask);
} else {
// Reorder the node and its operands.
TE->reorderOperands(Mask);
assert(TE->ReorderIndices.empty() &&
"Expected empty reorder sequence.");
reorderScalars(TE->Scalars, Mask);
}
if (!TE->ReuseShuffleIndices.empty()) {
// Apply reversed order to keep the original ordering of the reused
// elements to avoid extra reorder indices shuffling.
OrdersType CurrentOrder;
reorderOrder(CurrentOrder, MaskOrder);
SmallVector<int> NewReuses;
inversePermutation(CurrentOrder, NewReuses);
addMask(NewReuses, TE->ReuseShuffleIndices);
TE->ReuseShuffleIndices.swap(NewReuses);
}
}
}
}
void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
SetVector<TreeEntry *> OrderedEntries;
DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
// Find all reorderable leaf nodes with the given VF.
// Currently the are vectorized loads,extracts without alternate operands +
// some gathering of extracts.
SmallVector<TreeEntry *> NonVectorized;
for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
&NonVectorized](
const std::unique_ptr<TreeEntry> &TE) {
if (TE->State != TreeEntry::Vectorize)
NonVectorized.push_back(TE.get());
if (Optional<OrdersType> CurrentOrder =
getReorderingData(*TE.get(), /*TopToBottom=*/false)) {
OrderedEntries.insert(TE.get());
if (TE->State != TreeEntry::Vectorize)
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
}
});
// Checks if the operands of the users are reordarable and have only single
// use.
auto &&CheckOperands =
[this, &NonVectorized](const auto &Data,
SmallVectorImpl<TreeEntry *> &GatherOps) {
for (unsigned I = 0, E = Data.first->getNumOperands(); I < E; ++I) {
if (any_of(Data.second,
[I](const std::pair<unsigned, TreeEntry *> &OpData) {
return OpData.first == I &&
OpData.second->State == TreeEntry::Vectorize;
}))
continue;
ArrayRef<Value *> VL = Data.first->getOperand(I);
const TreeEntry *TE = nullptr;
const auto *It = find_if(VL, [this, &TE](Value *V) {
TE = getTreeEntry(V);
return TE;
});
if (It != VL.end() && TE->isSame(VL))
return false;
TreeEntry *Gather = nullptr;
if (count_if(NonVectorized, [VL, &Gather](TreeEntry *TE) {
assert(TE->State != TreeEntry::Vectorize &&
"Only non-vectorized nodes are expected.");
if (TE->isSame(VL)) {
Gather = TE;
return true;
}
return false;
}) > 1)
return false;
if (Gather)
GatherOps.push_back(Gather);
}
return true;
};
// 1. Propagate order to the graph nodes, which use only reordered nodes.
// I.e., if the node has operands, that are reordered, try to make at least
// one operand order in the natural order and reorder others + reorder the
// user node itself.
SmallPtrSet<const TreeEntry *, 4> Visited;
while (!OrderedEntries.empty()) {
// 1. Filter out only reordered nodes.
// 2. If the entry has multiple uses - skip it and jump to the next node.
MapVector<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>> Users;
SmallVector<TreeEntry *> Filtered;
for (TreeEntry *TE : OrderedEntries) {
if (!(TE->State == TreeEntry::Vectorize ||
(TE->State == TreeEntry::NeedToGather &&
GathersToOrders.count(TE))) ||
TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
!all_of(drop_begin(TE->UserTreeIndices),
[TE](const EdgeInfo &EI) {
return EI.UserTE == TE->UserTreeIndices.front().UserTE;
}) ||
!Visited.insert(TE).second) {
Filtered.push_back(TE);
continue;
}
// Build a map between user nodes and their operands order to speedup
// search. The graph currently does not provide this dependency directly.
for (EdgeInfo &EI : TE->UserTreeIndices) {
TreeEntry *UserTE = EI.UserTE;
auto It = Users.find(UserTE);
if (It == Users.end())
It = Users.insert({UserTE, {}}).first;
It->second.emplace_back(EI.EdgeIdx, TE);
}
}
// Erase filtered entries.
for_each(Filtered,
[&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); });
for (const auto &Data : Users) {
// Check that operands are used only in the User node.
SmallVector<TreeEntry *> GatherOps;
if (!CheckOperands(Data, GatherOps)) {
for_each(Data.second,
[&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
OrderedEntries.remove(Op.second);
});
continue;
}
// All operands are reordered and used only in this node - propagate the
// most used order to the user node.
MapVector<OrdersType, unsigned,
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
OrdersUses;
// Do the analysis for each tree entry only once, otherwise the order of
// the same node my be considered several times, though might be not
// profitable.
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
for (const auto &Op : Data.second) {
TreeEntry *OpTE = Op.second;
if (!VisitedOps.insert(OpTE).second)
continue;
if (!OpTE->ReuseShuffleIndices.empty() ||
(IgnoreReorder && OpTE == VectorizableTree.front().get()))
continue;
const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
if (OpTE->State == TreeEntry::NeedToGather)
return GathersToOrders.find(OpTE)->second;
return OpTE->ReorderIndices;
}();
// Stores actually store the mask, not the order, need to invert.
if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
SmallVector<int> Mask;
inversePermutation(Order, Mask);
unsigned E = Order.size();
OrdersType CurrentOrder(E, E);
transform(Mask, CurrentOrder.begin(), [E](int Idx) {
return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
});
fixupOrderingIndices(CurrentOrder);
++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
} else {
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
}
OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
OpTE->UserTreeIndices.size();
assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
--OrdersUses[{}];
}
// If no orders - skip current nodes and jump to the next one, if any.
if (OrdersUses.empty()) {
for_each(Data.second,
[&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
OrderedEntries.remove(Op.second);
});
continue;
}
// Choose the best order.
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
unsigned Cnt = OrdersUses.front().second;
for (const auto &Pair : drop_begin(OrdersUses)) {
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
BestOrder = Pair.first;
Cnt = Pair.second;
}
}
// Set order of the user node (reordering of operands and user nodes).
if (BestOrder.empty()) {
for_each(Data.second,
[&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
OrderedEntries.remove(Op.second);
});
continue;
}
// Erase operands from OrderedEntries list and adjust their orders.
VisitedOps.clear();
SmallVector<int> Mask;
inversePermutation(BestOrder, Mask);
SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
unsigned E = BestOrder.size();
transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
return I < E ? static_cast<int>(I) : UndefMaskElem;
});
for (const std::pair<unsigned, TreeEntry *> &Op : Data.second) {
TreeEntry *TE = Op.second;
OrderedEntries.remove(TE);
if (!VisitedOps.insert(TE).second)
continue;
if (!TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty()) {
// Just reorder reuses indices.
reorderReuses(TE->ReuseShuffleIndices, Mask);
continue;
}
// Gathers are processed separately.
if (TE->State != TreeEntry::Vectorize)
continue;
assert((BestOrder.size() == TE->ReorderIndices.size() ||
TE->ReorderIndices.empty()) &&
"Non-matching sizes of user/operand entries.");
reorderOrder(TE->ReorderIndices, Mask);
}
// For gathers just need to reorder its scalars.
for (TreeEntry *Gather : GatherOps) {
assert(Gather->ReorderIndices.empty() &&
"Unexpected reordering of gathers.");
if (!Gather->ReuseShuffleIndices.empty()) {
// Just reorder reuses indices.
reorderReuses(Gather->ReuseShuffleIndices, Mask);
continue;
}
reorderScalars(Gather->Scalars, Mask);
OrderedEntries.remove(Gather);
}
// Reorder operands of the user node and set the ordering for the user
// node itself.
if (Data.first->State != TreeEntry::Vectorize ||
!isa<ExtractElementInst, ExtractValueInst, LoadInst>(
Data.first->getMainOp()) ||
Data.first->isAltShuffle())
Data.first->reorderOperands(Mask);
if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) ||
Data.first->isAltShuffle()) {
reorderScalars(Data.first->Scalars, Mask);
reorderOrder(Data.first->ReorderIndices, MaskOrder);
if (Data.first->ReuseShuffleIndices.empty() &&
!Data.first->ReorderIndices.empty() &&
!Data.first->isAltShuffle()) {
// Insert user node to the list to try to sink reordering deeper in
// the graph.
OrderedEntries.insert(Data.first);
}
} else {
reorderOrder(Data.first->ReorderIndices, Mask);
}
}
}
// If the reordering is unnecessary, just remove the reorder.
if (IgnoreReorder && !VectorizableTree.front()->ReorderIndices.empty() &&
VectorizableTree.front()->ReuseShuffleIndices.empty())
VectorizableTree.front()->ReorderIndices.clear();
}
void BoUpSLP::buildExternalUses(
const ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// Collect the values that we need to extract from the tree.
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
if (Entry->State == TreeEntry::NeedToGather)
continue;
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
int FoundLane = Entry->findLaneForValue(Scalar);
// Check if the scalar is externally used as an extra arg.
auto ExtI = ExternallyUsedValues.find(Scalar);
if (ExtI != ExternallyUsedValues.end()) {
LLVM_DEBUG(dbgs() << "SLP: Need to extract: Extra arg from lane "
<< Lane << " from " << *Scalar << ".\n");
ExternalUses.emplace_back(Scalar, nullptr, FoundLane);
}
for (User *U : Scalar->users()) {
LLVM_DEBUG(dbgs() << "SLP: Checking user:" << *U << ".\n");
Instruction *UserInst = dyn_cast<Instruction>(U);
if (!UserInst)
continue;
if (isDeleted(UserInst))
continue;
// Skip in-tree scalars that become vectors
if (TreeEntry *UseEntry = getTreeEntry(U)) {
Value *UseScalar = UseEntry->Scalars[0];
// Some in-tree scalars will remain as scalar in vectorized
// instructions. If that is the case, the one in Lane 0 will
// be used.
if (UseScalar != U ||
UseEntry->State == TreeEntry::ScatterVectorize ||
!InTreeUserNeedToExtract(Scalar, UserInst, TLI)) {
LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U
<< ".\n");
assert(UseEntry->State != TreeEntry::NeedToGather && "Bad state");
continue;
}
}
// Ignore users in the user ignore list.
if (is_contained(UserIgnoreList, UserInst))
continue;
LLVM_DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane "
<< Lane << " from " << *Scalar << ".\n");
ExternalUses.push_back(ExternalUser(Scalar, U, FoundLane));
}
}
}
}
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst) {
deleteTree();
UserIgnoreList = UserIgnoreLst;
if (!allSameType(Roots))
return;
buildTree_rec(Roots, 0, EdgeInfo());
}
namespace {
/// Tracks the state we can represent the loads in the given sequence.
enum class LoadsState { Gather, Vectorize, ScatterVectorize };
} // anonymous namespace
/// Checks if the given array of loads can be represented as a vectorized,
/// scatter or just simple gather.
static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
const TargetTransformInfo &TTI,
const DataLayout &DL, ScalarEvolution &SE,
SmallVectorImpl<unsigned> &Order,
SmallVectorImpl<Value *> &PointerOps) {
// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.
Type *ScalarTy = VL0->getType();
if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy))
return LoadsState::Gather;
// Make sure all loads in the bundle are simple - we can't vectorize
// atomic or volatile loads.
PointerOps.clear();
PointerOps.resize(VL.size());
auto *POIter = PointerOps.begin();
for (Value *V : VL) {
auto *L = cast<LoadInst>(V);
if (!L->isSimple())
return LoadsState::Gather;
*POIter = L->getPointerOperand();
++POIter;
}
Order.clear();
// Check the order of pointer operands.
if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) {
Value *Ptr0;
Value *PtrN;
if (Order.empty()) {
Ptr0 = PointerOps.front();
PtrN = PointerOps.back();
} else {
Ptr0 = PointerOps[Order.front()];
PtrN = PointerOps[Order.back()];
}
Optional<int> Diff =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
// Check that the sorted loads are consecutive.
if (static_cast<unsigned>(*Diff) == VL.size() - 1)
return LoadsState::Vectorize;
Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
for (Value *V : VL)
CommonAlignment =
commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
CommonAlignment))
return LoadsState::ScatterVectorize;
}
return LoadsState::Gather;
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
SmallVector<int> ReuseShuffleIndicies;
SmallVector<Value *> UniqueValues;
auto &&TryToFindDuplicates = [&VL, &ReuseShuffleIndicies, &UniqueValues,
&UserTreeIdx,
this](const InstructionsState &S) {
// Check that every instruction appears once in this bundle.
DenseMap<Value *, unsigned> UniquePositions;
for (Value *V : VL) {
if (isConstant(V)) {
ReuseShuffleIndicies.emplace_back(
isa<UndefValue>(V) ? UndefMaskElem : UniqueValues.size());
UniqueValues.emplace_back(V);
continue;
}
auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
ReuseShuffleIndicies.emplace_back(Res.first->second);
if (Res.second)
UniqueValues.emplace_back(V);
}
size_t NumUniqueScalarValues = UniqueValues.size();
if (NumUniqueScalarValues == VL.size()) {
ReuseShuffleIndicies.clear();
} else {
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
if (NumUniqueScalarValues <= 1 ||
(UniquePositions.size() == 1 && all_of(UniqueValues,
[](Value *V) {
return isa<UndefValue>(V) ||
!isConstant(V);
})) ||
!llvm::isPowerOf2_32(NumUniqueScalarValues)) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return false;
}
VL = UniqueValues;
}
return true;
};
InstructionsState S = getSameOpcode(VL);
if (Depth == RecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
// Don't handle scalable vectors
if (S.getOpcode() == Instruction::ExtractElement &&
isa<ScalableVectorType>(
cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
// Don't handle vectors.
if (S.OpValue->getType()->isVectorTy() &&
!isa<InsertElementInst>(S.OpValue)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
if (SI->getValueOperand()->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
// If all of the operands are identical or constant we have a simple solution.
// If we deal with insert/extract instructions, they all must have constant
// indices, otherwise we should gather them, not try to vectorize.
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode() ||
(isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(S.MainOp) &&
!all_of(VL, isVectorLikeInstWithConstOps))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
// We now know that this is a vector of instructions of the same type from
// the same block.
// Don't vectorize ephemeral values.
for (Value *V : VL) {
if (EphValues.count(V)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is ephemeral.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
}
// Check if this is a duplicate of another entry.
if (TreeEntry *E = getTreeEntry(S.OpValue)) {
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
if (!E->isSame(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
// Record the reuse of the tree node. FIXME, currently this is only used to
// properly draw the graph rather than for the actual vectorization.
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
<< ".\n");
return;
}
// Check that none of the instructions in the bundle are already in the tree.
for (Value *V : VL) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
continue;
if (getTreeEntry(I)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is already in tree.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
}
// The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (Value *V : VL) {
if (is_contained(UserIgnoreList, V)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
if (TryToFindDuplicates(S))
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
}
// Check that all of the users of the scalars that we want to vectorize are
// schedulable.
auto *VL0 = cast<Instruction>(S.OpValue);
BasicBlock *BB = VL0->getParent();
if (!DT->isReachableFromEntry(BB)) {
// Don't go into unreachable blocks. They may contain instructions with
// dependency cycles which confuse the final scheduling.
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
return;
}
// Check that every instruction appears once in this bundle.
if (!TryToFindDuplicates(S))
return;
auto &BSRef = BlocksSchedules[BB];
if (!BSRef)
BSRef = std::make_unique<BlockScheduling>(BB);
BlockScheduling &BS = *BSRef.get();
Optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
if (!Bundle) {
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
assert((!BS.getScheduleData(VL0) ||
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
"tryScheduleBundle should cancelScheduling on failure");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
unsigned ShuffleOrOp = S.isAltShuffle() ?
(unsigned) Instruction::ShuffleVector : S.getOpcode();
switch (ShuffleOrOp) {
case Instruction::PHI: {
auto *PH = cast<PHINode>(VL0);
// Check for terminator values (e.g. invoke).
for (Value *V : VL)
for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
Instruction *Term = dyn_cast<Instruction>(
cast<PHINode>(V)->getIncomingValueForBlock(
PH->getIncomingBlock(I)));
if (Term && Term->isTerminator()) {
LLVM_DEBUG(dbgs()
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
}
TreeEntry *TE =
newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
// Keeps the reordered operands to avoid code duplication.
SmallVector<ValueList, 2> OperandsVec;
for (unsigned I = 0, E = PH->getNumIncomingValues(); I < E; ++I) {
if (!DT->isReachableFromEntry(PH->getIncomingBlock(I))) {
ValueList Operands(VL.size(), PoisonValue::get(PH->getType()));
TE->setOperand(I, Operands);
OperandsVec.push_back(Operands);
continue;
}
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL)
Operands.push_back(cast<PHINode>(V)->getIncomingValueForBlock(
PH->getIncomingBlock(I)));
TE->setOperand(I, Operands);
OperandsVec.push_back(Operands);
}
for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
return;
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
OrdersType CurrentOrder;
bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
if (Reuse) {
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
Op0.assign(VL.size(), VL0->getOperand(0));
VectorizableTree.back()->setOperand(0, Op0);
return;
}
if (!CurrentOrder.empty()) {
LLVM_DEBUG({
dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
"with order";
for (unsigned Idx : CurrentOrder)
dbgs() << " " << Idx;
dbgs() << "\n";
});
fixupOrderingIndices(CurrentOrder);
// Insert new order with initial value 0, if it does not exist,
// otherwise return the iterator to the existing one.
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
Op0.assign(VL.size(), VL0->getOperand(0));
VectorizableTree.back()->setOperand(0, Op0);
return;
}
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
BS.cancelScheduling(VL, VL0);
return;
}
case Instruction::InsertElement: {
assert(ReuseShuffleIndicies.empty() && "All inserts should be unique");
// Check that we have a buildvector and not a shuffle of 2 or more
// different vectors.
ValueSet SourceVectors;
- int MinIdx = std::numeric_limits<int>::max();
for (Value *V : VL) {
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
- Optional<int> Idx = *getInsertIndex(V, 0);
- if (!Idx || *Idx == UndefMaskElem)
- continue;
- MinIdx = std::min(MinIdx, *Idx);
+ assert(getInsertIndex(V) != None && "Non-constant or undef index?");
}
if (count_if(VL, [&SourceVectors](Value *V) {
return !SourceVectors.contains(V);
}) >= 2) {
// Found 2nd source vector - cancel.
LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
"different source vectors.\n");
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
BS.cancelScheduling(VL, VL0);
return;
}
auto OrdCompare = [](const std::pair<int, int> &P1,
const std::pair<int, int> &P2) {
return P1.first > P2.first;
};
PriorityQueue<std::pair<int, int>, SmallVector<std::pair<int, int>>,
decltype(OrdCompare)>
Indices(OrdCompare);
for (int I = 0, E = VL.size(); I < E; ++I) {
- Optional<int> Idx = *getInsertIndex(VL[I], 0);
- if (!Idx || *Idx == UndefMaskElem)
- continue;
- Indices.emplace(*Idx, I);
+ unsigned Idx = *getInsertIndex(VL[I]);
+ Indices.emplace(Idx, I);
}
OrdersType CurrentOrder(VL.size(), VL.size());
bool IsIdentity = true;
for (int I = 0, E = VL.size(); I < E; ++I) {
CurrentOrder[Indices.top().second] = I;
IsIdentity &= Indices.top().second == I;
Indices.pop();
}
if (IsIdentity)
CurrentOrder.clear();
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
None, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
constexpr int NumOps = 2;
ValueList VectorOperands[NumOps];
for (int I = 0; I < NumOps; ++I) {
for (Value *V : VL)
VectorOperands[I].push_back(cast<Instruction>(V)->getOperand(I));
TE->setOperand(I, VectorOperands[I]);
}
buildTree_rec(VectorOperands[NumOps - 1], Depth + 1, {TE, NumOps - 1});
return;
}
case Instruction::Load: {
// Check that a vectorized load would load the same memory as a scalar
// load. For example, we don't want to vectorize loads that are smaller
// than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.
SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
TreeEntry *TE = nullptr;
switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, CurrentOrder,
PointerOps)) {
case LoadsState::Vectorize:
if (CurrentOrder.empty()) {
// Original loads are consecutive and does not require reordering.
TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {
fixupOrderingIndices(CurrentOrder);
// Need to reorder.
TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
}
TE->setOperandsInOrder();
break;
case LoadsState::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndicies);
TE->setOperandsInOrder();
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
case LoadsState::Gather:
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
#ifndef NDEBUG
Type *ScalarTy = VL0->getType();
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy))
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
else if (any_of(VL, [](Value *V) {
return !cast<LoadInst>(V)->isSimple();
}))
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
else
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
#endif // NDEBUG
break;
}
return;
}
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
for (Value *V : VL) {
Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs()
<< "SLP: Gathering casts with different src types.\n");
return;
}
}
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL)
Operands.push_back(cast<Instruction>(V)->getOperand(i));
buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
case Instruction::ICmp:
case Instruction::FCmp: {
// Check that all of the compares have the same predicate.
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
Type *ComparedTy = VL0->getOperand(0)->getType();
for (Value *V : VL) {
CmpInst *Cmp = cast<CmpInst>(V);
if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs()
<< "SLP: Gathering cmp with different predicate.\n");
return;
}
}
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
assert(P0 == SwapP0 && "Commutative Predicate mismatch");
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
} else {
// Collect operands - commute if it uses the swapped predicate.
for (Value *V : VL) {
auto *Cmp = cast<CmpInst>(V);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
if (Cmp->getPredicate() != P0)
std::swap(LHS, RHS);
Left.push_back(LHS);
Right.push_back(RHS);
}
}
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
case Instruction::Select:
case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
// Sort operands of the instructions so that each side is more likely to
// have the same opcode.
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL)
Operands.push_back(cast<Instruction>(V)->getOperand(i));
buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (Value *V : VL) {
if (cast<Instruction>(V)->getNumOperands() != 2) {
LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
}
// We can't combine several GEPs into one vector if they operate on
// different types.
Type *Ty0 = VL0->getOperand(0)->getType();
for (Value *V : VL) {
Type *CurTy = cast<Instruction>(V)->getOperand(0)->getType();
if (Ty0 != CurTy) {
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
}
// We don't combine GEPs with non-constant indexes.
Type *Ty1 = VL0->getOperand(1)->getType();
for (Value *V : VL) {
auto Op = cast<Instruction>(V)->getOperand(1);
if (!isa<ConstantInt>(Op) ||
(Op->getType() != Ty1 &&
Op->getType()->getScalarSizeInBits() >
DL->getIndexSizeInBits(
V->getType()->getPointerAddressSpace()))) {
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
return;
}
}
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
SmallVector<ValueList, 2> Operands(2);
// Prepare the operand vector for pointer operands.
for (Value *V : VL)
Operands.front().push_back(
cast<GetElementPtrInst>(V)->getPointerOperand());
TE->setOperand(0, Operands.front());
// Need to cast all indices to the same type before vectorization to
// avoid crash.
// Required to be able to find correct matches between different gather
// nodes and reuse the vectorized values rather than trying to gather them
// again.
int IndexIdx = 1;
Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
Type *Ty = all_of(VL,
[VL0Ty, IndexIdx](Value *V) {
return VL0Ty == cast<GetElementPtrInst>(V)
->getOperand(IndexIdx)
->getType();
})
? VL0Ty
: DL->getIndexType(cast<GetElementPtrInst>(VL0)
->getPointerOperandType()
->getScalarType());
// Prepare the operand vector.
for (Value *V : VL) {
auto *Op = cast<Instruction>(V)->getOperand(IndexIdx);
auto *CI = cast<ConstantInt>(Op);
Operands.back().push_back(ConstantExpr::getIntegerCast(
CI, Ty, CI->getValue().isSignBitSet()));
}
TE->setOperand(IndexIdx, Operands.back());
for (unsigned I = 0, Ops = Operands.size(); I < Ops; ++I)
buildTree_rec(Operands[I], Depth + 1, {TE, I});
return;
}
case Instruction::Store: {
// Check if the stores are consecutive or if we need to swizzle them.
llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
// Avoid types that are padded when being allocated as scalars, while
// being packed together in a vector (such as i1).
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n");
return;
}
// Make sure all stores in the bundle are simple - we can't vectorize
// atomic or volatile stores.
SmallVector<Value *, 4> PointerOps(VL.size());
ValueList Operands(VL.size());
auto POIter = PointerOps.begin();
auto OIter = Operands.begin();
for (Value *V : VL) {
auto *SI = cast<StoreInst>(V);
if (!SI->isSimple()) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");
return;
}
*POIter = SI->getPointerOperand();
*OIter = SI->getValueOperand();
++POIter;
++OIter;
}
OrdersType CurrentOrder;
// Check the order of pointer operands.
if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
Value *Ptr0;
Value *PtrN;
if (CurrentOrder.empty()) {
Ptr0 = PointerOps.front();
PtrN = PointerOps.back();
} else {
Ptr0 = PointerOps[CurrentOrder.front()];
PtrN = PointerOps[CurrentOrder.back()];
}
Optional<int> Dist =
getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
// Check that the sorted pointer operands are consecutive.
if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
if (CurrentOrder.empty()) {
// Original stores are consecutive and does not require reordering.
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
UserTreeIdx, ReuseShuffleIndicies);
TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
} else {
fixupOrderingIndices(CurrentOrder);
TreeEntry *TE =
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
}
return;
}
}
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
case Instruction::Call: {
// Check if the calls are all to the same vectorizable intrinsic or
// library function.
CallInst *CI = cast<CallInst>(VL0);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
VFShape Shape = VFShape::get(
*CI, ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
false /*HasGlobalPred*/);
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
if (!VecFunc && !isTriviallyVectorizable(ID)) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
Function *F = CI->getCalledFunction();
unsigned NumArgs = CI->arg_size();
SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
for (unsigned j = 0; j != NumArgs; ++j)
if (hasVectorInstrinsicScalarOpd(ID, j))
ScalarArgs[j] = CI->getArgOperand(j);
for (Value *V : VL) {
CallInst *CI2 = dyn_cast<CallInst>(V);
if (!CI2 || CI2->getCalledFunction() != F ||
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
(VecFunc &&
VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) ||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V
<< "\n");
return;
}
// Some intrinsics have scalar arguments and should be same in order for
// them to be vectorized.
for (unsigned j = 0; j != NumArgs; ++j) {
if (hasVectorInstrinsicScalarOpd(ID, j)) {
Value *A1J = CI2->getArgOperand(j);
if (ScalarArgs[j] != A1J) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument " << ScalarArgs[j] << "!=" << A1J
<< "\n");
return;
}
}
}
// Verify that the bundle operands are identical between the two calls.
if (CI->hasOperandBundles() &&
!std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
CI->op_begin() + CI->getBundleOperandsEndIndex(),
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
<< *CI << "!=" << *V << '\n');
return;
}
}
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
TE->setOperandsInOrder();
for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
// For scalar operands no need to to create an entry since no need to
// vectorize it.
if (hasVectorInstrinsicScalarOpd(ID, i))
continue;
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL) {
auto *CI2 = cast<CallInst>(V);
Operands.push_back(CI2->getArgOperand(i));
}
buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
case Instruction::ShuffleVector: {
// If this is not an alternate sequence of opcode like add-sub
// then do not vectorize this instruction.
if (!S.isAltShuffle()) {
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
TE->setOperand(0, Left);
TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL)
Operands.push_back(cast<Instruction>(V)->getOperand(i));
buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
default:
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
}
}
unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
unsigned N = 1;
Type *EltTy = T;
while (isa<StructType>(EltTy) || isa<ArrayType>(EltTy) ||
isa<VectorType>(EltTy)) {
if (auto *ST = dyn_cast<StructType>(EltTy)) {
// Check that struct is homogeneous.
for (const auto *Ty : ST->elements())
if (Ty != *ST->element_begin())
return 0;
N *= ST->getNumElements();
EltTy = *ST->element_begin();
} else if (auto *AT = dyn_cast<ArrayType>(EltTy)) {
N *= AT->getNumElements();
EltTy = AT->getElementType();
} else {
auto *VT = cast<FixedVectorType>(EltTy);
N *= VT->getNumElements();
EltTy = VT->getElementType();
}
}
if (!isValidElementType(EltTy))
return 0;
uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
return 0;
return N;
}
bool BoUpSLP::canReuseExtract(ArrayRef<Value *> VL, Value *OpValue,
SmallVectorImpl<unsigned> &CurrentOrder) const {
const auto *It = find_if(VL, [](Value *V) {
return isa<ExtractElementInst, ExtractValueInst>(V);
});
assert(It != VL.end() && "Expected at least one extract instruction.");
auto *E0 = cast<Instruction>(*It);
assert(all_of(VL,
[](Value *V) {
return isa<UndefValue, ExtractElementInst, ExtractValueInst>(
V);
}) &&
"Invalid opcode");
// Check if all of the extracts come from the same vector and from the
// correct offset.
Value *Vec = E0->getOperand(0);
CurrentOrder.clear();
// We have to extract from a vector/aggregate with the same number of elements.
unsigned NElts;
if (E0->getOpcode() == Instruction::ExtractValue) {
const DataLayout &DL = E0->getModule()->getDataLayout();
NElts = canMapToVector(Vec->getType(), DL);
if (!NElts)
return false;
// Check if load can be rewritten as load of vector.
LoadInst *LI = dyn_cast<LoadInst>(Vec);
if (!LI || !LI->isSimple() || !LI->hasNUses(VL.size()))
return false;
} else {
NElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
}
if (NElts != VL.size())
return false;
// Check that all of the indices extract from the correct offset.
bool ShouldKeepOrder = true;
unsigned E = VL.size();
// Assign to all items the initial value E + 1 so we can check if the extract
// instruction index was used already.
// Also, later we can check that all the indices are used and we have a
// consecutive access in the extract instructions, by checking that no
// element of CurrentOrder still has value E + 1.
CurrentOrder.assign(E, E);
unsigned I = 0;
for (; I < E; ++I) {
auto *Inst = dyn_cast<Instruction>(VL[I]);
if (!Inst)
continue;
if (Inst->getOperand(0) != Vec)
break;
if (auto *EE = dyn_cast<ExtractElementInst>(Inst))
if (isa<UndefValue>(EE->getIndexOperand()))
continue;
Optional<unsigned> Idx = getExtractIndex(Inst);
if (!Idx)
break;
const unsigned ExtIdx = *Idx;
if (ExtIdx != I) {
if (ExtIdx >= E || CurrentOrder[ExtIdx] != E)
break;
ShouldKeepOrder = false;
CurrentOrder[ExtIdx] = I;
} else {
if (CurrentOrder[I] != E)
break;
CurrentOrder[I] = I;
}
}
if (I < E) {
CurrentOrder.clear();
return false;
}
if (ShouldKeepOrder)
CurrentOrder.clear();
return ShouldKeepOrder;
}
bool BoUpSLP::areAllUsersVectorized(Instruction *I,
ArrayRef<Value *> VectorizedVals) const {
return (I->hasOneUse() && is_contained(VectorizedVals, I)) ||
all_of(I->users(), [this](User *U) {
return ScalarToTreeEntry.count(U) > 0 || MustGather.contains(U);
});
}
static std::pair<InstructionCost, InstructionCost>
getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
TargetTransformInfo *TTI, TargetLibraryInfo *TLI) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
SmallVector<Type *, 4> VecTys;
for (Use &Arg : CI->args())
VecTys.push_back(
FixedVectorType::get(Arg->getType(), VecTy->getNumElements()));
FastMathFlags FMF;
if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
FMF = FPCI->getFastMathFlags();
SmallVector<const Value *> Arguments(CI->args());
IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF,
dyn_cast<IntrinsicInst>(CI));
auto IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput);
auto Shape = VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
VecTy->getNumElements())),
false /*HasGlobalPred*/);
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
auto LibCost = IntrinsicCost;
if (!CI->isNoBuiltin() && VecFunc) {
// Calculate the cost of the vector library call.
// If the corresponding vector call is cheaper, return its cost.
LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys,
TTI::TCK_RecipThroughput);
}
return {IntrinsicCost, LibCost};
}
/// Compute the cost of creating a vector of type \p VecTy containing the
/// extracted values from \p VL.
static InstructionCost
computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
TargetTransformInfo::ShuffleKind ShuffleKind,
ArrayRef<int> Mask, TargetTransformInfo &TTI) {
unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc || !NumOfParts ||
VecTy->getNumElements() < NumOfParts)
return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
bool AllConsecutive = true;
unsigned EltsPerVector = VecTy->getNumElements() / NumOfParts;
unsigned Idx = -1;
InstructionCost Cost = 0;
// Process extracts in blocks of EltsPerVector to check if the source vector
// operand can be re-used directly. If not, add the cost of creating a shuffle
// to extract the values into a vector register.
for (auto *V : VL) {
++Idx;
// Need to exclude undefs from analysis.
if (isa<UndefValue>(V) || Mask[Idx] == UndefMaskElem)
continue;
// Reached the start of a new vector registers.
if (Idx % EltsPerVector == 0) {
AllConsecutive = true;
continue;
}
// Check all extracts for a vector register on the target directly
// extract values in order.
unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
if (!isa<UndefValue>(VL[Idx - 1]) && Mask[Idx - 1] != UndefMaskElem) {
unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
CurrentIdx % EltsPerVector == Idx % EltsPerVector;
}
if (AllConsecutive)
continue;
// Skip all indices, except for the last index per vector block.
if ((Idx + 1) % EltsPerVector != 0 && Idx + 1 != VL.size())
continue;
// If we have a series of extracts which are not consecutive and hence
// cannot re-use the source vector register directly, compute the shuffle
// cost to extract the a vector with EltsPerVector elements.
Cost += TTI.getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc,
FixedVectorType::get(VecTy->getElementType(), EltsPerVector));
}
return Cost;
}
/// Build shuffle mask for shuffle graph entries and lists of main and alternate
/// operations operands.
static void
buildSuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
ArrayRef<int> ReusesIndices,
const function_ref<bool(Instruction *)> IsAltOp,
SmallVectorImpl<int> &Mask,
SmallVectorImpl<Value *> *OpScalars = nullptr,
SmallVectorImpl<Value *> *AltScalars = nullptr) {
unsigned Sz = VL.size();
Mask.assign(Sz, UndefMaskElem);
SmallVector<int> OrderMask;
if (!ReorderIndices.empty())
inversePermutation(ReorderIndices, OrderMask);
for (unsigned I = 0; I < Sz; ++I) {
unsigned Idx = I;
if (!ReorderIndices.empty())
Idx = OrderMask[I];
auto *OpInst = cast<Instruction>(VL[Idx]);
if (IsAltOp(OpInst)) {
Mask[I] = Sz + Idx;
if (AltScalars)
AltScalars->push_back(OpInst);
} else {
Mask[I] = Idx;
if (OpScalars)
OpScalars->push_back(OpInst);
}
}
if (!ReusesIndices.empty()) {
SmallVector<int> NewMask(ReusesIndices.size(), UndefMaskElem);
transform(ReusesIndices, NewMask.begin(), [&Mask](int Idx) {
return Idx != UndefMaskElem ? Mask[Idx] : UndefMaskElem;
});
Mask.swap(NewMask);
}
}
InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
ArrayRef<Value *> VectorizedVals) {
ArrayRef<Value*> VL = E->Scalars;
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
ScalarTy = SI->getValueOperand()->getType();
else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
ScalarTy = CI->getOperand(0)->getType();
else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
ScalarTy = IE->getOperand(1)->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
// If we have computed a smaller type for the expression, update VecTy so
// that the costs will be accurate.
if (MinBWs.count(VL[0]))
VecTy = FixedVectorType::get(
IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
unsigned EntryVF = E->getVectorFactor();
auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
// FIXME: it tries to fix a problem with MSVC buildbots.
TargetTransformInfo &TTIRef = *TTI;
auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
VectorizedVals, E](InstructionCost &Cost) {
DenseMap<Value *, int> ExtractVectorsTys;
SmallPtrSet<Value *, 4> CheckedExtracts;
for (auto *V : VL) {
if (isa<UndefValue>(V))
continue;
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
// instruction as dead and remove its cost from the final cost of the
// vectorized tree.
// Also, avoid adjusting the cost for extractelements with multiple uses
// in different graph entries.
const TreeEntry *VE = getTreeEntry(V);
if (!CheckedExtracts.insert(V).second ||
!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
(VE && VE != E))
continue;
auto *EE = cast<ExtractElementInst>(V);
Optional<unsigned> EEIdx = getExtractIndex(EE);
if (!EEIdx)
continue;
unsigned Idx = *EEIdx;
if (TTIRef.getNumberOfParts(VecTy) !=
TTIRef.getNumberOfParts(EE->getVectorOperandType())) {
auto It =
ExtractVectorsTys.try_emplace(EE->getVectorOperand(), Idx).first;
It->getSecond() = std::min<int>(It->second, Idx);
}
// Take credit for instruction that will become dead.
if (EE->hasOneUse()) {
Instruction *Ext = EE->user_back();
if ((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
all_of(Ext->users(),
[](User *U) { return isa<GetElementPtrInst>(U); })) {
// Use getExtractWithExtendCost() to calculate the cost of
// extractelement/ext pair.
Cost -=
TTIRef.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
EE->getVectorOperandType(), Idx);
// Add back the cost of s|zext which is subtracted separately.
Cost += TTIRef.getCastInstrCost(
Ext->getOpcode(), Ext->getType(), EE->getType(),
TTI::getCastContextHint(Ext), CostKind, Ext);
continue;
}
}
Cost -= TTIRef.getVectorInstrCost(Instruction::ExtractElement,
EE->getVectorOperandType(), Idx);
}
// Add a cost for subvector extracts/inserts if required.
for (const auto &Data : ExtractVectorsTys) {
auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
unsigned NumElts = VecTy->getNumElements();
if (Data.second % NumElts == 0)
continue;
if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) {
unsigned Idx = (Data.second / NumElts) * NumElts;
unsigned EENumElts = EEVTy->getNumElements();
if (Idx + NumElts <= EENumElts) {
Cost +=
TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
EEVTy, None, Idx, VecTy);
} else {
// Need to round up the subvector type vectorization factor to avoid a
// crash in cost model functions. Make SubVT so that Idx + VF of SubVT
// <= EENumElts.
auto *SubVT =
FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
Cost +=
TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
EEVTy, None, Idx, SubVT);
}
} else {
Cost += TTIRef.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
VecTy, None, 0, EEVTy);
}
}
};
if (E->State == TreeEntry::NeedToGather) {
if (allConstant(VL))
return 0;
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
SmallVector<int> Mask;
SmallVector<const TreeEntry *> Entries;
Optional<TargetTransformInfo::ShuffleKind> Shuffle =
isGatherShuffledEntry(E, Mask, Entries);
if (Shuffle.hasValue()) {
InstructionCost GatherCost = 0;
if (ShuffleVectorInst::isIdentityMask(Mask)) {
// Perfect match in the graph, will reuse the previously vectorized
// node. Cost is 0.
LLVM_DEBUG(
dbgs()
<< "SLP: perfect diamond match for gather bundle that starts with "
<< *VL.front() << ".\n");
if (NeedToShuffleReuses)
GatherCost =
TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
FinalVecTy, E->ReuseShuffleIndices);
} else {
LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
<< " entries for bundle that starts with "
<< *VL.front() << ".\n");
// Detected that instead of gather we can emit a shuffle of single/two
// previously vectorized nodes. Add the cost of the permutation rather
// than gather.
::addMask(Mask, E->ReuseShuffleIndices);
GatherCost = TTI->getShuffleCost(*Shuffle, FinalVecTy, Mask);
}
return GatherCost;
}
if ((E->getOpcode() == Instruction::ExtractElement ||
all_of(E->Scalars,
[](Value *V) {
return isa<ExtractElementInst, UndefValue>(V);
})) &&
allSameType(VL)) {
// Check that gather of extractelements can be represented as just a
// shuffle of a single/two vectors the scalars are extracted from.
SmallVector<int> Mask;
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
isFixedVectorShuffle(VL, Mask);
if (ShuffleKind.hasValue()) {
// Found the bunch of extractelement instructions that must be gathered
// into a vector and can be represented as a permutation elements in a
// single input vector or of 2 input vectors.
InstructionCost Cost =
computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
AdjustExtractsCost(Cost);
if (NeedToShuffleReuses)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
FinalVecTy, E->ReuseShuffleIndices);
return Cost;
}
}
if (isSplat(VL)) {
// Found the broadcasting of the single scalar, calculate the cost as the
// broadcast.
assert(VecTy == FinalVecTy &&
"No reused scalars expected for broadcast.");
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
}
InstructionCost ReuseShuffleCost = 0;
if (NeedToShuffleReuses)
ReuseShuffleCost = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
// Improve gather cost for gather of loads, if we can group some of the
// loads into vector loads.
if (VL.size() > 2 && E->getOpcode() == Instruction::Load &&
!E->isAltShuffle()) {
BoUpSLP::ValueSet VectorizedLoads;
unsigned StartIdx = 0;
unsigned VF = VL.size() / 2;
unsigned VectorizedCnt = 0;
unsigned ScatterVectorizeCnt = 0;
const unsigned Sz = DL->getTypeSizeInBits(E->getMainOp()->getType());
for (unsigned MinVF = getMinVF(2 * Sz); VF >= MinVF; VF /= 2) {
for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
Cnt += VF) {
ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
if (!VectorizedLoads.count(Slice.front()) &&
!VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
LoadsState LS = canVectorizeLoads(Slice, Slice.front(), *TTI, *DL,
*SE, CurrentOrder, PointerOps);
switch (LS) {
case LoadsState::Vectorize:
case LoadsState::ScatterVectorize:
// Mark the vectorized loads so that we don't vectorize them
// again.
if (LS == LoadsState::Vectorize)
++VectorizedCnt;
else
++ScatterVectorizeCnt;
VectorizedLoads.insert(Slice.begin(), Slice.end());
// If we vectorized initial block, no need to try to vectorize it
// again.
if (Cnt == StartIdx)
StartIdx += VF;
break;
case LoadsState::Gather:
break;
}
}
}
// Check if the whole array was vectorized already - exit.
if (StartIdx >= VL.size())
break;
// Found vectorizable parts - exit.
if (!VectorizedLoads.empty())
break;
}
if (!VectorizedLoads.empty()) {
InstructionCost GatherCost = 0;
unsigned NumParts = TTI->getNumberOfParts(VecTy);
bool NeedInsertSubvectorAnalysis =
!NumParts || (VL.size() / VF) > NumParts;
// Get the cost for gathered loads.
for (unsigned I = 0, End = VL.size(); I < End; I += VF) {
if (VectorizedLoads.contains(VL[I]))
continue;
GatherCost += getGatherCost(VL.slice(I, VF));
}
// The cost for vectorized loads.
InstructionCost ScalarsCost = 0;
for (Value *V : VectorizedLoads) {
auto *LI = cast<LoadInst>(V);
ScalarsCost += TTI->getMemoryOpCost(
Instruction::Load, LI->getType(), LI->getAlign(),
LI->getPointerAddressSpace(), CostKind, LI);
}
auto *LI = cast<LoadInst>(E->getMainOp());
auto *LoadTy = FixedVectorType::get(LI->getType(), VF);
Align Alignment = LI->getAlign();
GatherCost +=
VectorizedCnt *
TTI->getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
LI->getPointerAddressSpace(), CostKind, LI);
GatherCost += ScatterVectorizeCnt *
TTI->getGatherScatterOpCost(
Instruction::Load, LoadTy, LI->getPointerOperand(),
/*VariableMask=*/false, Alignment, CostKind, LI);
if (NeedInsertSubvectorAnalysis) {
// Add the cost for the subvectors insert.
for (int I = VF, E = VL.size(); I < E; I += VF)
GatherCost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy,
None, I, LoadTy);
}
return ReuseShuffleCost + GatherCost - ScalarsCost;
}
}
return ReuseShuffleCost + getGatherCost(VL);
}
InstructionCost CommonCost = 0;
SmallVector<int> Mask;
if (!E->ReorderIndices.empty()) {
SmallVector<int> NewMask;
if (E->getOpcode() == Instruction::Store) {
// For stores the order is actually a mask.
NewMask.resize(E->ReorderIndices.size());
copy(E->ReorderIndices, NewMask.begin());
} else {
inversePermutation(E->ReorderIndices, NewMask);
}
::addMask(Mask, NewMask);
}
if (NeedToShuffleReuses)
::addMask(Mask, E->ReuseShuffleIndices);
if (!Mask.empty() && !ShuffleVectorInst::isIdentityMask(Mask))
CommonCost =
TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, FinalVecTy, Mask);
assert((E->State == TreeEntry::Vectorize ||
E->State == TreeEntry::ScatterVectorize) &&
"Unhandled state");
assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
Instruction *VL0 = E->getMainOp();
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
switch (ShuffleOrOp) {
case Instruction::PHI:
return 0;
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
// The common cost of removal ExtractElement/ExtractValue instructions +
// the cost of shuffles, if required to resuffle the original vector.
if (NeedToShuffleReuses) {
unsigned Idx = 0;
for (unsigned I : E->ReuseShuffleIndices) {
if (ShuffleOrOp == Instruction::ExtractElement) {
auto *EE = cast<ExtractElementInst>(VL[I]);
CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
EE->getVectorOperandType(),
*getExtractIndex(EE));
} else {
CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement,
VecTy, Idx);
++Idx;
}
}
Idx = EntryVF;
for (Value *V : VL) {
if (ShuffleOrOp == Instruction::ExtractElement) {
auto *EE = cast<ExtractElementInst>(V);
CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
EE->getVectorOperandType(),
*getExtractIndex(EE));
} else {
--Idx;
CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement,
VecTy, Idx);
}
}
}
if (ShuffleOrOp == Instruction::ExtractValue) {
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
auto *EI = cast<Instruction>(VL[I]);
// Take credit for instruction that will become dead.
if (EI->hasOneUse()) {
Instruction *Ext = EI->user_back();
if ((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
all_of(Ext->users(),
[](User *U) { return isa<GetElementPtrInst>(U); })) {
// Use getExtractWithExtendCost() to calculate the cost of
// extractelement/ext pair.
CommonCost -= TTI->getExtractWithExtendCost(
Ext->getOpcode(), Ext->getType(), VecTy, I);
// Add back the cost of s|zext which is subtracted separately.
CommonCost += TTI->getCastInstrCost(
Ext->getOpcode(), Ext->getType(), EI->getType(),
TTI::getCastContextHint(Ext), CostKind, Ext);
continue;
}
}
CommonCost -=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, I);
}
} else {
AdjustExtractsCost(CommonCost);
}
return CommonCost;
}
case Instruction::InsertElement: {
assert(E->ReuseShuffleIndices.empty() &&
"Unique insertelements only are expected.");
auto *SrcVecTy = cast<FixedVectorType>(VL0->getType());
unsigned const NumElts = SrcVecTy->getNumElements();
unsigned const NumScalars = VL.size();
APInt DemandedElts = APInt::getZero(NumElts);
// TODO: Add support for Instruction::InsertValue.
SmallVector<int> Mask;
if (!E->ReorderIndices.empty()) {
inversePermutation(E->ReorderIndices, Mask);
Mask.append(NumElts - NumScalars, UndefMaskElem);
} else {
Mask.assign(NumElts, UndefMaskElem);
std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
}
unsigned Offset = *getInsertIndex(VL0, 0);
bool IsIdentity = true;
SmallVector<int> PrevMask(NumElts, UndefMaskElem);
Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
- Optional<int> InsertIdx = getInsertIndex(VL[PrevMask[I]], 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- DemandedElts.setBit(*InsertIdx);
- IsIdentity &= *InsertIdx - Offset == I;
- Mask[*InsertIdx - Offset] = I;
+ unsigned InsertIdx = *getInsertIndex(VL[PrevMask[I]]);
+ DemandedElts.setBit(InsertIdx);
+ IsIdentity &= InsertIdx - Offset == I;
+ Mask[InsertIdx - Offset] = I;
}
assert(Offset < NumElts && "Failed to find vector index offset");
InstructionCost Cost = 0;
Cost -= TTI->getScalarizationOverhead(SrcVecTy, DemandedElts,
/*Insert*/ true, /*Extract*/ false);
if (IsIdentity && NumElts != NumScalars && Offset % NumScalars != 0) {
// FIXME: Replace with SK_InsertSubvector once it is properly supported.
unsigned Sz = PowerOf2Ceil(Offset + NumScalars);
Cost += TTI->getShuffleCost(
TargetTransformInfo::SK_PermuteSingleSrc,
FixedVectorType::get(SrcVecTy->getElementType(), Sz));
} else if (!IsIdentity) {
auto *FirstInsert =
cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
return !is_contained(E->Scalars,
cast<Instruction>(V)->getOperand(0));
}));
if (isUndefVector(FirstInsert->getOperand(0))) {
Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask);
} else {
SmallVector<int> InsertMask(NumElts);
std::iota(InsertMask.begin(), InsertMask.end(), 0);
for (unsigned I = 0; I < NumElts; I++) {
if (Mask[I] != UndefMaskElem)
InsertMask[Offset + I] = NumElts + I;
}
Cost +=
TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVecTy, InsertMask);
}
}
return Cost;
}
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
Type *SrcTy = VL0->getOperand(0)->getType();
InstructionCost ScalarEltCost =
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
TTI::getCastContextHint(VL0), CostKind, VL0);
if (NeedToShuffleReuses) {
CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
// Calculate the cost of this instruction.
InstructionCost ScalarCost = VL.size() * ScalarEltCost;
auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size());
InstructionCost VecCost = 0;
// Check if the values are candidates to demote.
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
VecCost = CommonCost + TTI->getCastInstrCost(
E->getOpcode(), VecTy, SrcVecTy,
TTI::getCastContextHint(VL0), CostKind, VL0);
}
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return VecCost - ScalarCost;
}
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::Select: {
// Calculate the cost of this instruction.
InstructionCost ScalarEltCost =
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
if (NeedToShuffleReuses) {
CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
// Check if all entries in VL are either compares or selects with compares
// as condition that have the same predicates.
CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE;
bool First = true;
for (auto *V : VL) {
CmpInst::Predicate CurrentPred;
auto MatchCmp = m_Cmp(CurrentPred, m_Value(), m_Value());
if ((!match(V, m_Select(MatchCmp, m_Value(), m_Value())) &&
!match(V, MatchCmp)) ||
(!First && VecPred != CurrentPred)) {
VecPred = CmpInst::BAD_ICMP_PREDICATE;
break;
}
First = false;
VecPred = CurrentPred;
}
InstructionCost VecCost = TTI->getCmpSelInstrCost(
E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0);
// Check if it is possible and profitable to use min/max for selects in
// VL.
//
auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
{VecTy, VecTy});
InstructionCost IntrinsicCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
// If the selects are the only uses of the compares, they will be dead
// and we can adjust the cost by removing their cost.
if (IntrinsicAndUse.second)
IntrinsicCost -=
TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy, MaskTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
VecCost = std::min(VecCost, IntrinsicCost);
}
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return CommonCost + VecCost - ScalarCost;
}
case Instruction::FNeg:
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
// Certain instructions can be cheaper to vectorize if they have a
// constant second vector operand.
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
TargetTransformInfo::OperandValueProperties Op1VP =
TargetTransformInfo::OP_None;
TargetTransformInfo::OperandValueProperties Op2VP =
TargetTransformInfo::OP_PowerOf2;
// If all operands are exactly the same ConstantInt then set the
// operand kind to OK_UniformConstantValue.
// If instead not all operands are constants, then set the operand kind
// to OK_AnyValue. If all operands are constants but not the same,
// then set the operand kind to OK_NonUniformConstantValue.
ConstantInt *CInt0 = nullptr;
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
const Instruction *I = cast<Instruction>(VL[i]);
unsigned OpIdx = isa<BinaryOperator>(I) ? 1 : 0;
ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
if (!CInt) {
Op2VK = TargetTransformInfo::OK_AnyValue;
Op2VP = TargetTransformInfo::OP_None;
break;
}
if (Op2VP == TargetTransformInfo::OP_PowerOf2 &&
!CInt->getValue().isPowerOf2())
Op2VP = TargetTransformInfo::OP_None;
if (i == 0) {
CInt0 = CInt;
continue;
}
if (CInt0 != CInt)
Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
}
SmallVector<const Value *, 4> Operands(VL0->operand_values());
InstructionCost ScalarEltCost =
TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost =
TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return CommonCost + VecCost - ScalarCost;
}
case Instruction::GetElementPtr: {
TargetTransformInfo::OperandValueKind Op1VK =
TargetTransformInfo::OK_AnyValue;
TargetTransformInfo::OperandValueKind Op2VK =
TargetTransformInfo::OK_UniformConstantValue;
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
if (NeedToShuffleReuses) {
CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost = TTI->getArithmeticInstrCost(
Instruction::Add, VecTy, CostKind, Op1VK, Op2VK);
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return CommonCost + VecCost - ScalarCost;
}
case Instruction::Load: {
// Cost of wide load - cost of scalar loads.
Align Alignment = cast<LoadInst>(VL0)->getAlign();
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Load, ScalarTy, Alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses) {
CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecLdCost;
if (E->State == TreeEntry::Vectorize) {
VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, Alignment, 0,
CostKind, VL0);
} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
Align CommonAlignment = Alignment;
for (Value *V : VL)
CommonAlignment =
commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
VecLdCost = TTI->getGatherScatterOpCost(
Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, VL0);
}
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecLdCost, ScalarLdCost));
return CommonCost + VecLdCost - ScalarLdCost;
}
case Instruction::Store: {
// We know that we can merge the stores. Calculate the cost.
bool IsReorder = !E->ReorderIndices.empty();
auto *SI =
cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
Align Alignment = SI->getAlign();
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0);
InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecStCost = TTI->getMemoryOpCost(
Instruction::Store, VecTy, Alignment, 0, CostKind, VL0);
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecStCost, ScalarStCost));
return CommonCost + VecStCost - ScalarStCost;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
IntrinsicCostAttributes CostAttrs(ID, *CI, 1);
InstructionCost ScalarEltCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
InstructionCost VecCallCost =
std::min(VecCallCosts.first, VecCallCosts.second);
LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
<< " for " << *CI << "\n");
return CommonCost + VecCallCost - ScalarCallCost;
}
case Instruction::ShuffleVector: {
assert(E->isAltShuffle() &&
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
InstructionCost ScalarCost = 0;
if (NeedToShuffleReuses) {
for (unsigned Idx : E->ReuseShuffleIndices) {
Instruction *I = cast<Instruction>(VL[Idx]);
CommonCost -= TTI->getInstructionCost(I, CostKind);
}
for (Value *V : VL) {
Instruction *I = cast<Instruction>(V);
CommonCost += TTI->getInstructionCost(I, CostKind);
}
}
for (Value *V : VL) {
Instruction *I = cast<Instruction>(V);
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
ScalarCost += TTI->getInstructionCost(I, CostKind);
}
// VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle.
InstructionCost VecCost = 0;
// Try to find the previous shuffle node with the same operands and same
// main/alternate ops.
auto &&TryFindNodeWithEqualOperands = [this, E]() {
for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
if (TE.get() == E)
break;
if (TE->isAltShuffle() &&
((TE->getOpcode() == E->getOpcode() &&
TE->getAltOpcode() == E->getAltOpcode()) ||
(TE->getOpcode() == E->getAltOpcode() &&
TE->getAltOpcode() == E->getOpcode())) &&
TE->hasEqualOperands(*E))
return true;
}
return false;
};
if (TryFindNodeWithEqualOperands()) {
LLVM_DEBUG({
dbgs() << "SLP: diamond match for alternate node found.\n";
E->dump();
});
// No need to add new vector costs here since we're going to reuse
// same main/alternate vector ops, just do different shuffling.
} else if (Instruction::isBinaryOp(E->getOpcode())) {
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
} else {
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size());
auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size());
VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty,
TTI::CastContextHint::None, CostKind);
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty,
TTI::CastContextHint::None, CostKind);
}
SmallVector<int> Mask;
buildSuffleEntryMask(
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
return I->getOpcode() == E->getAltOpcode();
},
Mask);
CommonCost =
TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy, Mask);
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return CommonCost + VecCost - ScalarCost;
}
default:
llvm_unreachable("Unknown instruction");
}
}
bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
<< VectorizableTree.size() << " is fully vectorizable .\n");
auto &&AreVectorizableGathers = [this](const TreeEntry *TE, unsigned Limit) {
SmallVector<int> Mask;
return TE->State == TreeEntry::NeedToGather &&
!any_of(TE->Scalars,
[this](Value *V) { return EphValues.contains(V); }) &&
(allConstant(TE->Scalars) || isSplat(TE->Scalars) ||
TE->Scalars.size() < Limit ||
((TE->getOpcode() == Instruction::ExtractElement ||
all_of(TE->Scalars,
[](Value *V) {
return isa<ExtractElementInst, UndefValue>(V);
})) &&
isFixedVectorShuffle(TE->Scalars, Mask)) ||
(TE->State == TreeEntry::NeedToGather &&
TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()));
};
// We only handle trees of heights 1 and 2.
if (VectorizableTree.size() == 1 &&
(VectorizableTree[0]->State == TreeEntry::Vectorize ||
(ForReduction &&
AreVectorizableGathers(VectorizableTree[0].get(),
VectorizableTree[0]->Scalars.size()) &&
VectorizableTree[0]->getVectorFactor() > 2)))
return true;
if (VectorizableTree.size() != 2)
return false;
// Handle splat and all-constants stores. Also try to vectorize tiny trees
// with the second gather nodes if they have less scalar operands rather than
// the initial tree element (may be profitable to shuffle the second gather)
// or they are extractelements, which form shuffle.
SmallVector<int> Mask;
if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
AreVectorizableGathers(VectorizableTree[1].get(),
VectorizableTree[0]->Scalars.size()))
return true;
// Gathering cost would be too much for tiny trees.
if (VectorizableTree[0]->State == TreeEntry::NeedToGather ||
(VectorizableTree[1]->State == TreeEntry::NeedToGather &&
VectorizableTree[0]->State != TreeEntry::ScatterVectorize))
return false;
return true;
}
static bool isLoadCombineCandidateImpl(Value *Root, unsigned NumElts,
TargetTransformInfo *TTI,
bool MustMatchOrInst) {
// Look past the root to find a source value. Arbitrarily follow the
// path through operand 0 of any 'or'. Also, peek through optional
// shift-left-by-multiple-of-8-bits.
Value *ZextLoad = Root;
const APInt *ShAmtC;
bool FoundOr = false;
while (!isa<ConstantExpr>(ZextLoad) &&
(match(ZextLoad, m_Or(m_Value(), m_Value())) ||
(match(ZextLoad, m_Shl(m_Value(), m_APInt(ShAmtC))) &&
ShAmtC->urem(8) == 0))) {
auto *BinOp = cast<BinaryOperator>(ZextLoad);
ZextLoad = BinOp->getOperand(0);
if (BinOp->getOpcode() == Instruction::Or)
FoundOr = true;
}
// Check if the input is an extended load of the required or/shift expression.
Value *Load;
if ((MustMatchOrInst && !FoundOr) || ZextLoad == Root ||
!match(ZextLoad, m_ZExt(m_Value(Load))) || !isa<LoadInst>(Load))
return false;
// Require that the total load bit width is a legal integer type.
// For example, <8 x i8> --> i64 is a legal integer on a 64-bit target.
// But <16 x i8> --> i128 is not, so the backend probably can't reduce it.
Type *SrcTy = Load->getType();
unsigned LoadBitWidth = SrcTy->getIntegerBitWidth() * NumElts;
if (!TTI->isTypeLegal(IntegerType::get(Root->getContext(), LoadBitWidth)))
return false;
// Everything matched - assume that we can fold the whole sequence using
// load combining.
LLVM_DEBUG(dbgs() << "SLP: Assume load combining for tree starting at "
<< *(cast<Instruction>(Root)) << "\n");
return true;
}
bool BoUpSLP::isLoadCombineReductionCandidate(RecurKind RdxKind) const {
if (RdxKind != RecurKind::Or)
return false;
unsigned NumElts = VectorizableTree[0]->Scalars.size();
Value *FirstReduced = VectorizableTree[0]->Scalars[0];
return isLoadCombineCandidateImpl(FirstReduced, NumElts, TTI,
/* MatchOr */ false);
}
bool BoUpSLP::isLoadCombineCandidate() const {
// Peek through a final sequence of stores and check if all operations are
// likely to be load-combined.
unsigned NumElts = VectorizableTree[0]->Scalars.size();
for (Value *Scalar : VectorizableTree[0]->Scalars) {
Value *X;
if (!match(Scalar, m_Store(m_Value(X), m_Value())) ||
!isLoadCombineCandidateImpl(X, NumElts, TTI, /* MatchOr */ true))
return false;
}
return true;
}
bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// No need to vectorize inserts of gathered values.
if (VectorizableTree.size() == 2 &&
isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
VectorizableTree[1]->State == TreeEntry::NeedToGather)
return true;
// We can vectorize the tree if its size is greater than or equal to the
// minimum size specified by the MinTreeSize command line option.
if (VectorizableTree.size() >= MinTreeSize)
return false;
// If we have a tiny tree (a tree whose size is less than MinTreeSize), we
// can vectorize it if we can prove it fully vectorizable.
if (isFullyVectorizableTinyTree(ForReduction))
return false;
assert(VectorizableTree.empty()
? ExternalUses.empty()
: true && "We shouldn't have any external users");
// Otherwise, we can't vectorize the tree. It is both tiny and not fully
// vectorizable.
return true;
}
InstructionCost BoUpSLP::getSpillCost() const {
// Walk from the bottom of the tree to the top, tracking which values are
// live. When we see a call instruction that is not part of our tree,
// query TTI to see if there is a cost to keeping values live over it
// (for example, if spills and fills are required).
unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
InstructionCost Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
Instruction *PrevInst = nullptr;
// The entries in VectorizableTree are not necessarily ordered by their
// position in basic blocks. Collect them and order them by dominance so later
// instructions are guaranteed to be visited first. For instructions in
// different basic blocks, we only scan to the beginning of the block, so
// their order does not matter, as long as all instructions in a basic block
// are grouped together. Using dominance ensures a deterministic order.
SmallVector<Instruction *, 16> OrderedScalars;
for (const auto &TEPtr : VectorizableTree) {
Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
if (!Inst)
continue;
OrderedScalars.push_back(Inst);
}
llvm::sort(OrderedScalars, [&](Instruction *A, Instruction *B) {
auto *NodeA = DT->getNode(A->getParent());
auto *NodeB = DT->getNode(B->getParent());
assert(NodeA && "Should only process reachable instructions");
assert(NodeB && "Should only process reachable instructions");
assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
if (NodeA != NodeB)
return NodeA->getDFSNumIn() < NodeB->getDFSNumIn();
return B->comesBefore(A);
});
for (Instruction *Inst : OrderedScalars) {
if (!PrevInst) {
PrevInst = Inst;
continue;
}
// Update LiveValues.
LiveValues.erase(PrevInst);
for (auto &J : PrevInst->operands()) {
if (isa<Instruction>(&*J) && getTreeEntry(&*J))
LiveValues.insert(cast<Instruction>(&*J));
}
LLVM_DEBUG({
dbgs() << "SLP: #LV: " << LiveValues.size();
for (auto *X : LiveValues)
dbgs() << " " << X->getName();
dbgs() << ", Looking at ";
Inst->dump();
});
// Now find the sequence of instructions between PrevInst and Inst.
unsigned NumCalls = 0;
BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
PrevInstIt =
PrevInst->getIterator().getReverse();
while (InstIt != PrevInstIt) {
if (PrevInstIt == PrevInst->getParent()->rend()) {
PrevInstIt = Inst->getParent()->rbegin();
continue;
}
// Debug information does not impact spill cost.
if ((isa<CallInst>(&*PrevInstIt) &&
!isa<DbgInfoIntrinsic>(&*PrevInstIt)) &&
&*PrevInstIt != PrevInst)
NumCalls++;
++PrevInstIt;
}
if (NumCalls) {
SmallVector<Type*, 4> V;
for (auto *II : LiveValues) {
auto *ScalarTy = II->getType();
if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
ScalarTy = VectorTy->getElementType();
V.push_back(FixedVectorType::get(ScalarTy, BundleWidth));
}
Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
}
PrevInst = Inst;
}
return Cost;
}
/// Check if two insertelement instructions are from the same buildvector.
static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
InsertElementInst *V) {
// Instructions must be from the same basic blocks.
if (VU->getParent() != V->getParent())
return false;
// Checks if 2 insertelements are from the same buildvector.
if (VU->getType() != V->getType())
return false;
// Multiple used inserts are separate nodes.
if (!VU->hasOneUse() && !V->hasOneUse())
return false;
auto *IE1 = VU;
auto *IE2 = V;
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
do {
if (IE2 == VU || IE1 == V)
return true;
if (IE1) {
if (IE1 != VU && !IE1->hasOneUse())
IE1 = nullptr;
else
IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
}
if (IE2) {
if (IE2 != V && !IE2->hasOneUse())
IE2 = nullptr;
else
IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
}
} while (IE1 || IE2);
return false;
}
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
TreeEntry &TE = *VectorizableTree[I].get();
InstructionCost C = getEntryCost(&TE, VectorizedVals);
Cost += C;
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for bundle that starts with " << *TE.Scalars[0]
<< ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
}
SmallPtrSet<Value *, 16> ExtractCostCalculated;
InstructionCost ExtractCost = 0;
SmallVector<unsigned> VF;
SmallVector<SmallVector<int>> ShuffleMask;
SmallVector<Value *> FirstUsers;
SmallVector<APInt> DemandedElts;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
!ExtractCostCalculated.insert(EU.Scalar).second)
continue;
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
// removed as well).
if (EphValues.count(EU.User))
continue;
// No extract cost for vector "scalar"
if (isa<FixedVectorType>(EU.Scalar->getType()))
continue;
// Already counted the cost for external uses when tried to adjust the cost
// for extractelements, no need to add it again.
if (isa<ExtractElementInst>(EU.Scalar))
continue;
// If found user is an insertelement, do not calculate extract cost but try
// to detect it as a final shuffled/identity match.
if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User)) {
if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
- Optional<int> InsertIdx = getInsertIndex(VU, 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- auto *It = find_if(FirstUsers, [VU](Value *V) {
- return areTwoInsertFromSameBuildVector(VU,
- cast<InsertElementInst>(V));
- });
- int VecId = -1;
- if (It == FirstUsers.end()) {
- VF.push_back(FTy->getNumElements());
- ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
- // Find the insertvector, vectorized in tree, if any.
- Value *Base = VU;
- while (isa<InsertElementInst>(Base)) {
- // Build the mask for the vectorized insertelement instructions.
- if (const TreeEntry *E = getTreeEntry(Base)) {
- VU = cast<InsertElementInst>(Base);
- do {
- int Idx = E->findLaneForValue(Base);
- ShuffleMask.back()[Idx] = Idx;
- Base = cast<InsertElementInst>(Base)->getOperand(0);
- } while (E == getTreeEntry(Base));
- break;
+ Optional<unsigned> InsertIdx = getInsertIndex(VU);
+ if (InsertIdx) {
+ auto *It = find_if(FirstUsers, [VU](Value *V) {
+ return areTwoInsertFromSameBuildVector(VU,
+ cast<InsertElementInst>(V));
+ });
+ int VecId = -1;
+ if (It == FirstUsers.end()) {
+ VF.push_back(FTy->getNumElements());
+ ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
+ // Find the insertvector, vectorized in tree, if any.
+ Value *Base = VU;
+ while (isa<InsertElementInst>(Base)) {
+ // Build the mask for the vectorized insertelement instructions.
+ if (const TreeEntry *E = getTreeEntry(Base)) {
+ VU = cast<InsertElementInst>(Base);
+ do {
+ int Idx = E->findLaneForValue(Base);
+ ShuffleMask.back()[Idx] = Idx;
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
+ } while (E == getTreeEntry(Base));
+ break;
+ }
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
}
- Base = cast<InsertElementInst>(Base)->getOperand(0);
+ FirstUsers.push_back(VU);
+ DemandedElts.push_back(APInt::getZero(VF.back()));
+ VecId = FirstUsers.size() - 1;
+ } else {
+ VecId = std::distance(FirstUsers.begin(), It);
}
- FirstUsers.push_back(VU);
- DemandedElts.push_back(APInt::getZero(VF.back()));
- VecId = FirstUsers.size() - 1;
- } else {
- VecId = std::distance(FirstUsers.begin(), It);
+ ShuffleMask[VecId][*InsertIdx] = EU.Lane;
+ DemandedElts[VecId].setBit(*InsertIdx);
+ continue;
}
- int Idx = *InsertIdx;
- ShuffleMask[VecId][Idx] = EU.Lane;
- DemandedElts[VecId].setBit(Idx);
- continue;
}
}
// If we plan to rewrite the tree in a smaller type, we will need to sign
// extend the extracted value back to the original type. Here, we account
// for the extract and the added cost of the sign extend if needed.
auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto Extend =
MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt;
VecTy = FixedVectorType::get(MinTy, BundleWidth);
ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
VecTy, EU.Lane);
} else {
ExtractCost +=
TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
}
}
InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
if (FirstUsers.size() == 1) {
int Limit = ShuffleMask.front().size() * 2;
if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) &&
!ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
InstructionCost C = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc,
cast<FixedVectorType>(FirstUsers.front()->getType()),
ShuffleMask.front());
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of insertelement external users "
<< *VectorizableTree.front()->Scalars.front() << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost += C;
}
InstructionCost InsertCost = TTI->getScalarizationOverhead(
cast<FixedVectorType>(FirstUsers.front()->getType()),
DemandedElts.front(), /*Insert*/ true, /*Extract*/ false);
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
<< " for insertelements gather.\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost -= InsertCost;
} else if (FirstUsers.size() >= 2) {
unsigned MaxVF = *std::max_element(VF.begin(), VF.end());
// Combined masks of the first 2 vectors.
SmallVector<int> CombinedMask(MaxVF, UndefMaskElem);
copy(ShuffleMask.front(), CombinedMask.begin());
APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF);
auto *VecTy = FixedVectorType::get(
cast<VectorType>(FirstUsers.front()->getType())->getElementType(),
MaxVF);
for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) {
if (ShuffleMask[1][I] != UndefMaskElem) {
CombinedMask[I] = ShuffleMask[1][I] + MaxVF;
CombinedDemandedElts.setBit(I);
}
}
InstructionCost C =
TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of vector node and external "
"insertelement users "
<< *VectorizableTree.front()->Scalars.front() << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost += C;
InstructionCost InsertCost = TTI->getScalarizationOverhead(
VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false);
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
<< " for insertelements gather.\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost -= InsertCost;
for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
// Other elements - permutation of 2 vectors (the initial one and the
// next Ith incoming vector).
unsigned VF = ShuffleMask[I].size();
for (unsigned Idx = 0; Idx < VF; ++Idx) {
int Mask = ShuffleMask[I][Idx];
if (Mask != UndefMaskElem)
CombinedMask[Idx] = MaxVF + Mask;
else if (CombinedMask[Idx] != UndefMaskElem)
CombinedMask[Idx] = Idx;
}
for (unsigned Idx = VF; Idx < MaxVF; ++Idx)
if (CombinedMask[Idx] != UndefMaskElem)
CombinedMask[Idx] = Idx;
InstructionCost C =
TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of vector node and external "
"insertelement users "
<< *VectorizableTree.front()->Scalars.front() << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost += C;
InstructionCost InsertCost = TTI->getScalarizationOverhead(
cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
/*Insert*/ true, /*Extract*/ false);
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
<< " for insertelements gather.\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost -= InsertCost;
}
}
#ifndef NDEBUG
SmallString<256> Str;
{
raw_svector_ostream OS(Str);
OS << "SLP: Spill Cost = " << SpillCost << ".\n"
<< "SLP: Extract Cost = " << ExtractCost << ".\n"
<< "SLP: Total Cost = " << Cost << ".\n";
}
LLVM_DEBUG(dbgs() << Str);
if (ViewSLPTree)
ViewGraph(this, "SLP" + F->getName(), false, Str);
#endif
return Cost;
}
Optional<TargetTransformInfo::ShuffleKind>
BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
SmallVectorImpl<const TreeEntry *> &Entries) {
// TODO: currently checking only for Scalars in the tree entry, need to count
// reused elements too for better cost estimation.
Mask.assign(TE->Scalars.size(), UndefMaskElem);
Entries.clear();
// Build a lists of values to tree entries.
DenseMap<Value *, SmallPtrSet<const TreeEntry *, 4>> ValueToTEs;
for (const std::unique_ptr<TreeEntry> &EntryPtr : VectorizableTree) {
if (EntryPtr.get() == TE)
break;
if (EntryPtr->State != TreeEntry::NeedToGather)
continue;
for (Value *V : EntryPtr->Scalars)
ValueToTEs.try_emplace(V).first->getSecond().insert(EntryPtr.get());
}
// Find all tree entries used by the gathered values. If no common entries
// found - not a shuffle.
// Here we build a set of tree nodes for each gathered value and trying to
// find the intersection between these sets. If we have at least one common
// tree node for each gathered value - we have just a permutation of the
// single vector. If we have 2 different sets, we're in situation where we
// have a permutation of 2 input vectors.
SmallVector<SmallPtrSet<const TreeEntry *, 4>> UsedTEs;
DenseMap<Value *, int> UsedValuesEntry;
for (Value *V : TE->Scalars) {
if (isa<UndefValue>(V))
continue;
// Build a list of tree entries where V is used.
SmallPtrSet<const TreeEntry *, 4> VToTEs;
auto It = ValueToTEs.find(V);
if (It != ValueToTEs.end())
VToTEs = It->second;
if (const TreeEntry *VTE = getTreeEntry(V))
VToTEs.insert(VTE);
if (VToTEs.empty())
return None;
if (UsedTEs.empty()) {
// The first iteration, just insert the list of nodes to vector.
UsedTEs.push_back(VToTEs);
} else {
// Need to check if there are any previously used tree nodes which use V.
// If there are no such nodes, consider that we have another one input
// vector.
SmallPtrSet<const TreeEntry *, 4> SavedVToTEs(VToTEs);
unsigned Idx = 0;
for (SmallPtrSet<const TreeEntry *, 4> &Set : UsedTEs) {
// Do we have a non-empty intersection of previously listed tree entries
// and tree entries using current V?
set_intersect(VToTEs, Set);
if (!VToTEs.empty()) {
// Yes, write the new subset and continue analysis for the next
// scalar.
Set.swap(VToTEs);
break;
}
VToTEs = SavedVToTEs;
++Idx;
}
// No non-empty intersection found - need to add a second set of possible
// source vectors.
if (Idx == UsedTEs.size()) {
// If the number of input vectors is greater than 2 - not a permutation,
// fallback to the regular gather.
if (UsedTEs.size() == 2)
return None;
UsedTEs.push_back(SavedVToTEs);
Idx = UsedTEs.size() - 1;
}
UsedValuesEntry.try_emplace(V, Idx);
}
}
unsigned VF = 0;
if (UsedTEs.size() == 1) {
// Try to find the perfect match in another gather node at first.
auto It = find_if(UsedTEs.front(), [TE](const TreeEntry *EntryPtr) {
return EntryPtr->isSame(TE->Scalars);
});
if (It != UsedTEs.front().end()) {
Entries.push_back(*It);
std::iota(Mask.begin(), Mask.end(), 0);
return TargetTransformInfo::SK_PermuteSingleSrc;
}
// No perfect match, just shuffle, so choose the first tree node.
Entries.push_back(*UsedTEs.front().begin());
} else {
// Try to find nodes with the same vector factor.
assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries.");
DenseMap<int, const TreeEntry *> VFToTE;
for (const TreeEntry *TE : UsedTEs.front())
VFToTE.try_emplace(TE->getVectorFactor(), TE);
for (const TreeEntry *TE : UsedTEs.back()) {
auto It = VFToTE.find(TE->getVectorFactor());
if (It != VFToTE.end()) {
VF = It->first;
Entries.push_back(It->second);
Entries.push_back(TE);
break;
}
}
// No 2 source vectors with the same vector factor - give up and do regular
// gather.
if (Entries.empty())
return None;
}
// Build a shuffle mask for better cost estimation and vector emission.
for (int I = 0, E = TE->Scalars.size(); I < E; ++I) {
Value *V = TE->Scalars[I];
if (isa<UndefValue>(V))
continue;
unsigned Idx = UsedValuesEntry.lookup(V);
const TreeEntry *VTE = Entries[Idx];
int FoundLane = VTE->findLaneForValue(V);
Mask[I] = Idx * VF + FoundLane;
// Extra check required by isSingleSourceMaskImpl function (called by
// ShuffleVectorInst::isSingleSourceMask).
if (Mask[I] >= 2 * E)
return None;
}
switch (Entries.size()) {
case 1:
return TargetTransformInfo::SK_PermuteSingleSrc;
case 2:
return TargetTransformInfo::SK_PermuteTwoSrc;
default:
break;
}
return None;
}
InstructionCost
BoUpSLP::getGatherCost(FixedVectorType *Ty,
const DenseSet<unsigned> &ShuffledIndices,
bool NeedToShuffle) const {
unsigned NumElts = Ty->getNumElements();
APInt DemandedElts = APInt::getZero(NumElts);
for (unsigned I = 0; I < NumElts; ++I)
if (!ShuffledIndices.count(I))
DemandedElts.setBit(I);
InstructionCost Cost =
TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
/*Extract*/ false);
if (NeedToShuffle)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}
InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
ScalarTy = SI->getValueOperand()->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
bool DuplicateNonConst = false;
// Find the cost of inserting/extracting values from the vector.
// Check if the same elements are inserted several times and count them as
// shuffle candidates.
DenseSet<unsigned> ShuffledElements;
DenseSet<Value *> UniqueElements;
// Iterate in reverse order to consider insert elements with the high cost.
for (unsigned I = VL.size(); I > 0; --I) {
unsigned Idx = I - 1;
// No need to shuffle duplicates for constants.
if (isConstant(VL[Idx])) {
ShuffledElements.insert(Idx);
continue;
}
if (!UniqueElements.insert(VL[Idx]).second) {
DuplicateNonConst = true;
ShuffledElements.insert(Idx);
}
}
return getGatherCost(VecTy, ShuffledElements, DuplicateNonConst);
}
// Perform operand reordering on the instructions in VL and return the reordered
// operands in Left and Right.
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right,
const DataLayout &DL,
ScalarEvolution &SE,
const BoUpSLP &R) {
if (VL.empty())
return;
VLOperands Ops(VL, DL, SE, R);
// Reorder the operands in place.
Ops.reorder();
Left = Ops.getVL(0);
Right = Ops.getVL(1);
}
void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
// Get the basic block this bundle is in. All instructions in the bundle
// should be in this block.
auto *Front = E->getMainOp();
auto *BB = Front->getParent();
assert(llvm::all_of(E->Scalars, [=](Value *V) -> bool {
auto *I = cast<Instruction>(V);
return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
}));
// The last instruction in the bundle in program order.
Instruction *LastInst = nullptr;
// Find the last instruction. The common case should be that BB has been
// scheduled, and the last instruction is VL.back(). So we start with
// VL.back() and iterate over schedule data until we reach the end of the
// bundle. The end of the bundle is marked by null ScheduleData.
if (BlocksSchedules.count(BB)) {
auto *Bundle =
BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back()));
if (Bundle && Bundle->isPartOfBundle())
for (; Bundle; Bundle = Bundle->NextInBundle)
if (Bundle->OpValue == Bundle->Inst)
LastInst = Bundle->Inst;
}
// LastInst can still be null at this point if there's either not an entry
// for BB in BlocksSchedules or there's no ScheduleData available for
// VL.back(). This can be the case if buildTree_rec aborts for various
// reasons (e.g., the maximum recursion depth is reached, the maximum region
// size is reached, etc.). ScheduleData is initialized in the scheduling
// "dry-run".
//
// If this happens, we can still find the last instruction by brute force. We
// iterate forwards from Front (inclusive) until we either see all
// instructions in the bundle or reach the end of the block. If Front is the
// last instruction in program order, LastInst will be set to Front, and we
// will visit all the remaining instructions in the block.
//
// One of the reasons we exit early from buildTree_rec is to place an upper
// bound on compile-time. Thus, taking an additional compile-time hit here is
// not ideal. However, this should be exceedingly rare since it requires that
// we both exit early from buildTree_rec and that the bundle be out-of-order
// (causing us to iterate all the way to the end of the block).
if (!LastInst) {
SmallPtrSet<Value *, 16> Bundle(E->Scalars.begin(), E->Scalars.end());
for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I))
LastInst = &I;
if (Bundle.empty())
break;
}
}
assert(LastInst && "Failed to find last instruction in bundle");
// Set the insertion point after the last instruction in the bundle. Set the
// debug location to Front.
Builder.SetInsertPoint(BB, ++LastInst->getIterator());
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
// List of instructions/lanes from current block and/or the blocks which are
// part of the current loop. These instructions will be inserted at the end to
// make it possible to optimize loops and hoist invariant instructions out of
// the loops body with better chances for success.
SmallVector<std::pair<Value *, unsigned>, 4> PostponedInsts;
SmallSet<int, 4> PostponedIndices;
Loop *L = LI->getLoopFor(Builder.GetInsertBlock());
auto &&CheckPredecessor = [](BasicBlock *InstBB, BasicBlock *InsertBB) {
SmallPtrSet<BasicBlock *, 4> Visited;
while (InsertBB && InsertBB != InstBB && Visited.insert(InsertBB).second)
InsertBB = InsertBB->getSinglePredecessor();
return InsertBB && InsertBB == InstBB;
};
for (int I = 0, E = VL.size(); I < E; ++I) {
if (auto *Inst = dyn_cast<Instruction>(VL[I]))
if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) ||
getTreeEntry(Inst) || (L && (L->contains(Inst)))) &&
PostponedIndices.insert(I).second)
PostponedInsts.emplace_back(Inst, I);
}
auto &&CreateInsertElement = [this](Value *Vec, Value *V, unsigned Pos) {
Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(Pos));
auto *InsElt = dyn_cast<InsertElementInst>(Vec);
if (!InsElt)
return Vec;
GatherShuffleSeq.insert(InsElt);
CSEBlocks.insert(InsElt->getParent());
// Add to our 'need-to-extract' list.
if (TreeEntry *Entry = getTreeEntry(V)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(V);
ExternalUses.emplace_back(V, InsElt, FoundLane);
}
return Vec;
};
Value *Val0 =
isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
Value *Vec = PoisonValue::get(VecTy);
SmallVector<int> NonConsts;
// Insert constant values at first.
for (int I = 0, E = VL.size(); I < E; ++I) {
if (PostponedIndices.contains(I))
continue;
if (!isConstant(VL[I])) {
NonConsts.push_back(I);
continue;
}
Vec = CreateInsertElement(Vec, VL[I], I);
}
// Insert non-constant values.
for (int I : NonConsts)
Vec = CreateInsertElement(Vec, VL[I], I);
// Append instructions, which are/may be part of the loop, in the end to make
// it possible to hoist non-loop-based instructions.
for (const std::pair<Value *, unsigned> &Pair : PostponedInsts)
Vec = CreateInsertElement(Vec, Pair.first, Pair.second);
return Vec;
}
namespace {
/// Merges shuffle masks and emits final shuffle instruction, if required.
class ShuffleInstructionBuilder {
IRBuilderBase &Builder;
const unsigned VF = 0;
bool IsFinalized = false;
SmallVector<int, 4> Mask;
/// Holds all of the instructions that we gathered.
SetVector<Instruction *> &GatherShuffleSeq;
/// A list of blocks that we are going to CSE.
SetVector<BasicBlock *> &CSEBlocks;
public:
ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF,
SetVector<Instruction *> &GatherShuffleSeq,
SetVector<BasicBlock *> &CSEBlocks)
: Builder(Builder), VF(VF), GatherShuffleSeq(GatherShuffleSeq),
CSEBlocks(CSEBlocks) {}
/// Adds a mask, inverting it before applying.
void addInversedMask(ArrayRef<unsigned> SubMask) {
if (SubMask.empty())
return;
SmallVector<int, 4> NewMask;
inversePermutation(SubMask, NewMask);
addMask(NewMask);
}
/// Functions adds masks, merging them into single one.
void addMask(ArrayRef<unsigned> SubMask) {
SmallVector<int, 4> NewMask(SubMask.begin(), SubMask.end());
addMask(NewMask);
}
void addMask(ArrayRef<int> SubMask) { ::addMask(Mask, SubMask); }
Value *finalize(Value *V) {
IsFinalized = true;
unsigned ValueVF = cast<FixedVectorType>(V->getType())->getNumElements();
if (VF == ValueVF && Mask.empty())
return V;
SmallVector<int, 4> NormalizedMask(VF, UndefMaskElem);
std::iota(NormalizedMask.begin(), NormalizedMask.end(), 0);
addMask(NormalizedMask);
if (VF == ValueVF && ShuffleVectorInst::isIdentityMask(Mask))
return V;
Value *Vec = Builder.CreateShuffleVector(V, Mask, "shuffle");
if (auto *I = dyn_cast<Instruction>(Vec)) {
GatherShuffleSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
return Vec;
}
~ShuffleInstructionBuilder() {
assert((IsFinalized || Mask.empty()) &&
"Shuffle construction must be finalized.");
}
};
} // namespace
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
unsigned VF = VL.size();
InstructionsState S = getSameOpcode(VL);
if (S.getOpcode()) {
if (TreeEntry *E = getTreeEntry(S.OpValue))
if (E->isSame(VL)) {
Value *V = vectorizeTree(E);
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
if (!E->ReuseShuffleIndices.empty()) {
// Reshuffle to get only unique values.
// If some of the scalars are duplicated in the vectorization tree
// entry, we do not vectorize them but instead generate a mask for
// the reuses. But if there are several users of the same entry,
// they may have different vectorization factors. This is especially
// important for PHI nodes. In this case, we need to adapt the
// resulting instruction for the user vectorization factor and have
// to reshuffle it again to take only unique elements of the vector.
// Without this code the function incorrectly returns reduced vector
// instruction with the same elements, not with the unique ones.
// block:
// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
// %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
// ... (use %2)
// %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
// br %block
SmallVector<int> UniqueIdxs(VF, UndefMaskElem);
SmallSet<int, 4> UsedIdxs;
int Pos = 0;
int Sz = VL.size();
for (int Idx : E->ReuseShuffleIndices) {
if (Idx != Sz && Idx != UndefMaskElem &&
UsedIdxs.insert(Idx).second)
UniqueIdxs[Idx] = Pos;
++Pos;
}
assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
"less than original vector size.");
UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
V = Builder.CreateShuffleVector(V, UniqueIdxs, "shrink.shuffle");
} else {
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
"Expected vectorization factor less "
"than original vector size.");
SmallVector<int> UniformMask(VF, 0);
std::iota(UniformMask.begin(), UniformMask.end(), 0);
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
}
if (auto *I = dyn_cast<Instruction>(V)) {
GatherShuffleSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
}
return V;
}
}
// Check that every instruction appears once in this bundle.
SmallVector<int> ReuseShuffleIndicies;
SmallVector<Value *> UniqueValues;
if (VL.size() > 2) {
DenseMap<Value *, unsigned> UniquePositions;
unsigned NumValues =
std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) {
return !isa<UndefValue>(V);
}).base());
VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
int UniqueVals = 0;
for (Value *V : VL.drop_back(VL.size() - VF)) {
if (isa<UndefValue>(V)) {
ReuseShuffleIndicies.emplace_back(UndefMaskElem);
continue;
}
if (isConstant(V)) {
ReuseShuffleIndicies.emplace_back(UniqueValues.size());
UniqueValues.emplace_back(V);
continue;
}
auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
ReuseShuffleIndicies.emplace_back(Res.first->second);
if (Res.second) {
UniqueValues.emplace_back(V);
++UniqueVals;
}
}
if (UniqueVals == 1 && UniqueValues.size() == 1) {
// Emit pure splat vector.
ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
UndefMaskElem);
} else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
ReuseShuffleIndicies.clear();
UniqueValues.clear();
UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues));
}
UniqueValues.append(VF - UniqueValues.size(),
PoisonValue::get(VL[0]->getType()));
VL = UniqueValues;
}
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
CSEBlocks);
Value *Vec = gather(VL);
if (!ReuseShuffleIndicies.empty()) {
ShuffleBuilder.addMask(ReuseShuffleIndicies);
Vec = ShuffleBuilder.finalize(Vec);
}
return Vec;
}
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
IRBuilder<>::InsertPointGuard Guard(Builder);
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n");
return E->VectorizedValue;
}
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
unsigned VF = E->getVectorFactor();
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
CSEBlocks);
if (E->State == TreeEntry::NeedToGather) {
if (E->getMainOp())
setInsertPointAfterBundle(E);
Value *Vec;
SmallVector<int> Mask;
SmallVector<const TreeEntry *> Entries;
Optional<TargetTransformInfo::ShuffleKind> Shuffle =
isGatherShuffledEntry(E, Mask, Entries);
if (Shuffle.hasValue()) {
assert((Entries.size() == 1 || Entries.size() == 2) &&
"Expected shuffle of 1 or 2 entries.");
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
Entries.back()->VectorizedValue, Mask);
if (auto *I = dyn_cast<Instruction>(Vec)) {
GatherShuffleSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
} else {
Vec = gather(E->Scalars);
}
if (NeedToShuffleReuses) {
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
Vec = ShuffleBuilder.finalize(Vec);
}
E->VectorizedValue = Vec;
return Vec;
}
assert((E->State == TreeEntry::Vectorize ||
E->State == TreeEntry::ScatterVectorize) &&
"Unhandled state");
unsigned ShuffleOrOp =
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
Instruction *VL0 = E->getMainOp();
Type *ScalarTy = VL0->getType();
if (auto *Store = dyn_cast<StoreInst>(VL0))
ScalarTy = Store->getValueOperand()->getType();
else if (auto *IE = dyn_cast<InsertElementInst>(VL0))
ScalarTy = IE->getOperand(1)->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
assert(
(E->ReorderIndices.empty() || E != VectorizableTree.front().get()) &&
"PHI reordering is free.");
auto *PH = cast<PHINode>(VL0);
Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
Value *V = NewPhi;
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
// PHINodes may have multiple entries from the same block. We want to
// visit every block once.
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
BasicBlock *IBB = PH->getIncomingBlock(i);
if (!VisitedBBs.insert(IBB).second) {
NewPhi->addIncoming(NewPhi->getIncomingValueForBlock(IBB), IBB);
continue;
}
Builder.SetInsertPoint(IBB->getTerminator());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
Value *Vec = vectorizeTree(E->getOperand(i));
NewPhi->addIncoming(Vec, IBB);
}
assert(NewPhi->getNumIncomingValues() == PH->getNumIncomingValues() &&
"Invalid number of incoming values");
return V;
}
case Instruction::ExtractElement: {
Value *V = E->getSingleOperand(0);
Builder.SetInsertPoint(VL0);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
return V;
}
case Instruction::ExtractValue: {
auto *LI = cast<LoadInst>(E->getSingleOperand(0));
Builder.SetInsertPoint(LI);
auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace());
Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy);
LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign());
Value *NewV = propagateMetadata(V, E->Scalars);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
NewV = ShuffleBuilder.finalize(NewV);
E->VectorizedValue = NewV;
return NewV;
}
case Instruction::InsertElement: {
assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
Value *V = vectorizeTree(E->getOperand(1));
// Create InsertVector shuffle if necessary
auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
}));
const unsigned NumElts =
cast<FixedVectorType>(FirstInsert->getType())->getNumElements();
const unsigned NumScalars = E->Scalars.size();
unsigned Offset = *getInsertIndex(VL0, 0);
assert(Offset < NumElts && "Failed to find vector index offset");
// Create shuffle to resize vector
SmallVector<int> Mask;
if (!E->ReorderIndices.empty()) {
inversePermutation(E->ReorderIndices, Mask);
Mask.append(NumElts - NumScalars, UndefMaskElem);
} else {
Mask.assign(NumElts, UndefMaskElem);
std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
}
// Create InsertVector shuffle if necessary
bool IsIdentity = true;
SmallVector<int> PrevMask(NumElts, UndefMaskElem);
Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
Value *Scalar = E->Scalars[PrevMask[I]];
- Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- IsIdentity &= *InsertIdx - Offset == I;
- Mask[*InsertIdx - Offset] = I;
+ unsigned InsertIdx = *getInsertIndex(Scalar);
+ IsIdentity &= InsertIdx - Offset == I;
+ Mask[InsertIdx - Offset] = I;
}
if (!IsIdentity || NumElts != NumScalars) {
V = Builder.CreateShuffleVector(V, Mask);
if (auto *I = dyn_cast<Instruction>(V)) {
GatherShuffleSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
}
if ((!IsIdentity || Offset != 0 ||
!isUndefVector(FirstInsert->getOperand(0))) &&
NumElts != NumScalars) {
SmallVector<int> InsertMask(NumElts);
std::iota(InsertMask.begin(), InsertMask.end(), 0);
for (unsigned I = 0; I < NumElts; I++) {
if (Mask[I] != UndefMaskElem)
InsertMask[Offset + I] = NumElts + I;
}
V = Builder.CreateShuffleVector(
FirstInsert->getOperand(0), V, InsertMask,
cast<Instruction>(E->Scalars.back())->getName());
if (auto *I = dyn_cast<Instruction>(V)) {
GatherShuffleSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
}
++NumVectorInstructions;
E->VectorizedValue = V;
return V;
}
case Instruction::ZExt:
case Instruction::SExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::FPExt:
case Instruction::PtrToInt:
case Instruction::IntToPtr:
case Instruction::SIToFP:
case Instruction::UIToFP:
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
setInsertPointAfterBundle(E);
Value *InVec = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::FCmp:
case Instruction::ICmp: {
setInsertPointAfterBundle(E);
Value *L = vectorizeTree(E->getOperand(0));
Value *R = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::Select: {
setInsertPointAfterBundle(E);
Value *Cond = vectorizeTree(E->getOperand(0));
Value *True = vectorizeTree(E->getOperand(1));
Value *False = vectorizeTree(E->getOperand(2));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
Value *V = Builder.CreateSelect(Cond, True, False);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::FNeg: {
setInsertPointAfterBundle(E);
Value *Op = vectorizeTree(E->getOperand(0));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
Value *V = Builder.CreateUnOp(
static_cast<Instruction::UnaryOps>(E->getOpcode()), Op);
propagateIRFlags(V, E->Scalars, VL0);
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
case Instruction::FSub:
case Instruction::Mul:
case Instruction::FMul:
case Instruction::UDiv:
case Instruction::SDiv:
case Instruction::FDiv:
case Instruction::URem:
case Instruction::SRem:
case Instruction::FRem:
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
setInsertPointAfterBundle(E);
Value *LHS = vectorizeTree(E->getOperand(0));
Value *RHS = vectorizeTree(E->getOperand(1));
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS);
propagateIRFlags(V, E->Scalars, VL0);
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
setInsertPointAfterBundle(E);
LoadInst *LI = cast<LoadInst>(VL0);
Instruction *NewLI;
unsigned AS = LI->getPointerAddressSpace();
Value *PO = LI->getPointerOperand();
if (E->State == TreeEntry::Vectorize) {
Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS));
// The pointer operand uses an in-tree scalar so we add the new BitCast
// to ExternalUses list to make sure that an extract will be generated
// in the future.
if (TreeEntry *Entry = getTreeEntry(PO)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(PO);
ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
}
NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
Value *VecPtr = vectorizeTree(E->getOperand(0));
// Use the minimum alignment of the gathered loads.
Align CommonAlignment = LI->getAlign();
for (Value *V : E->Scalars)
CommonAlignment =
commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
NewLI = Builder.CreateMaskedGather(VecTy, VecPtr, CommonAlignment);
}
Value *V = propagateMetadata(NewLI, E->Scalars);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::Store: {
auto *SI = cast<StoreInst>(VL0);
unsigned AS = SI->getPointerAddressSpace();
setInsertPointAfterBundle(E);
Value *VecValue = vectorizeTree(E->getOperand(0));
ShuffleBuilder.addMask(E->ReorderIndices);
VecValue = ShuffleBuilder.finalize(VecValue);
Value *ScalarPtr = SI->getPointerOperand();
Value *VecPtr = Builder.CreateBitCast(
ScalarPtr, VecValue->getType()->getPointerTo(AS));
StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr,
SI->getAlign());
// The pointer operand uses an in-tree scalar, so add the new BitCast to
// ExternalUses to make sure that an extract will be generated in the
// future.
if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
ExternalUses.push_back(
ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
}
Value *V = propagateMetadata(ST, E->Scalars);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::GetElementPtr: {
auto *GEP0 = cast<GetElementPtrInst>(VL0);
setInsertPointAfterBundle(E);
Value *Op0 = vectorizeTree(E->getOperand(0));
SmallVector<Value *> OpVecs;
for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) {
Value *OpVec = vectorizeTree(E->getOperand(J));
OpVecs.push_back(OpVec);
}
Value *V = Builder.CreateGEP(GEP0->getSourceElementType(), Op0, OpVecs);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E);
Intrinsic::ID IID = Intrinsic::not_intrinsic;
if (Function *FI = CI->getCalledFunction())
IID = FI->getIntrinsicID();
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
VecCallCosts.first <= VecCallCosts.second;
Value *ScalarArg = nullptr;
std::vector<Value *> OpVecs;
SmallVector<Type *, 2> TysForDecl =
{FixedVectorType::get(CI->getType(), E->Scalars.size())};
for (int j = 0, e = CI->arg_size(); j < e; ++j) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
if (UseIntrinsic && hasVectorInstrinsicScalarOpd(IID, j)) {
CallInst *CEI = cast<CallInst>(VL0);
ScalarArg = CEI->getArgOperand(j);
OpVecs.push_back(CEI->getArgOperand(j));
if (hasVectorInstrinsicOverloadedScalarOpd(IID, j))
TysForDecl.push_back(ScalarArg->getType());
continue;
}
Value *OpVec = vectorizeTree(E->getOperand(j));
LLVM_DEBUG(dbgs() << "SLP: OpVec[" << j << "]: " << *OpVec << "\n");
OpVecs.push_back(OpVec);
}
Function *CF;
if (!UseIntrinsic) {
VFShape Shape =
VFShape::get(*CI, ElementCount::getFixed(static_cast<unsigned>(
VecTy->getNumElements())),
false /*HasGlobalPred*/);
CF = VFDatabase(*CI).getVectorizedFunction(Shape);
} else {
CF = Intrinsic::getDeclaration(F->getParent(), ID, TysForDecl);
}
SmallVector<OperandBundleDef, 1> OpBundles;
CI->getOperandBundlesAsDefs(OpBundles);
Value *V = Builder.CreateCall(CF, OpVecs, OpBundles);
// The scalar argument uses an in-tree scalar so we add the new vectorized
// call to ExternalUses list to make sure that an extract will be
// generated in the future.
if (ScalarArg) {
if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
ExternalUses.push_back(
ExternalUser(ScalarArg, cast<User>(V), FoundLane));
}
}
propagateIRFlags(V, E->Scalars, VL0);
ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
case Instruction::ShuffleVector: {
assert(E->isAltShuffle() &&
((Instruction::isBinaryOp(E->getOpcode()) &&
Instruction::isBinaryOp(E->getAltOpcode())) ||
(Instruction::isCast(E->getOpcode()) &&
Instruction::isCast(E->getAltOpcode()))) &&
"Invalid Shuffle Vector Operand");
Value *LHS = nullptr, *RHS = nullptr;
if (Instruction::isBinaryOp(E->getOpcode())) {
setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
RHS = vectorizeTree(E->getOperand(1));
} else {
setInsertPointAfterBundle(E);
LHS = vectorizeTree(E->getOperand(0));
}
if (E->VectorizedValue) {
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
return E->VectorizedValue;
}
Value *V0, *V1;
if (Instruction::isBinaryOp(E->getOpcode())) {
V0 = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
V1 = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
} else {
V0 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
V1 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
}
// Add V0 and V1 to later analysis to try to find and remove matching
// instruction, if any.
for (Value *V : {V0, V1}) {
if (auto *I = dyn_cast<Instruction>(V)) {
GatherShuffleSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
}
// Create shuffle to take alternate operations from the vector.
// Also, gather up main and alt scalar ops to propagate IR flags to
// each vector operation.
ValueList OpScalars, AltScalars;
SmallVector<int> Mask;
buildSuffleEntryMask(
E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
[E](Instruction *I) {
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
return I->getOpcode() == E->getAltOpcode();
},
Mask, &OpScalars, &AltScalars);
propagateIRFlags(V0, OpScalars);
propagateIRFlags(V1, AltScalars);
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
if (auto *I = dyn_cast<Instruction>(V)) {
V = propagateMetadata(I, E->Scalars);
GatherShuffleSeq.insert(I);
CSEBlocks.insert(I->getParent());
}
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
++NumVectorInstructions;
return V;
}
default:
llvm_unreachable("unknown inst");
}
return nullptr;
}
Value *BoUpSLP::vectorizeTree() {
ExtraValueToDebugLocsMap ExternallyUsedValues;
return vectorizeTree(ExternallyUsedValues);
}
Value *
BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
}
Builder.SetInsertPoint(&F->getEntryBlock().front());
auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
// If the vectorized tree can be rewritten in a smaller type, we truncate the
// vectorized root. InstCombine will then rewrite the entire expression. We
// sign extend the extracted values below.
auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
if (MinBWs.count(ScalarRoot)) {
if (auto *I = dyn_cast<Instruction>(VectorRoot)) {
// If current instr is a phi and not the last phi, insert it after the
// last phi node.
if (isa<PHINode>(I))
Builder.SetInsertPoint(&*I->getParent()->getFirstInsertionPt());
else
Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
}
auto BundleWidth = VectorizableTree[0]->Scalars.size();
auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
auto *VecTy = FixedVectorType::get(MinTy, BundleWidth);
auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
VectorizableTree[0]->VectorizedValue = Trunc;
}
LLVM_DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size()
<< " values .\n");
// Extract all of the elements with the external uses.
for (const auto &ExternalUse : ExternalUses) {
Value *Scalar = ExternalUse.Scalar;
llvm::User *User = ExternalUse.User;
// Skip users that we already RAUW. This happens when one instruction
// has multiple uses of the same value.
if (User && !is_contained(Scalar->users(), User))
continue;
TreeEntry *E = getTreeEntry(Scalar);
assert(E && "Invalid scalar");
assert(E->State != TreeEntry::NeedToGather &&
"Extracting from a gather list");
Value *Vec = E->VectorizedValue;
assert(Vec && "Can't find vectorizable value");
Value *Lane = Builder.getInt32(ExternalUse.Lane);
auto ExtractAndExtendIfNeeded = [&](Value *Vec) {
if (Scalar->getType() != Vec->getType()) {
Value *Ex;
// "Reuse" the existing extract to improve final codegen.
if (auto *ES = dyn_cast<ExtractElementInst>(Scalar)) {
Ex = Builder.CreateExtractElement(ES->getOperand(0),
ES->getOperand(1));
} else {
Ex = Builder.CreateExtractElement(Vec, Lane);
}
// If necessary, sign-extend or zero-extend ScalarRoot
// to the larger type.
if (!MinBWs.count(ScalarRoot))
return Ex;
if (MinBWs[ScalarRoot].second)
return Builder.CreateSExt(Ex, Scalar->getType());
return Builder.CreateZExt(Ex, Scalar->getType());
}
assert(isa<FixedVectorType>(Scalar->getType()) &&
isa<InsertElementInst>(Scalar) &&
"In-tree scalar of vector type is not insertelement?");
return Vec;
};
// If User == nullptr, the Scalar is used as extra arg. Generate
// ExtractElement instruction and update the record for this scalar in
// ExternallyUsedValues.
if (!User) {
assert(ExternallyUsedValues.count(Scalar) &&
"Scalar with nullptr as an external user must be registered in "
"ExternallyUsedValues map");
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
Builder.SetInsertPoint(VecI->getParent(),
std::next(VecI->getIterator()));
} else {
Builder.SetInsertPoint(&F->getEntryBlock().front());
}
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
CSEBlocks.insert(cast<Instruction>(Scalar)->getParent());
auto &NewInstLocs = ExternallyUsedValues[NewInst];
auto It = ExternallyUsedValues.find(Scalar);
assert(It != ExternallyUsedValues.end() &&
"Externally used scalar is not found in ExternallyUsedValues");
NewInstLocs.append(It->second);
ExternallyUsedValues.erase(Scalar);
// Required to update internally referenced instructions.
Scalar->replaceAllUsesWith(NewInst);
continue;
}
// Generate extracts for out-of-tree users.
// Find the insertion point for the extractelement lane.
if (auto *VecI = dyn_cast<Instruction>(Vec)) {
if (PHINode *PH = dyn_cast<PHINode>(User)) {
for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
if (PH->getIncomingValue(i) == Scalar) {
Instruction *IncomingTerminator =
PH->getIncomingBlock(i)->getTerminator();
if (isa<CatchSwitchInst>(IncomingTerminator)) {
Builder.SetInsertPoint(VecI->getParent(),
std::next(VecI->getIterator()));
} else {
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
}
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
CSEBlocks.insert(PH->getIncomingBlock(i));
PH->setOperand(i, NewInst);
}
}
} else {
Builder.SetInsertPoint(cast<Instruction>(User));
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
CSEBlocks.insert(cast<Instruction>(User)->getParent());
User->replaceUsesOfWith(Scalar, NewInst);
}
} else {
Builder.SetInsertPoint(&F->getEntryBlock().front());
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
CSEBlocks.insert(&F->getEntryBlock());
User->replaceUsesOfWith(Scalar, NewInst);
}
LLVM_DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
}
// For each vectorized value:
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
// No need to handle users of gathered values.
if (Entry->State == TreeEntry::NeedToGather)
continue;
assert(Entry->VectorizedValue && "Can't find vectorizable value");
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
#ifndef NDEBUG
Type *Ty = Scalar->getType();
if (!Ty->isVoidTy()) {
for (User *U : Scalar->users()) {
LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
// It is legal to delete users in the ignorelist.
assert((getTreeEntry(U) || is_contained(UserIgnoreList, U) ||
(isa_and_nonnull<Instruction>(U) &&
isDeleted(cast<Instruction>(U)))) &&
"Deleting out-of-tree value");
}
}
#endif
LLVM_DEBUG(dbgs() << "SLP: \tErasing scalar:" << *Scalar << ".\n");
eraseInstruction(cast<Instruction>(Scalar));
}
}
Builder.ClearInsertionPoint();
InstrElementSize.clear();
return VectorizableTree[0]->VectorizedValue;
}
void BoUpSLP::optimizeGatherSequence() {
LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size()
<< " gather sequences instructions.\n");
// LICM InsertElementInst sequences.
for (Instruction *I : GatherShuffleSeq) {
if (isDeleted(I))
continue;
// Check if this block is inside a loop.
Loop *L = LI->getLoopFor(I->getParent());
if (!L)
continue;
// Check if it has a preheader.
BasicBlock *PreHeader = L->getLoopPreheader();
if (!PreHeader)
continue;
// If the vector or the element that we insert into it are
// instructions that are defined in this basic block then we can't
// hoist this instruction.
if (any_of(I->operands(), [L](Value *V) {
auto *OpI = dyn_cast<Instruction>(V);
return OpI && L->contains(OpI);
}))
continue;
// We can hoist this instruction. Move it to the pre-header.
I->moveBefore(PreHeader->getTerminator());
}
// Make a list of all reachable blocks in our CSE queue.
SmallVector<const DomTreeNode *, 8> CSEWorkList;
CSEWorkList.reserve(CSEBlocks.size());
for (BasicBlock *BB : CSEBlocks)
if (DomTreeNode *N = DT->getNode(BB)) {
assert(DT->isReachableFromEntry(N));
CSEWorkList.push_back(N);
}
// Sort blocks by domination. This ensures we visit a block after all blocks
// dominating it are visited.
llvm::sort(CSEWorkList, [](const DomTreeNode *A, const DomTreeNode *B) {
assert((A == B) == (A->getDFSNumIn() == B->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
return A->getDFSNumIn() < B->getDFSNumIn();
});
// Less defined shuffles can be replaced by the more defined copies.
// Between two shuffles one is less defined if it has the same vector operands
// and its mask indeces are the same as in the first one or undefs. E.g.
// shuffle %0, poison, <0, 0, 0, undef> is less defined than shuffle %0,
// poison, <0, 0, 0, 0>.
auto &&IsIdenticalOrLessDefined = [this](Instruction *I1, Instruction *I2,
SmallVectorImpl<int> &NewMask) {
if (I1->getType() != I2->getType())
return false;
auto *SI1 = dyn_cast<ShuffleVectorInst>(I1);
auto *SI2 = dyn_cast<ShuffleVectorInst>(I2);
if (!SI1 || !SI2)
return I1->isIdenticalTo(I2);
if (SI1->isIdenticalTo(SI2))
return true;
for (int I = 0, E = SI1->getNumOperands(); I < E; ++I)
if (SI1->getOperand(I) != SI2->getOperand(I))
return false;
// Check if the second instruction is more defined than the first one.
NewMask.assign(SI2->getShuffleMask().begin(), SI2->getShuffleMask().end());
ArrayRef<int> SM1 = SI1->getShuffleMask();
// Count trailing undefs in the mask to check the final number of used
// registers.
unsigned LastUndefsCnt = 0;
for (int I = 0, E = NewMask.size(); I < E; ++I) {
if (SM1[I] == UndefMaskElem)
++LastUndefsCnt;
else
LastUndefsCnt = 0;
if (NewMask[I] != UndefMaskElem && SM1[I] != UndefMaskElem &&
NewMask[I] != SM1[I])
return false;
if (NewMask[I] == UndefMaskElem)
NewMask[I] = SM1[I];
}
// Check if the last undefs actually change the final number of used vector
// registers.
return SM1.size() - LastUndefsCnt > 1 &&
TTI->getNumberOfParts(SI1->getType()) ==
TTI->getNumberOfParts(
FixedVectorType::get(SI1->getType()->getElementType(),
SM1.size() - LastUndefsCnt));
};
// Perform O(N^2) search over the gather/shuffle sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
// instructions into different buckets based on the insert lane.
SmallVector<Instruction *, 16> Visited;
for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
assert(*I &&
(I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
"Worklist not sorted properly!");
BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
for (Instruction &In : llvm::make_early_inc_range(*BB)) {
if (isDeleted(&In))
continue;
if (!isa<InsertElementInst>(&In) && !isa<ExtractElementInst>(&In) &&
!isa<ShuffleVectorInst>(&In) && !GatherShuffleSeq.contains(&In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
bool Replaced = false;
for (Instruction *&V : Visited) {
SmallVector<int> NewMask;
if (IsIdenticalOrLessDefined(&In, V, NewMask) &&
DT->dominates(V->getParent(), In.getParent())) {
In.replaceAllUsesWith(V);
eraseInstruction(&In);
if (auto *SI = dyn_cast<ShuffleVectorInst>(V))
if (!NewMask.empty())
SI->setShuffleMask(NewMask);
Replaced = true;
break;
}
if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
GatherShuffleSeq.contains(V) &&
IsIdenticalOrLessDefined(V, &In, NewMask) &&
DT->dominates(In.getParent(), V->getParent())) {
In.moveAfter(V);
V->replaceAllUsesWith(&In);
eraseInstruction(V);
if (auto *SI = dyn_cast<ShuffleVectorInst>(&In))
if (!NewMask.empty())
SI->setShuffleMask(NewMask);
V = &In;
Replaced = true;
break;
}
}
if (!Replaced) {
assert(!is_contained(Visited, &In));
Visited.push_back(&In);
}
}
}
CSEBlocks.clear();
GatherShuffleSeq.clear();
}
BoUpSLP::ScheduleData *
BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
ScheduleData *Bundle = nullptr;
ScheduleData *PrevInBundle = nullptr;
for (Value *V : VL) {
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member "
"(maybe not in same basic block)");
assert(BundleMember->isSchedulingEntity() &&
"bundle member already part of other bundle");
if (PrevInBundle) {
PrevInBundle->NextInBundle = BundleMember;
} else {
Bundle = BundleMember;
}
BundleMember->UnscheduledDepsInBundle = 0;
Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
// Group the instructions to a bundle.
BundleMember->FirstInBundle = Bundle;
PrevInBundle = BundleMember;
}
assert(Bundle && "Failed to find schedule bundle");
return Bundle;
}
// Groups the instructions to a bundle (which is then a single scheduling entity)
// and schedules instructions until the bundle gets ready.
Optional<BoUpSLP::ScheduleData *>
BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
// No need to schedule PHIs, insertelement, extractelement and extractvalue
// instructions.
if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue))
return nullptr;
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n");
auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
ScheduleData *Bundle) {
// The scheduling region got new instructions at the lower end (or it is a
// new region for the first bundle). This makes it necessary to
// recalculate all dependencies.
// It is seldom that this needs to be done a second time after adding the
// initial bundle to the region.
if (ScheduleEnd != OldScheduleEnd) {
for (auto *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode())
doForAllOpcodes(I, [](ScheduleData *SD) { SD->clearDependencies(); });
ReSchedule = true;
}
if (ReSchedule) {
resetSchedule();
initialFillReadyList(ReadyInsts);
}
if (Bundle) {
LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << *Bundle
<< " in block " << BB->getName() << "\n");
calculateDependencies(Bundle, /*InsertInReadyList=*/true, SLP);
}
// Now try to schedule the new bundle or (if no bundle) just calculate
// dependencies. As soon as the bundle is "ready" it means that there are no
// cyclic dependencies and we can schedule it. Note that's important that we
// don't "schedule" the bundle yet (see cancelScheduling).
while (((!Bundle && ReSchedule) || (Bundle && !Bundle->isReady())) &&
!ReadyInsts.empty()) {
ScheduleData *Picked = ReadyInsts.pop_back_val();
if (Picked->isSchedulingEntity() && Picked->isReady())
schedule(Picked, ReadyInsts);
}
};
// Make sure that the scheduling region contains all
// instructions of the bundle.
for (Value *V : VL) {
if (!extendSchedulingRegion(V, S)) {
// If the scheduling region got new instructions at the lower end (or it
// is a new region for the first bundle). This makes it necessary to
// recalculate all dependencies.
// Otherwise the compiler may crash trying to incorrectly calculate
// dependencies and emit instruction in the wrong order at the actual
// scheduling.
TryScheduleBundleImpl(/*ReSchedule=*/false, nullptr);
return None;
}
}
bool ReSchedule = false;
for (Value *V : VL) {
ScheduleData *BundleMember = getScheduleData(V);
assert(BundleMember &&
"no ScheduleData for bundle member (maybe not in same basic block)");
if (!BundleMember->IsScheduled)
continue;
// A bundle member was scheduled as single instruction before and now
// needs to be scheduled as part of the bundle. We just get rid of the
// existing schedule.
LLVM_DEBUG(dbgs() << "SLP: reset schedule because " << *BundleMember
<< " was already scheduled\n");
ReSchedule = true;
}
auto *Bundle = buildBundle(VL);
TryScheduleBundleImpl(ReSchedule, Bundle);
if (!Bundle->isReady()) {
cancelScheduling(VL, S.OpValue);
return None;
}
return Bundle;
}
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
Value *OpValue) {
if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue))
return;
ScheduleData *Bundle = getScheduleData(OpValue);
LLVM_DEBUG(dbgs() << "SLP: cancel scheduling of " << *Bundle << "\n");
assert(!Bundle->IsScheduled &&
"Can't cancel bundle which is already scheduled");
assert(Bundle->isSchedulingEntity() && Bundle->isPartOfBundle() &&
"tried to unbundle something which is not a bundle");
// Un-bundle: make single instructions out of the bundle.
ScheduleData *BundleMember = Bundle;
while (BundleMember) {
assert(BundleMember->FirstInBundle == Bundle && "corrupt bundle links");
BundleMember->FirstInBundle = BundleMember;
ScheduleData *Next = BundleMember->NextInBundle;
BundleMember->NextInBundle = nullptr;
BundleMember->UnscheduledDepsInBundle = BundleMember->UnscheduledDeps;
if (BundleMember->UnscheduledDepsInBundle == 0) {
ReadyInsts.insert(BundleMember);
}
BundleMember = Next;
}
}
BoUpSLP::ScheduleData *BoUpSLP::BlockScheduling::allocateScheduleDataChunks() {
// Allocate a new ScheduleData for the instruction.
if (ChunkPos >= ChunkSize) {
ScheduleDataChunks.push_back(std::make_unique<ScheduleData[]>(ChunkSize));
ChunkPos = 0;
}
return &(ScheduleDataChunks.back()[ChunkPos++]);
}
bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
const InstructionsState &S) {
if (getScheduleData(V, isOneOf(S, V)))
return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
"phi nodes/insertelements/extractelements/extractvalues don't need to "
"be scheduled");
auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
ScheduleData *ISD = getScheduleData(I);
if (!ISD)
return false;
assert(isInSchedulingRegion(ISD) &&
"ScheduleData not in scheduling region");
ScheduleData *SD = allocateScheduleDataChunks();
SD->Inst = I;
SD->init(SchedulingRegionID, S.OpValue);
ExtraScheduleDataMap[I][S.OpValue] = SD;
return true;
};
if (CheckSheduleForI(I))
return true;
if (!ScheduleStart) {
// It's the first instruction in the new region.
initScheduleData(I, I->getNextNode(), nullptr, nullptr);
ScheduleStart = I;
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
CheckSheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
return true;
}
// Search up and down at the same time, because we don't know if the new
// instruction is above or below the existing scheduling region.
BasicBlock::reverse_iterator UpIter =
++ScheduleStart->getIterator().getReverse();
BasicBlock::reverse_iterator UpperEnd = BB->rend();
BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
BasicBlock::iterator LowerEnd = BB->end();
while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I &&
&*DownIter != I) {
if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
LLVM_DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
return false;
}
++UpIter;
++DownIter;
}
if (DownIter == LowerEnd || (UpIter != UpperEnd && &*UpIter == I)) {
assert(I->getParent() == ScheduleStart->getParent() &&
"Instruction is in wrong basic block.");
initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
ScheduleStart = I;
if (isOneOf(S, I) != I)
CheckSheduleForI(I);
LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I
<< "\n");
return true;
}
assert((UpIter == UpperEnd || (DownIter != LowerEnd && &*DownIter == I)) &&
"Expected to reach top of the basic block or instruction down the "
"lower end.");
assert(I->getParent() == ScheduleEnd->getParent() &&
"Instruction is in wrong basic block.");
initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion,
nullptr);
ScheduleEnd = I->getNextNode();
if (isOneOf(S, I) != I)
CheckSheduleForI(I);
assert(ScheduleEnd && "tried to vectorize a terminator?");
LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
return true;
}
void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
Instruction *ToI,
ScheduleData *PrevLoadStore,
ScheduleData *NextLoadStore) {
ScheduleData *CurrentLoadStore = PrevLoadStore;
for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) {
ScheduleData *SD = ScheduleDataMap[I];
if (!SD) {
SD = allocateScheduleDataChunks();
ScheduleDataMap[I] = SD;
SD->Inst = I;
}
assert(!isInSchedulingRegion(SD) &&
"new ScheduleData already in scheduling region");
SD->init(SchedulingRegionID, I);
if (I->mayReadOrWriteMemory() &&
(!isa<IntrinsicInst>(I) ||
(cast<IntrinsicInst>(I)->getIntrinsicID() != Intrinsic::sideeffect &&
cast<IntrinsicInst>(I)->getIntrinsicID() !=
Intrinsic::pseudoprobe))) {
// Update the linked list of memory accessing instructions.
if (CurrentLoadStore) {
CurrentLoadStore->NextLoadStore = SD;
} else {
FirstLoadStoreInRegion = SD;
}
CurrentLoadStore = SD;
}
}
if (NextLoadStore) {
if (CurrentLoadStore)
CurrentLoadStore->NextLoadStore = NextLoadStore;
} else {
LastLoadStoreInRegion = CurrentLoadStore;
}
}
void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
bool InsertInReadyList,
BoUpSLP *SLP) {
assert(SD->isSchedulingEntity());
SmallVector<ScheduleData *, 10> WorkList;
WorkList.push_back(SD);
while (!WorkList.empty()) {
ScheduleData *SD = WorkList.pop_back_val();
for (ScheduleData *BundleMember = SD; BundleMember;
BundleMember = BundleMember->NextInBundle) {
assert(isInSchedulingRegion(BundleMember));
if (BundleMember->hasValidDependencies())
continue;
LLVM_DEBUG(dbgs() << "SLP: update deps of " << *BundleMember
<< "\n");
BundleMember->Dependencies = 0;
BundleMember->resetUnscheduledDeps();
// Handle def-use chain dependencies.
if (BundleMember->OpValue != BundleMember->Inst) {
ScheduleData *UseSD = getScheduleData(BundleMember->Inst);
if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
if (!DestBundle->IsScheduled)
BundleMember->incrementUnscheduledDeps(1);
if (!DestBundle->hasValidDependencies())
WorkList.push_back(DestBundle);
}
} else {
for (User *U : BundleMember->Inst->users()) {
assert(isa<Instruction>(U) &&
"user of instruction must be instruction");
ScheduleData *UseSD = getScheduleData(U);
if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
BundleMember->Dependencies++;
ScheduleData *DestBundle = UseSD->FirstInBundle;
if (!DestBundle->IsScheduled)
BundleMember->incrementUnscheduledDeps(1);
if (!DestBundle->hasValidDependencies())
WorkList.push_back(DestBundle);
}
}
}
// Handle the memory dependencies (if any).
ScheduleData *DepDest = BundleMember->NextLoadStore;
if (!DepDest)
continue;
Instruction *SrcInst = BundleMember->Inst;
assert(SrcInst->mayReadOrWriteMemory() &&
"NextLoadStore list for non memory effecting bundle?");
MemoryLocation SrcLoc = getLocation(SrcInst);
bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
unsigned numAliased = 0;
unsigned DistToSrc = 1;
for ( ; DepDest; DepDest = DepDest->NextLoadStore) {
assert(isInSchedulingRegion(DepDest));
// We have two limits to reduce the complexity:
// 1) AliasedCheckLimit: It's a small limit to reduce calls to
// SLP->isAliased (which is the expensive part in this loop).
// 2) MaxMemDepDistance: It's for very large blocks and it aborts
// the whole loop (even if the loop is fast, it's quadratic).
// It's important for the loop break condition (see below) to
// check this limit even between two read-only instructions.
if (DistToSrc >= MaxMemDepDistance ||
((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
(numAliased >= AliasedCheckLimit ||
SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
// We increment the counter only if the locations are aliased
// (instead of counting all alias checks). This gives a better
// balance between reduced runtime and accurate dependencies.
numAliased++;
DepDest->MemoryDependencies.push_back(BundleMember);
BundleMember->Dependencies++;
ScheduleData *DestBundle = DepDest->FirstInBundle;
if (!DestBundle->IsScheduled) {
BundleMember->incrementUnscheduledDeps(1);
}
if (!DestBundle->hasValidDependencies()) {
WorkList.push_back(DestBundle);
}
}
// Example, explaining the loop break condition: Let's assume our
// starting instruction is i0 and MaxMemDepDistance = 3.
//
// +--------v--v--v
// i0,i1,i2,i3,i4,i5,i6,i7,i8
// +--------^--^--^
//
// MaxMemDepDistance let us stop alias-checking at i3 and we add
// dependencies from i0 to i3,i4,.. (even if they are not aliased).
// Previously we already added dependencies from i3 to i6,i7,i8
// (because of MaxMemDepDistance). As we added a dependency from
// i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
// and we can abort this loop at i6.
if (DistToSrc >= 2 * MaxMemDepDistance)
break;
DistToSrc++;
}
}
if (InsertInReadyList && SD->isReady()) {
ReadyInsts.push_back(SD);
LLVM_DEBUG(dbgs() << "SLP: gets ready on update: " << *SD->Inst
<< "\n");
}
}
}
void BoUpSLP::BlockScheduling::resetSchedule() {
assert(ScheduleStart &&
"tried to reset schedule on block which has not been scheduled");
for (Instruction *I = ScheduleStart; I != ScheduleEnd; I = I->getNextNode()) {
doForAllOpcodes(I, [&](ScheduleData *SD) {
assert(isInSchedulingRegion(SD) &&
"ScheduleData not in scheduling region");
SD->IsScheduled = false;
SD->resetUnscheduledDeps();
});
}
ReadyInsts.clear();
}
void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
if (!BS->ScheduleStart)
return;
LLVM_DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
BS->resetSchedule();
// For the real scheduling we use a more sophisticated ready-list: it is
// sorted by the original instruction location. This lets the final schedule
// be as close as possible to the original instruction order.
struct ScheduleDataCompare {
bool operator()(ScheduleData *SD1, ScheduleData *SD2) const {
return SD2->SchedulingPriority < SD1->SchedulingPriority;
}
};
std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
// Ensure that all dependency data is updated and fill the ready-list with
// initial instructions.
int Idx = 0;
int NumToSchedule = 0;
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
I = I->getNextNode()) {
BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
assert((isVectorLikeInstWithConstOps(SD->Inst) ||
SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
"scheduler and vectorizer bundle mismatch");
SD->FirstInBundle->SchedulingPriority = Idx++;
if (SD->isSchedulingEntity()) {
BS->calculateDependencies(SD, false, this);
NumToSchedule++;
}
});
}
BS->initialFillReadyList(ReadyInsts);
Instruction *LastScheduledInst = BS->ScheduleEnd;
// Do the "real" scheduling.
while (!ReadyInsts.empty()) {
ScheduleData *picked = *ReadyInsts.begin();
ReadyInsts.erase(ReadyInsts.begin());
// Move the scheduled instruction(s) to their dedicated places, if not
// there yet.
for (ScheduleData *BundleMember = picked; BundleMember;
BundleMember = BundleMember->NextInBundle) {
Instruction *pickedInst = BundleMember->Inst;
if (pickedInst->getNextNode() != LastScheduledInst)
pickedInst->moveBefore(LastScheduledInst);
LastScheduledInst = pickedInst;
}
BS->schedule(picked, ReadyInsts);
NumToSchedule--;
}
assert(NumToSchedule == 0 && "could not schedule all instructions");
// Avoid duplicate scheduling of the block.
BS->ScheduleStart = nullptr;
}
unsigned BoUpSLP::getVectorElementSize(Value *V) {
// If V is a store, just return the width of the stored value (or value
// truncated just before storing) without traversing the expression tree.
// This is the common case.
if (auto *Store = dyn_cast<StoreInst>(V)) {
if (auto *Trunc = dyn_cast<TruncInst>(Store->getValueOperand()))
return DL->getTypeSizeInBits(Trunc->getSrcTy());
return DL->getTypeSizeInBits(Store->getValueOperand()->getType());
}
if (auto *IEI = dyn_cast<InsertElementInst>(V))
return getVectorElementSize(IEI->getOperand(1));
auto E = InstrElementSize.find(V);
if (E != InstrElementSize.end())
return E->second;
// If V is not a store, we can traverse the expression tree to find loads
// that feed it. The type of the loaded value may indicate a more suitable
// width than V's type. We want to base the vector element size on the width
// of memory operations where possible.
SmallVector<std::pair<Instruction *, BasicBlock *>, 16> Worklist;
SmallPtrSet<Instruction *, 16> Visited;
if (auto *I = dyn_cast<Instruction>(V)) {
Worklist.emplace_back(I, I->getParent());
Visited.insert(I);
}
// Traverse the expression tree in bottom-up order looking for loads. If we
// encounter an instruction we don't yet handle, we give up.
auto Width = 0u;
while (!Worklist.empty()) {
Instruction *I;
BasicBlock *Parent;
std::tie(I, Parent) = Worklist.pop_back_val();
// We should only be looking at scalar instructions here. If the current
// instruction has a vector type, skip.
auto *Ty = I->getType();
if (isa<VectorType>(Ty))
continue;
// If the current instruction is a load, update MaxWidth to reflect the
// width of the loaded value.
if (isa<LoadInst>(I) || isa<ExtractElementInst>(I) ||
isa<ExtractValueInst>(I))
Width = std::max<unsigned>(Width, DL->getTypeSizeInBits(Ty));
// Otherwise, we need to visit the operands of the instruction. We only
// handle the interesting cases from buildTree here. If an operand is an
// instruction we haven't yet visited and from the same basic block as the
// user or the use is a PHI node, we add it to the worklist.
else if (isa<PHINode>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
isa<CmpInst>(I) || isa<SelectInst>(I) || isa<BinaryOperator>(I) ||
isa<UnaryOperator>(I)) {
for (Use &U : I->operands())
if (auto *J = dyn_cast<Instruction>(U.get()))
if (Visited.insert(J).second &&
(isa<PHINode>(I) || J->getParent() == Parent))
Worklist.emplace_back(J, J->getParent());
} else {
break;
}
}
// If we didn't encounter a memory access in the expression tree, or if we
// gave up for some reason, just return the width of V. Otherwise, return the
// maximum width we found.
if (!Width) {
if (auto *CI = dyn_cast<CmpInst>(V))
V = CI->getOperand(0);
Width = DL->getTypeSizeInBits(V->getType());
}
for (Instruction *I : Visited)
InstrElementSize[I] = Width;
return Width;
}
// Determine if a value V in a vectorizable expression Expr can be demoted to a
// smaller type with a truncation. We collect the values that will be demoted
// in ToDemote and additional roots that require investigating in Roots.
static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
SmallVectorImpl<Value *> &ToDemote,
SmallVectorImpl<Value *> &Roots) {
// We can always demote constants.
if (isa<Constant>(V)) {
ToDemote.push_back(V);
return true;
}
// If the value is not an instruction in the expression with only one use, it
// cannot be demoted.
auto *I = dyn_cast<Instruction>(V);
if (!I || !I->hasOneUse() || !Expr.count(I))
return false;
switch (I->getOpcode()) {
// We can always demote truncations and extensions. Since truncations can
// seed additional demotion, we save the truncated value.
case Instruction::Trunc:
Roots.push_back(I->getOperand(0));
break;
case Instruction::ZExt:
case Instruction::SExt:
if (isa<ExtractElementInst>(I->getOperand(0)) ||
isa<InsertElementInst>(I->getOperand(0)))
return false;
break;
// We can demote certain binary operations if we can demote both of their
// operands.
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots) ||
!collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots))
return false;
break;
// We can demote selects if we can demote their true and false values.
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots) ||
!collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots))
return false;
break;
}
// We can demote phis if we can demote all their incoming operands. Note that
// we don't need to worry about cycles since we ensure single use above.
case Instruction::PHI: {
PHINode *PN = cast<PHINode>(I);
for (Value *IncValue : PN->incoming_values())
if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots))
return false;
break;
}
// Otherwise, conservatively give up.
default:
return false;
}
// Record the value that we can demote.
ToDemote.push_back(V);
return true;
}
void BoUpSLP::computeMinimumValueSizes() {
// If there are no external uses, the expression tree must be rooted by a
// store. We can't demote in-memory values, so there is nothing to do here.
if (ExternalUses.empty())
return;
// We only attempt to truncate integer expressions.
auto &TreeRoot = VectorizableTree[0]->Scalars;
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
if (!TreeRootIT)
return;
// If the expression is not rooted by a store, these roots should have
// external uses. We will rely on InstCombine to rewrite the expression in
// the narrower type. However, InstCombine only rewrites single-use values.
// This means that if a tree entry other than a root is used externally, it
// must have multiple uses and InstCombine will not rewrite it. The code
// below ensures that only the roots are used externally.
SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
for (auto &EU : ExternalUses)
if (!Expr.erase(EU.Scalar))
return;
if (!Expr.empty())
return;
// Collect the scalar values of the vectorizable expression. We will use this
// context to determine which values can be demoted. If we see a truncation,
// we mark it as seeding another demotion.
for (auto &EntryPtr : VectorizableTree)
Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
// Ensure the roots of the vectorizable tree don't form a cycle. They must
// have a single external user that is not in the vectorizable tree.
for (auto *Root : TreeRoot)
if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
return;
// Conservatively determine if we can actually truncate the roots of the
// expression. Collect the values that can be demoted in ToDemote and
// additional roots that require investigating in Roots.
SmallVector<Value *, 32> ToDemote;
SmallVector<Value *, 4> Roots;
for (auto *Root : TreeRoot)
if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
return;
// The maximum bit width required to represent all the values that can be
// demoted without loss of precision. It would be safe to truncate the roots
// of the expression to this width.
auto MaxBitWidth = 8u;
// We first check if all the bits of the roots are demanded. If they're not,
// we can truncate the roots to this narrower type.
for (auto *Root : TreeRoot) {
auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
MaxBitWidth = std::max<unsigned>(
Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
}
// True if the roots can be zero-extended back to their original type, rather
// than sign-extended. We know that if the leading bits are not demanded, we
// can safely zero-extend. So we initialize IsKnownPositive to True.
bool IsKnownPositive = true;
// If all the bits of the roots are demanded, we can try a little harder to
// compute a narrower type. This can happen, for example, if the roots are
// getelementptr indices. InstCombine promotes these indices to the pointer
// width. Thus, all their bits are technically demanded even though the
// address computation might be vectorized in a smaller type.
//
// We start by looking at each entry that can be demoted. We compute the
// maximum bit width required to store the scalar by using ValueTracking to
// compute the number of high-order bits we can truncate.
if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
llvm::all_of(TreeRoot, [](Value *R) {
assert(R->hasOneUse() && "Root should have only one use!");
return isa<GetElementPtrInst>(R->user_back());
})) {
MaxBitWidth = 8u;
// Determine if the sign bit of all the roots is known to be zero. If not,
// IsKnownPositive is set to False.
IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) {
KnownBits Known = computeKnownBits(R, *DL);
return Known.isNonNegative();
});
// Determine the maximum number of bits required to store the scalar
// values.
for (auto *Scalar : ToDemote) {
auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT);
auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
}
// If we can't prove that the sign bit is zero, we must add one to the
// maximum bit width to account for the unknown sign bit. This preserves
// the existing sign bit so we can safely sign-extend the root back to the
// original type. Otherwise, if we know the sign bit is zero, we will
// zero-extend the root instead.
//
// FIXME: This is somewhat suboptimal, as there will be cases where adding
// one to the maximum bit width will yield a larger-than-necessary
// type. In general, we need to add an extra bit only if we can't
// prove that the upper bit of the original type is equal to the
// upper bit of the proposed smaller type. If these two bits are the
// same (either zero or one) we know that sign-extending from the
// smaller type will result in the same value. Here, since we can't
// yet prove this, we are just making the proposed smaller type
// larger to ensure correctness.
if (!IsKnownPositive)
++MaxBitWidth;
}
// Round MaxBitWidth up to the next power-of-two.
if (!isPowerOf2_64(MaxBitWidth))
MaxBitWidth = NextPowerOf2(MaxBitWidth);
// If the maximum bit width we compute is less than the with of the roots'
// type, we can proceed with the narrowing. Otherwise, do nothing.
if (MaxBitWidth >= TreeRootIT->getBitWidth())
return;
// If we can truncate the root, we must collect additional values that might
// be demoted as a result. That is, those seeded by truncations we will
// modify.
while (!Roots.empty())
collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
// Finally, map the values we can demote to the maximum bit with we computed.
for (auto *Scalar : ToDemote)
MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive);
}
namespace {
/// The SLPVectorizer Pass.
struct SLPVectorizer : public FunctionPass {
SLPVectorizerPass Impl;
/// Pass identification, replacement for typeid
static char ID;
explicit SLPVectorizer() : FunctionPass(ID) {
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M) override { return false; }
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<DemandedBitsWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addRequired<InjectTLIMappingsLegacy>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
};
} // end anonymous namespace
PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
auto *LI = &AM.getResult<LoopAnalysis>(F);
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
auto *DB = &AM.getResult<DemandedBitsAnalysis>(F);
auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
bool Changed = runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}
bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
TargetTransformInfo *TTI_,
TargetLibraryInfo *TLI_, AAResults *AA_,
LoopInfo *LI_, DominatorTree *DT_,
AssumptionCache *AC_, DemandedBits *DB_,
OptimizationRemarkEmitter *ORE_) {
if (!RunSLPVectorization)
return false;
SE = SE_;
TTI = TTI_;
TLI = TLI_;
AA = AA_;
LI = LI_;
DT = DT_;
AC = AC_;
DB = DB_;
DL = &F.getParent()->getDataLayout();
Stores.clear();
GEPs.clear();
bool Changed = false;
// If the target claims to have no vector registers don't attempt
// vectorization.
if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) {
LLVM_DEBUG(
dbgs() << "SLP: Didn't find any vector registers for target, abort.\n");
return false;
}
// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return false;
LLVM_DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
// Use the bottom up slp vectorizer to construct chains that start with
// store instructions.
BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL, ORE_);
// A general note: the vectorizer must use BoUpSLP::eraseInstruction() to
// delete instructions.
// Update DFS numbers now so that we can use them for ordering.
DT->updateDFSNumbers();
// Scan the blocks in the function in post order.
for (auto BB : post_order(&F.getEntryBlock())) {
collectSeedInstructions(BB);
// Vectorize trees that end at stores.
if (!Stores.empty()) {
LLVM_DEBUG(dbgs() << "SLP: Found stores for " << Stores.size()
<< " underlying objects.\n");
Changed |= vectorizeStoreChains(R);
}
// Vectorize trees that end at reductions.
Changed |= vectorizeChainsInBlock(BB, R);
// Vectorize the index computations of getelementptr instructions. This
// is primarily intended to catch gather-like idioms ending at
// non-consecutive loads.
if (!GEPs.empty()) {
LLVM_DEBUG(dbgs() << "SLP: Found GEPs for " << GEPs.size()
<< " underlying objects.\n");
Changed |= vectorizeGEPIndices(BB, R);
}
}
if (Changed) {
R.optimizeGatherSequence();
LLVM_DEBUG(dbgs() << "SLP: vectorized \"" << F.getName() << "\"\n");
}
return Changed;
}
bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
unsigned Idx) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
<< "\n");
const unsigned Sz = R.getVectorElementSize(Chain[0]);
const unsigned MinVF = R.getMinVecRegSize() / Sz;
unsigned VF = Chain.size();
if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF)
return false;
LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx
<< "\n");
R.buildTree(Chain);
if (R.isTreeTinyAndNotFullyVectorizable())
return false;
if (R.isLoadCombineCandidate())
return false;
R.reorderTopToBottom();
R.reorderBottomToTop();
R.buildExternalUses();
R.computeMinimumValueSizes();
InstructionCost Cost = R.getTreeCost();
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for VF =" << VF << "\n");
if (Cost < -SLPCostThreshold) {
LLVM_DEBUG(dbgs() << "SLP: Decided to vectorize cost = " << Cost << "\n");
using namespace ore;
R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized",
cast<StoreInst>(Chain[0]))
<< "Stores SLP vectorized with cost " << NV("Cost", Cost)
<< " and with tree size "
<< NV("TreeSize", R.getTreeSize()));
R.vectorizeTree();
return true;
}
return false;
}
bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
BoUpSLP &R) {
// We may run into multiple chains that merge into a single chain. We mark the
// stores that we vectorized so that we don't visit the same store twice.
BoUpSLP::ValueSet VectorizedStores;
bool Changed = false;
int E = Stores.size();
SmallBitVector Tails(E, false);
int MaxIter = MaxStoreLookup.getValue();
SmallVector<std::pair<int, int>, 16> ConsecutiveChain(
E, std::make_pair(E, INT_MAX));
SmallVector<SmallBitVector, 4> CheckedPairs(E, SmallBitVector(E, false));
int IterCnt;
auto &&FindConsecutiveAccess = [this, &Stores, &Tails, &IterCnt, MaxIter,
&CheckedPairs,
&ConsecutiveChain](int K, int Idx) {
if (IterCnt >= MaxIter)
return true;
if (CheckedPairs[Idx].test(K))
return ConsecutiveChain[K].second == 1 &&
ConsecutiveChain[K].first == Idx;
++IterCnt;
CheckedPairs[Idx].set(K);
CheckedPairs[K].set(Idx);
Optional<int> Diff = getPointersDiff(
Stores[K]->getValueOperand()->getType(), Stores[K]->getPointerOperand(),
Stores[Idx]->getValueOperand()->getType(),
Stores[Idx]->getPointerOperand(), *DL, *SE, /*StrictCheck=*/true);
if (!Diff || *Diff == 0)
return false;
int Val = *Diff;
if (Val < 0) {
if (ConsecutiveChain[Idx].second > -Val) {
Tails.set(K);
ConsecutiveChain[Idx] = std::make_pair(K, -Val);
}
return false;
}
if (ConsecutiveChain[K].second <= Val)
return false;
Tails.set(Idx);
ConsecutiveChain[K] = std::make_pair(Idx, Val);
return Val == 1;
};
// Do a quadratic search on all of the given stores in reverse order and find
// all of the pairs of stores that follow each other.
for (int Idx = E - 1; Idx >= 0; --Idx) {
// If a store has multiple consecutive store candidates, search according
// to the sequence: Idx-1, Idx+1, Idx-2, Idx+2, ...
// This is because usually pairing with immediate succeeding or preceding
// candidate create the best chance to find slp vectorization opportunity.
const int MaxLookDepth = std::max(E - Idx, Idx + 1);
IterCnt = 0;
for (int Offset = 1, F = MaxLookDepth; Offset < F; ++Offset)
if ((Idx >= Offset && FindConsecutiveAccess(Idx - Offset, Idx)) ||
(Idx + Offset < E && FindConsecutiveAccess(Idx + Offset, Idx)))
break;
}
// Tracks if we tried to vectorize stores starting from the given tail
// already.
SmallBitVector TriedTails(E, false);
// For stores that start but don't end a link in the chain:
for (int Cnt = E; Cnt > 0; --Cnt) {
int I = Cnt - 1;
if (ConsecutiveChain[I].first == E || Tails.test(I))
continue;
// We found a store instr that starts a chain. Now follow the chain and try
// to vectorize it.
BoUpSLP::ValueList Operands;
// Collect the chain into a list.
while (I != E && !VectorizedStores.count(Stores[I])) {
Operands.push_back(Stores[I]);
Tails.set(I);
if (ConsecutiveChain[I].second != 1) {
// Mark the new end in the chain and go back, if required. It might be
// required if the original stores come in reversed order, for example.
if (ConsecutiveChain[I].first != E &&
Tails.test(ConsecutiveChain[I].first) && !TriedTails.test(I) &&
!VectorizedStores.count(Stores[ConsecutiveChain[I].first])) {
TriedTails.set(I);
Tails.reset(ConsecutiveChain[I].first);
if (Cnt < ConsecutiveChain[I].first + 2)
Cnt = ConsecutiveChain[I].first + 2;
}
break;
}
// Move to the next value in the chain.
I = ConsecutiveChain[I].first;
}
assert(!Operands.empty() && "Expected non-empty list of stores.");
unsigned MaxVecRegSize = R.getMaxVecRegSize();
unsigned EltSize = R.getVectorElementSize(Operands[0]);
unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
unsigned MinVF = R.getMinVF(EltSize);
unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
MaxElts);
// FIXME: Is division-by-2 the correct step? Should we assert that the
// register size is a power-of-2?
unsigned StartIdx = 0;
for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
ArrayRef<Value *> Slice = makeArrayRef(Operands).slice(Cnt, Size);
if (!VectorizedStores.count(Slice.front()) &&
!VectorizedStores.count(Slice.back()) &&
vectorizeStoreChain(Slice, R, Cnt)) {
// Mark the vectorized stores so that we don't vectorize them again.
VectorizedStores.insert(Slice.begin(), Slice.end());
Changed = true;
// If we vectorized initial block, no need to try to vectorize it
// again.
if (Cnt == StartIdx)
StartIdx += Size;
Cnt += Size;
continue;
}
++Cnt;
}
// Check if the whole array was vectorized already - exit.
if (StartIdx >= Operands.size())
break;
}
}
return Changed;
}
void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
// Initialize the collections. We will make a single pass over the block.
Stores.clear();
GEPs.clear();
// Visit the store and getelementptr instructions in BB and organize them in
// Stores and GEPs according to the underlying objects of their pointer
// operands.
for (Instruction &I : *BB) {
// Ignore store instructions that are volatile or have a pointer operand
// that doesn't point to a scalar type.
if (auto *SI = dyn_cast<StoreInst>(&I)) {
if (!SI->isSimple())
continue;
if (!isValidElementType(SI->getValueOperand()->getType()))
continue;
Stores[getUnderlyingObject(SI->getPointerOperand())].push_back(SI);
}
// Ignore getelementptr instructions that have more than one index, a
// constant index, or a pointer operand that doesn't point to a scalar
// type.
else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
auto Idx = GEP->idx_begin()->get();
if (GEP->getNumIndices() > 1 || isa<Constant>(Idx))
continue;
if (!isValidElementType(Idx->getType()))
continue;
if (GEP->getType()->isVectorTy())
continue;
GEPs[GEP->getPointerOperand()].push_back(GEP);
}
}
}
bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
+ if (isa<InsertElementInst>(A) || isa<InsertElementInst>(B))
+ return false;
Value *VL[] = {A, B};
return tryToVectorizeList(VL, R);
}
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
bool LimitForRegisterSize) {
if (VL.size() < 2)
return false;
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = "
<< VL.size() << ".\n");
// Check that all of the parts are instructions of the same type,
// we permit an alternate opcode via InstructionsState.
InstructionsState S = getSameOpcode(VL);
if (!S.getOpcode())
return false;
Instruction *I0 = cast<Instruction>(S.OpValue);
// Make sure invalid types (including vector type) are rejected before
// determining vectorization factor for scalar instructions.
for (Value *V : VL) {
Type *Ty = V->getType();
if (!isa<InsertElementInst>(V) && !isValidElementType(Ty)) {
// NOTE: the following will give user internal llvm type name, which may
// not be useful.
R.getORE()->emit([&]() {
std::string type_str;
llvm::raw_string_ostream rso(type_str);
Ty->print(rso);
return OptimizationRemarkMissed(SV_NAME, "UnsupportedType", I0)
<< "Cannot SLP vectorize list: type "
<< rso.str() + " is unsupported by vectorizer";
});
return false;
}
}
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = R.getMinVF(Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
if (MaxVF < 2) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "SmallVF", I0)
<< "Cannot SLP vectorize list: vectorization factor "
<< "less than 2 is not supported";
});
return false;
}
bool Changed = false;
bool CandidateFound = false;
InstructionCost MinCost = SLPCostThreshold.getValue();
Type *ScalarTy = VL[0]->getType();
if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
ScalarTy = IE->getOperand(1)->getType();
unsigned NextInst = 0, MaxInst = VL.size();
for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) {
// No actual vectorization should happen, if number of parts is the same as
// provided vectorization factor (i.e. the scalar type is used for vector
// code during codegen).
auto *VecTy = FixedVectorType::get(ScalarTy, VF);
if (TTI->getNumberOfParts(VecTy) == VF)
continue;
for (unsigned I = NextInst; I < MaxInst; ++I) {
unsigned OpsWidth = 0;
if (I + VF > MaxInst)
OpsWidth = MaxInst - I;
else
OpsWidth = VF;
if (!isPowerOf2_32(OpsWidth))
continue;
if ((LimitForRegisterSize && OpsWidth < MaxVF) ||
(VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
break;
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
// Check that a previous iteration of this loop did not delete the Value.
if (llvm::any_of(Ops, [&R](Value *V) {
auto *I = dyn_cast<Instruction>(V);
return I && R.isDeleted(I);
}))
continue;
LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
<< "\n");
R.buildTree(Ops);
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
R.reorderTopToBottom();
R.reorderBottomToTop(!isa<InsertElementInst>(Ops.front()));
R.buildExternalUses();
R.computeMinimumValueSizes();
InstructionCost Cost = R.getTreeCost();
CandidateFound = true;
MinCost = std::min(MinCost, Cost);
if (Cost < -SLPCostThreshold) {
LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
cast<Instruction>(Ops[0]))
<< "SLP vectorized with cost " << ore::NV("Cost", Cost)
<< " and with tree size "
<< ore::NV("TreeSize", R.getTreeSize()));
R.vectorizeTree();
// Move to the next bundle.
I += VF - 1;
NextInst = I + 1;
Changed = true;
}
}
}
if (!Changed && CandidateFound) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "NotBeneficial", I0)
<< "List vectorization was possible but not beneficial with cost "
<< ore::NV("Cost", MinCost) << " >= "
<< ore::NV("Treshold", -SLPCostThreshold);
});
} else if (!Changed) {
R.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "NotPossible", I0)
<< "Cannot SLP vectorize list: vectorization was impossible"
<< " with available vectorization factors";
});
}
return Changed;
}
bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
if (!I)
return false;
if (!isa<BinaryOperator>(I) && !isa<CmpInst>(I))
return false;
Value *P = I->getParent();
// Vectorize in current basic block only.
auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P)
return false;
// Try to vectorize V.
if (tryToVectorizePair(Op0, Op1, R))
return true;
auto *A = dyn_cast<BinaryOperator>(Op0);
auto *B = dyn_cast<BinaryOperator>(Op1);
// Try to skip B.
if (B && B->hasOneUse()) {
auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
return true;
if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
return true;
}
// Try to skip A.
if (A && A->hasOneUse()) {
auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
return true;
if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
return true;
}
return false;
}
namespace {
/// Model horizontal reductions.
///
/// A horizontal reduction is a tree of reduction instructions that has values
/// that can be put into a vector as its leaves. For example:
///
/// mul mul mul mul
/// \ / \ /
/// + +
/// \ /
/// +
/// This tree has "mul" as its leaf values and "+" as its reduction
/// instructions. A reduction can feed into a store or a binary operation
/// feeding a phi.
/// ...
/// \ /
/// +
/// |
/// phi +=
///
/// Or:
/// ...
/// \ /
/// +
/// |
/// *p =
///
class HorizontalReduction {
using ReductionOpsType = SmallVector<Value *, 16>;
using ReductionOpsListType = SmallVector<ReductionOpsType, 2>;
ReductionOpsListType ReductionOps;
SmallVector<Value *, 32> ReducedVals;
// Use map vector to make stable output.
MapVector<Instruction *, Value *> ExtraArgs;
WeakTrackingVH ReductionRoot;
/// The type of reduction operation.
RecurKind RdxKind;
const unsigned INVALID_OPERAND_INDEX = std::numeric_limits<unsigned>::max();
static bool isCmpSelMinMax(Instruction *I) {
return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) &&
RecurrenceDescriptor::isMinMaxRecurrenceKind(getRdxKind(I));
}
// And/or are potentially poison-safe logical patterns like:
// select x, y, false
// select x, true, y
static bool isBoolLogicOp(Instruction *I) {
return match(I, m_LogicalAnd(m_Value(), m_Value())) ||
match(I, m_LogicalOr(m_Value(), m_Value()));
}
/// Checks if instruction is associative and can be vectorized.
static bool isVectorizable(RecurKind Kind, Instruction *I) {
if (Kind == RecurKind::None)
return false;
// Integer ops that map to select instructions or intrinsics are fine.
if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(Kind) ||
isBoolLogicOp(I))
return true;
if (Kind == RecurKind::FMax || Kind == RecurKind::FMin) {
// FP min/max are associative except for NaN and -0.0. We do not
// have to rule out -0.0 here because the intrinsic semantics do not
// specify a fixed result for it.
return I->getFastMathFlags().noNaNs();
}
return I->isAssociative();
}
static Value *getRdxOperand(Instruction *I, unsigned Index) {
// Poison-safe 'or' takes the form: select X, true, Y
// To make that work with the normal operand processing, we skip the
// true value operand.
// TODO: Change the code and data structures to handle this without a hack.
if (getRdxKind(I) == RecurKind::Or && isa<SelectInst>(I) && Index == 1)
return I->getOperand(2);
return I->getOperand(Index);
}
/// Checks if the ParentStackElem.first should be marked as a reduction
/// operation with an extra argument or as extra argument itself.
void markExtraArg(std::pair<Instruction *, unsigned> &ParentStackElem,
Value *ExtraArg) {
if (ExtraArgs.count(ParentStackElem.first)) {
ExtraArgs[ParentStackElem.first] = nullptr;
// We ran into something like:
// ParentStackElem.first = ExtraArgs[ParentStackElem.first] + ExtraArg.
// The whole ParentStackElem.first should be considered as an extra value
// in this case.
// Do not perform analysis of remaining operands of ParentStackElem.first
// instruction, this whole instruction is an extra argument.
ParentStackElem.second = INVALID_OPERAND_INDEX;
} else {
// We ran into something like:
// ParentStackElem.first += ... + ExtraArg + ...
ExtraArgs[ParentStackElem.first] = ExtraArg;
}
}
/// Creates reduction operation with the current opcode.
static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
Value *RHS, const Twine &Name, bool UseSelect) {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
switch (Kind) {
case RecurKind::Or:
if (UseSelect &&
LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
return Builder.CreateSelect(LHS, Builder.getTrue(), RHS, Name);
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
case RecurKind::And:
if (UseSelect &&
LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
return Builder.CreateSelect(LHS, RHS, Builder.getFalse(), Name);
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
case RecurKind::Add:
case RecurKind::Mul:
case RecurKind::Xor:
case RecurKind::FAdd:
case RecurKind::FMul:
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
case RecurKind::FMax:
return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
case RecurKind::FMin:
return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
case RecurKind::SMax:
if (UseSelect) {
Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
return Builder.CreateBinaryIntrinsic(Intrinsic::smax, LHS, RHS);
case RecurKind::SMin:
if (UseSelect) {
Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
return Builder.CreateBinaryIntrinsic(Intrinsic::smin, LHS, RHS);
case RecurKind::UMax:
if (UseSelect) {
Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
return Builder.CreateBinaryIntrinsic(Intrinsic::umax, LHS, RHS);
case RecurKind::UMin:
if (UseSelect) {
Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
return Builder.CreateBinaryIntrinsic(Intrinsic::umin, LHS, RHS);
default:
llvm_unreachable("Unknown reduction operation.");
}
}
/// Creates reduction operation with the current opcode with the IR flags
/// from \p ReductionOps.
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
Value *RHS, const Twine &Name,
const ReductionOpsListType &ReductionOps) {
bool UseSelect = ReductionOps.size() == 2 ||
// Logical or/and.
(ReductionOps.size() == 1 &&
isa<SelectInst>(ReductionOps.front().front()));
assert((!UseSelect || ReductionOps.size() != 2 ||
isa<SelectInst>(ReductionOps[1][0])) &&
"Expected cmp + select pairs for reduction");
Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
if (auto *Sel = dyn_cast<SelectInst>(Op)) {
propagateIRFlags(Sel->getCondition(), ReductionOps[0]);
propagateIRFlags(Op, ReductionOps[1]);
return Op;
}
}
propagateIRFlags(Op, ReductionOps[0]);
return Op;
}
/// Creates reduction operation with the current opcode with the IR flags
/// from \p I.
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
Value *RHS, const Twine &Name, Instruction *I) {
auto *SelI = dyn_cast<SelectInst>(I);
Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, SelI != nullptr);
if (SelI && RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
if (auto *Sel = dyn_cast<SelectInst>(Op))
propagateIRFlags(Sel->getCondition(), SelI->getCondition());
}
propagateIRFlags(Op, I);
return Op;
}
static RecurKind getRdxKind(Instruction *I) {
assert(I && "Expected instruction for reduction matching");
if (match(I, m_Add(m_Value(), m_Value())))
return RecurKind::Add;
if (match(I, m_Mul(m_Value(), m_Value())))
return RecurKind::Mul;
if (match(I, m_And(m_Value(), m_Value())) ||
match(I, m_LogicalAnd(m_Value(), m_Value())))
return RecurKind::And;
if (match(I, m_Or(m_Value(), m_Value())) ||
match(I, m_LogicalOr(m_Value(), m_Value())))
return RecurKind::Or;
if (match(I, m_Xor(m_Value(), m_Value())))
return RecurKind::Xor;
if (match(I, m_FAdd(m_Value(), m_Value())))
return RecurKind::FAdd;
if (match(I, m_FMul(m_Value(), m_Value())))
return RecurKind::FMul;
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
return RecurKind::FMax;
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
return RecurKind::FMin;
// This matches either cmp+select or intrinsics. SLP is expected to handle
// either form.
// TODO: If we are canonicalizing to intrinsics, we can remove several
// special-case paths that deal with selects.
if (match(I, m_SMax(m_Value(), m_Value())))
return RecurKind::SMax;
if (match(I, m_SMin(m_Value(), m_Value())))
return RecurKind::SMin;
if (match(I, m_UMax(m_Value(), m_Value())))
return RecurKind::UMax;
if (match(I, m_UMin(m_Value(), m_Value())))
return RecurKind::UMin;
if (auto *Select = dyn_cast<SelectInst>(I)) {
// Try harder: look for min/max pattern based on instructions producing
// same values such as: select ((cmp Inst1, Inst2), Inst1, Inst2).
// During the intermediate stages of SLP, it's very common to have
// pattern like this (since optimizeGatherSequence is run only once
// at the end):
// %1 = extractelement <2 x i32> %a, i32 0
// %2 = extractelement <2 x i32> %a, i32 1
// %cond = icmp sgt i32 %1, %2
// %3 = extractelement <2 x i32> %a, i32 0
// %4 = extractelement <2 x i32> %a, i32 1
// %select = select i1 %cond, i32 %3, i32 %4
CmpInst::Predicate Pred;
Instruction *L1;
Instruction *L2;
Value *LHS = Select->getTrueValue();
Value *RHS = Select->getFalseValue();
Value *Cond = Select->getCondition();
// TODO: Support inverse predicates.
if (match(Cond, m_Cmp(Pred, m_Specific(LHS), m_Instruction(L2)))) {
if (!isa<ExtractElementInst>(RHS) ||
!L2->isIdenticalTo(cast<Instruction>(RHS)))
return RecurKind::None;
} else if (match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Specific(RHS)))) {
if (!isa<ExtractElementInst>(LHS) ||
!L1->isIdenticalTo(cast<Instruction>(LHS)))
return RecurKind::None;
} else {
if (!isa<ExtractElementInst>(LHS) || !isa<ExtractElementInst>(RHS))
return RecurKind::None;
if (!match(Cond, m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2))) ||
!L1->isIdenticalTo(cast<Instruction>(LHS)) ||
!L2->isIdenticalTo(cast<Instruction>(RHS)))
return RecurKind::None;
}
switch (Pred) {
default:
return RecurKind::None;
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SGE:
return RecurKind::SMax;
case CmpInst::ICMP_SLT:
case CmpInst::ICMP_SLE:
return RecurKind::SMin;
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_UGE:
return RecurKind::UMax;
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_ULE:
return RecurKind::UMin;
}
}
return RecurKind::None;
}
/// Get the index of the first operand.
static unsigned getFirstOperandIndex(Instruction *I) {
return isCmpSelMinMax(I) ? 1 : 0;
}
/// Total number of operands in the reduction operation.
static unsigned getNumberOfOperands(Instruction *I) {
return isCmpSelMinMax(I) ? 3 : 2;
}
/// Checks if the instruction is in basic block \p BB.
/// For a cmp+sel min/max reduction check that both ops are in \p BB.
static bool hasSameParent(Instruction *I, BasicBlock *BB) {
if (isCmpSelMinMax(I) || (isBoolLogicOp(I) && isa<SelectInst>(I))) {
auto *Sel = cast<SelectInst>(I);
auto *Cmp = dyn_cast<Instruction>(Sel->getCondition());
return Sel->getParent() == BB && Cmp && Cmp->getParent() == BB;
}
return I->getParent() == BB;
}
/// Expected number of uses for reduction operations/reduced values.
static bool hasRequiredNumberOfUses(bool IsCmpSelMinMax, Instruction *I) {
if (IsCmpSelMinMax) {
// SelectInst must be used twice while the condition op must have single
// use only.
if (auto *Sel = dyn_cast<SelectInst>(I))
return Sel->hasNUses(2) && Sel->getCondition()->hasOneUse();
return I->hasNUses(2);
}
// Arithmetic reduction operation must be used once only.
return I->hasOneUse();
}
/// Initializes the list of reduction operations.
void initReductionOps(Instruction *I) {
if (isCmpSelMinMax(I))
ReductionOps.assign(2, ReductionOpsType());
else
ReductionOps.assign(1, ReductionOpsType());
}
/// Add all reduction operations for the reduction instruction \p I.
void addReductionOps(Instruction *I) {
if (isCmpSelMinMax(I)) {
ReductionOps[0].emplace_back(cast<SelectInst>(I)->getCondition());
ReductionOps[1].emplace_back(I);
} else {
ReductionOps[0].emplace_back(I);
}
}
static Value *getLHS(RecurKind Kind, Instruction *I) {
if (Kind == RecurKind::None)
return nullptr;
return I->getOperand(getFirstOperandIndex(I));
}
static Value *getRHS(RecurKind Kind, Instruction *I) {
if (Kind == RecurKind::None)
return nullptr;
return I->getOperand(getFirstOperandIndex(I) + 1);
}
public:
HorizontalReduction() = default;
/// Try to find a reduction tree.
bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst) {
assert((!Phi || is_contained(Phi->operands(), Inst)) &&
"Phi needs to use the binary operator");
assert((isa<BinaryOperator>(Inst) || isa<SelectInst>(Inst) ||
isa<IntrinsicInst>(Inst)) &&
"Expected binop, select, or intrinsic for reduction matching");
RdxKind = getRdxKind(Inst);
// We could have a initial reductions that is not an add.
// r *= v1 + v2 + v3 + v4
// In such a case start looking for a tree rooted in the first '+'.
if (Phi) {
if (getLHS(RdxKind, Inst) == Phi) {
Phi = nullptr;
Inst = dyn_cast<Instruction>(getRHS(RdxKind, Inst));
if (!Inst)
return false;
RdxKind = getRdxKind(Inst);
} else if (getRHS(RdxKind, Inst) == Phi) {
Phi = nullptr;
Inst = dyn_cast<Instruction>(getLHS(RdxKind, Inst));
if (!Inst)
return false;
RdxKind = getRdxKind(Inst);
}
}
if (!isVectorizable(RdxKind, Inst))
return false;
// Analyze "regular" integer/FP types for reductions - no target-specific
// types or pointers.
Type *Ty = Inst->getType();
if (!isValidElementType(Ty) || Ty->isPointerTy())
return false;
// Though the ultimate reduction may have multiple uses, its condition must
// have only single use.
if (auto *Sel = dyn_cast<SelectInst>(Inst))
if (!Sel->getCondition()->hasOneUse())
return false;
ReductionRoot = Inst;
// The opcode for leaf values that we perform a reduction on.
// For example: load(x) + load(y) + load(z) + fptoui(w)
// The leaf opcode for 'w' does not match, so we don't include it as a
// potential candidate for the reduction.
unsigned LeafOpcode = 0;
// Post-order traverse the reduction tree starting at Inst. We only handle
// true trees containing binary operators or selects.
SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
Stack.push_back(std::make_pair(Inst, getFirstOperandIndex(Inst)));
initReductionOps(Inst);
while (!Stack.empty()) {
Instruction *TreeN = Stack.back().first;
unsigned EdgeToVisit = Stack.back().second++;
const RecurKind TreeRdxKind = getRdxKind(TreeN);
bool IsReducedValue = TreeRdxKind != RdxKind;
// Postorder visit.
if (IsReducedValue || EdgeToVisit >= getNumberOfOperands(TreeN)) {
if (IsReducedValue)
ReducedVals.push_back(TreeN);
else {
auto ExtraArgsIter = ExtraArgs.find(TreeN);
if (ExtraArgsIter != ExtraArgs.end() && !ExtraArgsIter->second) {
// Check if TreeN is an extra argument of its parent operation.
if (Stack.size() <= 1) {
// TreeN can't be an extra argument as it is a root reduction
// operation.
return false;
}
// Yes, TreeN is an extra argument, do not add it to a list of
// reduction operations.
// Stack[Stack.size() - 2] always points to the parent operation.
markExtraArg(Stack[Stack.size() - 2], TreeN);
ExtraArgs.erase(TreeN);
} else
addReductionOps(TreeN);
}
// Retract.
Stack.pop_back();
continue;
}
// Visit operands.
Value *EdgeVal = getRdxOperand(TreeN, EdgeToVisit);
auto *EdgeInst = dyn_cast<Instruction>(EdgeVal);
if (!EdgeInst) {
// Edge value is not a reduction instruction or a leaf instruction.
// (It may be a constant, function argument, or something else.)
markExtraArg(Stack.back(), EdgeVal);
continue;
}
RecurKind EdgeRdxKind = getRdxKind(EdgeInst);
// Continue analysis if the next operand is a reduction operation or
// (possibly) a leaf value. If the leaf value opcode is not set,
// the first met operation != reduction operation is considered as the
// leaf opcode.
// Only handle trees in the current basic block.
// Each tree node needs to have minimal number of users except for the
// ultimate reduction.
const bool IsRdxInst = EdgeRdxKind == RdxKind;
if (EdgeInst != Phi && EdgeInst != Inst &&
hasSameParent(EdgeInst, Inst->getParent()) &&
hasRequiredNumberOfUses(isCmpSelMinMax(Inst), EdgeInst) &&
(!LeafOpcode || LeafOpcode == EdgeInst->getOpcode() || IsRdxInst)) {
if (IsRdxInst) {
// We need to be able to reassociate the reduction operations.
if (!isVectorizable(EdgeRdxKind, EdgeInst)) {
// I is an extra argument for TreeN (its parent operation).
markExtraArg(Stack.back(), EdgeInst);
continue;
}
} else if (!LeafOpcode) {
LeafOpcode = EdgeInst->getOpcode();
}
Stack.push_back(
std::make_pair(EdgeInst, getFirstOperandIndex(EdgeInst)));
continue;
}
// I is an extra argument for TreeN (its parent operation).
markExtraArg(Stack.back(), EdgeInst);
}
return true;
}
/// Attempt to vectorize the tree found by matchAssociativeReduction.
Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
// If there are a sufficient number of reduction values, reduce
// to a nearby power-of-2. We can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
unsigned NumReducedVals = ReducedVals.size();
if (NumReducedVals < 4)
return nullptr;
// Intersect the fast-math-flags from all reduction operations.
FastMathFlags RdxFMF;
RdxFMF.set();
for (ReductionOpsType &RdxOp : ReductionOps) {
for (Value *RdxVal : RdxOp) {
if (auto *FPMO = dyn_cast<FPMathOperator>(RdxVal))
RdxFMF &= FPMO->getFastMathFlags();
}
}
IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
Builder.setFastMathFlags(RdxFMF);
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
// The same extra argument may be used several times, so log each attempt
// to use it.
for (const std::pair<Instruction *, Value *> &Pair : ExtraArgs) {
assert(Pair.first && "DebugLoc must be set.");
ExternallyUsedValues[Pair.second].push_back(Pair.first);
}
// The compare instruction of a min/max is the insertion point for new
// instructions and may be replaced with a new compare instruction.
auto getCmpForMinMaxReduction = [](Instruction *RdxRootInst) {
assert(isa<SelectInst>(RdxRootInst) &&
"Expected min/max reduction to have select root instruction");
Value *ScalarCond = cast<SelectInst>(RdxRootInst)->getCondition();
assert(isa<Instruction>(ScalarCond) &&
"Expected min/max reduction to have compare condition");
return cast<Instruction>(ScalarCond);
};
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
SmallVector<Value *, 16> IgnoreList;
for (ReductionOpsType &RdxOp : ReductionOps)
IgnoreList.append(RdxOp.begin(), RdxOp.end());
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
if (NumReducedVals > ReduxWidth) {
// In the loop below, we are building a tree based on a window of
// 'ReduxWidth' values.
// If the operands of those values have common traits (compare predicate,
// constant operand, etc), then we want to group those together to
// minimize the cost of the reduction.
// TODO: This should be extended to count common operands for
// compares and binops.
// Step 1: Count the number of times each compare predicate occurs.
SmallDenseMap<unsigned, unsigned> PredCountMap;
for (Value *RdxVal : ReducedVals) {
CmpInst::Predicate Pred;
if (match(RdxVal, m_Cmp(Pred, m_Value(), m_Value())))
++PredCountMap[Pred];
}
// Step 2: Sort the values so the most common predicates come first.
stable_sort(ReducedVals, [&PredCountMap](Value *A, Value *B) {
CmpInst::Predicate PredA, PredB;
if (match(A, m_Cmp(PredA, m_Value(), m_Value())) &&
match(B, m_Cmp(PredB, m_Value(), m_Value()))) {
return PredCountMap[PredA] > PredCountMap[PredB];
}
return false;
});
}
Value *VectorizedTree = nullptr;
unsigned i = 0;
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, IgnoreList);
if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true))
break;
if (V.isLoadCombineReductionCandidate(RdxKind))
break;
V.reorderTopToBottom();
V.reorderBottomToTop(/*IgnoreReorder=*/true);
V.buildExternalUses(ExternallyUsedValues);
// For a poison-safe boolean logic reduction, do not replace select
// instructions with logic ops. All reduced values will be frozen (see
// below) to prevent leaking poison.
if (isa<SelectInst>(ReductionRoot) &&
isBoolLogicOp(cast<Instruction>(ReductionRoot)) &&
NumReducedVals != ReduxWidth)
break;
V.computeMinimumValueSizes();
// Estimate cost.
InstructionCost TreeCost =
V.getTreeCost(makeArrayRef(&ReducedVals[i], ReduxWidth));
InstructionCost ReductionCost =
getReductionCost(TTI, ReducedVals[i], ReduxWidth, RdxFMF);
InstructionCost Cost = TreeCost + ReductionCost;
if (!Cost.isValid()) {
LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
return nullptr;
}
if (Cost >= -SLPCostThreshold) {
V.getORE()->emit([&]() {
return OptimizationRemarkMissed(SV_NAME, "HorSLPNotBeneficial",
cast<Instruction>(VL[0]))
<< "Vectorizing horizontal reduction is possible"
<< "but not beneficial with cost " << ore::NV("Cost", Cost)
<< " and threshold "
<< ore::NV("Threshold", -SLPCostThreshold);
});
break;
}
LLVM_DEBUG(dbgs() << "SLP: Vectorizing horizontal reduction at cost:"
<< Cost << ". (HorRdx)\n");
V.getORE()->emit([&]() {
return OptimizationRemark(SV_NAME, "VectorizedHorizontalReduction",
cast<Instruction>(VL[0]))
<< "Vectorized horizontal reduction with cost "
<< ore::NV("Cost", Cost) << " and with tree size "
<< ore::NV("TreeSize", V.getTreeSize());
});
// Vectorize a tree.
DebugLoc Loc = cast<Instruction>(ReducedVals[i])->getDebugLoc();
Value *VectorizedRoot = V.vectorizeTree(ExternallyUsedValues);
// Emit a reduction. If the root is a select (min/max idiom), the insert
// point is the compare condition of that select.
Instruction *RdxRootInst = cast<Instruction>(ReductionRoot);
if (isCmpSelMinMax(RdxRootInst))
Builder.SetInsertPoint(getCmpForMinMaxReduction(RdxRootInst));
else
Builder.SetInsertPoint(RdxRootInst);
// To prevent poison from leaking across what used to be sequential, safe,
// scalar boolean logic operations, the reduction operand must be frozen.
if (isa<SelectInst>(RdxRootInst) && isBoolLogicOp(RdxRootInst))
VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
if (!VectorizedTree) {
// Initialize the final value in the reduction.
VectorizedTree = ReducedSubTree;
} else {
// Update the final value in the reduction.
Builder.SetCurrentDebugLocation(Loc);
VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
ReducedSubTree, "op.rdx", ReductionOps);
}
i += ReduxWidth;
ReduxWidth = PowerOf2Floor(NumReducedVals - i);
}
if (VectorizedTree) {
// Finish the reduction.
for (; i < NumReducedVals; ++i) {
auto *I = cast<Instruction>(ReducedVals[i]);
Builder.SetCurrentDebugLocation(I->getDebugLoc());
VectorizedTree =
createOp(Builder, RdxKind, VectorizedTree, I, "", ReductionOps);
}
for (auto &Pair : ExternallyUsedValues) {
// Add each externally used value to the final reduction.
for (auto *I : Pair.second) {
Builder.SetCurrentDebugLocation(I->getDebugLoc());
VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
Pair.first, "op.extra", I);
}
}
ReductionRoot->replaceAllUsesWith(VectorizedTree);
// Mark all scalar reduction ops for deletion, they are replaced by the
// vector reductions.
V.eraseInstructions(IgnoreList);
}
return VectorizedTree;
}
unsigned numReductionValues() const { return ReducedVals.size(); }
private:
/// Calculate the cost of a reduction.
InstructionCost getReductionCost(TargetTransformInfo *TTI,
Value *FirstReducedVal, unsigned ReduxWidth,
FastMathFlags FMF) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *ScalarTy = FirstReducedVal->getType();
FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
InstructionCost VectorCost, ScalarCost;
switch (RdxKind) {
case RecurKind::Add:
case RecurKind::Mul:
case RecurKind::Or:
case RecurKind::And:
case RecurKind::Xor:
case RecurKind::FAdd:
case RecurKind::FMul: {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
VectorCost =
TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind);
ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind);
break;
}
case RecurKind::FMax:
case RecurKind::FMin: {
auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
/*IsUnsigned=*/false, CostKind);
CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
ScalarCost = TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy,
SclCondTy, RdxPred, CostKind) +
TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
SclCondTy, RdxPred, CostKind);
break;
}
case RecurKind::SMax:
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin: {
auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
bool IsUnsigned =
RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned,
CostKind);
CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
ScalarCost = TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
SclCondTy, RdxPred, CostKind) +
TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
SclCondTy, RdxPred, CostKind);
break;
}
default:
llvm_unreachable("Expected arithmetic or min/max reduction operation");
}
// Scalar cost is repeated for N-1 elements.
ScalarCost *= (ReduxWidth - 1);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
<< " for reduction that starts with " << *FirstReducedVal
<< " (It is a splitting reduction)\n");
return VectorCost - ScalarCost;
}
/// Emit a horizontal reduction of the vectorized value.
Value *emitReduction(Value *VectorizedValue, IRBuilder<> &Builder,
unsigned ReduxWidth, const TargetTransformInfo *TTI) {
assert(VectorizedValue && "Need to have a vectorized tree node");
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
assert(RdxKind != RecurKind::FMulAdd &&
"A call to the llvm.fmuladd intrinsic is not handled yet");
++NumVectorInstructions;
return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind);
}
};
} // end anonymous namespace
static Optional<unsigned> getAggregateSize(Instruction *InsertInst) {
if (auto *IE = dyn_cast<InsertElementInst>(InsertInst))
return cast<FixedVectorType>(IE->getType())->getNumElements();
unsigned AggregateSize = 1;
auto *IV = cast<InsertValueInst>(InsertInst);
Type *CurrentType = IV->getType();
do {
if (auto *ST = dyn_cast<StructType>(CurrentType)) {
for (auto *Elt : ST->elements())
if (Elt != ST->getElementType(0)) // check homogeneity
return None;
AggregateSize *= ST->getNumElements();
CurrentType = ST->getElementType(0);
} else if (auto *AT = dyn_cast<ArrayType>(CurrentType)) {
AggregateSize *= AT->getNumElements();
CurrentType = AT->getElementType();
} else if (auto *VT = dyn_cast<FixedVectorType>(CurrentType)) {
AggregateSize *= VT->getNumElements();
return AggregateSize;
} else if (CurrentType->isSingleValueType()) {
return AggregateSize;
} else {
return None;
}
} while (true);
}
-static bool findBuildAggregate_rec(Instruction *LastInsertInst,
+static void findBuildAggregate_rec(Instruction *LastInsertInst,
TargetTransformInfo *TTI,
SmallVectorImpl<Value *> &BuildVectorOpds,
SmallVectorImpl<Value *> &InsertElts,
unsigned OperandOffset) {
do {
Value *InsertedOperand = LastInsertInst->getOperand(1);
- Optional<int> OperandIndex = getInsertIndex(LastInsertInst, OperandOffset);
+ Optional<unsigned> OperandIndex =
+ getInsertIndex(LastInsertInst, OperandOffset);
if (!OperandIndex)
- return false;
+ return;
if (isa<InsertElementInst>(InsertedOperand) ||
isa<InsertValueInst>(InsertedOperand)) {
- if (!findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
- BuildVectorOpds, InsertElts, *OperandIndex))
- return false;
+ findBuildAggregate_rec(cast<Instruction>(InsertedOperand), TTI,
+ BuildVectorOpds, InsertElts, *OperandIndex);
+
} else {
BuildVectorOpds[*OperandIndex] = InsertedOperand;
InsertElts[*OperandIndex] = LastInsertInst;
}
LastInsertInst = dyn_cast<Instruction>(LastInsertInst->getOperand(0));
} while (LastInsertInst != nullptr &&
(isa<InsertValueInst>(LastInsertInst) ||
isa<InsertElementInst>(LastInsertInst)) &&
LastInsertInst->hasOneUse());
- return true;
}
/// Recognize construction of vectors like
/// %ra = insertelement <4 x float> poison, float %s0, i32 0
/// %rb = insertelement <4 x float> %ra, float %s1, i32 1
/// %rc = insertelement <4 x float> %rb, float %s2, i32 2
/// %rd = insertelement <4 x float> %rc, float %s3, i32 3
/// starting from the last insertelement or insertvalue instruction.
///
/// Also recognize homogeneous aggregates like {<2 x float>, <2 x float>},
/// {{float, float}, {float, float}}, [2 x {float, float}] and so on.
/// See llvm/test/Transforms/SLPVectorizer/X86/pr42022.ll for examples.
///
/// Assume LastInsertInst is of InsertElementInst or InsertValueInst type.
///
/// \return true if it matches.
static bool findBuildAggregate(Instruction *LastInsertInst,
TargetTransformInfo *TTI,
SmallVectorImpl<Value *> &BuildVectorOpds,
SmallVectorImpl<Value *> &InsertElts) {
assert((isa<InsertElementInst>(LastInsertInst) ||
isa<InsertValueInst>(LastInsertInst)) &&
"Expected insertelement or insertvalue instruction!");
assert((BuildVectorOpds.empty() && InsertElts.empty()) &&
"Expected empty result vectors!");
Optional<unsigned> AggregateSize = getAggregateSize(LastInsertInst);
if (!AggregateSize)
return false;
BuildVectorOpds.resize(*AggregateSize);
InsertElts.resize(*AggregateSize);
- if (findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts,
- 0)) {
- llvm::erase_value(BuildVectorOpds, nullptr);
- llvm::erase_value(InsertElts, nullptr);
- if (BuildVectorOpds.size() >= 2)
- return true;
- }
+ findBuildAggregate_rec(LastInsertInst, TTI, BuildVectorOpds, InsertElts, 0);
+ llvm::erase_value(BuildVectorOpds, nullptr);
+ llvm::erase_value(InsertElts, nullptr);
+ if (BuildVectorOpds.size() >= 2)
+ return true;
return false;
}
/// Try and get a reduction value from a phi node.
///
/// Given a phi node \p P in a block \p ParentBB, consider possible reductions
/// if they come from either \p ParentBB or a containing loop latch.
///
/// \returns A candidate reduction value if possible, or \code nullptr \endcode
/// if not possible.
static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
BasicBlock *ParentBB, LoopInfo *LI) {
// There are situations where the reduction value is not dominated by the
// reduction phi. Vectorizing such cases has been reported to cause
// miscompiles. See PR25787.
auto DominatedReduxValue = [&](Value *R) {
return isa<Instruction>(R) &&
DT->dominates(P->getParent(), cast<Instruction>(R)->getParent());
};
Value *Rdx = nullptr;
// Return the incoming value if it comes from the same BB as the phi node.
if (P->getIncomingBlock(0) == ParentBB) {
Rdx = P->getIncomingValue(0);
} else if (P->getIncomingBlock(1) == ParentBB) {
Rdx = P->getIncomingValue(1);
}
if (Rdx && DominatedReduxValue(Rdx))
return Rdx;
// Otherwise, check whether we have a loop latch to look at.
Loop *BBL = LI->getLoopFor(ParentBB);
if (!BBL)
return nullptr;
BasicBlock *BBLatch = BBL->getLoopLatch();
if (!BBLatch)
return nullptr;
// There is a loop latch, return the incoming value if it comes from
// that. This reduction pattern occasionally turns up.
if (P->getIncomingBlock(0) == BBLatch) {
Rdx = P->getIncomingValue(0);
} else if (P->getIncomingBlock(1) == BBLatch) {
Rdx = P->getIncomingValue(1);
}
if (Rdx && DominatedReduxValue(Rdx))
return Rdx;
return nullptr;
}
static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
if (match(I, m_BinOp(m_Value(V0), m_Value(V1))))
return true;
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(V0), m_Value(V1))))
return true;
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(V0), m_Value(V1))))
return true;
if (match(I, m_Intrinsic<Intrinsic::smax>(m_Value(V0), m_Value(V1))))
return true;
if (match(I, m_Intrinsic<Intrinsic::smin>(m_Value(V0), m_Value(V1))))
return true;
if (match(I, m_Intrinsic<Intrinsic::umax>(m_Value(V0), m_Value(V1))))
return true;
if (match(I, m_Intrinsic<Intrinsic::umin>(m_Value(V0), m_Value(V1))))
return true;
return false;
}
/// Attempt to reduce a horizontal reduction.
/// If it is legal to match a horizontal reduction feeding the phi node \a P
/// with reduction operators \a Root (or one of its operands) in a basic block
/// \a BB, then check if it can be done. If horizontal reduction is not found
/// and root instruction is a binary operation, vectorization of the operands is
/// attempted.
/// \returns true if a horizontal reduction was matched and reduced or operands
/// of one of the binary instruction were vectorized.
/// \returns false if a horizontal reduction was not matched (or not possible)
/// or no vectorization of any binary operation feeding \a Root instruction was
/// performed.
static bool tryToVectorizeHorReductionOrInstOperands(
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
TargetTransformInfo *TTI,
const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) {
if (!ShouldVectorizeHor)
return false;
if (!Root)
return false;
if (Root->getParent() != BB || isa<PHINode>(Root))
return false;
// Start analysis starting from Root instruction. If horizontal reduction is
// found, try to vectorize it. If it is not a horizontal reduction or
// vectorization is not possible or not effective, and currently analyzed
// instruction is a binary operation, try to vectorize the operands, using
// pre-order DFS traversal order. If the operands were not vectorized, repeat
// the same procedure considering each operand as a possible root of the
// horizontal reduction.
// Interrupt the process if the Root instruction itself was vectorized or all
// sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
// Skip the analysis of CmpInsts.Compiler implements postanalysis of the
// CmpInsts so we can skip extra attempts in
// tryToVectorizeHorReductionOrInstOperands and save compile time.
std::queue<std::pair<Instruction *, unsigned>> Stack;
Stack.emplace(Root, 0);
SmallPtrSet<Value *, 8> VisitedInstrs;
SmallVector<WeakTrackingVH> PostponedInsts;
bool Res = false;
auto &&TryToReduce = [TTI, &P, &R](Instruction *Inst, Value *&B0,
Value *&B1) -> Value * {
bool IsBinop = matchRdxBop(Inst, B0, B1);
bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
if (IsBinop || IsSelect) {
HorizontalReduction HorRdx;
if (HorRdx.matchAssociativeReduction(P, Inst))
return HorRdx.tryToReduce(R, TTI);
}
return nullptr;
};
while (!Stack.empty()) {
Instruction *Inst;
unsigned Level;
std::tie(Inst, Level) = Stack.front();
Stack.pop();
// Do not try to analyze instruction that has already been vectorized.
// This may happen when we vectorize instruction operands on a previous
// iteration while stack was populated before that happened.
if (R.isDeleted(Inst))
continue;
Value *B0 = nullptr, *B1 = nullptr;
if (Value *V = TryToReduce(Inst, B0, B1)) {
Res = true;
// Set P to nullptr to avoid re-analysis of phi node in
// matchAssociativeReduction function unless this is the root node.
P = nullptr;
if (auto *I = dyn_cast<Instruction>(V)) {
// Try to find another reduction.
Stack.emplace(I, Level);
continue;
}
} else {
bool IsBinop = B0 && B1;
if (P && IsBinop) {
Inst = dyn_cast<Instruction>(B0);
if (Inst == P)
Inst = dyn_cast<Instruction>(B1);
if (!Inst) {
// Set P to nullptr to avoid re-analysis of phi node in
// matchAssociativeReduction function unless this is the root node.
P = nullptr;
continue;
}
}
// Set P to nullptr to avoid re-analysis of phi node in
// matchAssociativeReduction function unless this is the root node.
P = nullptr;
// Do not try to vectorize CmpInst operands, this is done separately.
// Final attempt for binop args vectorization should happen after the loop
// to try to find reductions.
if (!isa<CmpInst>(Inst))
PostponedInsts.push_back(Inst);
}
// Try to vectorize operands.
// Continue analysis for the instruction from the same basic block only to
// save compile time.
if (++Level < RecursionMaxDepth)
for (auto *Op : Inst->operand_values())
if (VisitedInstrs.insert(Op).second)
if (auto *I = dyn_cast<Instruction>(Op))
// Do not try to vectorize CmpInst operands, this is done
// separately.
if (!isa<PHINode>(I) && !isa<CmpInst>(I) && !R.isDeleted(I) &&
I->getParent() == BB)
Stack.emplace(I, Level);
}
// Try to vectorized binops where reductions were not found.
for (Value *V : PostponedInsts)
if (auto *Inst = dyn_cast<Instruction>(V))
if (!R.isDeleted(Inst))
Res |= Vectorize(Inst, R);
return Res;
}
bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
BasicBlock *BB, BoUpSLP &R,
TargetTransformInfo *TTI) {
auto *I = dyn_cast_or_null<Instruction>(V);
if (!I)
return false;
if (!isa<BinaryOperator>(I))
P = nullptr;
// Try to match and vectorize a horizontal reduction.
auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
return tryToVectorize(I, R);
};
return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
ExtraVectorization);
}
bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
BasicBlock *BB, BoUpSLP &R) {
const DataLayout &DL = BB->getModule()->getDataLayout();
if (!R.canMapToVector(IVI->getType(), DL))
return false;
SmallVector<Value *, 16> BuildVectorOpds;
SmallVector<Value *, 16> BuildVectorInsts;
if (!findBuildAggregate(IVI, TTI, BuildVectorOpds, BuildVectorInsts))
return false;
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register.
return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
BasicBlock *BB, BoUpSLP &R) {
SmallVector<Value *, 16> BuildVectorInsts;
SmallVector<Value *, 16> BuildVectorOpds;
SmallVector<int> Mask;
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
(llvm::all_of(
BuildVectorOpds,
[](Value *V) { return isa<ExtractElementInst, UndefValue>(V); }) &&
isFixedVectorShuffle(BuildVectorOpds, Mask)))
return false;
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n");
return tryToVectorizeList(BuildVectorInsts, R);
}
template <typename T>
static bool
tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
function_ref<unsigned(T *)> Limit,
function_ref<bool(T *, T *)> Comparator,
function_ref<bool(T *, T *)> AreCompatible,
function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper,
bool LimitForRegisterSize) {
bool Changed = false;
// Sort by type, parent, operands.
stable_sort(Incoming, Comparator);
// Try to vectorize elements base on their type.
SmallVector<T *> Candidates;
for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;) {
// Look for the next elements with the same type, parent and operand
// kinds.
auto *SameTypeIt = IncIt;
while (SameTypeIt != E && AreCompatible(*SameTypeIt, *IncIt))
++SameTypeIt;
// Try to vectorize them.
unsigned NumElts = (SameTypeIt - IncIt);
LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at nodes ("
<< NumElts << ")\n");
// The vectorization is a 3-state attempt:
// 1. Try to vectorize instructions with the same/alternate opcodes with the
// size of maximal register at first.
// 2. Try to vectorize remaining instructions with the same type, if
// possible. This may result in the better vectorization results rather than
// if we try just to vectorize instructions with the same/alternate opcodes.
// 3. Final attempt to try to vectorize all instructions with the
// same/alternate ops only, this may result in some extra final
// vectorization.
if (NumElts > 1 &&
TryToVectorizeHelper(makeArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
// Success start over because instructions might have been changed.
Changed = true;
} else if (NumElts < Limit(*IncIt) &&
(Candidates.empty() ||
Candidates.front()->getType() == (*IncIt)->getType())) {
Candidates.append(IncIt, std::next(IncIt, NumElts));
}
// Final attempt to vectorize instructions with the same types.
if (Candidates.size() > 1 &&
(SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType())) {
if (TryToVectorizeHelper(Candidates, /*LimitForRegisterSize=*/false)) {
// Success start over because instructions might have been changed.
Changed = true;
} else if (LimitForRegisterSize) {
// Try to vectorize using small vectors.
for (auto *It = Candidates.begin(), *End = Candidates.end();
It != End;) {
auto *SameTypeIt = It;
while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
++SameTypeIt;
unsigned NumElts = (SameTypeIt - It);
if (NumElts > 1 && TryToVectorizeHelper(makeArrayRef(It, NumElts),
/*LimitForRegisterSize=*/false))
Changed = true;
It = SameTypeIt;
}
}
Candidates.clear();
}
// Start over at the next instruction of a different type (or the end).
IncIt = SameTypeIt;
}
return Changed;
}
/// Compare two cmp instructions. If IsCompatibility is true, function returns
/// true if 2 cmps have same/swapped predicates and mos compatible corresponding
/// operands. If IsCompatibility is false, function implements strict weak
/// ordering relation between two cmp instructions, returning true if the first
/// instruction is "less" than the second, i.e. its predicate is less than the
/// predicate of the second or the operands IDs are less than the operands IDs
/// of the second cmp instruction.
template <bool IsCompatibility>
static bool compareCmp(Value *V, Value *V2,
function_ref<bool(Instruction *)> IsDeleted) {
auto *CI1 = cast<CmpInst>(V);
auto *CI2 = cast<CmpInst>(V2);
if (IsDeleted(CI2) || !isValidElementType(CI2->getType()))
return false;
if (CI1->getOperand(0)->getType()->getTypeID() <
CI2->getOperand(0)->getType()->getTypeID())
return !IsCompatibility;
if (CI1->getOperand(0)->getType()->getTypeID() >
CI2->getOperand(0)->getType()->getTypeID())
return false;
CmpInst::Predicate Pred1 = CI1->getPredicate();
CmpInst::Predicate Pred2 = CI2->getPredicate();
CmpInst::Predicate SwapPred1 = CmpInst::getSwappedPredicate(Pred1);
CmpInst::Predicate SwapPred2 = CmpInst::getSwappedPredicate(Pred2);
CmpInst::Predicate BasePred1 = std::min(Pred1, SwapPred1);
CmpInst::Predicate BasePred2 = std::min(Pred2, SwapPred2);
if (BasePred1 < BasePred2)
return !IsCompatibility;
if (BasePred1 > BasePred2)
return false;
// Compare operands.
bool LEPreds = Pred1 <= Pred2;
bool GEPreds = Pred1 >= Pred2;
for (int I = 0, E = CI1->getNumOperands(); I < E; ++I) {
auto *Op1 = CI1->getOperand(LEPreds ? I : E - I - 1);
auto *Op2 = CI2->getOperand(GEPreds ? I : E - I - 1);
if (Op1->getValueID() < Op2->getValueID())
return !IsCompatibility;
if (Op1->getValueID() > Op2->getValueID())
return false;
if (auto *I1 = dyn_cast<Instruction>(Op1))
if (auto *I2 = dyn_cast<Instruction>(Op2)) {
if (I1->getParent() != I2->getParent())
return false;
InstructionsState S = getSameOpcode({I1, I2});
if (S.getOpcode())
continue;
return false;
}
}
return IsCompatibility;
}
bool SLPVectorizerPass::vectorizeSimpleInstructions(
SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
bool AtTerminator) {
bool OpsChanged = false;
SmallVector<Instruction *, 4> PostponedCmps;
for (auto *I : reverse(Instructions)) {
if (R.isDeleted(I))
continue;
if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
else if (isa<CmpInst>(I))
PostponedCmps.push_back(I);
}
if (AtTerminator) {
// Try to find reductions first.
for (Instruction *I : PostponedCmps) {
if (R.isDeleted(I))
continue;
for (Value *Op : I->operands())
OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
}
// Try to vectorize operands as vector bundles.
for (Instruction *I : PostponedCmps) {
if (R.isDeleted(I))
continue;
OpsChanged |= tryToVectorize(I, R);
}
// Try to vectorize list of compares.
// Sort by type, compare predicate, etc.
auto &&CompareSorter = [&R](Value *V, Value *V2) {
return compareCmp<false>(V, V2,
[&R](Instruction *I) { return R.isDeleted(I); });
};
auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) {
if (V1 == V2)
return true;
return compareCmp<true>(V1, V2,
[&R](Instruction *I) { return R.isDeleted(I); });
};
auto Limit = [&R](Value *V) {
unsigned EltSize = R.getVectorElementSize(V);
return std::max(2U, R.getMaxVecRegSize() / EltSize);
};
SmallVector<Value *> Vals(PostponedCmps.begin(), PostponedCmps.end());
OpsChanged |= tryToVectorizeSequence<Value>(
Vals, Limit, CompareSorter, AreCompatibleCompares,
[this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
// Exclude possible reductions from other blocks.
bool ArePossiblyReducedInOtherBlock =
any_of(Candidates, [](Value *V) {
return any_of(V->users(), [V](User *U) {
return isa<SelectInst>(U) &&
cast<SelectInst>(U)->getParent() !=
cast<Instruction>(V)->getParent();
});
});
if (ArePossiblyReducedInOtherBlock)
return false;
return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
},
/*LimitForRegisterSize=*/true);
Instructions.clear();
} else {
// Insert in reverse order since the PostponedCmps vector was filled in
// reverse order.
Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend());
}
return OpsChanged;
}
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
SmallPtrSet<Value *, 16> VisitedInstrs;
// Maps phi nodes to the non-phi nodes found in the use tree for each phi
// node. Allows better to identify the chains that can be vectorized in the
// better way.
DenseMap<Value *, SmallVector<Value *, 4>> PHIToOpcodes;
auto PHICompare = [this, &PHIToOpcodes](Value *V1, Value *V2) {
assert(isValidElementType(V1->getType()) &&
isValidElementType(V2->getType()) &&
"Expected vectorizable types only.");
// It is fine to compare type IDs here, since we expect only vectorizable
// types, like ints, floats and pointers, we don't care about other type.
if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
return true;
if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
return false;
ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
if (Opcodes1.size() < Opcodes2.size())
return true;
if (Opcodes1.size() > Opcodes2.size())
return false;
Optional<bool> ConstOrder;
for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
// Undefs are compatible with any other value.
if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I])) {
if (!ConstOrder)
ConstOrder =
!isa<UndefValue>(Opcodes1[I]) && isa<UndefValue>(Opcodes2[I]);
continue;
}
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
if (!NodeI1)
return NodeI2 != nullptr;
if (!NodeI2)
return false;
assert((NodeI1 == NodeI2) ==
(NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
InstructionsState S = getSameOpcode({I1, I2});
if (S.getOpcode())
continue;
return I1->getOpcode() < I2->getOpcode();
}
if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I])) {
if (!ConstOrder)
ConstOrder = Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID();
continue;
}
if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
return true;
if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
return false;
}
return ConstOrder && *ConstOrder;
};
auto AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
if (V1 == V2)
return true;
if (V1->getType() != V2->getType())
return false;
ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
if (Opcodes1.size() != Opcodes2.size())
return false;
for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
// Undefs are compatible with any other value.
if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
continue;
if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
if (I1->getParent() != I2->getParent())
return false;
InstructionsState S = getSameOpcode({I1, I2});
if (S.getOpcode())
continue;
return false;
}
if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
continue;
if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
return false;
}
return true;
};
auto Limit = [&R](Value *V) {
unsigned EltSize = R.getVectorElementSize(V);
return std::max(2U, R.getMaxVecRegSize() / EltSize);
};
bool HaveVectorizedPhiNodes = false;
do {
// Collect the incoming values from the PHIs.
Incoming.clear();
for (Instruction &I : *BB) {
PHINode *P = dyn_cast<PHINode>(&I);
if (!P)
break;
// No need to analyze deleted, vectorized and non-vectorizable
// instructions.
if (!VisitedInstrs.count(P) && !R.isDeleted(P) &&
isValidElementType(P->getType()))
Incoming.push_back(P);
}
// Find the corresponding non-phi nodes for better matching when trying to
// build the tree.
for (Value *V : Incoming) {
SmallVectorImpl<Value *> &Opcodes =
PHIToOpcodes.try_emplace(V).first->getSecond();
if (!Opcodes.empty())
continue;
SmallVector<Value *, 4> Nodes(1, V);
SmallPtrSet<Value *, 4> Visited;
while (!Nodes.empty()) {
auto *PHI = cast<PHINode>(Nodes.pop_back_val());
if (!Visited.insert(PHI).second)
continue;
for (Value *V : PHI->incoming_values()) {
if (auto *PHI1 = dyn_cast<PHINode>((V))) {
Nodes.push_back(PHI1);
continue;
}
Opcodes.emplace_back(V);
}
}
}
HaveVectorizedPhiNodes = tryToVectorizeSequence<Value>(
Incoming, Limit, PHICompare, AreCompatiblePHIs,
[this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
},
/*LimitForRegisterSize=*/true);
Changed |= HaveVectorizedPhiNodes;
VisitedInstrs.insert(Incoming.begin(), Incoming.end());
} while (HaveVectorizedPhiNodes);
VisitedInstrs.clear();
SmallVector<Instruction *, 8> PostProcessInstructions;
SmallDenseSet<Instruction *, 4> KeyNodes;
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// Skip instructions with scalable type. The num of elements is unknown at
// compile-time for scalable type.
if (isa<ScalableVectorType>(it->getType()))
continue;
// Skip instructions marked for the deletion.
if (R.isDeleted(&*it))
continue;
// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second) {
if (it->use_empty() && KeyNodes.contains(&*it) &&
vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
it->isTerminator())) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
Changed = true;
it = BB->begin();
e = BB->end();
}
continue;
}
if (isa<DbgInfoIntrinsic>(it))
continue;
// Try to vectorize reductions that use PHINodes.
if (PHINode *P = dyn_cast<PHINode>(it)) {
// Check that the PHI is a reduction PHI.
if (P->getNumIncomingValues() == 2) {
// Try to match and vectorize a horizontal reduction.
if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
TTI)) {
Changed = true;
it = BB->begin();
e = BB->end();
continue;
}
}
// Try to vectorize the incoming values of the PHI, to catch reductions
// that feed into PHIs.
for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) {
// Skip if the incoming block is the current BB for now. Also, bypass
// unreachable IR for efficiency and to avoid crashing.
// TODO: Collect the skipped incoming values and try to vectorize them
// after processing BB.
if (BB == P->getIncomingBlock(I) ||
!DT->isReachableFromEntry(P->getIncomingBlock(I)))
continue;
Changed |= vectorizeRootInstruction(nullptr, P->getIncomingValue(I),
P->getIncomingBlock(I), R, TTI);
}
continue;
}
// Ran into an instruction without users, like terminator, or function call
// with ignored return value, store. Ignore unused instructions (basing on
// instruction type, except for CallInst and InvokeInst).
if (it->use_empty() && (it->getType()->isVoidTy() || isa<CallInst>(it) ||
isa<InvokeInst>(it))) {
KeyNodes.insert(&*it);
bool OpsChanged = false;
if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(it)) {
for (auto *V : it->operand_values()) {
// Try to match and vectorize a horizontal reduction.
OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI);
}
}
// Start vectorization of post-process list of instructions from the
// top-tree instructions to try to vectorize as many instructions as
// possible.
OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
it->isTerminator());
if (OpsChanged) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
Changed = true;
it = BB->begin();
e = BB->end();
continue;
}
}
if (isa<InsertElementInst>(it) || isa<CmpInst>(it) ||
isa<InsertValueInst>(it))
PostProcessInstructions.push_back(&*it);
}
return Changed;
}
bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {
auto Changed = false;
for (auto &Entry : GEPs) {
// If the getelementptr list has fewer than two elements, there's nothing
// to do.
if (Entry.second.size() < 2)
continue;
LLVM_DEBUG(dbgs() << "SLP: Analyzing a getelementptr list of length "
<< Entry.second.size() << ".\n");
// Process the GEP list in chunks suitable for the target's supported
// vector size. If a vector register can't hold 1 element, we are done. We
// are trying to vectorize the index computations, so the maximum number of
// elements is based on the size of the index expression, rather than the
// size of the GEP itself (the target's pointer size).
unsigned MaxVecRegSize = R.getMaxVecRegSize();
unsigned EltSize = R.getVectorElementSize(*Entry.second[0]->idx_begin());
if (MaxVecRegSize < EltSize)
continue;
unsigned MaxElts = MaxVecRegSize / EltSize;
for (unsigned BI = 0, BE = Entry.second.size(); BI < BE; BI += MaxElts) {
auto Len = std::min<unsigned>(BE - BI, MaxElts);
ArrayRef<GetElementPtrInst *> GEPList(&Entry.second[BI], Len);
// Initialize a set a candidate getelementptrs. Note that we use a
// SetVector here to preserve program order. If the index computations
// are vectorizable and begin with loads, we want to minimize the chance
// of having to reorder them later.
SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());
// Some of the candidates may have already been vectorized after we
// initially collected them. If so, they are marked as deleted, so remove
// them from the set of candidates.
Candidates.remove_if(
[&R](Value *I) { return R.isDeleted(cast<Instruction>(I)); });
// Remove from the set of candidates all pairs of getelementptrs with
// constant differences. Such getelementptrs are likely not good
// candidates for vectorization in a bottom-up phase since one can be
// computed from the other. We also ensure all candidate getelementptr
// indices are unique.
for (int I = 0, E = GEPList.size(); I < E && Candidates.size() > 1; ++I) {
auto *GEPI = GEPList[I];
if (!Candidates.count(GEPI))
continue;
auto *SCEVI = SE->getSCEV(GEPList[I]);
for (int J = I + 1; J < E && Candidates.size() > 1; ++J) {
auto *GEPJ = GEPList[J];
auto *SCEVJ = SE->getSCEV(GEPList[J]);
if (isa<SCEVConstant>(SE->getMinusSCEV(SCEVI, SCEVJ))) {
Candidates.remove(GEPI);
Candidates.remove(GEPJ);
} else if (GEPI->idx_begin()->get() == GEPJ->idx_begin()->get()) {
Candidates.remove(GEPJ);
}
}
}
// We break out of the above computation as soon as we know there are
// fewer than two candidates remaining.
if (Candidates.size() < 2)
continue;
// Add the single, non-constant index of each candidate to the bundle. We
// ensured the indices met these constraints when we originally collected
// the getelementptrs.
SmallVector<Value *, 16> Bundle(Candidates.size());
auto BundleIndex = 0u;
for (auto *V : Candidates) {
auto *GEP = cast<GetElementPtrInst>(V);
auto *GEPIdx = GEP->idx_begin()->get();
assert(GEP->getNumIndices() == 1 || !isa<Constant>(GEPIdx));
Bundle[BundleIndex++] = GEPIdx;
}
// Try and vectorize the indices. We are currently only interested in
// gather-like cases of the form:
//
// ... = g[a[0] - b[0]] + g[a[1] - b[1]] + ...
//
// where the loads of "a", the loads of "b", and the subtractions can be
// performed in parallel. It's likely that detecting this pattern in a
// bottom-up phase will be simpler and less costly than building a
// full-blown top-down phase beginning at the consecutive loads.
Changed |= tryToVectorizeList(Bundle, R);
}
}
return Changed;
}
bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
bool Changed = false;
// Sort by type, base pointers and values operand. Value operands must be
// compatible (have the same opcode, same parent), otherwise it is
// definitely not profitable to try to vectorize them.
auto &&StoreSorter = [this](StoreInst *V, StoreInst *V2) {
if (V->getPointerOperandType()->getTypeID() <
V2->getPointerOperandType()->getTypeID())
return true;
if (V->getPointerOperandType()->getTypeID() >
V2->getPointerOperandType()->getTypeID())
return false;
// UndefValues are compatible with all other values.
if (isa<UndefValue>(V->getValueOperand()) ||
isa<UndefValue>(V2->getValueOperand()))
return false;
if (auto *I1 = dyn_cast<Instruction>(V->getValueOperand()))
if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
DomTreeNodeBase<llvm::BasicBlock> *NodeI1 =
DT->getNode(I1->getParent());
DomTreeNodeBase<llvm::BasicBlock> *NodeI2 =
DT->getNode(I2->getParent());
assert(NodeI1 && "Should only process reachable instructions");
assert(NodeI1 && "Should only process reachable instructions");
assert((NodeI1 == NodeI2) ==
(NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
if (NodeI1 != NodeI2)
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
InstructionsState S = getSameOpcode({I1, I2});
if (S.getOpcode())
return false;
return I1->getOpcode() < I2->getOpcode();
}
if (isa<Constant>(V->getValueOperand()) &&
isa<Constant>(V2->getValueOperand()))
return false;
return V->getValueOperand()->getValueID() <
V2->getValueOperand()->getValueID();
};
auto &&AreCompatibleStores = [](StoreInst *V1, StoreInst *V2) {
if (V1 == V2)
return true;
if (V1->getPointerOperandType() != V2->getPointerOperandType())
return false;
// Undefs are compatible with any other value.
if (isa<UndefValue>(V1->getValueOperand()) ||
isa<UndefValue>(V2->getValueOperand()))
return true;
if (auto *I1 = dyn_cast<Instruction>(V1->getValueOperand()))
if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
if (I1->getParent() != I2->getParent())
return false;
InstructionsState S = getSameOpcode({I1, I2});
return S.getOpcode() > 0;
}
if (isa<Constant>(V1->getValueOperand()) &&
isa<Constant>(V2->getValueOperand()))
return true;
return V1->getValueOperand()->getValueID() ==
V2->getValueOperand()->getValueID();
};
auto Limit = [&R, this](StoreInst *SI) {
unsigned EltSize = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
return R.getMinVF(EltSize);
};
// Attempt to sort and vectorize each of the store-groups.
for (auto &Pair : Stores) {
if (Pair.second.size() < 2)
continue;
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
<< Pair.second.size() << ".\n");
if (!isValidElementType(Pair.second.front()->getValueOperand()->getType()))
continue;
Changed |= tryToVectorizeSequence<StoreInst>(
Pair.second, Limit, StoreSorter, AreCompatibleStores,
[this, &R](ArrayRef<StoreInst *> Candidates, bool) {
return vectorizeStores(Candidates, R);
},
/*LimitForRegisterSize=*/false);
}
return Changed;
}
char SLPVectorizer::ID = 0;
static const char lv_name[] = "SLP Vectorizer";
INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_DEPENDENCY(InjectTLIMappingsLegacy)
INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); }
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp
index cbd0e4261238..fcbfef96d860 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -1,457 +1,466 @@
//===- Writer.cpp ---------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "Writer.h"
#include "Object.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include <cstddef>
#include <cstdint>
namespace llvm {
namespace objcopy {
namespace coff {
using namespace object;
using namespace COFF;
Error COFFWriter::finalizeRelocTargets() {
for (Section &Sec : Obj.getMutableSections()) {
for (Relocation &R : Sec.Relocs) {
const Symbol *Sym = Obj.findSymbol(R.Target);
if (Sym == nullptr)
return createStringError(object_error::invalid_symbol_index,
"relocation target '%s' (%zu) not found",
R.TargetName.str().c_str(), R.Target);
R.Reloc.SymbolTableIndex = Sym->RawIndex;
}
}
return Error::success();
}
Error COFFWriter::finalizeSymbolContents() {
for (Symbol &Sym : Obj.getMutableSymbols()) {
if (Sym.TargetSectionId <= 0) {
// Undefined, or a special kind of symbol. These negative values
// are stored in the SectionNumber field which is unsigned.
Sym.Sym.SectionNumber = static_cast<uint32_t>(Sym.TargetSectionId);
} else {
const Section *Sec = Obj.findSection(Sym.TargetSectionId);
if (Sec == nullptr)
return createStringError(object_error::invalid_symbol_index,
"symbol '%s' points to a removed section",
Sym.Name.str().c_str());
Sym.Sym.SectionNumber = Sec->Index;
if (Sym.Sym.NumberOfAuxSymbols == 1 &&
Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) {
coff_aux_section_definition *SD =
reinterpret_cast<coff_aux_section_definition *>(
Sym.AuxData[0].Opaque);
uint32_t SDSectionNumber;
if (Sym.AssociativeComdatTargetSectionId == 0) {
// Not a comdat associative section; just set the Number field to
// the number of the section itself.
SDSectionNumber = Sec->Index;
} else {
Sec = Obj.findSection(Sym.AssociativeComdatTargetSectionId);
if (Sec == nullptr)
return createStringError(
object_error::invalid_symbol_index,
"symbol '%s' is associative to a removed section",
Sym.Name.str().c_str());
SDSectionNumber = Sec->Index;
}
// Update the section definition with the new section number.
SD->NumberLowPart = static_cast<uint16_t>(SDSectionNumber);
SD->NumberHighPart = static_cast<uint16_t>(SDSectionNumber >> 16);
}
}
// Check that we actually have got AuxData to match the weak symbol target
// we want to set. Only >= 1 would be required, but only == 1 makes sense.
if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) {
coff_aux_weak_external *WE =
reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData[0].Opaque);
const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId);
if (Target == nullptr)
return createStringError(object_error::invalid_symbol_index,
"symbol '%s' is missing its weak target",
Sym.Name.str().c_str());
WE->TagIndex = Target->RawIndex;
}
}
return Error::success();
}
void COFFWriter::layoutSections() {
for (auto &S : Obj.getMutableSections()) {
if (S.Header.SizeOfRawData > 0)
S.Header.PointerToRawData = FileSize;
FileSize += S.Header.SizeOfRawData; // For executables, this is already
// aligned to FileAlignment.
if (S.Relocs.size() >= 0xffff) {
S.Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
S.Header.NumberOfRelocations = 0xffff;
S.Header.PointerToRelocations = FileSize;
FileSize += sizeof(coff_relocation);
} else {
S.Header.NumberOfRelocations = S.Relocs.size();
S.Header.PointerToRelocations = S.Relocs.size() ? FileSize : 0;
}
FileSize += S.Relocs.size() * sizeof(coff_relocation);
FileSize = alignTo(FileSize, FileAlignment);
if (S.Header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA)
SizeOfInitializedData += S.Header.SizeOfRawData;
}
}
-size_t COFFWriter::finalizeStringTable() {
+Expected<size_t> COFFWriter::finalizeStringTable() {
for (const auto &S : Obj.getSections())
if (S.Name.size() > COFF::NameSize)
StrTabBuilder.add(S.Name);
for (const auto &S : Obj.getSymbols())
if (S.Name.size() > COFF::NameSize)
StrTabBuilder.add(S.Name);
StrTabBuilder.finalize();
for (auto &S : Obj.getMutableSections()) {
memset(S.Header.Name, 0, sizeof(S.Header.Name));
- if (S.Name.size() > COFF::NameSize) {
- snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
- (int)StrTabBuilder.getOffset(S.Name));
- } else {
+ if (S.Name.size() <= COFF::NameSize) {
+ // Short names can go in the field directly.
memcpy(S.Header.Name, S.Name.data(), S.Name.size());
+ } else {
+ // Offset of the section name in the string table.
+ size_t Offset = StrTabBuilder.getOffset(S.Name);
+ if (!COFF::encodeSectionName(S.Header.Name, Offset))
+ return createStringError(object_error::invalid_section_index,
+ "COFF string table is greater than 64GB, "
+ "unable to encode section name offset");
}
}
for (auto &S : Obj.getMutableSymbols()) {
if (S.Name.size() > COFF::NameSize) {
S.Sym.Name.Offset.Zeroes = 0;
S.Sym.Name.Offset.Offset = StrTabBuilder.getOffset(S.Name);
} else {
strncpy(S.Sym.Name.ShortName, S.Name.data(), COFF::NameSize);
}
}
return StrTabBuilder.getSize();
}
template <class SymbolTy>
std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
size_t RawSymIndex = 0;
for (auto &S : Obj.getMutableSymbols()) {
// Symbols normally have NumberOfAuxSymbols set correctly all the time.
// For file symbols, we need to know the output file's symbol size to be
// able to calculate the number of slots it occupies.
if (!S.AuxFile.empty())
S.Sym.NumberOfAuxSymbols =
alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy);
S.RawIndex = RawSymIndex;
RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols;
}
return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy));
}
Error COFFWriter::finalize(bool IsBigObj) {
size_t SymTabSize, SymbolSize;
std::tie(SymTabSize, SymbolSize) = IsBigObj
? finalizeSymbolTable<coff_symbol32>()
: finalizeSymbolTable<coff_symbol16>();
if (Error E = finalizeRelocTargets())
return E;
if (Error E = finalizeSymbolContents())
return E;
size_t SizeOfHeaders = 0;
FileAlignment = 1;
size_t PeHeaderSize = 0;
if (Obj.IsPE) {
Obj.DosHeader.AddressOfNewExeHeader =
sizeof(Obj.DosHeader) + Obj.DosStub.size();
SizeOfHeaders += Obj.DosHeader.AddressOfNewExeHeader + sizeof(PEMagic);
FileAlignment = Obj.PeHeader.FileAlignment;
Obj.PeHeader.NumberOfRvaAndSize = Obj.DataDirectories.size();
PeHeaderSize = Obj.Is64 ? sizeof(pe32plus_header) : sizeof(pe32_header);
SizeOfHeaders +=
PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
}
Obj.CoffFileHeader.NumberOfSections = Obj.getSections().size();
SizeOfHeaders +=
IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header);
SizeOfHeaders += sizeof(coff_section) * Obj.getSections().size();
SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment);
Obj.CoffFileHeader.SizeOfOptionalHeader =
PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
FileSize = SizeOfHeaders;
SizeOfInitializedData = 0;
layoutSections();
if (Obj.IsPE) {
Obj.PeHeader.SizeOfHeaders = SizeOfHeaders;
Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData;
if (!Obj.getSections().empty()) {
const Section &S = Obj.getSections().back();
Obj.PeHeader.SizeOfImage =
alignTo(S.Header.VirtualAddress + S.Header.VirtualSize,
Obj.PeHeader.SectionAlignment);
}
// If the PE header had a checksum, clear it, since it isn't valid
// any longer. (We don't calculate a new one.)
Obj.PeHeader.CheckSum = 0;
}
- size_t StrTabSize = finalizeStringTable();
+ Expected<size_t> StrTabSizeOrErr = finalizeStringTable();
+ if (!StrTabSizeOrErr)
+ return StrTabSizeOrErr.takeError();
+
+ size_t StrTabSize = *StrTabSizeOrErr;
size_t PointerToSymbolTable = FileSize;
// StrTabSize <= 4 is the size of an empty string table, only consisting
// of the length field.
if (SymTabSize == 0 && StrTabSize <= 4 && Obj.IsPE) {
// For executables, don't point to the symbol table and skip writing
// the length field, if both the symbol and string tables are empty.
PointerToSymbolTable = 0;
StrTabSize = 0;
}
size_t NumRawSymbols = SymTabSize / SymbolSize;
Obj.CoffFileHeader.PointerToSymbolTable = PointerToSymbolTable;
Obj.CoffFileHeader.NumberOfSymbols = NumRawSymbols;
FileSize += SymTabSize + StrTabSize;
FileSize = alignTo(FileSize, FileAlignment);
return Error::success();
}
void COFFWriter::writeHeaders(bool IsBigObj) {
uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart());
if (Obj.IsPE) {
memcpy(Ptr, &Obj.DosHeader, sizeof(Obj.DosHeader));
Ptr += sizeof(Obj.DosHeader);
memcpy(Ptr, Obj.DosStub.data(), Obj.DosStub.size());
Ptr += Obj.DosStub.size();
memcpy(Ptr, PEMagic, sizeof(PEMagic));
Ptr += sizeof(PEMagic);
}
if (!IsBigObj) {
memcpy(Ptr, &Obj.CoffFileHeader, sizeof(Obj.CoffFileHeader));
Ptr += sizeof(Obj.CoffFileHeader);
} else {
// Generate a coff_bigobj_file_header, filling it in with the values
// from Obj.CoffFileHeader. All extra fields that don't exist in
// coff_file_header can be set to hardcoded values.
coff_bigobj_file_header BigObjHeader;
BigObjHeader.Sig1 = IMAGE_FILE_MACHINE_UNKNOWN;
BigObjHeader.Sig2 = 0xffff;
BigObjHeader.Version = BigObjHeader::MinBigObjectVersion;
BigObjHeader.Machine = Obj.CoffFileHeader.Machine;
BigObjHeader.TimeDateStamp = Obj.CoffFileHeader.TimeDateStamp;
memcpy(BigObjHeader.UUID, BigObjMagic, sizeof(BigObjMagic));
BigObjHeader.unused1 = 0;
BigObjHeader.unused2 = 0;
BigObjHeader.unused3 = 0;
BigObjHeader.unused4 = 0;
// The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus
// get the original one instead.
BigObjHeader.NumberOfSections = Obj.getSections().size();
BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable;
BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols;
memcpy(Ptr, &BigObjHeader, sizeof(BigObjHeader));
Ptr += sizeof(BigObjHeader);
}
if (Obj.IsPE) {
if (Obj.Is64) {
memcpy(Ptr, &Obj.PeHeader, sizeof(Obj.PeHeader));
Ptr += sizeof(Obj.PeHeader);
} else {
pe32_header PeHeader;
copyPeHeader(PeHeader, Obj.PeHeader);
// The pe32plus_header (stored in Object) lacks the BaseOfData field.
PeHeader.BaseOfData = Obj.BaseOfData;
memcpy(Ptr, &PeHeader, sizeof(PeHeader));
Ptr += sizeof(PeHeader);
}
for (const auto &DD : Obj.DataDirectories) {
memcpy(Ptr, &DD, sizeof(DD));
Ptr += sizeof(DD);
}
}
for (const auto &S : Obj.getSections()) {
memcpy(Ptr, &S.Header, sizeof(S.Header));
Ptr += sizeof(S.Header);
}
}
void COFFWriter::writeSections() {
for (const auto &S : Obj.getSections()) {
uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) +
S.Header.PointerToRawData;
ArrayRef<uint8_t> Contents = S.getContents();
std::copy(Contents.begin(), Contents.end(), Ptr);
// For executable sections, pad the remainder of the raw data size with
// 0xcc, which is int3 on x86.
if ((S.Header.Characteristics & IMAGE_SCN_CNT_CODE) &&
S.Header.SizeOfRawData > Contents.size())
memset(Ptr + Contents.size(), 0xcc,
S.Header.SizeOfRawData - Contents.size());
Ptr += S.Header.SizeOfRawData;
if (S.Relocs.size() >= 0xffff) {
object::coff_relocation R;
R.VirtualAddress = S.Relocs.size() + 1;
R.SymbolTableIndex = 0;
R.Type = 0;
memcpy(Ptr, &R, sizeof(R));
Ptr += sizeof(R);
}
for (const auto &R : S.Relocs) {
memcpy(Ptr, &R.Reloc, sizeof(R.Reloc));
Ptr += sizeof(R.Reloc);
}
}
}
template <class SymbolTy> void COFFWriter::writeSymbolStringTables() {
uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) +
Obj.CoffFileHeader.PointerToSymbolTable;
for (const auto &S : Obj.getSymbols()) {
// Convert symbols back to the right size, from coff_symbol32.
copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
S.Sym);
Ptr += sizeof(SymbolTy);
if (!S.AuxFile.empty()) {
// For file symbols, just write the string into the aux symbol slots,
// assuming that the unwritten parts are initialized to zero in the memory
// mapped file.
std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr);
Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy);
} else {
// For other auxillary symbols, write their opaque payload into one symbol
// table slot each. For big object files, the symbols are larger than the
// opaque auxillary symbol struct and we leave padding at the end of each
// entry.
for (const AuxSymbol &AuxSym : S.AuxData) {
ArrayRef<uint8_t> Ref = AuxSym.getRef();
std::copy(Ref.begin(), Ref.end(), Ptr);
Ptr += sizeof(SymbolTy);
}
}
}
if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
// Always write a string table in object files, even an empty one.
StrTabBuilder.write(Ptr);
Ptr += StrTabBuilder.getSize();
}
}
Error COFFWriter::write(bool IsBigObj) {
if (Error E = finalize(IsBigObj))
return E;
Buf = WritableMemoryBuffer::getNewMemBuffer(FileSize);
if (!Buf)
return createStringError(llvm::errc::not_enough_memory,
"failed to allocate memory buffer of " +
Twine::utohexstr(FileSize) + " bytes.");
writeHeaders(IsBigObj);
writeSections();
if (IsBigObj)
writeSymbolStringTables<coff_symbol32>();
else
writeSymbolStringTables<coff_symbol16>();
if (Obj.IsPE)
if (Error E = patchDebugDirectory())
return E;
// TODO: Implement direct writing to the output stream (without intermediate
// memory buffer Buf).
Out.write(Buf->getBufferStart(), Buf->getBufferSize());
return Error::success();
}
Expected<uint32_t> COFFWriter::virtualAddressToFileAddress(uint32_t RVA) {
for (const auto &S : Obj.getSections()) {
if (RVA >= S.Header.VirtualAddress &&
RVA < S.Header.VirtualAddress + S.Header.SizeOfRawData)
return S.Header.PointerToRawData + RVA - S.Header.VirtualAddress;
}
return createStringError(object_error::parse_failed,
"debug directory payload not found");
}
// Locate which sections contain the debug directories, iterate over all
// the debug_directory structs in there, and set the PointerToRawData field
// in all of them, according to their new physical location in the file.
Error COFFWriter::patchDebugDirectory() {
if (Obj.DataDirectories.size() <= DEBUG_DIRECTORY)
return Error::success();
const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
if (Dir->Size <= 0)
return Error::success();
for (const auto &S : Obj.getSections()) {
if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress &&
Dir->RelativeVirtualAddress <
S.Header.VirtualAddress + S.Header.SizeOfRawData) {
if (Dir->RelativeVirtualAddress + Dir->Size >
S.Header.VirtualAddress + S.Header.SizeOfRawData)
return createStringError(object_error::parse_failed,
"debug directory extends past end of section");
size_t Offset = Dir->RelativeVirtualAddress - S.Header.VirtualAddress;
uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart()) +
S.Header.PointerToRawData + Offset;
uint8_t *End = Ptr + Dir->Size;
while (Ptr < End) {
debug_directory *Debug = reinterpret_cast<debug_directory *>(Ptr);
if (Debug->PointerToRawData) {
if (Expected<uint32_t> FilePosOrErr =
virtualAddressToFileAddress(Debug->AddressOfRawData))
Debug->PointerToRawData = *FilePosOrErr;
else
return FilePosOrErr.takeError();
}
Ptr += sizeof(debug_directory);
Offset += sizeof(debug_directory);
}
// Debug directory found and patched, all done.
return Error::success();
}
}
return createStringError(object_error::parse_failed,
"debug directory not found");
}
Error COFFWriter::write() {
bool IsBigObj = Obj.getSections().size() > MaxNumberOfSections16;
if (IsBigObj && Obj.IsPE)
return createStringError(object_error::parse_failed,
"too many sections for executable");
return write(IsBigObj);
}
} // end namespace coff
} // end namespace objcopy
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.h b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.h
index eed43b3e5814..5758aadb5439 100644
--- a/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.h
+++ b/contrib/llvm-project/llvm/tools/llvm-objcopy/COFF/Writer.h
@@ -1,63 +1,63 @@
//===- Writer.h -------------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
#define LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cstddef>
#include <utility>
namespace llvm {
namespace objcopy {
namespace coff {
struct Object;
class COFFWriter {
Object &Obj;
std::unique_ptr<WritableMemoryBuffer> Buf;
raw_ostream &Out;
size_t FileSize;
size_t FileAlignment;
size_t SizeOfInitializedData;
StringTableBuilder StrTabBuilder;
template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
Error finalizeRelocTargets();
Error finalizeSymbolContents();
void layoutSections();
- size_t finalizeStringTable();
+ Expected<size_t> finalizeStringTable();
Error finalize(bool IsBigObj);
void writeHeaders(bool IsBigObj);
void writeSections();
template <class SymbolTy> void writeSymbolStringTables();
Error write(bool IsBigObj);
Error patchDebugDirectory();
Expected<uint32_t> virtualAddressToFileAddress(uint32_t RVA);
public:
virtual ~COFFWriter() {}
Error write();
COFFWriter(Object &Obj, raw_ostream &Out)
: Obj(Obj), Out(Out), StrTabBuilder(StringTableBuilder::WinCOFF) {}
};
} // end namespace coff
} // end namespace objcopy
} // end namespace llvm
#endif // LLVM_TOOLS_OBJCOPY_COFF_WRITER_H
diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
index 04a67225401f..3d43d1a72e7e 100644
--- a/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1,7361 +1,7362 @@
//===- ELFDumper.cpp - ELF-specific dumper --------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements the ELF-specific dumper for llvm-readobj.
///
//===----------------------------------------------------------------------===//
#include "ARMEHABIPrinter.h"
#include "DwarfCFIEHPrinter.h"
#include "ObjDumper.h"
#include "StackMapPrinter.h"
#include "llvm-readobj.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/RelocationResolver.h"
#include "llvm/Object/StackMapParser.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/ARMAttributeParser.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MSP430AttributeParser.h"
#include "llvm/Support/MSP430Attributes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MipsABIFlags.h"
#include "llvm/Support/RISCVAttributeParser.h"
#include "llvm/Support/RISCVAttributes.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <memory>
#include <string>
#include <system_error>
#include <vector>
using namespace llvm;
using namespace llvm::object;
using namespace ELF;
#define LLVM_READOBJ_ENUM_CASE(ns, enum) \
case ns::enum: \
return #enum;
#define ENUM_ENT(enum, altName) \
{ #enum, altName, ELF::enum }
#define ENUM_ENT_1(enum) \
{ #enum, #enum, ELF::enum }
namespace {
template <class ELFT> struct RelSymbol {
RelSymbol(const typename ELFT::Sym *S, StringRef N)
: Sym(S), Name(N.str()) {}
const typename ELFT::Sym *Sym;
std::string Name;
};
/// Represents a contiguous uniform range in the file. We cannot just create a
/// range directly because when creating one of these from the .dynamic table
/// the size, entity size and virtual address are different entries in arbitrary
/// order (DT_REL, DT_RELSZ, DT_RELENT for example).
struct DynRegionInfo {
DynRegionInfo(const Binary &Owner, const ObjDumper &D)
: Obj(&Owner), Dumper(&D) {}
DynRegionInfo(const Binary &Owner, const ObjDumper &D, const uint8_t *A,
uint64_t S, uint64_t ES)
: Addr(A), Size(S), EntSize(ES), Obj(&Owner), Dumper(&D) {}
/// Address in current address space.
const uint8_t *Addr = nullptr;
/// Size in bytes of the region.
uint64_t Size = 0;
/// Size of each entity in the region.
uint64_t EntSize = 0;
/// Owner object. Used for error reporting.
const Binary *Obj;
/// Dumper used for error reporting.
const ObjDumper *Dumper;
/// Error prefix. Used for error reporting to provide more information.
std::string Context;
/// Region size name. Used for error reporting.
StringRef SizePrintName = "size";
/// Entry size name. Used for error reporting. If this field is empty, errors
/// will not mention the entry size.
StringRef EntSizePrintName = "entry size";
template <typename Type> ArrayRef<Type> getAsArrayRef() const {
const Type *Start = reinterpret_cast<const Type *>(Addr);
if (!Start)
return {Start, Start};
const uint64_t Offset =
Addr - (const uint8_t *)Obj->getMemoryBufferRef().getBufferStart();
const uint64_t ObjSize = Obj->getMemoryBufferRef().getBufferSize();
if (Size > ObjSize - Offset) {
Dumper->reportUniqueWarning(
"unable to read data at 0x" + Twine::utohexstr(Offset) +
" of size 0x" + Twine::utohexstr(Size) + " (" + SizePrintName +
"): it goes past the end of the file of size 0x" +
Twine::utohexstr(ObjSize));
return {Start, Start};
}
if (EntSize == sizeof(Type) && (Size % EntSize == 0))
return {Start, Start + (Size / EntSize)};
std::string Msg;
if (!Context.empty())
Msg += Context + " has ";
Msg += ("invalid " + SizePrintName + " (0x" + Twine::utohexstr(Size) + ")")
.str();
if (!EntSizePrintName.empty())
Msg +=
(" or " + EntSizePrintName + " (0x" + Twine::utohexstr(EntSize) + ")")
.str();
Dumper->reportUniqueWarning(Msg);
return {Start, Start};
}
};
struct GroupMember {
StringRef Name;
uint64_t Index;
};
struct GroupSection {
StringRef Name;
std::string Signature;
uint64_t ShName;
uint64_t Index;
uint32_t Link;
uint32_t Info;
uint32_t Type;
std::vector<GroupMember> Members;
};
namespace {
struct NoteType {
uint32_t ID;
StringRef Name;
};
} // namespace
template <class ELFT> class Relocation {
public:
Relocation(const typename ELFT::Rel &R, bool IsMips64EL)
: Type(R.getType(IsMips64EL)), Symbol(R.getSymbol(IsMips64EL)),
Offset(R.r_offset), Info(R.r_info) {}
Relocation(const typename ELFT::Rela &R, bool IsMips64EL)
: Relocation((const typename ELFT::Rel &)R, IsMips64EL) {
Addend = R.r_addend;
}
uint32_t Type;
uint32_t Symbol;
typename ELFT::uint Offset;
typename ELFT::uint Info;
Optional<int64_t> Addend;
};
template <class ELFT> class MipsGOTParser;
template <typename ELFT> class ELFDumper : public ObjDumper {
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
public:
ELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer);
void printUnwindInfo() override;
void printNeededLibraries() override;
void printHashTable() override;
void printGnuHashTable() override;
void printLoadName() override;
void printVersionInfo() override;
void printArchSpecificInfo() override;
void printStackMap() const override;
const object::ELFObjectFile<ELFT> &getElfObject() const { return ObjF; };
std::string describe(const Elf_Shdr &Sec) const;
unsigned getHashTableEntSize() const {
// EM_S390 and ELF::EM_ALPHA platforms use 8-bytes entries in SHT_HASH
// sections. This violates the ELF specification.
if (Obj.getHeader().e_machine == ELF::EM_S390 ||
Obj.getHeader().e_machine == ELF::EM_ALPHA)
return 8;
return 4;
}
Elf_Dyn_Range dynamic_table() const {
// A valid .dynamic section contains an array of entries terminated
// with a DT_NULL entry. However, sometimes the section content may
// continue past the DT_NULL entry, so to dump the section correctly,
// we first find the end of the entries by iterating over them.
Elf_Dyn_Range Table = DynamicTable.template getAsArrayRef<Elf_Dyn>();
size_t Size = 0;
while (Size < Table.size())
if (Table[Size++].getTag() == DT_NULL)
break;
return Table.slice(0, Size);
}
Elf_Sym_Range dynamic_symbols() const {
if (!DynSymRegion)
return Elf_Sym_Range();
return DynSymRegion->template getAsArrayRef<Elf_Sym>();
}
const Elf_Shdr *findSectionByName(StringRef Name) const;
StringRef getDynamicStringTable() const { return DynamicStringTable; }
protected:
virtual void printVersionSymbolSection(const Elf_Shdr *Sec) = 0;
virtual void printVersionDefinitionSection(const Elf_Shdr *Sec) = 0;
virtual void printVersionDependencySection(const Elf_Shdr *Sec) = 0;
void
printDependentLibsHelper(function_ref<void(const Elf_Shdr &)> OnSectionStart,
function_ref<void(StringRef, uint64_t)> OnLibEntry);
virtual void printRelRelaReloc(const Relocation<ELFT> &R,
const RelSymbol<ELFT> &RelSym) = 0;
virtual void printRelrReloc(const Elf_Relr &R) = 0;
virtual void printDynamicRelocHeader(unsigned Type, StringRef Name,
const DynRegionInfo &Reg) {}
void printReloc(const Relocation<ELFT> &R, unsigned RelIndex,
const Elf_Shdr &Sec, const Elf_Shdr *SymTab);
void printDynamicReloc(const Relocation<ELFT> &R);
void printDynamicRelocationsHelper();
void printRelocationsHelper(const Elf_Shdr &Sec);
void forEachRelocationDo(
const Elf_Shdr &Sec, bool RawRelr,
llvm::function_ref<void(const Relocation<ELFT> &, unsigned,
const Elf_Shdr &, const Elf_Shdr *)>
RelRelaFn,
llvm::function_ref<void(const Elf_Relr &)> RelrFn);
virtual void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset,
bool NonVisibilityBitsUsed) const {};
virtual void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable, bool IsDynamic,
bool NonVisibilityBitsUsed) const = 0;
virtual void printMipsABIFlags() = 0;
virtual void printMipsGOT(const MipsGOTParser<ELFT> &Parser) = 0;
virtual void printMipsPLT(const MipsGOTParser<ELFT> &Parser) = 0;
Expected<ArrayRef<Elf_Versym>>
getVersionTable(const Elf_Shdr &Sec, ArrayRef<Elf_Sym> *SymTab,
StringRef *StrTab, const Elf_Shdr **SymTabSec) const;
StringRef getPrintableSectionName(const Elf_Shdr &Sec) const;
std::vector<GroupSection> getGroups();
// Returns the function symbol index for the given address. Matches the
// symbol's section with FunctionSec when specified.
// Returns None if no function symbol can be found for the address or in case
// it is not defined in the specified section.
SmallVector<uint32_t>
getSymbolIndexesForFunctionAddress(uint64_t SymValue,
Optional<const Elf_Shdr *> FunctionSec);
bool printFunctionStackSize(uint64_t SymValue,
Optional<const Elf_Shdr *> FunctionSec,
const Elf_Shdr &StackSizeSec, DataExtractor Data,
uint64_t *Offset);
void printStackSize(const Relocation<ELFT> &R, const Elf_Shdr &RelocSec,
unsigned Ndx, const Elf_Shdr *SymTab,
const Elf_Shdr *FunctionSec, const Elf_Shdr &StackSizeSec,
const RelocationResolver &Resolver, DataExtractor Data);
virtual void printStackSizeEntry(uint64_t Size,
ArrayRef<std::string> FuncNames) = 0;
void printRelocatableStackSizes(std::function<void()> PrintHeader);
void printNonRelocatableStackSizes(std::function<void()> PrintHeader);
/// Retrieves sections with corresponding relocation sections based on
/// IsMatch.
void getSectionAndRelocations(
std::function<bool(const Elf_Shdr &)> IsMatch,
llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> &SecToRelocMap);
const object::ELFObjectFile<ELFT> &ObjF;
const ELFFile<ELFT> &Obj;
StringRef FileName;
Expected<DynRegionInfo> createDRI(uint64_t Offset, uint64_t Size,
uint64_t EntSize) {
if (Offset + Size < Offset || Offset + Size > Obj.getBufSize())
return createError("offset (0x" + Twine::utohexstr(Offset) +
") + size (0x" + Twine::utohexstr(Size) +
") is greater than the file size (0x" +
Twine::utohexstr(Obj.getBufSize()) + ")");
return DynRegionInfo(ObjF, *this, Obj.base() + Offset, Size, EntSize);
}
void printAttributes(unsigned, std::unique_ptr<ELFAttributeParser>,
support::endianness);
void printMipsReginfo();
void printMipsOptions();
std::pair<const Elf_Phdr *, const Elf_Shdr *> findDynamic();
void loadDynamicTable();
void parseDynamicTable();
Expected<StringRef> getSymbolVersion(const Elf_Sym &Sym,
bool &IsDefault) const;
Expected<SmallVector<Optional<VersionEntry>, 0> *> getVersionMap() const;
DynRegionInfo DynRelRegion;
DynRegionInfo DynRelaRegion;
DynRegionInfo DynRelrRegion;
DynRegionInfo DynPLTRelRegion;
Optional<DynRegionInfo> DynSymRegion;
DynRegionInfo DynSymTabShndxRegion;
DynRegionInfo DynamicTable;
StringRef DynamicStringTable;
const Elf_Hash *HashTable = nullptr;
const Elf_GnuHash *GnuHashTable = nullptr;
const Elf_Shdr *DotSymtabSec = nullptr;
const Elf_Shdr *DotDynsymSec = nullptr;
const Elf_Shdr *DotAddrsigSec = nullptr;
DenseMap<const Elf_Shdr *, ArrayRef<Elf_Word>> ShndxTables;
Optional<uint64_t> SONameOffset;
Optional<DenseMap<uint64_t, std::vector<uint32_t>>> AddressToIndexMap;
const Elf_Shdr *SymbolVersionSection = nullptr; // .gnu.version
const Elf_Shdr *SymbolVersionNeedSection = nullptr; // .gnu.version_r
const Elf_Shdr *SymbolVersionDefSection = nullptr; // .gnu.version_d
std::string getFullSymbolName(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable,
bool IsDynamic) const;
Expected<unsigned>
getSymbolSectionIndex(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable) const;
Expected<StringRef> getSymbolSectionName(const Elf_Sym &Symbol,
unsigned SectionIndex) const;
std::string getStaticSymbolName(uint32_t Index) const;
StringRef getDynamicString(uint64_t Value) const;
void printSymbolsHelper(bool IsDynamic) const;
std::string getDynamicEntry(uint64_t Type, uint64_t Value) const;
Expected<RelSymbol<ELFT>> getRelocationTarget(const Relocation<ELFT> &R,
const Elf_Shdr *SymTab) const;
ArrayRef<Elf_Word> getShndxTable(const Elf_Shdr *Symtab) const;
private:
mutable SmallVector<Optional<VersionEntry>, 0> VersionMap;
};
template <class ELFT>
std::string ELFDumper<ELFT>::describe(const Elf_Shdr &Sec) const {
return ::describe(Obj, Sec);
}
namespace {
template <class ELFT> struct SymtabLink {
typename ELFT::SymRange Symbols;
StringRef StringTable;
const typename ELFT::Shdr *SymTab;
};
// Returns the linked symbol table, symbols and associated string table for a
// given section.
template <class ELFT>
Expected<SymtabLink<ELFT>> getLinkAsSymtab(const ELFFile<ELFT> &Obj,
const typename ELFT::Shdr &Sec,
unsigned ExpectedType) {
Expected<const typename ELFT::Shdr *> SymtabOrErr =
Obj.getSection(Sec.sh_link);
if (!SymtabOrErr)
return createError("invalid section linked to " + describe(Obj, Sec) +
": " + toString(SymtabOrErr.takeError()));
if ((*SymtabOrErr)->sh_type != ExpectedType)
return createError(
"invalid section linked to " + describe(Obj, Sec) + ": expected " +
object::getELFSectionTypeName(Obj.getHeader().e_machine, ExpectedType) +
", but got " +
object::getELFSectionTypeName(Obj.getHeader().e_machine,
(*SymtabOrErr)->sh_type));
Expected<StringRef> StrTabOrErr = Obj.getLinkAsStrtab(**SymtabOrErr);
if (!StrTabOrErr)
return createError(
"can't get a string table for the symbol table linked to " +
describe(Obj, Sec) + ": " + toString(StrTabOrErr.takeError()));
Expected<typename ELFT::SymRange> SymsOrErr = Obj.symbols(*SymtabOrErr);
if (!SymsOrErr)
return createError("unable to read symbols from the " + describe(Obj, Sec) +
": " + toString(SymsOrErr.takeError()));
return SymtabLink<ELFT>{*SymsOrErr, *StrTabOrErr, *SymtabOrErr};
}
} // namespace
template <class ELFT>
Expected<ArrayRef<typename ELFT::Versym>>
ELFDumper<ELFT>::getVersionTable(const Elf_Shdr &Sec, ArrayRef<Elf_Sym> *SymTab,
StringRef *StrTab,
const Elf_Shdr **SymTabSec) const {
assert((!SymTab && !StrTab && !SymTabSec) || (SymTab && StrTab && SymTabSec));
if (reinterpret_cast<uintptr_t>(Obj.base() + Sec.sh_offset) %
sizeof(uint16_t) !=
0)
return createError("the " + describe(Sec) + " is misaligned");
Expected<ArrayRef<Elf_Versym>> VersionsOrErr =
Obj.template getSectionContentsAsArray<Elf_Versym>(Sec);
if (!VersionsOrErr)
return createError("cannot read content of " + describe(Sec) + ": " +
toString(VersionsOrErr.takeError()));
Expected<SymtabLink<ELFT>> SymTabOrErr =
getLinkAsSymtab(Obj, Sec, SHT_DYNSYM);
if (!SymTabOrErr) {
reportUniqueWarning(SymTabOrErr.takeError());
return *VersionsOrErr;
}
if (SymTabOrErr->Symbols.size() != VersionsOrErr->size())
reportUniqueWarning(describe(Sec) + ": the number of entries (" +
Twine(VersionsOrErr->size()) +
") does not match the number of symbols (" +
Twine(SymTabOrErr->Symbols.size()) +
") in the symbol table with index " +
Twine(Sec.sh_link));
if (SymTab) {
*SymTab = SymTabOrErr->Symbols;
*StrTab = SymTabOrErr->StringTable;
*SymTabSec = SymTabOrErr->SymTab;
}
return *VersionsOrErr;
}
template <class ELFT>
void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
Optional<StringRef> StrTable;
size_t Entries = 0;
Elf_Sym_Range Syms(nullptr, nullptr);
const Elf_Shdr *SymtabSec = IsDynamic ? DotDynsymSec : DotSymtabSec;
if (IsDynamic) {
StrTable = DynamicStringTable;
Syms = dynamic_symbols();
Entries = Syms.size();
} else if (DotSymtabSec) {
if (Expected<StringRef> StrTableOrErr =
Obj.getStringTableForSymtab(*DotSymtabSec))
StrTable = *StrTableOrErr;
else
reportUniqueWarning(
"unable to get the string table for the SHT_SYMTAB section: " +
toString(StrTableOrErr.takeError()));
if (Expected<Elf_Sym_Range> SymsOrErr = Obj.symbols(DotSymtabSec))
Syms = *SymsOrErr;
else
reportUniqueWarning(
"unable to read symbols from the SHT_SYMTAB section: " +
toString(SymsOrErr.takeError()));
Entries = DotSymtabSec->getEntityCount();
}
if (Syms.empty())
return;
// The st_other field has 2 logical parts. The first two bits hold the symbol
// visibility (STV_*) and the remainder hold other platform-specific values.
bool NonVisibilityBitsUsed =
llvm::any_of(Syms, [](const Elf_Sym &S) { return S.st_other & ~0x3; });
DataRegion<Elf_Word> ShndxTable =
IsDynamic ? DataRegion<Elf_Word>(
(const Elf_Word *)this->DynSymTabShndxRegion.Addr,
this->getElfObject().getELFFile().end())
: DataRegion<Elf_Word>(this->getShndxTable(SymtabSec));
printSymtabMessage(SymtabSec, Entries, NonVisibilityBitsUsed);
for (const Elf_Sym &Sym : Syms)
printSymbol(Sym, &Sym - Syms.begin(), ShndxTable, StrTable, IsDynamic,
NonVisibilityBitsUsed);
}
template <typename ELFT> class GNUELFDumper : public ELFDumper<ELFT> {
formatted_raw_ostream &OS;
public:
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
GNUELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer)
: ELFDumper<ELFT>(ObjF, Writer),
OS(static_cast<formatted_raw_ostream &>(Writer.getOStream())) {
assert(&this->W.getOStream() == &llvm::fouts());
}
void printFileSummary(StringRef FileStr, ObjectFile &Obj,
ArrayRef<std::string> InputFilenames,
const Archive *A) override;
void printFileHeaders() override;
void printGroupSections() override;
void printRelocations() override;
void printSectionHeaders() override;
void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
void printHashSymbols() override;
void printSectionDetails() override;
void printDependentLibs() override;
void printDynamicTable() override;
void printDynamicRelocations() override;
void printSymtabMessage(const Elf_Shdr *Symtab, size_t Offset,
bool NonVisibilityBitsUsed) const override;
void printProgramHeaders(bool PrintProgramHeaders,
cl::boolOrDefault PrintSectionMapping) override;
void printVersionSymbolSection(const Elf_Shdr *Sec) override;
void printVersionDefinitionSection(const Elf_Shdr *Sec) override;
void printVersionDependencySection(const Elf_Shdr *Sec) override;
void printHashHistograms() override;
void printCGProfile() override;
void printBBAddrMaps() override;
void printAddrsig() override;
void printNotes() override;
void printELFLinkerOptions() override;
void printStackSizes() override;
private:
void printHashHistogram(const Elf_Hash &HashTable);
void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable);
void printHashTableSymbols(const Elf_Hash &HashTable);
void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable);
struct Field {
std::string Str;
unsigned Column;
Field(StringRef S, unsigned Col) : Str(std::string(S)), Column(Col) {}
Field(unsigned Col) : Column(Col) {}
};
template <typename T, typename TEnum>
std::string printFlags(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues,
TEnum EnumMask1 = {}, TEnum EnumMask2 = {},
TEnum EnumMask3 = {}) const {
std::string Str;
for (const EnumEntry<TEnum> &Flag : EnumValues) {
if (Flag.Value == 0)
continue;
TEnum EnumMask{};
if (Flag.Value & EnumMask1)
EnumMask = EnumMask1;
else if (Flag.Value & EnumMask2)
EnumMask = EnumMask2;
else if (Flag.Value & EnumMask3)
EnumMask = EnumMask3;
bool IsEnum = (Flag.Value & EnumMask) != 0;
if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
(IsEnum && (Value & EnumMask) == Flag.Value)) {
if (!Str.empty())
Str += ", ";
Str += Flag.AltName;
}
}
return Str;
}
formatted_raw_ostream &printField(struct Field F) const {
if (F.Column != 0)
OS.PadToColumn(F.Column);
OS << F.Str;
OS.flush();
return OS;
}
void printHashedSymbol(const Elf_Sym *Sym, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable, StringRef StrTable,
uint32_t Bucket);
void printRelrReloc(const Elf_Relr &R) override;
void printRelRelaReloc(const Relocation<ELFT> &R,
const RelSymbol<ELFT> &RelSym) override;
void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable, bool IsDynamic,
bool NonVisibilityBitsUsed) const override;
void printDynamicRelocHeader(unsigned Type, StringRef Name,
const DynRegionInfo &Reg) override;
std::string getSymbolSectionNdx(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable) const;
void printProgramHeaders() override;
void printSectionMapping() override;
void printGNUVersionSectionProlog(const typename ELFT::Shdr &Sec,
const Twine &Label, unsigned EntriesNum);
void printStackSizeEntry(uint64_t Size,
ArrayRef<std::string> FuncNames) override;
void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsABIFlags() override;
};
template <typename ELFT> class LLVMELFDumper : public ELFDumper<ELFT> {
public:
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
LLVMELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer)
: ELFDumper<ELFT>(ObjF, Writer), W(Writer) {}
void printFileHeaders() override;
void printGroupSections() override;
void printRelocations() override;
void printSectionHeaders() override;
void printSymbols(bool PrintSymbols, bool PrintDynamicSymbols) override;
void printDependentLibs() override;
void printDynamicTable() override;
void printDynamicRelocations() override;
void printProgramHeaders(bool PrintProgramHeaders,
cl::boolOrDefault PrintSectionMapping) override;
void printVersionSymbolSection(const Elf_Shdr *Sec) override;
void printVersionDefinitionSection(const Elf_Shdr *Sec) override;
void printVersionDependencySection(const Elf_Shdr *Sec) override;
void printHashHistograms() override;
void printCGProfile() override;
void printBBAddrMaps() override;
void printAddrsig() override;
void printNotes() override;
void printELFLinkerOptions() override;
void printStackSizes() override;
private:
void printRelrReloc(const Elf_Relr &R) override;
void printRelRelaReloc(const Relocation<ELFT> &R,
const RelSymbol<ELFT> &RelSym) override;
void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable) const;
void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable, bool IsDynamic,
bool /*NonVisibilityBitsUsed*/) const override;
void printProgramHeaders() override;
void printSectionMapping() override {}
void printStackSizeEntry(uint64_t Size,
ArrayRef<std::string> FuncNames) override;
void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsABIFlags() override;
protected:
ScopedPrinter &W;
};
// JSONELFDumper shares most of the same implementation as LLVMELFDumper except
// it uses a JSONScopedPrinter.
template <typename ELFT> class JSONELFDumper : public LLVMELFDumper<ELFT> {
public:
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
JSONELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer)
: LLVMELFDumper<ELFT>(ObjF, Writer) {}
void printFileSummary(StringRef FileStr, ObjectFile &Obj,
ArrayRef<std::string> InputFilenames,
const Archive *A) override;
private:
std::unique_ptr<DictScope> FileScope;
};
} // end anonymous namespace
namespace llvm {
template <class ELFT>
static std::unique_ptr<ObjDumper>
createELFDumper(const ELFObjectFile<ELFT> &Obj, ScopedPrinter &Writer) {
if (opts::Output == opts::GNU)
return std::make_unique<GNUELFDumper<ELFT>>(Obj, Writer);
else if (opts::Output == opts::JSON)
return std::make_unique<JSONELFDumper<ELFT>>(Obj, Writer);
return std::make_unique<LLVMELFDumper<ELFT>>(Obj, Writer);
}
std::unique_ptr<ObjDumper> createELFDumper(const object::ELFObjectFileBase &Obj,
ScopedPrinter &Writer) {
// Little-endian 32-bit
if (const ELF32LEObjectFile *ELFObj = dyn_cast<ELF32LEObjectFile>(&Obj))
return createELFDumper(*ELFObj, Writer);
// Big-endian 32-bit
if (const ELF32BEObjectFile *ELFObj = dyn_cast<ELF32BEObjectFile>(&Obj))
return createELFDumper(*ELFObj, Writer);
// Little-endian 64-bit
if (const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(&Obj))
return createELFDumper(*ELFObj, Writer);
// Big-endian 64-bit
return createELFDumper(*cast<ELF64BEObjectFile>(&Obj), Writer);
}
} // end namespace llvm
template <class ELFT>
Expected<SmallVector<Optional<VersionEntry>, 0> *>
ELFDumper<ELFT>::getVersionMap() const {
// If the VersionMap has already been loaded or if there is no dynamic symtab
// or version table, there is nothing to do.
if (!VersionMap.empty() || !DynSymRegion || !SymbolVersionSection)
return &VersionMap;
Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr =
Obj.loadVersionMap(SymbolVersionNeedSection, SymbolVersionDefSection);
if (MapOrErr)
VersionMap = *MapOrErr;
else
return MapOrErr.takeError();
return &VersionMap;
}
template <typename ELFT>
Expected<StringRef> ELFDumper<ELFT>::getSymbolVersion(const Elf_Sym &Sym,
bool &IsDefault) const {
// This is a dynamic symbol. Look in the GNU symbol version table.
if (!SymbolVersionSection) {
// No version table.
IsDefault = false;
return "";
}
assert(DynSymRegion && "DynSymRegion has not been initialised");
// Determine the position in the symbol table of this entry.
size_t EntryIndex = (reinterpret_cast<uintptr_t>(&Sym) -
reinterpret_cast<uintptr_t>(DynSymRegion->Addr)) /
sizeof(Elf_Sym);
// Get the corresponding version index entry.
Expected<const Elf_Versym *> EntryOrErr =
Obj.template getEntry<Elf_Versym>(*SymbolVersionSection, EntryIndex);
if (!EntryOrErr)
return EntryOrErr.takeError();
unsigned Version = (*EntryOrErr)->vs_index;
if (Version == VER_NDX_LOCAL || Version == VER_NDX_GLOBAL) {
IsDefault = false;
return "";
}
Expected<SmallVector<Optional<VersionEntry>, 0> *> MapOrErr =
getVersionMap();
if (!MapOrErr)
return MapOrErr.takeError();
return Obj.getSymbolVersionByIndex(Version, IsDefault, **MapOrErr,
Sym.st_shndx == ELF::SHN_UNDEF);
}
template <typename ELFT>
Expected<RelSymbol<ELFT>>
ELFDumper<ELFT>::getRelocationTarget(const Relocation<ELFT> &R,
const Elf_Shdr *SymTab) const {
if (R.Symbol == 0)
return RelSymbol<ELFT>(nullptr, "");
Expected<const Elf_Sym *> SymOrErr =
Obj.template getEntry<Elf_Sym>(*SymTab, R.Symbol);
if (!SymOrErr)
return createError("unable to read an entry with index " + Twine(R.Symbol) +
" from " + describe(*SymTab) + ": " +
toString(SymOrErr.takeError()));
const Elf_Sym *Sym = *SymOrErr;
if (!Sym)
return RelSymbol<ELFT>(nullptr, "");
Expected<StringRef> StrTableOrErr = Obj.getStringTableForSymtab(*SymTab);
if (!StrTableOrErr)
return StrTableOrErr.takeError();
const Elf_Sym *FirstSym =
cantFail(Obj.template getEntry<Elf_Sym>(*SymTab, 0));
std::string SymbolName =
getFullSymbolName(*Sym, Sym - FirstSym, getShndxTable(SymTab),
*StrTableOrErr, SymTab->sh_type == SHT_DYNSYM);
return RelSymbol<ELFT>(Sym, SymbolName);
}
template <typename ELFT>
ArrayRef<typename ELFT::Word>
ELFDumper<ELFT>::getShndxTable(const Elf_Shdr *Symtab) const {
if (Symtab) {
auto It = ShndxTables.find(Symtab);
if (It != ShndxTables.end())
return It->second;
}
return {};
}
static std::string maybeDemangle(StringRef Name) {
return opts::Demangle ? demangle(std::string(Name)) : Name.str();
}
template <typename ELFT>
std::string ELFDumper<ELFT>::getStaticSymbolName(uint32_t Index) const {
auto Warn = [&](Error E) -> std::string {
reportUniqueWarning("unable to read the name of symbol with index " +
Twine(Index) + ": " + toString(std::move(E)));
return "<?>";
};
Expected<const typename ELFT::Sym *> SymOrErr =
Obj.getSymbol(DotSymtabSec, Index);
if (!SymOrErr)
return Warn(SymOrErr.takeError());
Expected<StringRef> StrTabOrErr = Obj.getStringTableForSymtab(*DotSymtabSec);
if (!StrTabOrErr)
return Warn(StrTabOrErr.takeError());
Expected<StringRef> NameOrErr = (*SymOrErr)->getName(*StrTabOrErr);
if (!NameOrErr)
return Warn(NameOrErr.takeError());
return maybeDemangle(*NameOrErr);
}
template <typename ELFT>
std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym &Symbol,
unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable,
bool IsDynamic) const {
if (!StrTable)
return "<?>";
std::string SymbolName;
if (Expected<StringRef> NameOrErr = Symbol.getName(*StrTable)) {
SymbolName = maybeDemangle(*NameOrErr);
} else {
reportUniqueWarning(NameOrErr.takeError());
return "<?>";
}
if (SymbolName.empty() && Symbol.getType() == ELF::STT_SECTION) {
Expected<unsigned> SectionIndex =
getSymbolSectionIndex(Symbol, SymIndex, ShndxTable);
if (!SectionIndex) {
reportUniqueWarning(SectionIndex.takeError());
return "<?>";
}
Expected<StringRef> NameOrErr = getSymbolSectionName(Symbol, *SectionIndex);
if (!NameOrErr) {
reportUniqueWarning(NameOrErr.takeError());
return ("<section " + Twine(*SectionIndex) + ">").str();
}
return std::string(*NameOrErr);
}
if (!IsDynamic)
return SymbolName;
bool IsDefault;
Expected<StringRef> VersionOrErr = getSymbolVersion(Symbol, IsDefault);
if (!VersionOrErr) {
reportUniqueWarning(VersionOrErr.takeError());
return SymbolName + "@<corrupt>";
}
if (!VersionOrErr->empty()) {
SymbolName += (IsDefault ? "@@" : "@");
SymbolName += *VersionOrErr;
}
return SymbolName;
}
template <typename ELFT>
Expected<unsigned>
ELFDumper<ELFT>::getSymbolSectionIndex(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable) const {
unsigned Ndx = Symbol.st_shndx;
if (Ndx == SHN_XINDEX)
return object::getExtendedSymbolTableIndex<ELFT>(Symbol, SymIndex,
ShndxTable);
if (Ndx != SHN_UNDEF && Ndx < SHN_LORESERVE)
return Ndx;
auto CreateErr = [&](const Twine &Name, Optional<unsigned> Offset = None) {
std::string Desc;
if (Offset)
Desc = (Name + "+0x" + Twine::utohexstr(*Offset)).str();
else
Desc = Name.str();
return createError(
"unable to get section index for symbol with st_shndx = 0x" +
Twine::utohexstr(Ndx) + " (" + Desc + ")");
};
if (Ndx >= ELF::SHN_LOPROC && Ndx <= ELF::SHN_HIPROC)
return CreateErr("SHN_LOPROC", Ndx - ELF::SHN_LOPROC);
if (Ndx >= ELF::SHN_LOOS && Ndx <= ELF::SHN_HIOS)
return CreateErr("SHN_LOOS", Ndx - ELF::SHN_LOOS);
if (Ndx == ELF::SHN_UNDEF)
return CreateErr("SHN_UNDEF");
if (Ndx == ELF::SHN_ABS)
return CreateErr("SHN_ABS");
if (Ndx == ELF::SHN_COMMON)
return CreateErr("SHN_COMMON");
return CreateErr("SHN_LORESERVE", Ndx - SHN_LORESERVE);
}
template <typename ELFT>
Expected<StringRef>
ELFDumper<ELFT>::getSymbolSectionName(const Elf_Sym &Symbol,
unsigned SectionIndex) const {
Expected<const Elf_Shdr *> SecOrErr = Obj.getSection(SectionIndex);
if (!SecOrErr)
return SecOrErr.takeError();
return Obj.getSectionName(**SecOrErr);
}
template <class ELFO>
static const typename ELFO::Elf_Shdr *
findNotEmptySectionByAddress(const ELFO &Obj, StringRef FileName,
uint64_t Addr) {
for (const typename ELFO::Elf_Shdr &Shdr : cantFail(Obj.sections()))
if (Shdr.sh_addr == Addr && Shdr.sh_size > 0)
return &Shdr;
return nullptr;
}
const EnumEntry<unsigned> ElfClass[] = {
{"None", "none", ELF::ELFCLASSNONE},
{"32-bit", "ELF32", ELF::ELFCLASS32},
{"64-bit", "ELF64", ELF::ELFCLASS64},
};
const EnumEntry<unsigned> ElfDataEncoding[] = {
{"None", "none", ELF::ELFDATANONE},
{"LittleEndian", "2's complement, little endian", ELF::ELFDATA2LSB},
{"BigEndian", "2's complement, big endian", ELF::ELFDATA2MSB},
};
const EnumEntry<unsigned> ElfObjectFileType[] = {
{"None", "NONE (none)", ELF::ET_NONE},
{"Relocatable", "REL (Relocatable file)", ELF::ET_REL},
{"Executable", "EXEC (Executable file)", ELF::ET_EXEC},
{"SharedObject", "DYN (Shared object file)", ELF::ET_DYN},
{"Core", "CORE (Core file)", ELF::ET_CORE},
};
const EnumEntry<unsigned> ElfOSABI[] = {
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
{"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
{"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
{"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
{"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
{"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
{"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
{"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
{"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
{"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
{"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
{"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
{"AROS", "AROS", ELF::ELFOSABI_AROS},
{"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
{"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
};
const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
{"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL},
{"AMDGPU_MESA3D", "AMDGPU - MESA3D", ELF::ELFOSABI_AMDGPU_MESA3D}
};
const EnumEntry<unsigned> ARMElfOSABI[] = {
{"ARM", "ARM", ELF::ELFOSABI_ARM}
};
const EnumEntry<unsigned> C6000ElfOSABI[] = {
{"C6000_ELFABI", "Bare-metal C6000", ELF::ELFOSABI_C6000_ELFABI},
{"C6000_LINUX", "Linux C6000", ELF::ELFOSABI_C6000_LINUX}
};
const EnumEntry<unsigned> ElfMachineType[] = {
ENUM_ENT(EM_NONE, "None"),
ENUM_ENT(EM_M32, "WE32100"),
ENUM_ENT(EM_SPARC, "Sparc"),
ENUM_ENT(EM_386, "Intel 80386"),
ENUM_ENT(EM_68K, "MC68000"),
ENUM_ENT(EM_88K, "MC88000"),
ENUM_ENT(EM_IAMCU, "EM_IAMCU"),
ENUM_ENT(EM_860, "Intel 80860"),
ENUM_ENT(EM_MIPS, "MIPS R3000"),
ENUM_ENT(EM_S370, "IBM System/370"),
ENUM_ENT(EM_MIPS_RS3_LE, "MIPS R3000 little-endian"),
ENUM_ENT(EM_PARISC, "HPPA"),
ENUM_ENT(EM_VPP500, "Fujitsu VPP500"),
ENUM_ENT(EM_SPARC32PLUS, "Sparc v8+"),
ENUM_ENT(EM_960, "Intel 80960"),
ENUM_ENT(EM_PPC, "PowerPC"),
ENUM_ENT(EM_PPC64, "PowerPC64"),
ENUM_ENT(EM_S390, "IBM S/390"),
ENUM_ENT(EM_SPU, "SPU"),
ENUM_ENT(EM_V800, "NEC V800 series"),
ENUM_ENT(EM_FR20, "Fujistsu FR20"),
ENUM_ENT(EM_RH32, "TRW RH-32"),
ENUM_ENT(EM_RCE, "Motorola RCE"),
ENUM_ENT(EM_ARM, "ARM"),
ENUM_ENT(EM_ALPHA, "EM_ALPHA"),
ENUM_ENT(EM_SH, "Hitachi SH"),
ENUM_ENT(EM_SPARCV9, "Sparc v9"),
ENUM_ENT(EM_TRICORE, "Siemens Tricore"),
ENUM_ENT(EM_ARC, "ARC"),
ENUM_ENT(EM_H8_300, "Hitachi H8/300"),
ENUM_ENT(EM_H8_300H, "Hitachi H8/300H"),
ENUM_ENT(EM_H8S, "Hitachi H8S"),
ENUM_ENT(EM_H8_500, "Hitachi H8/500"),
ENUM_ENT(EM_IA_64, "Intel IA-64"),
ENUM_ENT(EM_MIPS_X, "Stanford MIPS-X"),
ENUM_ENT(EM_COLDFIRE, "Motorola Coldfire"),
ENUM_ENT(EM_68HC12, "Motorola MC68HC12 Microcontroller"),
ENUM_ENT(EM_MMA, "Fujitsu Multimedia Accelerator"),
ENUM_ENT(EM_PCP, "Siemens PCP"),
ENUM_ENT(EM_NCPU, "Sony nCPU embedded RISC processor"),
ENUM_ENT(EM_NDR1, "Denso NDR1 microprocesspr"),
ENUM_ENT(EM_STARCORE, "Motorola Star*Core processor"),
ENUM_ENT(EM_ME16, "Toyota ME16 processor"),
ENUM_ENT(EM_ST100, "STMicroelectronics ST100 processor"),
ENUM_ENT(EM_TINYJ, "Advanced Logic Corp. TinyJ embedded processor"),
ENUM_ENT(EM_X86_64, "Advanced Micro Devices X86-64"),
ENUM_ENT(EM_PDSP, "Sony DSP processor"),
ENUM_ENT(EM_PDP10, "Digital Equipment Corp. PDP-10"),
ENUM_ENT(EM_PDP11, "Digital Equipment Corp. PDP-11"),
ENUM_ENT(EM_FX66, "Siemens FX66 microcontroller"),
ENUM_ENT(EM_ST9PLUS, "STMicroelectronics ST9+ 8/16 bit microcontroller"),
ENUM_ENT(EM_ST7, "STMicroelectronics ST7 8-bit microcontroller"),
ENUM_ENT(EM_68HC16, "Motorola MC68HC16 Microcontroller"),
ENUM_ENT(EM_68HC11, "Motorola MC68HC11 Microcontroller"),
ENUM_ENT(EM_68HC08, "Motorola MC68HC08 Microcontroller"),
ENUM_ENT(EM_68HC05, "Motorola MC68HC05 Microcontroller"),
ENUM_ENT(EM_SVX, "Silicon Graphics SVx"),
ENUM_ENT(EM_ST19, "STMicroelectronics ST19 8-bit microcontroller"),
ENUM_ENT(EM_VAX, "Digital VAX"),
ENUM_ENT(EM_CRIS, "Axis Communications 32-bit embedded processor"),
ENUM_ENT(EM_JAVELIN, "Infineon Technologies 32-bit embedded cpu"),
ENUM_ENT(EM_FIREPATH, "Element 14 64-bit DSP processor"),
ENUM_ENT(EM_ZSP, "LSI Logic's 16-bit DSP processor"),
ENUM_ENT(EM_MMIX, "Donald Knuth's educational 64-bit processor"),
ENUM_ENT(EM_HUANY, "Harvard Universitys's machine-independent object format"),
ENUM_ENT(EM_PRISM, "Vitesse Prism"),
ENUM_ENT(EM_AVR, "Atmel AVR 8-bit microcontroller"),
ENUM_ENT(EM_FR30, "Fujitsu FR30"),
ENUM_ENT(EM_D10V, "Mitsubishi D10V"),
ENUM_ENT(EM_D30V, "Mitsubishi D30V"),
ENUM_ENT(EM_V850, "NEC v850"),
ENUM_ENT(EM_M32R, "Renesas M32R (formerly Mitsubishi M32r)"),
ENUM_ENT(EM_MN10300, "Matsushita MN10300"),
ENUM_ENT(EM_MN10200, "Matsushita MN10200"),
ENUM_ENT(EM_PJ, "picoJava"),
ENUM_ENT(EM_OPENRISC, "OpenRISC 32-bit embedded processor"),
ENUM_ENT(EM_ARC_COMPACT, "EM_ARC_COMPACT"),
ENUM_ENT(EM_XTENSA, "Tensilica Xtensa Processor"),
ENUM_ENT(EM_VIDEOCORE, "Alphamosaic VideoCore processor"),
ENUM_ENT(EM_TMM_GPP, "Thompson Multimedia General Purpose Processor"),
ENUM_ENT(EM_NS32K, "National Semiconductor 32000 series"),
ENUM_ENT(EM_TPC, "Tenor Network TPC processor"),
ENUM_ENT(EM_SNP1K, "EM_SNP1K"),
ENUM_ENT(EM_ST200, "STMicroelectronics ST200 microcontroller"),
ENUM_ENT(EM_IP2K, "Ubicom IP2xxx 8-bit microcontrollers"),
ENUM_ENT(EM_MAX, "MAX Processor"),
ENUM_ENT(EM_CR, "National Semiconductor CompactRISC"),
ENUM_ENT(EM_F2MC16, "Fujitsu F2MC16"),
ENUM_ENT(EM_MSP430, "Texas Instruments msp430 microcontroller"),
ENUM_ENT(EM_BLACKFIN, "Analog Devices Blackfin"),
ENUM_ENT(EM_SE_C33, "S1C33 Family of Seiko Epson processors"),
ENUM_ENT(EM_SEP, "Sharp embedded microprocessor"),
ENUM_ENT(EM_ARCA, "Arca RISC microprocessor"),
ENUM_ENT(EM_UNICORE, "Unicore"),
ENUM_ENT(EM_EXCESS, "eXcess 16/32/64-bit configurable embedded CPU"),
ENUM_ENT(EM_DXP, "Icera Semiconductor Inc. Deep Execution Processor"),
ENUM_ENT(EM_ALTERA_NIOS2, "Altera Nios"),
ENUM_ENT(EM_CRX, "National Semiconductor CRX microprocessor"),
ENUM_ENT(EM_XGATE, "Motorola XGATE embedded processor"),
ENUM_ENT(EM_C166, "Infineon Technologies xc16x"),
ENUM_ENT(EM_M16C, "Renesas M16C"),
ENUM_ENT(EM_DSPIC30F, "Microchip Technology dsPIC30F Digital Signal Controller"),
ENUM_ENT(EM_CE, "Freescale Communication Engine RISC core"),
ENUM_ENT(EM_M32C, "Renesas M32C"),
ENUM_ENT(EM_TSK3000, "Altium TSK3000 core"),
ENUM_ENT(EM_RS08, "Freescale RS08 embedded processor"),
ENUM_ENT(EM_SHARC, "EM_SHARC"),
ENUM_ENT(EM_ECOG2, "Cyan Technology eCOG2 microprocessor"),
ENUM_ENT(EM_SCORE7, "SUNPLUS S+Core"),
ENUM_ENT(EM_DSP24, "New Japan Radio (NJR) 24-bit DSP Processor"),
ENUM_ENT(EM_VIDEOCORE3, "Broadcom VideoCore III processor"),
ENUM_ENT(EM_LATTICEMICO32, "Lattice Mico32"),
ENUM_ENT(EM_SE_C17, "Seiko Epson C17 family"),
ENUM_ENT(EM_TI_C6000, "Texas Instruments TMS320C6000 DSP family"),
ENUM_ENT(EM_TI_C2000, "Texas Instruments TMS320C2000 DSP family"),
ENUM_ENT(EM_TI_C5500, "Texas Instruments TMS320C55x DSP family"),
ENUM_ENT(EM_MMDSP_PLUS, "STMicroelectronics 64bit VLIW Data Signal Processor"),
ENUM_ENT(EM_CYPRESS_M8C, "Cypress M8C microprocessor"),
ENUM_ENT(EM_R32C, "Renesas R32C series microprocessors"),
ENUM_ENT(EM_TRIMEDIA, "NXP Semiconductors TriMedia architecture family"),
ENUM_ENT(EM_HEXAGON, "Qualcomm Hexagon"),
ENUM_ENT(EM_8051, "Intel 8051 and variants"),
ENUM_ENT(EM_STXP7X, "STMicroelectronics STxP7x family"),
ENUM_ENT(EM_NDS32, "Andes Technology compact code size embedded RISC processor family"),
ENUM_ENT(EM_ECOG1, "Cyan Technology eCOG1 microprocessor"),
// FIXME: Following EM_ECOG1X definitions is dead code since EM_ECOG1X has
// an identical number to EM_ECOG1.
ENUM_ENT(EM_ECOG1X, "Cyan Technology eCOG1X family"),
ENUM_ENT(EM_MAXQ30, "Dallas Semiconductor MAXQ30 Core microcontrollers"),
ENUM_ENT(EM_XIMO16, "New Japan Radio (NJR) 16-bit DSP Processor"),
ENUM_ENT(EM_MANIK, "M2000 Reconfigurable RISC Microprocessor"),
ENUM_ENT(EM_CRAYNV2, "Cray Inc. NV2 vector architecture"),
ENUM_ENT(EM_RX, "Renesas RX"),
ENUM_ENT(EM_METAG, "Imagination Technologies Meta processor architecture"),
ENUM_ENT(EM_MCST_ELBRUS, "MCST Elbrus general purpose hardware architecture"),
ENUM_ENT(EM_ECOG16, "Cyan Technology eCOG16 family"),
ENUM_ENT(EM_CR16, "National Semiconductor CompactRISC 16-bit processor"),
ENUM_ENT(EM_ETPU, "Freescale Extended Time Processing Unit"),
ENUM_ENT(EM_SLE9X, "Infineon Technologies SLE9X core"),
ENUM_ENT(EM_L10M, "EM_L10M"),
ENUM_ENT(EM_K10M, "EM_K10M"),
ENUM_ENT(EM_AARCH64, "AArch64"),
ENUM_ENT(EM_AVR32, "Atmel Corporation 32-bit microprocessor family"),
ENUM_ENT(EM_STM8, "STMicroeletronics STM8 8-bit microcontroller"),
ENUM_ENT(EM_TILE64, "Tilera TILE64 multicore architecture family"),
ENUM_ENT(EM_TILEPRO, "Tilera TILEPro multicore architecture family"),
ENUM_ENT(EM_MICROBLAZE, "Xilinx MicroBlaze 32-bit RISC soft processor core"),
ENUM_ENT(EM_CUDA, "NVIDIA CUDA architecture"),
ENUM_ENT(EM_TILEGX, "Tilera TILE-Gx multicore architecture family"),
ENUM_ENT(EM_CLOUDSHIELD, "EM_CLOUDSHIELD"),
ENUM_ENT(EM_COREA_1ST, "EM_COREA_1ST"),
ENUM_ENT(EM_COREA_2ND, "EM_COREA_2ND"),
ENUM_ENT(EM_ARC_COMPACT2, "EM_ARC_COMPACT2"),
ENUM_ENT(EM_OPEN8, "EM_OPEN8"),
ENUM_ENT(EM_RL78, "Renesas RL78"),
ENUM_ENT(EM_VIDEOCORE5, "Broadcom VideoCore V processor"),
ENUM_ENT(EM_78KOR, "EM_78KOR"),
ENUM_ENT(EM_56800EX, "EM_56800EX"),
ENUM_ENT(EM_AMDGPU, "EM_AMDGPU"),
ENUM_ENT(EM_RISCV, "RISC-V"),
ENUM_ENT(EM_LANAI, "EM_LANAI"),
ENUM_ENT(EM_BPF, "EM_BPF"),
ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"),
};
const EnumEntry<unsigned> ElfSymbolBindings[] = {
{"Local", "LOCAL", ELF::STB_LOCAL},
{"Global", "GLOBAL", ELF::STB_GLOBAL},
{"Weak", "WEAK", ELF::STB_WEAK},
{"Unique", "UNIQUE", ELF::STB_GNU_UNIQUE}};
const EnumEntry<unsigned> ElfSymbolVisibilities[] = {
{"DEFAULT", "DEFAULT", ELF::STV_DEFAULT},
{"INTERNAL", "INTERNAL", ELF::STV_INTERNAL},
{"HIDDEN", "HIDDEN", ELF::STV_HIDDEN},
{"PROTECTED", "PROTECTED", ELF::STV_PROTECTED}};
const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
{ "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL }
};
static const char *getGroupType(uint32_t Flag) {
if (Flag & ELF::GRP_COMDAT)
return "COMDAT";
else
return "(unknown)";
}
const EnumEntry<unsigned> ElfSectionFlags[] = {
ENUM_ENT(SHF_WRITE, "W"),
ENUM_ENT(SHF_ALLOC, "A"),
ENUM_ENT(SHF_EXECINSTR, "X"),
ENUM_ENT(SHF_MERGE, "M"),
ENUM_ENT(SHF_STRINGS, "S"),
ENUM_ENT(SHF_INFO_LINK, "I"),
ENUM_ENT(SHF_LINK_ORDER, "L"),
ENUM_ENT(SHF_OS_NONCONFORMING, "O"),
ENUM_ENT(SHF_GROUP, "G"),
ENUM_ENT(SHF_TLS, "T"),
ENUM_ENT(SHF_COMPRESSED, "C"),
ENUM_ENT(SHF_GNU_RETAIN, "R"),
ENUM_ENT(SHF_EXCLUDE, "E"),
};
const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
ENUM_ENT(XCORE_SHF_CP_SECTION, ""),
ENUM_ENT(XCORE_SHF_DP_SECTION, "")
};
const EnumEntry<unsigned> ElfARMSectionFlags[] = {
ENUM_ENT(SHF_ARM_PURECODE, "y")
};
const EnumEntry<unsigned> ElfHexagonSectionFlags[] = {
ENUM_ENT(SHF_HEX_GPREL, "")
};
const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
ENUM_ENT(SHF_MIPS_NODUPES, ""),
ENUM_ENT(SHF_MIPS_NAMES, ""),
ENUM_ENT(SHF_MIPS_LOCAL, ""),
ENUM_ENT(SHF_MIPS_NOSTRIP, ""),
ENUM_ENT(SHF_MIPS_GPREL, ""),
ENUM_ENT(SHF_MIPS_MERGE, ""),
ENUM_ENT(SHF_MIPS_ADDR, ""),
ENUM_ENT(SHF_MIPS_STRING, "")
};
const EnumEntry<unsigned> ElfX86_64SectionFlags[] = {
ENUM_ENT(SHF_X86_64_LARGE, "l")
};
static std::vector<EnumEntry<unsigned>>
getSectionFlagsForTarget(unsigned EMachine) {
std::vector<EnumEntry<unsigned>> Ret(std::begin(ElfSectionFlags),
std::end(ElfSectionFlags));
switch (EMachine) {
case EM_ARM:
Ret.insert(Ret.end(), std::begin(ElfARMSectionFlags),
std::end(ElfARMSectionFlags));
break;
case EM_HEXAGON:
Ret.insert(Ret.end(), std::begin(ElfHexagonSectionFlags),
std::end(ElfHexagonSectionFlags));
break;
case EM_MIPS:
Ret.insert(Ret.end(), std::begin(ElfMipsSectionFlags),
std::end(ElfMipsSectionFlags));
break;
case EM_X86_64:
Ret.insert(Ret.end(), std::begin(ElfX86_64SectionFlags),
std::end(ElfX86_64SectionFlags));
break;
case EM_XCORE:
Ret.insert(Ret.end(), std::begin(ElfXCoreSectionFlags),
std::end(ElfXCoreSectionFlags));
break;
default:
break;
}
return Ret;
}
static std::string getGNUFlags(unsigned EMachine, uint64_t Flags) {
// Here we are trying to build the flags string in the same way as GNU does.
// It is not that straightforward. Imagine we have sh_flags == 0x90000000.
// SHF_EXCLUDE ("E") has a value of 0x80000000 and SHF_MASKPROC is 0xf0000000.
// GNU readelf will not print "E" or "Ep" in this case, but will print just
// "p". It only will print "E" when no other processor flag is set.
std::string Str;
bool HasUnknownFlag = false;
bool HasOSFlag = false;
bool HasProcFlag = false;
std::vector<EnumEntry<unsigned>> FlagsList =
getSectionFlagsForTarget(EMachine);
while (Flags) {
// Take the least significant bit as a flag.
uint64_t Flag = Flags & -Flags;
Flags -= Flag;
// Find the flag in the known flags list.
auto I = llvm::find_if(FlagsList, [=](const EnumEntry<unsigned> &E) {
// Flags with empty names are not printed in GNU style output.
return E.Value == Flag && !E.AltName.empty();
});
if (I != FlagsList.end()) {
Str += I->AltName;
continue;
}
// If we did not find a matching regular flag, then we deal with an OS
// specific flag, processor specific flag or an unknown flag.
if (Flag & ELF::SHF_MASKOS) {
HasOSFlag = true;
Flags &= ~ELF::SHF_MASKOS;
} else if (Flag & ELF::SHF_MASKPROC) {
HasProcFlag = true;
// Mask off all the processor-specific bits. This removes the SHF_EXCLUDE
// bit if set so that it doesn't also get printed.
Flags &= ~ELF::SHF_MASKPROC;
} else {
HasUnknownFlag = true;
}
}
// "o", "p" and "x" are printed last.
if (HasOSFlag)
Str += "o";
if (HasProcFlag)
Str += "p";
if (HasUnknownFlag)
Str += "x";
return Str;
}
static StringRef segmentTypeToString(unsigned Arch, unsigned Type) {
// Check potentially overlapped processor-specific program header type.
switch (Arch) {
case ELF::EM_ARM:
switch (Type) { LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX); }
break;
case ELF::EM_MIPS:
case ELF::EM_MIPS_RS3_LE:
switch (Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_REGINFO);
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_RTPROC);
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_OPTIONS);
LLVM_READOBJ_ENUM_CASE(ELF, PT_MIPS_ABIFLAGS);
}
break;
}
switch (Type) {
LLVM_READOBJ_ENUM_CASE(ELF, PT_NULL);
LLVM_READOBJ_ENUM_CASE(ELF, PT_LOAD);
LLVM_READOBJ_ENUM_CASE(ELF, PT_DYNAMIC);
LLVM_READOBJ_ENUM_CASE(ELF, PT_INTERP);
LLVM_READOBJ_ENUM_CASE(ELF, PT_NOTE);
LLVM_READOBJ_ENUM_CASE(ELF, PT_SHLIB);
LLVM_READOBJ_ENUM_CASE(ELF, PT_PHDR);
LLVM_READOBJ_ENUM_CASE(ELF, PT_TLS);
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_EH_FRAME);
LLVM_READOBJ_ENUM_CASE(ELF, PT_SUNW_UNWIND);
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_PROPERTY);
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
default:
return "";
}
}
static std::string getGNUPtType(unsigned Arch, unsigned Type) {
StringRef Seg = segmentTypeToString(Arch, Type);
if (Seg.empty())
return std::string("<unknown>: ") + to_string(format_hex(Type, 1));
// E.g. "PT_ARM_EXIDX" -> "EXIDX".
if (Seg.startswith("PT_ARM_"))
return Seg.drop_front(7).str();
// E.g. "PT_MIPS_REGINFO" -> "REGINFO".
if (Seg.startswith("PT_MIPS_"))
return Seg.drop_front(8).str();
// E.g. "PT_LOAD" -> "LOAD".
assert(Seg.startswith("PT_"));
return Seg.drop_front(3).str();
}
const EnumEntry<unsigned> ElfSegmentFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, PF_X),
LLVM_READOBJ_ENUM_ENT(ELF, PF_W),
LLVM_READOBJ_ENUM_ENT(ELF, PF_R)
};
const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_NOREORDER, "noreorder"),
ENUM_ENT(EF_MIPS_PIC, "pic"),
ENUM_ENT(EF_MIPS_CPIC, "cpic"),
ENUM_ENT(EF_MIPS_ABI2, "abi2"),
ENUM_ENT(EF_MIPS_32BITMODE, "32bitmode"),
ENUM_ENT(EF_MIPS_FP64, "fp64"),
ENUM_ENT(EF_MIPS_NAN2008, "nan2008"),
ENUM_ENT(EF_MIPS_ABI_O32, "o32"),
ENUM_ENT(EF_MIPS_ABI_O64, "o64"),
ENUM_ENT(EF_MIPS_ABI_EABI32, "eabi32"),
ENUM_ENT(EF_MIPS_ABI_EABI64, "eabi64"),
ENUM_ENT(EF_MIPS_MACH_3900, "3900"),
ENUM_ENT(EF_MIPS_MACH_4010, "4010"),
ENUM_ENT(EF_MIPS_MACH_4100, "4100"),
ENUM_ENT(EF_MIPS_MACH_4650, "4650"),
ENUM_ENT(EF_MIPS_MACH_4120, "4120"),
ENUM_ENT(EF_MIPS_MACH_4111, "4111"),
ENUM_ENT(EF_MIPS_MACH_SB1, "sb1"),
ENUM_ENT(EF_MIPS_MACH_OCTEON, "octeon"),
ENUM_ENT(EF_MIPS_MACH_XLR, "xlr"),
ENUM_ENT(EF_MIPS_MACH_OCTEON2, "octeon2"),
ENUM_ENT(EF_MIPS_MACH_OCTEON3, "octeon3"),
ENUM_ENT(EF_MIPS_MACH_5400, "5400"),
ENUM_ENT(EF_MIPS_MACH_5900, "5900"),
ENUM_ENT(EF_MIPS_MACH_5500, "5500"),
ENUM_ENT(EF_MIPS_MACH_9000, "9000"),
ENUM_ENT(EF_MIPS_MACH_LS2E, "loongson-2e"),
ENUM_ENT(EF_MIPS_MACH_LS2F, "loongson-2f"),
ENUM_ENT(EF_MIPS_MACH_LS3A, "loongson-3a"),
ENUM_ENT(EF_MIPS_MICROMIPS, "micromips"),
ENUM_ENT(EF_MIPS_ARCH_ASE_M16, "mips16"),
ENUM_ENT(EF_MIPS_ARCH_ASE_MDMX, "mdmx"),
ENUM_ENT(EF_MIPS_ARCH_1, "mips1"),
ENUM_ENT(EF_MIPS_ARCH_2, "mips2"),
ENUM_ENT(EF_MIPS_ARCH_3, "mips3"),
ENUM_ENT(EF_MIPS_ARCH_4, "mips4"),
ENUM_ENT(EF_MIPS_ARCH_5, "mips5"),
ENUM_ENT(EF_MIPS_ARCH_32, "mips32"),
ENUM_ENT(EF_MIPS_ARCH_64, "mips64"),
ENUM_ENT(EF_MIPS_ARCH_32R2, "mips32r2"),
ENUM_ENT(EF_MIPS_ARCH_64R2, "mips64r2"),
ENUM_ENT(EF_MIPS_ARCH_32R6, "mips32r6"),
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RS880),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV670),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV710),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV730),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV770),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CEDAR),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CYPRESS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_JUNIPER),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_REDWOOD),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_SUMO),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_BARTS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAICOS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAYMAN),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_TURKS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX601),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX602),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX700),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX701),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX702),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX703),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX704),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX705),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX801),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX802),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX803),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX805),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1013),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
};
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RS880),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV670),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV710),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV730),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_RV770),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CEDAR),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CYPRESS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_JUNIPER),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_REDWOOD),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_SUMO),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_BARTS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAICOS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_CAYMAN),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_TURKS),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX601),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX602),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX700),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX701),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX702),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX703),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX704),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX705),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX801),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX802),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX803),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX805),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX810),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX900),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX902),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX908),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1013),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1030),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1031),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1032),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1033),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1034),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1035),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
};
const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
ENUM_ENT(EF_RISCV_RVC, "RVC"),
ENUM_ENT(EF_RISCV_FLOAT_ABI_SINGLE, "single-float ABI"),
ENUM_ENT(EF_RISCV_FLOAT_ABI_DOUBLE, "double-float ABI"),
ENUM_ENT(EF_RISCV_FLOAT_ABI_QUAD, "quad-float ABI"),
ENUM_ENT(EF_RISCV_RVE, "RVE"),
ENUM_ENT(EF_RISCV_TSO, "TSO"),
};
const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR1),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR2),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR25),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR3),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR31),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR35),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR5),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR51),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR6),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVRTINY),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA1),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA2),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA3),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA5),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA6),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_XMEGA7),
ENUM_ENT(EF_AVR_LINKRELAX_PREPARED, "relaxable"),
};
const EnumEntry<unsigned> ElfSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL),
LLVM_READOBJ_ENUM_ENT(ELF, STV_HIDDEN),
LLVM_READOBJ_ENUM_ENT(ELF, STV_PROTECTED)
};
const EnumEntry<unsigned> ElfMipsSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PIC),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MICROMIPS)
};
const EnumEntry<unsigned> ElfAArch64SymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_AARCH64_VARIANT_PCS)
};
const EnumEntry<unsigned> ElfMips16SymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MIPS16)
};
const EnumEntry<unsigned> ElfRISCVSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_RISCV_VARIANT_CC)};
static const char *getElfMipsOptionsOdkType(unsigned Odk) {
switch (Odk) {
LLVM_READOBJ_ENUM_CASE(ELF, ODK_NULL);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_REGINFO);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_EXCEPTIONS);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_PAD);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_HWPATCH);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_FILL);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_TAGS);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_HWAND);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_HWOR);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_GP_GROUP);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_IDENT);
LLVM_READOBJ_ENUM_CASE(ELF, ODK_PAGESIZE);
default:
return "Unknown";
}
}
template <typename ELFT>
std::pair<const typename ELFT::Phdr *, const typename ELFT::Shdr *>
ELFDumper<ELFT>::findDynamic() {
// Try to locate the PT_DYNAMIC header.
const Elf_Phdr *DynamicPhdr = nullptr;
if (Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj.program_headers()) {
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
if (Phdr.p_type != ELF::PT_DYNAMIC)
continue;
DynamicPhdr = &Phdr;
break;
}
} else {
reportUniqueWarning(
"unable to read program headers to locate the PT_DYNAMIC segment: " +
toString(PhdrsOrErr.takeError()));
}
// Try to locate the .dynamic section in the sections header table.
const Elf_Shdr *DynamicSec = nullptr;
for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
if (Sec.sh_type != ELF::SHT_DYNAMIC)
continue;
DynamicSec = &Sec;
break;
}
if (DynamicPhdr && ((DynamicPhdr->p_offset + DynamicPhdr->p_filesz >
ObjF.getMemoryBufferRef().getBufferSize()) ||
(DynamicPhdr->p_offset + DynamicPhdr->p_filesz <
DynamicPhdr->p_offset))) {
reportUniqueWarning(
"PT_DYNAMIC segment offset (0x" +
Twine::utohexstr(DynamicPhdr->p_offset) + ") + file size (0x" +
Twine::utohexstr(DynamicPhdr->p_filesz) +
") exceeds the size of the file (0x" +
Twine::utohexstr(ObjF.getMemoryBufferRef().getBufferSize()) + ")");
// Don't use the broken dynamic header.
DynamicPhdr = nullptr;
}
if (DynamicPhdr && DynamicSec) {
if (DynamicSec->sh_addr + DynamicSec->sh_size >
DynamicPhdr->p_vaddr + DynamicPhdr->p_memsz ||
DynamicSec->sh_addr < DynamicPhdr->p_vaddr)
reportUniqueWarning(describe(*DynamicSec) +
" is not contained within the "
"PT_DYNAMIC segment");
if (DynamicSec->sh_addr != DynamicPhdr->p_vaddr)
reportUniqueWarning(describe(*DynamicSec) + " is not at the start of "
"PT_DYNAMIC segment");
}
return std::make_pair(DynamicPhdr, DynamicSec);
}
template <typename ELFT>
void ELFDumper<ELFT>::loadDynamicTable() {
const Elf_Phdr *DynamicPhdr;
const Elf_Shdr *DynamicSec;
std::tie(DynamicPhdr, DynamicSec) = findDynamic();
if (!DynamicPhdr && !DynamicSec)
return;
DynRegionInfo FromPhdr(ObjF, *this);
bool IsPhdrTableValid = false;
if (DynamicPhdr) {
// Use cantFail(), because p_offset/p_filesz fields of a PT_DYNAMIC are
// validated in findDynamic() and so createDRI() is not expected to fail.
FromPhdr = cantFail(createDRI(DynamicPhdr->p_offset, DynamicPhdr->p_filesz,
sizeof(Elf_Dyn)));
FromPhdr.SizePrintName = "PT_DYNAMIC size";
FromPhdr.EntSizePrintName = "";
IsPhdrTableValid = !FromPhdr.template getAsArrayRef<Elf_Dyn>().empty();
}
// Locate the dynamic table described in a section header.
// Ignore sh_entsize and use the expected value for entry size explicitly.
// This allows us to dump dynamic sections with a broken sh_entsize
// field.
DynRegionInfo FromSec(ObjF, *this);
bool IsSecTableValid = false;
if (DynamicSec) {
Expected<DynRegionInfo> RegOrErr =
createDRI(DynamicSec->sh_offset, DynamicSec->sh_size, sizeof(Elf_Dyn));
if (RegOrErr) {
FromSec = *RegOrErr;
FromSec.Context = describe(*DynamicSec);
FromSec.EntSizePrintName = "";
IsSecTableValid = !FromSec.template getAsArrayRef<Elf_Dyn>().empty();
} else {
reportUniqueWarning("unable to read the dynamic table from " +
describe(*DynamicSec) + ": " +
toString(RegOrErr.takeError()));
}
}
// When we only have information from one of the SHT_DYNAMIC section header or
// PT_DYNAMIC program header, just use that.
if (!DynamicPhdr || !DynamicSec) {
if ((DynamicPhdr && IsPhdrTableValid) || (DynamicSec && IsSecTableValid)) {
DynamicTable = DynamicPhdr ? FromPhdr : FromSec;
parseDynamicTable();
} else {
reportUniqueWarning("no valid dynamic table was found");
}
return;
}
// At this point we have tables found from the section header and from the
// dynamic segment. Usually they match, but we have to do sanity checks to
// verify that.
if (FromPhdr.Addr != FromSec.Addr)
reportUniqueWarning("SHT_DYNAMIC section header and PT_DYNAMIC "
"program header disagree about "
"the location of the dynamic table");
if (!IsPhdrTableValid && !IsSecTableValid) {
reportUniqueWarning("no valid dynamic table was found");
return;
}
// Information in the PT_DYNAMIC program header has priority over the
// information in a section header.
if (IsPhdrTableValid) {
if (!IsSecTableValid)
reportUniqueWarning(
"SHT_DYNAMIC dynamic table is invalid: PT_DYNAMIC will be used");
DynamicTable = FromPhdr;
} else {
reportUniqueWarning(
"PT_DYNAMIC dynamic table is invalid: SHT_DYNAMIC will be used");
DynamicTable = FromSec;
}
parseDynamicTable();
}
template <typename ELFT>
ELFDumper<ELFT>::ELFDumper(const object::ELFObjectFile<ELFT> &O,
ScopedPrinter &Writer)
: ObjDumper(Writer, O.getFileName()), ObjF(O), Obj(O.getELFFile()),
FileName(O.getFileName()), DynRelRegion(O, *this),
DynRelaRegion(O, *this), DynRelrRegion(O, *this),
DynPLTRelRegion(O, *this), DynSymTabShndxRegion(O, *this),
DynamicTable(O, *this) {
if (!O.IsContentValid())
return;
typename ELFT::ShdrRange Sections = cantFail(Obj.sections());
for (const Elf_Shdr &Sec : Sections) {
switch (Sec.sh_type) {
case ELF::SHT_SYMTAB:
if (!DotSymtabSec)
DotSymtabSec = &Sec;
break;
case ELF::SHT_DYNSYM:
if (!DotDynsymSec)
DotDynsymSec = &Sec;
if (!DynSymRegion) {
Expected<DynRegionInfo> RegOrErr =
createDRI(Sec.sh_offset, Sec.sh_size, Sec.sh_entsize);
if (RegOrErr) {
DynSymRegion = *RegOrErr;
DynSymRegion->Context = describe(Sec);
if (Expected<StringRef> E = Obj.getStringTableForSymtab(Sec))
DynamicStringTable = *E;
else
reportUniqueWarning("unable to get the string table for the " +
describe(Sec) + ": " + toString(E.takeError()));
} else {
reportUniqueWarning("unable to read dynamic symbols from " +
describe(Sec) + ": " +
toString(RegOrErr.takeError()));
}
}
break;
case ELF::SHT_SYMTAB_SHNDX: {
uint32_t SymtabNdx = Sec.sh_link;
if (SymtabNdx >= Sections.size()) {
reportUniqueWarning(
"unable to get the associated symbol table for " + describe(Sec) +
": sh_link (" + Twine(SymtabNdx) +
") is greater than or equal to the total number of sections (" +
Twine(Sections.size()) + ")");
continue;
}
if (Expected<ArrayRef<Elf_Word>> ShndxTableOrErr =
Obj.getSHNDXTable(Sec)) {
if (!ShndxTables.insert({&Sections[SymtabNdx], *ShndxTableOrErr})
.second)
reportUniqueWarning(
"multiple SHT_SYMTAB_SHNDX sections are linked to " +
describe(Sec));
} else {
reportUniqueWarning(ShndxTableOrErr.takeError());
}
break;
}
case ELF::SHT_GNU_versym:
if (!SymbolVersionSection)
SymbolVersionSection = &Sec;
break;
case ELF::SHT_GNU_verdef:
if (!SymbolVersionDefSection)
SymbolVersionDefSection = &Sec;
break;
case ELF::SHT_GNU_verneed:
if (!SymbolVersionNeedSection)
SymbolVersionNeedSection = &Sec;
break;
case ELF::SHT_LLVM_ADDRSIG:
if (!DotAddrsigSec)
DotAddrsigSec = &Sec;
break;
}
}
loadDynamicTable();
}
template <typename ELFT> void ELFDumper<ELFT>::parseDynamicTable() {
auto toMappedAddr = [&](uint64_t Tag, uint64_t VAddr) -> const uint8_t * {
auto MappedAddrOrError = Obj.toMappedAddr(VAddr, [&](const Twine &Msg) {
this->reportUniqueWarning(Msg);
return Error::success();
});
if (!MappedAddrOrError) {
this->reportUniqueWarning("unable to parse DT_" +
Obj.getDynamicTagAsString(Tag) + ": " +
llvm::toString(MappedAddrOrError.takeError()));
return nullptr;
}
return MappedAddrOrError.get();
};
const char *StringTableBegin = nullptr;
uint64_t StringTableSize = 0;
Optional<DynRegionInfo> DynSymFromTable;
for (const Elf_Dyn &Dyn : dynamic_table()) {
switch (Dyn.d_tag) {
case ELF::DT_HASH:
HashTable = reinterpret_cast<const Elf_Hash *>(
toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
break;
case ELF::DT_GNU_HASH:
GnuHashTable = reinterpret_cast<const Elf_GnuHash *>(
toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
break;
case ELF::DT_STRTAB:
StringTableBegin = reinterpret_cast<const char *>(
toMappedAddr(Dyn.getTag(), Dyn.getPtr()));
break;
case ELF::DT_STRSZ:
StringTableSize = Dyn.getVal();
break;
case ELF::DT_SYMTAB: {
// If we can't map the DT_SYMTAB value to an address (e.g. when there are
// no program headers), we ignore its value.
if (const uint8_t *VA = toMappedAddr(Dyn.getTag(), Dyn.getPtr())) {
DynSymFromTable.emplace(ObjF, *this);
DynSymFromTable->Addr = VA;
DynSymFromTable->EntSize = sizeof(Elf_Sym);
DynSymFromTable->EntSizePrintName = "";
}
break;
}
case ELF::DT_SYMENT: {
uint64_t Val = Dyn.getVal();
if (Val != sizeof(Elf_Sym))
this->reportUniqueWarning("DT_SYMENT value of 0x" +
Twine::utohexstr(Val) +
" is not the size of a symbol (0x" +
Twine::utohexstr(sizeof(Elf_Sym)) + ")");
break;
}
case ELF::DT_RELA:
DynRelaRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_RELASZ:
DynRelaRegion.Size = Dyn.getVal();
DynRelaRegion.SizePrintName = "DT_RELASZ value";
break;
case ELF::DT_RELAENT:
DynRelaRegion.EntSize = Dyn.getVal();
DynRelaRegion.EntSizePrintName = "DT_RELAENT value";
break;
case ELF::DT_SONAME:
SONameOffset = Dyn.getVal();
break;
case ELF::DT_REL:
DynRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_RELSZ:
DynRelRegion.Size = Dyn.getVal();
DynRelRegion.SizePrintName = "DT_RELSZ value";
break;
case ELF::DT_RELENT:
DynRelRegion.EntSize = Dyn.getVal();
DynRelRegion.EntSizePrintName = "DT_RELENT value";
break;
case ELF::DT_RELR:
case ELF::DT_ANDROID_RELR:
DynRelrRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_RELRSZ:
case ELF::DT_ANDROID_RELRSZ:
DynRelrRegion.Size = Dyn.getVal();
DynRelrRegion.SizePrintName = Dyn.d_tag == ELF::DT_RELRSZ
? "DT_RELRSZ value"
: "DT_ANDROID_RELRSZ value";
break;
case ELF::DT_RELRENT:
case ELF::DT_ANDROID_RELRENT:
DynRelrRegion.EntSize = Dyn.getVal();
DynRelrRegion.EntSizePrintName = Dyn.d_tag == ELF::DT_RELRENT
? "DT_RELRENT value"
: "DT_ANDROID_RELRENT value";
break;
case ELF::DT_PLTREL:
if (Dyn.getVal() == DT_REL)
DynPLTRelRegion.EntSize = sizeof(Elf_Rel);
else if (Dyn.getVal() == DT_RELA)
DynPLTRelRegion.EntSize = sizeof(Elf_Rela);
else
reportUniqueWarning(Twine("unknown DT_PLTREL value of ") +
Twine((uint64_t)Dyn.getVal()));
DynPLTRelRegion.EntSizePrintName = "PLTREL entry size";
break;
case ELF::DT_JMPREL:
DynPLTRelRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
break;
case ELF::DT_PLTRELSZ:
DynPLTRelRegion.Size = Dyn.getVal();
DynPLTRelRegion.SizePrintName = "DT_PLTRELSZ value";
break;
case ELF::DT_SYMTAB_SHNDX:
DynSymTabShndxRegion.Addr = toMappedAddr(Dyn.getTag(), Dyn.getPtr());
DynSymTabShndxRegion.EntSize = sizeof(Elf_Word);
break;
}
}
if (StringTableBegin) {
const uint64_t FileSize = Obj.getBufSize();
const uint64_t Offset = (const uint8_t *)StringTableBegin - Obj.base();
if (StringTableSize > FileSize - Offset)
reportUniqueWarning(
"the dynamic string table at 0x" + Twine::utohexstr(Offset) +
" goes past the end of the file (0x" + Twine::utohexstr(FileSize) +
") with DT_STRSZ = 0x" + Twine::utohexstr(StringTableSize));
else
DynamicStringTable = StringRef(StringTableBegin, StringTableSize);
}
const bool IsHashTableSupported = getHashTableEntSize() == 4;
if (DynSymRegion) {
// Often we find the information about the dynamic symbol table
// location in the SHT_DYNSYM section header. However, the value in
// DT_SYMTAB has priority, because it is used by dynamic loaders to
// locate .dynsym at runtime. The location we find in the section header
// and the location we find here should match.
if (DynSymFromTable && DynSymFromTable->Addr != DynSymRegion->Addr)
reportUniqueWarning(
createError("SHT_DYNSYM section header and DT_SYMTAB disagree about "
"the location of the dynamic symbol table"));
// According to the ELF gABI: "The number of symbol table entries should
// equal nchain". Check to see if the DT_HASH hash table nchain value
// conflicts with the number of symbols in the dynamic symbol table
// according to the section header.
if (HashTable && IsHashTableSupported) {
if (DynSymRegion->EntSize == 0)
reportUniqueWarning("SHT_DYNSYM section has sh_entsize == 0");
else if (HashTable->nchain != DynSymRegion->Size / DynSymRegion->EntSize)
reportUniqueWarning(
"hash table nchain (" + Twine(HashTable->nchain) +
") differs from symbol count derived from SHT_DYNSYM section "
"header (" +
Twine(DynSymRegion->Size / DynSymRegion->EntSize) + ")");
}
}
// Delay the creation of the actual dynamic symbol table until now, so that
// checks can always be made against the section header-based properties,
// without worrying about tag order.
if (DynSymFromTable) {
if (!DynSymRegion) {
DynSymRegion = DynSymFromTable;
} else {
DynSymRegion->Addr = DynSymFromTable->Addr;
DynSymRegion->EntSize = DynSymFromTable->EntSize;
DynSymRegion->EntSizePrintName = DynSymFromTable->EntSizePrintName;
}
}
// Derive the dynamic symbol table size from the DT_HASH hash table, if
// present.
if (HashTable && IsHashTableSupported && DynSymRegion) {
const uint64_t FileSize = Obj.getBufSize();
const uint64_t DerivedSize =
(uint64_t)HashTable->nchain * DynSymRegion->EntSize;
const uint64_t Offset = (const uint8_t *)DynSymRegion->Addr - Obj.base();
if (DerivedSize > FileSize - Offset)
reportUniqueWarning(
"the size (0x" + Twine::utohexstr(DerivedSize) +
") of the dynamic symbol table at 0x" + Twine::utohexstr(Offset) +
", derived from the hash table, goes past the end of the file (0x" +
Twine::utohexstr(FileSize) + ") and will be ignored");
else
DynSymRegion->Size = HashTable->nchain * DynSymRegion->EntSize;
}
}
template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
// Dump version symbol section.
printVersionSymbolSection(SymbolVersionSection);
// Dump version definition section.
printVersionDefinitionSection(SymbolVersionDefSection);
// Dump version dependency section.
printVersionDependencySection(SymbolVersionNeedSection);
}
#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
{ #enum, prefix##_##enum }
const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF, ORIGIN),
LLVM_READOBJ_DT_FLAG_ENT(DF, SYMBOLIC),
LLVM_READOBJ_DT_FLAG_ENT(DF, TEXTREL),
LLVM_READOBJ_DT_FLAG_ENT(DF, BIND_NOW),
LLVM_READOBJ_DT_FLAG_ENT(DF, STATIC_TLS)
};
const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOW),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, GLOBAL),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, GROUP),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODELETE),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, LOADFLTR),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, INITFIRST),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOOPEN),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, ORIGIN),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, DIRECT),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, TRANS),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, INTERPOSE),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODEFLIB),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODUMP),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, CONFALT),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, ENDFILTEE),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELDNE),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, DISPRELPND),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NODIRECT),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, IGNMULDEF),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOKSYMS),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOHDR),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, EDITED),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NORELOC),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, SYMINTPOSE),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, GLOBAUDIT),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, SINGLETON),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, PIE),
};
const EnumEntry<unsigned> ElfDynamicDTMipsFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(RHF, NONE),
LLVM_READOBJ_DT_FLAG_ENT(RHF, QUICKSTART),
LLVM_READOBJ_DT_FLAG_ENT(RHF, NOTPOT),
LLVM_READOBJ_DT_FLAG_ENT(RHS, NO_LIBRARY_REPLACEMENT),
LLVM_READOBJ_DT_FLAG_ENT(RHF, NO_MOVE),
LLVM_READOBJ_DT_FLAG_ENT(RHF, SGI_ONLY),
LLVM_READOBJ_DT_FLAG_ENT(RHF, GUARANTEE_INIT),
LLVM_READOBJ_DT_FLAG_ENT(RHF, DELTA_C_PLUS_PLUS),
LLVM_READOBJ_DT_FLAG_ENT(RHF, GUARANTEE_START_INIT),
LLVM_READOBJ_DT_FLAG_ENT(RHF, PIXIE),
LLVM_READOBJ_DT_FLAG_ENT(RHF, DEFAULT_DELAY_LOAD),
LLVM_READOBJ_DT_FLAG_ENT(RHF, REQUICKSTART),
LLVM_READOBJ_DT_FLAG_ENT(RHF, REQUICKSTARTED),
LLVM_READOBJ_DT_FLAG_ENT(RHF, CORD),
LLVM_READOBJ_DT_FLAG_ENT(RHF, NO_UNRES_UNDEF),
LLVM_READOBJ_DT_FLAG_ENT(RHF, RLD_ORDER_SAFE)
};
#undef LLVM_READOBJ_DT_FLAG_ENT
template <typename T, typename TFlag>
void printFlags(T Value, ArrayRef<EnumEntry<TFlag>> Flags, raw_ostream &OS) {
SmallVector<EnumEntry<TFlag>, 10> SetFlags;
for (const EnumEntry<TFlag> &Flag : Flags)
if (Flag.Value != 0 && (Value & Flag.Value) == Flag.Value)
SetFlags.push_back(Flag);
for (const EnumEntry<TFlag> &Flag : SetFlags)
OS << Flag.Name << " ";
}
template <class ELFT>
const typename ELFT::Shdr *
ELFDumper<ELFT>::findSectionByName(StringRef Name) const {
for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) {
if (Expected<StringRef> NameOrErr = Obj.getSectionName(Shdr)) {
if (*NameOrErr == Name)
return &Shdr;
} else {
reportUniqueWarning("unable to read the name of " + describe(Shdr) +
": " + toString(NameOrErr.takeError()));
}
}
return nullptr;
}
template <class ELFT>
std::string ELFDumper<ELFT>::getDynamicEntry(uint64_t Type,
uint64_t Value) const {
auto FormatHexValue = [](uint64_t V) {
std::string Str;
raw_string_ostream OS(Str);
const char *ConvChar =
(opts::Output == opts::GNU) ? "0x%" PRIx64 : "0x%" PRIX64;
OS << format(ConvChar, V);
return OS.str();
};
auto FormatFlags = [](uint64_t V,
llvm::ArrayRef<llvm::EnumEntry<unsigned int>> Array) {
std::string Str;
raw_string_ostream OS(Str);
printFlags(V, Array, OS);
return OS.str();
};
// Handle custom printing of architecture specific tags
switch (Obj.getHeader().e_machine) {
case EM_AARCH64:
switch (Type) {
case DT_AARCH64_BTI_PLT:
case DT_AARCH64_PAC_PLT:
case DT_AARCH64_VARIANT_PCS:
return std::to_string(Value);
default:
break;
}
break;
case EM_HEXAGON:
switch (Type) {
case DT_HEXAGON_VER:
return std::to_string(Value);
case DT_HEXAGON_SYMSZ:
case DT_HEXAGON_PLT:
return FormatHexValue(Value);
default:
break;
}
break;
case EM_MIPS:
switch (Type) {
case DT_MIPS_RLD_VERSION:
case DT_MIPS_LOCAL_GOTNO:
case DT_MIPS_SYMTABNO:
case DT_MIPS_UNREFEXTNO:
return std::to_string(Value);
case DT_MIPS_TIME_STAMP:
case DT_MIPS_ICHECKSUM:
case DT_MIPS_IVERSION:
case DT_MIPS_BASE_ADDRESS:
case DT_MIPS_MSYM:
case DT_MIPS_CONFLICT:
case DT_MIPS_LIBLIST:
case DT_MIPS_CONFLICTNO:
case DT_MIPS_LIBLISTNO:
case DT_MIPS_GOTSYM:
case DT_MIPS_HIPAGENO:
case DT_MIPS_RLD_MAP:
case DT_MIPS_DELTA_CLASS:
case DT_MIPS_DELTA_CLASS_NO:
case DT_MIPS_DELTA_INSTANCE:
case DT_MIPS_DELTA_RELOC:
case DT_MIPS_DELTA_RELOC_NO:
case DT_MIPS_DELTA_SYM:
case DT_MIPS_DELTA_SYM_NO:
case DT_MIPS_DELTA_CLASSSYM:
case DT_MIPS_DELTA_CLASSSYM_NO:
case DT_MIPS_CXX_FLAGS:
case DT_MIPS_PIXIE_INIT:
case DT_MIPS_SYMBOL_LIB:
case DT_MIPS_LOCALPAGE_GOTIDX:
case DT_MIPS_LOCAL_GOTIDX:
case DT_MIPS_HIDDEN_GOTIDX:
case DT_MIPS_PROTECTED_GOTIDX:
case DT_MIPS_OPTIONS:
case DT_MIPS_INTERFACE:
case DT_MIPS_DYNSTR_ALIGN:
case DT_MIPS_INTERFACE_SIZE:
case DT_MIPS_RLD_TEXT_RESOLVE_ADDR:
case DT_MIPS_PERF_SUFFIX:
case DT_MIPS_COMPACT_SIZE:
case DT_MIPS_GP_VALUE:
case DT_MIPS_AUX_DYNAMIC:
case DT_MIPS_PLTGOT:
case DT_MIPS_RWPLT:
case DT_MIPS_RLD_MAP_REL:
+ case DT_MIPS_XHASH:
return FormatHexValue(Value);
case DT_MIPS_FLAGS:
return FormatFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags));
default:
break;
}
break;
default:
break;
}
switch (Type) {
case DT_PLTREL:
if (Value == DT_REL)
return "REL";
if (Value == DT_RELA)
return "RELA";
LLVM_FALLTHROUGH;
case DT_PLTGOT:
case DT_HASH:
case DT_STRTAB:
case DT_SYMTAB:
case DT_RELA:
case DT_INIT:
case DT_FINI:
case DT_REL:
case DT_JMPREL:
case DT_INIT_ARRAY:
case DT_FINI_ARRAY:
case DT_PREINIT_ARRAY:
case DT_DEBUG:
case DT_VERDEF:
case DT_VERNEED:
case DT_VERSYM:
case DT_GNU_HASH:
case DT_NULL:
return FormatHexValue(Value);
case DT_RELACOUNT:
case DT_RELCOUNT:
case DT_VERDEFNUM:
case DT_VERNEEDNUM:
return std::to_string(Value);
case DT_PLTRELSZ:
case DT_RELASZ:
case DT_RELAENT:
case DT_STRSZ:
case DT_SYMENT:
case DT_RELSZ:
case DT_RELENT:
case DT_INIT_ARRAYSZ:
case DT_FINI_ARRAYSZ:
case DT_PREINIT_ARRAYSZ:
case DT_RELRSZ:
case DT_RELRENT:
case DT_ANDROID_RELSZ:
case DT_ANDROID_RELASZ:
return std::to_string(Value) + " (bytes)";
case DT_NEEDED:
case DT_SONAME:
case DT_AUXILIARY:
case DT_USED:
case DT_FILTER:
case DT_RPATH:
case DT_RUNPATH: {
const std::map<uint64_t, const char *> TagNames = {
{DT_NEEDED, "Shared library"}, {DT_SONAME, "Library soname"},
{DT_AUXILIARY, "Auxiliary library"}, {DT_USED, "Not needed object"},
{DT_FILTER, "Filter library"}, {DT_RPATH, "Library rpath"},
{DT_RUNPATH, "Library runpath"},
};
return (Twine(TagNames.at(Type)) + ": [" + getDynamicString(Value) + "]")
.str();
}
case DT_FLAGS:
return FormatFlags(Value, makeArrayRef(ElfDynamicDTFlags));
case DT_FLAGS_1:
return FormatFlags(Value, makeArrayRef(ElfDynamicDTFlags1));
default:
return FormatHexValue(Value);
}
}
template <class ELFT>
StringRef ELFDumper<ELFT>::getDynamicString(uint64_t Value) const {
if (DynamicStringTable.empty() && !DynamicStringTable.data()) {
reportUniqueWarning("string table was not found");
return "<?>";
}
auto WarnAndReturn = [this](const Twine &Msg, uint64_t Offset) {
reportUniqueWarning("string table at offset 0x" + Twine::utohexstr(Offset) +
Msg);
return "<?>";
};
const uint64_t FileSize = Obj.getBufSize();
const uint64_t Offset =
(const uint8_t *)DynamicStringTable.data() - Obj.base();
if (DynamicStringTable.size() > FileSize - Offset)
return WarnAndReturn(" with size 0x" +
Twine::utohexstr(DynamicStringTable.size()) +
" goes past the end of the file (0x" +
Twine::utohexstr(FileSize) + ")",
Offset);
if (Value >= DynamicStringTable.size())
return WarnAndReturn(
": unable to read the string at 0x" + Twine::utohexstr(Offset + Value) +
": it goes past the end of the table (0x" +
Twine::utohexstr(Offset + DynamicStringTable.size()) + ")",
Offset);
if (DynamicStringTable.back() != '\0')
return WarnAndReturn(": unable to read the string at 0x" +
Twine::utohexstr(Offset + Value) +
": the string table is not null-terminated",
Offset);
return DynamicStringTable.data() + Value;
}
template <class ELFT> void ELFDumper<ELFT>::printUnwindInfo() {
DwarfCFIEH::PrinterContext<ELFT> Ctx(W, ObjF);
Ctx.printUnwindInformation();
}
// The namespace is needed to fix the compilation with GCC older than 7.0+.
namespace {
template <> void ELFDumper<ELF32LE>::printUnwindInfo() {
if (Obj.getHeader().e_machine == EM_ARM) {
ARM::EHABI::PrinterContext<ELF32LE> Ctx(W, Obj, ObjF.getFileName(),
DotSymtabSec);
Ctx.PrintUnwindInformation();
}
DwarfCFIEH::PrinterContext<ELF32LE> Ctx(W, ObjF);
Ctx.printUnwindInformation();
}
} // namespace
template <class ELFT> void ELFDumper<ELFT>::printNeededLibraries() {
ListScope D(W, "NeededLibraries");
std::vector<StringRef> Libs;
for (const auto &Entry : dynamic_table())
if (Entry.d_tag == ELF::DT_NEEDED)
Libs.push_back(getDynamicString(Entry.d_un.d_val));
llvm::sort(Libs);
for (StringRef L : Libs)
W.startLine() << L << "\n";
}
template <class ELFT>
static Error checkHashTable(const ELFDumper<ELFT> &Dumper,
const typename ELFT::Hash *H,
bool *IsHeaderValid = nullptr) {
const ELFFile<ELFT> &Obj = Dumper.getElfObject().getELFFile();
const uint64_t SecOffset = (const uint8_t *)H - Obj.base();
if (Dumper.getHashTableEntSize() == 8) {
auto It = llvm::find_if(ElfMachineType, [&](const EnumEntry<unsigned> &E) {
return E.Value == Obj.getHeader().e_machine;
});
if (IsHeaderValid)
*IsHeaderValid = false;
return createError("the hash table at 0x" + Twine::utohexstr(SecOffset) +
" is not supported: it contains non-standard 8 "
"byte entries on " +
It->AltName + " platform");
}
auto MakeError = [&](const Twine &Msg = "") {
return createError("the hash table at offset 0x" +
Twine::utohexstr(SecOffset) +
" goes past the end of the file (0x" +
Twine::utohexstr(Obj.getBufSize()) + ")" + Msg);
};
// Each SHT_HASH section starts from two 32-bit fields: nbucket and nchain.
const unsigned HeaderSize = 2 * sizeof(typename ELFT::Word);
if (IsHeaderValid)
*IsHeaderValid = Obj.getBufSize() - SecOffset >= HeaderSize;
if (Obj.getBufSize() - SecOffset < HeaderSize)
return MakeError();
if (Obj.getBufSize() - SecOffset - HeaderSize <
((uint64_t)H->nbucket + H->nchain) * sizeof(typename ELFT::Word))
return MakeError(", nbucket = " + Twine(H->nbucket) +
", nchain = " + Twine(H->nchain));
return Error::success();
}
template <class ELFT>
static Error checkGNUHashTable(const ELFFile<ELFT> &Obj,
const typename ELFT::GnuHash *GnuHashTable,
bool *IsHeaderValid = nullptr) {
const uint8_t *TableData = reinterpret_cast<const uint8_t *>(GnuHashTable);
assert(TableData >= Obj.base() && TableData < Obj.base() + Obj.getBufSize() &&
"GnuHashTable must always point to a location inside the file");
uint64_t TableOffset = TableData - Obj.base();
if (IsHeaderValid)
*IsHeaderValid = TableOffset + /*Header size:*/ 16 < Obj.getBufSize();
if (TableOffset + 16 + (uint64_t)GnuHashTable->nbuckets * 4 +
(uint64_t)GnuHashTable->maskwords * sizeof(typename ELFT::Off) >=
Obj.getBufSize())
return createError("unable to dump the SHT_GNU_HASH "
"section at 0x" +
Twine::utohexstr(TableOffset) +
": it goes past the end of the file");
return Error::success();
}
template <typename ELFT> void ELFDumper<ELFT>::printHashTable() {
DictScope D(W, "HashTable");
if (!HashTable)
return;
bool IsHeaderValid;
Error Err = checkHashTable(*this, HashTable, &IsHeaderValid);
if (IsHeaderValid) {
W.printNumber("Num Buckets", HashTable->nbucket);
W.printNumber("Num Chains", HashTable->nchain);
}
if (Err) {
reportUniqueWarning(std::move(Err));
return;
}
W.printList("Buckets", HashTable->buckets());
W.printList("Chains", HashTable->chains());
}
template <class ELFT>
static Expected<ArrayRef<typename ELFT::Word>>
getGnuHashTableChains(Optional<DynRegionInfo> DynSymRegion,
const typename ELFT::GnuHash *GnuHashTable) {
if (!DynSymRegion)
return createError("no dynamic symbol table found");
ArrayRef<typename ELFT::Sym> DynSymTable =
DynSymRegion->template getAsArrayRef<typename ELFT::Sym>();
size_t NumSyms = DynSymTable.size();
if (!NumSyms)
return createError("the dynamic symbol table is empty");
if (GnuHashTable->symndx < NumSyms)
return GnuHashTable->values(NumSyms);
// A normal empty GNU hash table section produced by linker might have
// symndx set to the number of dynamic symbols + 1 (for the zero symbol)
// and have dummy null values in the Bloom filter and in the buckets
// vector (or no values at all). It happens because the value of symndx is not
// important for dynamic loaders when the GNU hash table is empty. They just
// skip the whole object during symbol lookup. In such cases, the symndx value
// is irrelevant and we should not report a warning.
ArrayRef<typename ELFT::Word> Buckets = GnuHashTable->buckets();
if (!llvm::all_of(Buckets, [](typename ELFT::Word V) { return V == 0; }))
return createError(
"the first hashed symbol index (" + Twine(GnuHashTable->symndx) +
") is greater than or equal to the number of dynamic symbols (" +
Twine(NumSyms) + ")");
// There is no way to represent an array of (dynamic symbols count - symndx)
// length.
return ArrayRef<typename ELFT::Word>();
}
template <typename ELFT>
void ELFDumper<ELFT>::printGnuHashTable() {
DictScope D(W, "GnuHashTable");
if (!GnuHashTable)
return;
bool IsHeaderValid;
Error Err = checkGNUHashTable<ELFT>(Obj, GnuHashTable, &IsHeaderValid);
if (IsHeaderValid) {
W.printNumber("Num Buckets", GnuHashTable->nbuckets);
W.printNumber("First Hashed Symbol Index", GnuHashTable->symndx);
W.printNumber("Num Mask Words", GnuHashTable->maskwords);
W.printNumber("Shift Count", GnuHashTable->shift2);
}
if (Err) {
reportUniqueWarning(std::move(Err));
return;
}
ArrayRef<typename ELFT::Off> BloomFilter = GnuHashTable->filter();
W.printHexList("Bloom Filter", BloomFilter);
ArrayRef<Elf_Word> Buckets = GnuHashTable->buckets();
W.printList("Buckets", Buckets);
Expected<ArrayRef<Elf_Word>> Chains =
getGnuHashTableChains<ELFT>(DynSymRegion, GnuHashTable);
if (!Chains) {
reportUniqueWarning("unable to dump 'Values' for the SHT_GNU_HASH "
"section: " +
toString(Chains.takeError()));
return;
}
W.printHexList("Values", *Chains);
}
template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
StringRef SOName = "<Not found>";
if (SONameOffset)
SOName = getDynamicString(*SONameOffset);
W.printString("LoadName", SOName);
}
template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
switch (Obj.getHeader().e_machine) {
case EM_ARM:
if (Obj.isLE())
printAttributes(ELF::SHT_ARM_ATTRIBUTES,
std::make_unique<ARMAttributeParser>(&W),
support::little);
else
reportUniqueWarning("attribute printing not implemented for big-endian "
"ARM objects");
break;
case EM_RISCV:
if (Obj.isLE())
printAttributes(ELF::SHT_RISCV_ATTRIBUTES,
std::make_unique<RISCVAttributeParser>(&W),
support::little);
else
reportUniqueWarning("attribute printing not implemented for big-endian "
"RISC-V objects");
break;
case EM_MSP430:
printAttributes(ELF::SHT_MSP430_ATTRIBUTES,
std::make_unique<MSP430AttributeParser>(&W),
support::little);
break;
case EM_MIPS: {
printMipsABIFlags();
printMipsOptions();
printMipsReginfo();
MipsGOTParser<ELFT> Parser(*this);
if (Error E = Parser.findGOT(dynamic_table(), dynamic_symbols()))
reportUniqueWarning(std::move(E));
else if (!Parser.isGotEmpty())
printMipsGOT(Parser);
if (Error E = Parser.findPLT(dynamic_table()))
reportUniqueWarning(std::move(E));
else if (!Parser.isPltEmpty())
printMipsPLT(Parser);
break;
}
default:
break;
}
}
template <class ELFT>
void ELFDumper<ELFT>::printAttributes(
unsigned AttrShType, std::unique_ptr<ELFAttributeParser> AttrParser,
support::endianness Endianness) {
assert((AttrShType != ELF::SHT_NULL) && AttrParser &&
"Incomplete ELF attribute implementation");
DictScope BA(W, "BuildAttributes");
for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
if (Sec.sh_type != AttrShType)
continue;
ArrayRef<uint8_t> Contents;
if (Expected<ArrayRef<uint8_t>> ContentOrErr =
Obj.getSectionContents(Sec)) {
Contents = *ContentOrErr;
if (Contents.empty()) {
reportUniqueWarning("the " + describe(Sec) + " is empty");
continue;
}
} else {
reportUniqueWarning("unable to read the content of the " + describe(Sec) +
": " + toString(ContentOrErr.takeError()));
continue;
}
W.printHex("FormatVersion", Contents[0]);
if (Error E = AttrParser->parse(Contents, Endianness))
reportUniqueWarning("unable to dump attributes from the " +
describe(Sec) + ": " + toString(std::move(E)));
}
}
namespace {
template <class ELFT> class MipsGOTParser {
public:
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
using Entry = typename ELFT::Addr;
using Entries = ArrayRef<Entry>;
const bool IsStatic;
const ELFFile<ELFT> &Obj;
const ELFDumper<ELFT> &Dumper;
MipsGOTParser(const ELFDumper<ELFT> &D);
Error findGOT(Elf_Dyn_Range DynTable, Elf_Sym_Range DynSyms);
Error findPLT(Elf_Dyn_Range DynTable);
bool isGotEmpty() const { return GotEntries.empty(); }
bool isPltEmpty() const { return PltEntries.empty(); }
uint64_t getGp() const;
const Entry *getGotLazyResolver() const;
const Entry *getGotModulePointer() const;
const Entry *getPltLazyResolver() const;
const Entry *getPltModulePointer() const;
Entries getLocalEntries() const;
Entries getGlobalEntries() const;
Entries getOtherEntries() const;
Entries getPltEntries() const;
uint64_t getGotAddress(const Entry * E) const;
int64_t getGotOffset(const Entry * E) const;
const Elf_Sym *getGotSym(const Entry *E) const;
uint64_t getPltAddress(const Entry * E) const;
const Elf_Sym *getPltSym(const Entry *E) const;
StringRef getPltStrTable() const { return PltStrTable; }
const Elf_Shdr *getPltSymTable() const { return PltSymTable; }
private:
const Elf_Shdr *GotSec;
size_t LocalNum;
size_t GlobalNum;
const Elf_Shdr *PltSec;
const Elf_Shdr *PltRelSec;
const Elf_Shdr *PltSymTable;
StringRef FileName;
Elf_Sym_Range GotDynSyms;
StringRef PltStrTable;
Entries GotEntries;
Entries PltEntries;
};
} // end anonymous namespace
template <class ELFT>
MipsGOTParser<ELFT>::MipsGOTParser(const ELFDumper<ELFT> &D)
: IsStatic(D.dynamic_table().empty()), Obj(D.getElfObject().getELFFile()),
Dumper(D), GotSec(nullptr), LocalNum(0), GlobalNum(0), PltSec(nullptr),
PltRelSec(nullptr), PltSymTable(nullptr),
FileName(D.getElfObject().getFileName()) {}
template <class ELFT>
Error MipsGOTParser<ELFT>::findGOT(Elf_Dyn_Range DynTable,
Elf_Sym_Range DynSyms) {
// See "Global Offset Table" in Chapter 5 in the following document
// for detailed GOT description.
// ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf
// Find static GOT secton.
if (IsStatic) {
GotSec = Dumper.findSectionByName(".got");
if (!GotSec)
return Error::success();
ArrayRef<uint8_t> Content =
unwrapOrError(FileName, Obj.getSectionContents(*GotSec));
GotEntries = Entries(reinterpret_cast<const Entry *>(Content.data()),
Content.size() / sizeof(Entry));
LocalNum = GotEntries.size();
return Error::success();
}
// Lookup dynamic table tags which define the GOT layout.
Optional<uint64_t> DtPltGot;
Optional<uint64_t> DtLocalGotNum;
Optional<uint64_t> DtGotSym;
for (const auto &Entry : DynTable) {
switch (Entry.getTag()) {
case ELF::DT_PLTGOT:
DtPltGot = Entry.getVal();
break;
case ELF::DT_MIPS_LOCAL_GOTNO:
DtLocalGotNum = Entry.getVal();
break;
case ELF::DT_MIPS_GOTSYM:
DtGotSym = Entry.getVal();
break;
}
}
if (!DtPltGot && !DtLocalGotNum && !DtGotSym)
return Error::success();
if (!DtPltGot)
return createError("cannot find PLTGOT dynamic tag");
if (!DtLocalGotNum)
return createError("cannot find MIPS_LOCAL_GOTNO dynamic tag");
if (!DtGotSym)
return createError("cannot find MIPS_GOTSYM dynamic tag");
size_t DynSymTotal = DynSyms.size();
if (*DtGotSym > DynSymTotal)
return createError("DT_MIPS_GOTSYM value (" + Twine(*DtGotSym) +
") exceeds the number of dynamic symbols (" +
Twine(DynSymTotal) + ")");
GotSec = findNotEmptySectionByAddress(Obj, FileName, *DtPltGot);
if (!GotSec)
return createError("there is no non-empty GOT section at 0x" +
Twine::utohexstr(*DtPltGot));
LocalNum = *DtLocalGotNum;
GlobalNum = DynSymTotal - *DtGotSym;
ArrayRef<uint8_t> Content =
unwrapOrError(FileName, Obj.getSectionContents(*GotSec));
GotEntries = Entries(reinterpret_cast<const Entry *>(Content.data()),
Content.size() / sizeof(Entry));
GotDynSyms = DynSyms.drop_front(*DtGotSym);
return Error::success();
}
template <class ELFT>
Error MipsGOTParser<ELFT>::findPLT(Elf_Dyn_Range DynTable) {
// Lookup dynamic table tags which define the PLT layout.
Optional<uint64_t> DtMipsPltGot;
Optional<uint64_t> DtJmpRel;
for (const auto &Entry : DynTable) {
switch (Entry.getTag()) {
case ELF::DT_MIPS_PLTGOT:
DtMipsPltGot = Entry.getVal();
break;
case ELF::DT_JMPREL:
DtJmpRel = Entry.getVal();
break;
}
}
if (!DtMipsPltGot && !DtJmpRel)
return Error::success();
// Find PLT section.
if (!DtMipsPltGot)
return createError("cannot find MIPS_PLTGOT dynamic tag");
if (!DtJmpRel)
return createError("cannot find JMPREL dynamic tag");
PltSec = findNotEmptySectionByAddress(Obj, FileName, *DtMipsPltGot);
if (!PltSec)
return createError("there is no non-empty PLTGOT section at 0x" +
Twine::utohexstr(*DtMipsPltGot));
PltRelSec = findNotEmptySectionByAddress(Obj, FileName, *DtJmpRel);
if (!PltRelSec)
return createError("there is no non-empty RELPLT section at 0x" +
Twine::utohexstr(*DtJmpRel));
if (Expected<ArrayRef<uint8_t>> PltContentOrErr =
Obj.getSectionContents(*PltSec))
PltEntries =
Entries(reinterpret_cast<const Entry *>(PltContentOrErr->data()),
PltContentOrErr->size() / sizeof(Entry));
else
return createError("unable to read PLTGOT section content: " +
toString(PltContentOrErr.takeError()));
if (Expected<const Elf_Shdr *> PltSymTableOrErr =
Obj.getSection(PltRelSec->sh_link))
PltSymTable = *PltSymTableOrErr;
else
return createError("unable to get a symbol table linked to the " +
describe(Obj, *PltRelSec) + ": " +
toString(PltSymTableOrErr.takeError()));
if (Expected<StringRef> StrTabOrErr =
Obj.getStringTableForSymtab(*PltSymTable))
PltStrTable = *StrTabOrErr;
else
return createError("unable to get a string table for the " +
describe(Obj, *PltSymTable) + ": " +
toString(StrTabOrErr.takeError()));
return Error::success();
}
template <class ELFT> uint64_t MipsGOTParser<ELFT>::getGp() const {
return GotSec->sh_addr + 0x7ff0;
}
template <class ELFT>
const typename MipsGOTParser<ELFT>::Entry *
MipsGOTParser<ELFT>::getGotLazyResolver() const {
return LocalNum > 0 ? &GotEntries[0] : nullptr;
}
template <class ELFT>
const typename MipsGOTParser<ELFT>::Entry *
MipsGOTParser<ELFT>::getGotModulePointer() const {
if (LocalNum < 2)
return nullptr;
const Entry &E = GotEntries[1];
if ((E >> (sizeof(Entry) * 8 - 1)) == 0)
return nullptr;
return &E;
}
template <class ELFT>
typename MipsGOTParser<ELFT>::Entries
MipsGOTParser<ELFT>::getLocalEntries() const {
size_t Skip = getGotModulePointer() ? 2 : 1;
if (LocalNum - Skip <= 0)
return Entries();
return GotEntries.slice(Skip, LocalNum - Skip);
}
template <class ELFT>
typename MipsGOTParser<ELFT>::Entries
MipsGOTParser<ELFT>::getGlobalEntries() const {
if (GlobalNum == 0)
return Entries();
return GotEntries.slice(LocalNum, GlobalNum);
}
template <class ELFT>
typename MipsGOTParser<ELFT>::Entries
MipsGOTParser<ELFT>::getOtherEntries() const {
size_t OtherNum = GotEntries.size() - LocalNum - GlobalNum;
if (OtherNum == 0)
return Entries();
return GotEntries.slice(LocalNum + GlobalNum, OtherNum);
}
template <class ELFT>
uint64_t MipsGOTParser<ELFT>::getGotAddress(const Entry *E) const {
int64_t Offset = std::distance(GotEntries.data(), E) * sizeof(Entry);
return GotSec->sh_addr + Offset;
}
template <class ELFT>
int64_t MipsGOTParser<ELFT>::getGotOffset(const Entry *E) const {
int64_t Offset = std::distance(GotEntries.data(), E) * sizeof(Entry);
return Offset - 0x7ff0;
}
template <class ELFT>
const typename MipsGOTParser<ELFT>::Elf_Sym *
MipsGOTParser<ELFT>::getGotSym(const Entry *E) const {
int64_t Offset = std::distance(GotEntries.data(), E);
return &GotDynSyms[Offset - LocalNum];
}
template <class ELFT>
const typename MipsGOTParser<ELFT>::Entry *
MipsGOTParser<ELFT>::getPltLazyResolver() const {
return PltEntries.empty() ? nullptr : &PltEntries[0];
}
template <class ELFT>
const typename MipsGOTParser<ELFT>::Entry *
MipsGOTParser<ELFT>::getPltModulePointer() const {
return PltEntries.size() < 2 ? nullptr : &PltEntries[1];
}
template <class ELFT>
typename MipsGOTParser<ELFT>::Entries
MipsGOTParser<ELFT>::getPltEntries() const {
if (PltEntries.size() <= 2)
return Entries();
return PltEntries.slice(2, PltEntries.size() - 2);
}
template <class ELFT>
uint64_t MipsGOTParser<ELFT>::getPltAddress(const Entry *E) const {
int64_t Offset = std::distance(PltEntries.data(), E) * sizeof(Entry);
return PltSec->sh_addr + Offset;
}
template <class ELFT>
const typename MipsGOTParser<ELFT>::Elf_Sym *
MipsGOTParser<ELFT>::getPltSym(const Entry *E) const {
int64_t Offset = std::distance(getPltEntries().data(), E);
if (PltRelSec->sh_type == ELF::SHT_REL) {
Elf_Rel_Range Rels = unwrapOrError(FileName, Obj.rels(*PltRelSec));
return unwrapOrError(FileName,
Obj.getRelocationSymbol(Rels[Offset], PltSymTable));
} else {
Elf_Rela_Range Rels = unwrapOrError(FileName, Obj.relas(*PltRelSec));
return unwrapOrError(FileName,
Obj.getRelocationSymbol(Rels[Offset], PltSymTable));
}
}
const EnumEntry<unsigned> ElfMipsISAExtType[] = {
{"None", Mips::AFL_EXT_NONE},
{"Broadcom SB-1", Mips::AFL_EXT_SB1},
{"Cavium Networks Octeon", Mips::AFL_EXT_OCTEON},
{"Cavium Networks Octeon2", Mips::AFL_EXT_OCTEON2},
{"Cavium Networks OcteonP", Mips::AFL_EXT_OCTEONP},
{"Cavium Networks Octeon3", Mips::AFL_EXT_OCTEON3},
{"LSI R4010", Mips::AFL_EXT_4010},
{"Loongson 2E", Mips::AFL_EXT_LOONGSON_2E},
{"Loongson 2F", Mips::AFL_EXT_LOONGSON_2F},
{"Loongson 3A", Mips::AFL_EXT_LOONGSON_3A},
{"MIPS R4650", Mips::AFL_EXT_4650},
{"MIPS R5900", Mips::AFL_EXT_5900},
{"MIPS R10000", Mips::AFL_EXT_10000},
{"NEC VR4100", Mips::AFL_EXT_4100},
{"NEC VR4111/VR4181", Mips::AFL_EXT_4111},
{"NEC VR4120", Mips::AFL_EXT_4120},
{"NEC VR5400", Mips::AFL_EXT_5400},
{"NEC VR5500", Mips::AFL_EXT_5500},
{"RMI Xlr", Mips::AFL_EXT_XLR},
{"Toshiba R3900", Mips::AFL_EXT_3900}
};
const EnumEntry<unsigned> ElfMipsASEFlags[] = {
{"DSP", Mips::AFL_ASE_DSP},
{"DSPR2", Mips::AFL_ASE_DSPR2},
{"Enhanced VA Scheme", Mips::AFL_ASE_EVA},
{"MCU", Mips::AFL_ASE_MCU},
{"MDMX", Mips::AFL_ASE_MDMX},
{"MIPS-3D", Mips::AFL_ASE_MIPS3D},
{"MT", Mips::AFL_ASE_MT},
{"SmartMIPS", Mips::AFL_ASE_SMARTMIPS},
{"VZ", Mips::AFL_ASE_VIRT},
{"MSA", Mips::AFL_ASE_MSA},
{"MIPS16", Mips::AFL_ASE_MIPS16},
{"microMIPS", Mips::AFL_ASE_MICROMIPS},
{"XPA", Mips::AFL_ASE_XPA},
{"CRC", Mips::AFL_ASE_CRC},
{"GINV", Mips::AFL_ASE_GINV},
};
const EnumEntry<unsigned> ElfMipsFpABIType[] = {
{"Hard or soft float", Mips::Val_GNU_MIPS_ABI_FP_ANY},
{"Hard float (double precision)", Mips::Val_GNU_MIPS_ABI_FP_DOUBLE},
{"Hard float (single precision)", Mips::Val_GNU_MIPS_ABI_FP_SINGLE},
{"Soft float", Mips::Val_GNU_MIPS_ABI_FP_SOFT},
{"Hard float (MIPS32r2 64-bit FPU 12 callee-saved)",
Mips::Val_GNU_MIPS_ABI_FP_OLD_64},
{"Hard float (32-bit CPU, Any FPU)", Mips::Val_GNU_MIPS_ABI_FP_XX},
{"Hard float (32-bit CPU, 64-bit FPU)", Mips::Val_GNU_MIPS_ABI_FP_64},
{"Hard float compat (32-bit CPU, 64-bit FPU)",
Mips::Val_GNU_MIPS_ABI_FP_64A}
};
static const EnumEntry<unsigned> ElfMipsFlags1[] {
{"ODDSPREG", Mips::AFL_FLAGS1_ODDSPREG},
};
static int getMipsRegisterSize(uint8_t Flag) {
switch (Flag) {
case Mips::AFL_REG_NONE:
return 0;
case Mips::AFL_REG_32:
return 32;
case Mips::AFL_REG_64:
return 64;
case Mips::AFL_REG_128:
return 128;
default:
return -1;
}
}
template <class ELFT>
static void printMipsReginfoData(ScopedPrinter &W,
const Elf_Mips_RegInfo<ELFT> &Reginfo) {
W.printHex("GP", Reginfo.ri_gp_value);
W.printHex("General Mask", Reginfo.ri_gprmask);
W.printHex("Co-Proc Mask0", Reginfo.ri_cprmask[0]);
W.printHex("Co-Proc Mask1", Reginfo.ri_cprmask[1]);
W.printHex("Co-Proc Mask2", Reginfo.ri_cprmask[2]);
W.printHex("Co-Proc Mask3", Reginfo.ri_cprmask[3]);
}
template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
const Elf_Shdr *RegInfoSec = findSectionByName(".reginfo");
if (!RegInfoSec) {
W.startLine() << "There is no .reginfo section in the file.\n";
return;
}
Expected<ArrayRef<uint8_t>> ContentsOrErr =
Obj.getSectionContents(*RegInfoSec);
if (!ContentsOrErr) {
this->reportUniqueWarning(
"unable to read the content of the .reginfo section (" +
describe(*RegInfoSec) + "): " + toString(ContentsOrErr.takeError()));
return;
}
if (ContentsOrErr->size() < sizeof(Elf_Mips_RegInfo<ELFT>)) {
this->reportUniqueWarning("the .reginfo section has an invalid size (0x" +
Twine::utohexstr(ContentsOrErr->size()) + ")");
return;
}
DictScope GS(W, "MIPS RegInfo");
printMipsReginfoData(W, *reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(
ContentsOrErr->data()));
}
template <class ELFT>
static Expected<const Elf_Mips_Options<ELFT> *>
readMipsOptions(const uint8_t *SecBegin, ArrayRef<uint8_t> &SecData,
bool &IsSupported) {
if (SecData.size() < sizeof(Elf_Mips_Options<ELFT>))
return createError("the .MIPS.options section has an invalid size (0x" +
Twine::utohexstr(SecData.size()) + ")");
const Elf_Mips_Options<ELFT> *O =
reinterpret_cast<const Elf_Mips_Options<ELFT> *>(SecData.data());
const uint8_t Size = O->size;
if (Size > SecData.size()) {
const uint64_t Offset = SecData.data() - SecBegin;
const uint64_t SecSize = Offset + SecData.size();
return createError("a descriptor of size 0x" + Twine::utohexstr(Size) +
" at offset 0x" + Twine::utohexstr(Offset) +
" goes past the end of the .MIPS.options "
"section of size 0x" +
Twine::utohexstr(SecSize));
}
IsSupported = O->kind == ODK_REGINFO;
const size_t ExpectedSize =
sizeof(Elf_Mips_Options<ELFT>) + sizeof(Elf_Mips_RegInfo<ELFT>);
if (IsSupported)
if (Size < ExpectedSize)
return createError(
"a .MIPS.options entry of kind " +
Twine(getElfMipsOptionsOdkType(O->kind)) +
" has an invalid size (0x" + Twine::utohexstr(Size) +
"), the expected size is 0x" + Twine::utohexstr(ExpectedSize));
SecData = SecData.drop_front(Size);
return O;
}
template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
const Elf_Shdr *MipsOpts = findSectionByName(".MIPS.options");
if (!MipsOpts) {
W.startLine() << "There is no .MIPS.options section in the file.\n";
return;
}
DictScope GS(W, "MIPS Options");
ArrayRef<uint8_t> Data =
unwrapOrError(ObjF.getFileName(), Obj.getSectionContents(*MipsOpts));
const uint8_t *const SecBegin = Data.begin();
while (!Data.empty()) {
bool IsSupported;
Expected<const Elf_Mips_Options<ELFT> *> OptsOrErr =
readMipsOptions<ELFT>(SecBegin, Data, IsSupported);
if (!OptsOrErr) {
reportUniqueWarning(OptsOrErr.takeError());
break;
}
unsigned Kind = (*OptsOrErr)->kind;
const char *Type = getElfMipsOptionsOdkType(Kind);
if (!IsSupported) {
W.startLine() << "Unsupported MIPS options tag: " << Type << " (" << Kind
<< ")\n";
continue;
}
DictScope GS(W, Type);
if (Kind == ODK_REGINFO)
printMipsReginfoData(W, (*OptsOrErr)->getRegInfo());
else
llvm_unreachable("unexpected .MIPS.options section descriptor kind");
}
}
template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
const Elf_Shdr *StackMapSection = findSectionByName(".llvm_stackmaps");
if (!StackMapSection)
return;
auto Warn = [&](Error &&E) {
this->reportUniqueWarning("unable to read the stack map from " +
describe(*StackMapSection) + ": " +
toString(std::move(E)));
};
Expected<ArrayRef<uint8_t>> ContentOrErr =
Obj.getSectionContents(*StackMapSection);
if (!ContentOrErr) {
Warn(ContentOrErr.takeError());
return;
}
if (Error E = StackMapParser<ELFT::TargetEndianness>::validateHeader(
*ContentOrErr)) {
Warn(std::move(E));
return;
}
prettyPrintStackMap(W, StackMapParser<ELFT::TargetEndianness>(*ContentOrErr));
}
template <class ELFT>
void ELFDumper<ELFT>::printReloc(const Relocation<ELFT> &R, unsigned RelIndex,
const Elf_Shdr &Sec, const Elf_Shdr *SymTab) {
Expected<RelSymbol<ELFT>> Target = getRelocationTarget(R, SymTab);
if (!Target)
reportUniqueWarning("unable to print relocation " + Twine(RelIndex) +
" in " + describe(Sec) + ": " +
toString(Target.takeError()));
else
printRelRelaReloc(R, *Target);
}
static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
StringRef Str2) {
OS.PadToColumn(2u);
OS << Str1;
OS.PadToColumn(37u);
OS << Str2 << "\n";
OS.flush();
}
template <class ELFT>
static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
StringRef FileName) {
const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
if (ElfHeader.e_shnum != 0)
return to_string(ElfHeader.e_shnum);
Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
if (!ArrOrErr) {
// In this case we can ignore an error, because we have already reported a
// warning about the broken section header table earlier.
consumeError(ArrOrErr.takeError());
return "<?>";
}
if (ArrOrErr->empty())
return "0";
return "0 (" + to_string((*ArrOrErr)[0].sh_size) + ")";
}
template <class ELFT>
static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
StringRef FileName) {
const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
if (ElfHeader.e_shstrndx != SHN_XINDEX)
return to_string(ElfHeader.e_shstrndx);
Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
if (!ArrOrErr) {
// In this case we can ignore an error, because we have already reported a
// warning about the broken section header table earlier.
consumeError(ArrOrErr.takeError());
return "<?>";
}
if (ArrOrErr->empty())
return "65535 (corrupt: out of range)";
return to_string(ElfHeader.e_shstrndx) + " (" +
to_string((*ArrOrErr)[0].sh_link) + ")";
}
static const EnumEntry<unsigned> *getObjectFileEnumEntry(unsigned Type) {
auto It = llvm::find_if(ElfObjectFileType, [&](const EnumEntry<unsigned> &E) {
return E.Value == Type;
});
if (It != makeArrayRef(ElfObjectFileType).end())
return It;
return nullptr;
}
template <class ELFT>
void GNUELFDumper<ELFT>::printFileSummary(StringRef FileStr, ObjectFile &Obj,
ArrayRef<std::string> InputFilenames,
const Archive *A) {
if (InputFilenames.size() > 1 || A) {
this->W.startLine() << "\n";
this->W.printString("File", FileStr);
}
}
template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
const Elf_Ehdr &e = this->Obj.getHeader();
OS << "ELF Header:\n";
OS << " Magic: ";
std::string Str;
for (int i = 0; i < ELF::EI_NIDENT; i++)
OS << format(" %02x", static_cast<int>(e.e_ident[i]));
OS << "\n";
Str = enumToString(e.e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
printFields(OS, "Class:", Str);
Str = enumToString(e.e_ident[ELF::EI_DATA], makeArrayRef(ElfDataEncoding));
printFields(OS, "Data:", Str);
OS.PadToColumn(2u);
OS << "Version:";
OS.PadToColumn(37u);
OS << to_hexString(e.e_ident[ELF::EI_VERSION]);
if (e.e_version == ELF::EV_CURRENT)
OS << " (current)";
OS << "\n";
Str = enumToString(e.e_ident[ELF::EI_OSABI], makeArrayRef(ElfOSABI));
printFields(OS, "OS/ABI:", Str);
printFields(OS,
"ABI Version:", std::to_string(e.e_ident[ELF::EI_ABIVERSION]));
if (const EnumEntry<unsigned> *E = getObjectFileEnumEntry(e.e_type)) {
Str = E->AltName.str();
} else {
if (e.e_type >= ET_LOPROC)
Str = "Processor Specific: (" + to_hexString(e.e_type, false) + ")";
else if (e.e_type >= ET_LOOS)
Str = "OS Specific: (" + to_hexString(e.e_type, false) + ")";
else
Str = "<unknown>: " + to_hexString(e.e_type, false);
}
printFields(OS, "Type:", Str);
Str = enumToString(e.e_machine, makeArrayRef(ElfMachineType));
printFields(OS, "Machine:", Str);
Str = "0x" + to_hexString(e.e_version);
printFields(OS, "Version:", Str);
Str = "0x" + to_hexString(e.e_entry);
printFields(OS, "Entry point address:", Str);
Str = to_string(e.e_phoff) + " (bytes into file)";
printFields(OS, "Start of program headers:", Str);
Str = to_string(e.e_shoff) + " (bytes into file)";
printFields(OS, "Start of section headers:", Str);
std::string ElfFlags;
if (e.e_machine == EM_MIPS)
ElfFlags =
printFlags(e.e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
else if (e.e_machine == EM_RISCV)
ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderRISCVFlags));
else if (e.e_machine == EM_AVR)
ElfFlags = printFlags(e.e_flags, makeArrayRef(ElfHeaderAVRFlags),
unsigned(ELF::EF_AVR_ARCH_MASK));
Str = "0x" + to_hexString(e.e_flags);
if (!ElfFlags.empty())
Str = Str + ", " + ElfFlags;
printFields(OS, "Flags:", Str);
Str = to_string(e.e_ehsize) + " (bytes)";
printFields(OS, "Size of this header:", Str);
Str = to_string(e.e_phentsize) + " (bytes)";
printFields(OS, "Size of program headers:", Str);
Str = to_string(e.e_phnum);
printFields(OS, "Number of program headers:", Str);
Str = to_string(e.e_shentsize) + " (bytes)";
printFields(OS, "Size of section headers:", Str);
Str = getSectionHeadersNumString(this->Obj, this->FileName);
printFields(OS, "Number of section headers:", Str);
Str = getSectionHeaderTableIndexString(this->Obj, this->FileName);
printFields(OS, "Section header string table index:", Str);
}
template <class ELFT> std::vector<GroupSection> ELFDumper<ELFT>::getGroups() {
auto GetSignature = [&](const Elf_Sym &Sym, unsigned SymNdx,
const Elf_Shdr &Symtab) -> StringRef {
Expected<StringRef> StrTableOrErr = Obj.getStringTableForSymtab(Symtab);
if (!StrTableOrErr) {
reportUniqueWarning("unable to get the string table for " +
describe(Symtab) + ": " +
toString(StrTableOrErr.takeError()));
return "<?>";
}
StringRef Strings = *StrTableOrErr;
if (Sym.st_name >= Strings.size()) {
reportUniqueWarning("unable to get the name of the symbol with index " +
Twine(SymNdx) + ": st_name (0x" +
Twine::utohexstr(Sym.st_name) +
") is past the end of the string table of size 0x" +
Twine::utohexstr(Strings.size()));
return "<?>";
}
return StrTableOrErr->data() + Sym.st_name;
};
std::vector<GroupSection> Ret;
uint64_t I = 0;
for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
++I;
if (Sec.sh_type != ELF::SHT_GROUP)
continue;
StringRef Signature = "<?>";
if (Expected<const Elf_Shdr *> SymtabOrErr = Obj.getSection(Sec.sh_link)) {
if (Expected<const Elf_Sym *> SymOrErr =
Obj.template getEntry<Elf_Sym>(**SymtabOrErr, Sec.sh_info))
Signature = GetSignature(**SymOrErr, Sec.sh_info, **SymtabOrErr);
else
reportUniqueWarning("unable to get the signature symbol for " +
describe(Sec) + ": " +
toString(SymOrErr.takeError()));
} else {
reportUniqueWarning("unable to get the symbol table for " +
describe(Sec) + ": " +
toString(SymtabOrErr.takeError()));
}
ArrayRef<Elf_Word> Data;
if (Expected<ArrayRef<Elf_Word>> ContentsOrErr =
Obj.template getSectionContentsAsArray<Elf_Word>(Sec)) {
if (ContentsOrErr->empty())
reportUniqueWarning("unable to read the section group flag from the " +
describe(Sec) + ": the section is empty");
else
Data = *ContentsOrErr;
} else {
reportUniqueWarning("unable to get the content of the " + describe(Sec) +
": " + toString(ContentsOrErr.takeError()));
}
Ret.push_back({getPrintableSectionName(Sec),
maybeDemangle(Signature),
Sec.sh_name,
I - 1,
Sec.sh_link,
Sec.sh_info,
Data.empty() ? Elf_Word(0) : Data[0],
{}});
if (Data.empty())
continue;
std::vector<GroupMember> &GM = Ret.back().Members;
for (uint32_t Ndx : Data.slice(1)) {
if (Expected<const Elf_Shdr *> SecOrErr = Obj.getSection(Ndx)) {
GM.push_back({getPrintableSectionName(**SecOrErr), Ndx});
} else {
reportUniqueWarning("unable to get the section with index " +
Twine(Ndx) + " when dumping the " + describe(Sec) +
": " + toString(SecOrErr.takeError()));
GM.push_back({"<?>", Ndx});
}
}
}
return Ret;
}
static DenseMap<uint64_t, const GroupSection *>
mapSectionsToGroups(ArrayRef<GroupSection> Groups) {
DenseMap<uint64_t, const GroupSection *> Ret;
for (const GroupSection &G : Groups)
for (const GroupMember &GM : G.Members)
Ret.insert({GM.Index, &G});
return Ret;
}
template <class ELFT> void GNUELFDumper<ELFT>::printGroupSections() {
std::vector<GroupSection> V = this->getGroups();
DenseMap<uint64_t, const GroupSection *> Map = mapSectionsToGroups(V);
for (const GroupSection &G : V) {
OS << "\n"
<< getGroupType(G.Type) << " group section ["
<< format_decimal(G.Index, 5) << "] `" << G.Name << "' [" << G.Signature
<< "] contains " << G.Members.size() << " sections:\n"
<< " [Index] Name\n";
for (const GroupMember &GM : G.Members) {
const GroupSection *MainGroup = Map[GM.Index];
if (MainGroup != &G)
this->reportUniqueWarning(
"section with index " + Twine(GM.Index) +
", included in the group section with index " +
Twine(MainGroup->Index) +
", was also found in the group section with index " +
Twine(G.Index));
OS << " [" << format_decimal(GM.Index, 5) << "] " << GM.Name << "\n";
}
}
if (V.empty())
OS << "There are no section groups in this file.\n";
}
template <class ELFT>
void GNUELFDumper<ELFT>::printRelrReloc(const Elf_Relr &R) {
OS << to_string(format_hex_no_prefix(R, ELFT::Is64Bits ? 16 : 8)) << "\n";
}
template <class ELFT>
void GNUELFDumper<ELFT>::printRelRelaReloc(const Relocation<ELFT> &R,
const RelSymbol<ELFT> &RelSym) {
// First two fields are bit width dependent. The rest of them are fixed width.
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
unsigned Width = ELFT::Is64Bits ? 16 : 8;
Fields[0].Str = to_string(format_hex_no_prefix(R.Offset, Width));
Fields[1].Str = to_string(format_hex_no_prefix(R.Info, Width));
SmallString<32> RelocName;
this->Obj.getRelocationTypeName(R.Type, RelocName);
Fields[2].Str = RelocName.c_str();
if (RelSym.Sym)
Fields[3].Str =
to_string(format_hex_no_prefix(RelSym.Sym->getValue(), Width));
Fields[4].Str = std::string(RelSym.Name);
for (const Field &F : Fields)
printField(F);
std::string Addend;
if (Optional<int64_t> A = R.Addend) {
int64_t RelAddend = *A;
if (!RelSym.Name.empty()) {
if (RelAddend < 0) {
Addend = " - ";
RelAddend = std::abs(RelAddend);
} else {
Addend = " + ";
}
}
Addend += to_hexString(RelAddend, false);
}
OS << Addend << "\n";
}
template <class ELFT>
static void printRelocHeaderFields(formatted_raw_ostream &OS, unsigned SType) {
bool IsRela = SType == ELF::SHT_RELA || SType == ELF::SHT_ANDROID_RELA;
bool IsRelr = SType == ELF::SHT_RELR || SType == ELF::SHT_ANDROID_RELR;
if (ELFT::Is64Bits)
OS << " ";
else
OS << " ";
if (IsRelr && opts::RawRelr)
OS << "Data ";
else
OS << "Offset";
if (ELFT::Is64Bits)
OS << " Info Type"
<< " Symbol's Value Symbol's Name";
else
OS << " Info Type Sym. Value Symbol's Name";
if (IsRela)
OS << " + Addend";
OS << "\n";
}
template <class ELFT>
void GNUELFDumper<ELFT>::printDynamicRelocHeader(unsigned Type, StringRef Name,
const DynRegionInfo &Reg) {
uint64_t Offset = Reg.Addr - this->Obj.base();
OS << "\n'" << Name.str().c_str() << "' relocation section at offset 0x"
<< to_hexString(Offset, false) << " contains " << Reg.Size << " bytes:\n";
printRelocHeaderFields<ELFT>(OS, Type);
}
template <class ELFT>
static bool isRelocationSec(const typename ELFT::Shdr &Sec) {
return Sec.sh_type == ELF::SHT_REL || Sec.sh_type == ELF::SHT_RELA ||
Sec.sh_type == ELF::SHT_RELR || Sec.sh_type == ELF::SHT_ANDROID_REL ||
Sec.sh_type == ELF::SHT_ANDROID_RELA ||
Sec.sh_type == ELF::SHT_ANDROID_RELR;
}
template <class ELFT> void GNUELFDumper<ELFT>::printRelocations() {
auto GetEntriesNum = [&](const Elf_Shdr &Sec) -> Expected<size_t> {
// Android's packed relocation section needs to be unpacked first
// to get the actual number of entries.
if (Sec.sh_type == ELF::SHT_ANDROID_REL ||
Sec.sh_type == ELF::SHT_ANDROID_RELA) {
Expected<std::vector<typename ELFT::Rela>> RelasOrErr =
this->Obj.android_relas(Sec);
if (!RelasOrErr)
return RelasOrErr.takeError();
return RelasOrErr->size();
}
if (!opts::RawRelr && (Sec.sh_type == ELF::SHT_RELR ||
Sec.sh_type == ELF::SHT_ANDROID_RELR)) {
Expected<Elf_Relr_Range> RelrsOrErr = this->Obj.relrs(Sec);
if (!RelrsOrErr)
return RelrsOrErr.takeError();
return this->Obj.decode_relrs(*RelrsOrErr).size();
}
return Sec.getEntityCount();
};
bool HasRelocSections = false;
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
if (!isRelocationSec<ELFT>(Sec))
continue;
HasRelocSections = true;
std::string EntriesNum = "<?>";
if (Expected<size_t> NumOrErr = GetEntriesNum(Sec))
EntriesNum = std::to_string(*NumOrErr);
else
this->reportUniqueWarning("unable to get the number of relocations in " +
this->describe(Sec) + ": " +
toString(NumOrErr.takeError()));
uintX_t Offset = Sec.sh_offset;
StringRef Name = this->getPrintableSectionName(Sec);
OS << "\nRelocation section '" << Name << "' at offset 0x"
<< to_hexString(Offset, false) << " contains " << EntriesNum
<< " entries:\n";
printRelocHeaderFields<ELFT>(OS, Sec.sh_type);
this->printRelocationsHelper(Sec);
}
if (!HasRelocSections)
OS << "\nThere are no relocations in this file.\n";
}
// Print the offset of a particular section from anyone of the ranges:
// [SHT_LOOS, SHT_HIOS], [SHT_LOPROC, SHT_HIPROC], [SHT_LOUSER, SHT_HIUSER].
// If 'Type' does not fall within any of those ranges, then a string is
// returned as '<unknown>' followed by the type value.
static std::string getSectionTypeOffsetString(unsigned Type) {
if (Type >= SHT_LOOS && Type <= SHT_HIOS)
return "LOOS+0x" + to_hexString(Type - SHT_LOOS);
else if (Type >= SHT_LOPROC && Type <= SHT_HIPROC)
return "LOPROC+0x" + to_hexString(Type - SHT_LOPROC);
else if (Type >= SHT_LOUSER && Type <= SHT_HIUSER)
return "LOUSER+0x" + to_hexString(Type - SHT_LOUSER);
return "0x" + to_hexString(Type) + ": <unknown>";
}
static std::string getSectionTypeString(unsigned Machine, unsigned Type) {
StringRef Name = getELFSectionTypeName(Machine, Type);
// Handle SHT_GNU_* type names.
if (Name.startswith("SHT_GNU_")) {
if (Name == "SHT_GNU_HASH")
return "GNU_HASH";
// E.g. SHT_GNU_verneed -> VERNEED.
return Name.drop_front(8).upper();
}
if (Name == "SHT_SYMTAB_SHNDX")
return "SYMTAB SECTION INDICES";
if (Name.startswith("SHT_"))
return Name.drop_front(4).str();
return getSectionTypeOffsetString(Type);
}
static void printSectionDescription(formatted_raw_ostream &OS,
unsigned EMachine) {
OS << "Key to Flags:\n";
OS << " W (write), A (alloc), X (execute), M (merge), S (strings), I "
"(info),\n";
OS << " L (link order), O (extra OS processing required), G (group), T "
"(TLS),\n";
OS << " C (compressed), x (unknown), o (OS specific), E (exclude),\n";
OS << " R (retain)";
if (EMachine == EM_X86_64)
OS << ", l (large)";
else if (EMachine == EM_ARM)
OS << ", y (purecode)";
OS << ", p (processor specific)\n";
}
template <class ELFT> void GNUELFDumper<ELFT>::printSectionHeaders() {
unsigned Bias = ELFT::Is64Bits ? 0 : 8;
ArrayRef<Elf_Shdr> Sections = cantFail(this->Obj.sections());
OS << "There are " << to_string(Sections.size())
<< " section headers, starting at offset "
<< "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n";
OS << "Section Headers:\n";
Field Fields[11] = {
{"[Nr]", 2}, {"Name", 7}, {"Type", 25},
{"Address", 41}, {"Off", 58 - Bias}, {"Size", 65 - Bias},
{"ES", 72 - Bias}, {"Flg", 75 - Bias}, {"Lk", 79 - Bias},
{"Inf", 82 - Bias}, {"Al", 86 - Bias}};
for (const Field &F : Fields)
printField(F);
OS << "\n";
StringRef SecStrTable;
if (Expected<StringRef> SecStrTableOrErr =
this->Obj.getSectionStringTable(Sections, this->WarningHandler))
SecStrTable = *SecStrTableOrErr;
else
this->reportUniqueWarning(SecStrTableOrErr.takeError());
size_t SectionIndex = 0;
for (const Elf_Shdr &Sec : Sections) {
Fields[0].Str = to_string(SectionIndex);
if (SecStrTable.empty())
Fields[1].Str = "<no-strings>";
else
Fields[1].Str = std::string(unwrapOrError<StringRef>(
this->FileName, this->Obj.getSectionName(Sec, SecStrTable)));
Fields[2].Str =
getSectionTypeString(this->Obj.getHeader().e_machine, Sec.sh_type);
Fields[3].Str =
to_string(format_hex_no_prefix(Sec.sh_addr, ELFT::Is64Bits ? 16 : 8));
Fields[4].Str = to_string(format_hex_no_prefix(Sec.sh_offset, 6));
Fields[5].Str = to_string(format_hex_no_prefix(Sec.sh_size, 6));
Fields[6].Str = to_string(format_hex_no_prefix(Sec.sh_entsize, 2));
Fields[7].Str = getGNUFlags(this->Obj.getHeader().e_machine, Sec.sh_flags);
Fields[8].Str = to_string(Sec.sh_link);
Fields[9].Str = to_string(Sec.sh_info);
Fields[10].Str = to_string(Sec.sh_addralign);
OS.PadToColumn(Fields[0].Column);
OS << "[" << right_justify(Fields[0].Str, 2) << "]";
for (int i = 1; i < 7; i++)
printField(Fields[i]);
OS.PadToColumn(Fields[7].Column);
OS << right_justify(Fields[7].Str, 3);
OS.PadToColumn(Fields[8].Column);
OS << right_justify(Fields[8].Str, 2);
OS.PadToColumn(Fields[9].Column);
OS << right_justify(Fields[9].Str, 3);
OS.PadToColumn(Fields[10].Column);
OS << right_justify(Fields[10].Str, 2);
OS << "\n";
++SectionIndex;
}
printSectionDescription(OS, this->Obj.getHeader().e_machine);
}
template <class ELFT>
void GNUELFDumper<ELFT>::printSymtabMessage(const Elf_Shdr *Symtab,
size_t Entries,
bool NonVisibilityBitsUsed) const {
StringRef Name;
if (Symtab)
Name = this->getPrintableSectionName(*Symtab);
if (!Name.empty())
OS << "\nSymbol table '" << Name << "'";
else
OS << "\nSymbol table for image";
OS << " contains " << Entries << " entries:\n";
if (ELFT::Is64Bits)
OS << " Num: Value Size Type Bind Vis";
else
OS << " Num: Value Size Type Bind Vis";
if (NonVisibilityBitsUsed)
OS << " ";
OS << " Ndx Name\n";
}
template <class ELFT>
std::string
GNUELFDumper<ELFT>::getSymbolSectionNdx(const Elf_Sym &Symbol,
unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable) const {
unsigned SectionIndex = Symbol.st_shndx;
switch (SectionIndex) {
case ELF::SHN_UNDEF:
return "UND";
case ELF::SHN_ABS:
return "ABS";
case ELF::SHN_COMMON:
return "COM";
case ELF::SHN_XINDEX: {
Expected<uint32_t> IndexOrErr =
object::getExtendedSymbolTableIndex<ELFT>(Symbol, SymIndex, ShndxTable);
if (!IndexOrErr) {
assert(Symbol.st_shndx == SHN_XINDEX &&
"getExtendedSymbolTableIndex should only fail due to an invalid "
"SHT_SYMTAB_SHNDX table/reference");
this->reportUniqueWarning(IndexOrErr.takeError());
return "RSV[0xffff]";
}
return to_string(format_decimal(*IndexOrErr, 3));
}
default:
// Find if:
// Processor specific
if (SectionIndex >= ELF::SHN_LOPROC && SectionIndex <= ELF::SHN_HIPROC)
return std::string("PRC[0x") +
to_string(format_hex_no_prefix(SectionIndex, 4)) + "]";
// OS specific
if (SectionIndex >= ELF::SHN_LOOS && SectionIndex <= ELF::SHN_HIOS)
return std::string("OS[0x") +
to_string(format_hex_no_prefix(SectionIndex, 4)) + "]";
// Architecture reserved:
if (SectionIndex >= ELF::SHN_LORESERVE &&
SectionIndex <= ELF::SHN_HIRESERVE)
return std::string("RSV[0x") +
to_string(format_hex_no_prefix(SectionIndex, 4)) + "]";
// A normal section with an index
return to_string(format_decimal(SectionIndex, 3));
}
}
template <class ELFT>
void GNUELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable,
bool IsDynamic,
bool NonVisibilityBitsUsed) const {
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[8] = {0, 8, 17 + Bias, 23 + Bias,
31 + Bias, 38 + Bias, 48 + Bias, 51 + Bias};
Fields[0].Str = to_string(format_decimal(SymIndex, 6)) + ":";
Fields[1].Str =
to_string(format_hex_no_prefix(Symbol.st_value, ELFT::Is64Bits ? 16 : 8));
Fields[2].Str = to_string(format_decimal(Symbol.st_size, 5));
unsigned char SymbolType = Symbol.getType();
if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
Fields[3].Str = enumToString(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
Fields[3].Str = enumToString(SymbolType, makeArrayRef(ElfSymbolTypes));
Fields[4].Str =
enumToString(Symbol.getBinding(), makeArrayRef(ElfSymbolBindings));
Fields[5].Str =
enumToString(Symbol.getVisibility(), makeArrayRef(ElfSymbolVisibilities));
if (Symbol.st_other & ~0x3) {
if (this->Obj.getHeader().e_machine == ELF::EM_AARCH64) {
uint8_t Other = Symbol.st_other & ~0x3;
if (Other & STO_AARCH64_VARIANT_PCS) {
Other &= ~STO_AARCH64_VARIANT_PCS;
Fields[5].Str += " [VARIANT_PCS";
if (Other != 0)
Fields[5].Str.append(" | " + to_hexString(Other, false));
Fields[5].Str.append("]");
}
} else if (this->Obj.getHeader().e_machine == ELF::EM_RISCV) {
uint8_t Other = Symbol.st_other & ~0x3;
if (Other & STO_RISCV_VARIANT_CC) {
Other &= ~STO_RISCV_VARIANT_CC;
Fields[5].Str += " [VARIANT_CC";
if (Other != 0)
Fields[5].Str.append(" | " + to_hexString(Other, false));
Fields[5].Str.append("]");
}
} else {
Fields[5].Str +=
" [<other: " + to_string(format_hex(Symbol.st_other, 2)) + ">]";
}
}
Fields[6].Column += NonVisibilityBitsUsed ? 13 : 0;
Fields[6].Str = getSymbolSectionNdx(Symbol, SymIndex, ShndxTable);
Fields[7].Str = this->getFullSymbolName(Symbol, SymIndex, ShndxTable,
StrTable, IsDynamic);
for (const Field &Entry : Fields)
printField(Entry);
OS << "\n";
}
template <class ELFT>
void GNUELFDumper<ELFT>::printHashedSymbol(const Elf_Sym *Symbol,
unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
StringRef StrTable,
uint32_t Bucket) {
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
Field Fields[9] = {0, 6, 11, 20 + Bias, 25 + Bias,
34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias};
Fields[0].Str = to_string(format_decimal(SymIndex, 5));
Fields[1].Str = to_string(format_decimal(Bucket, 3)) + ":";
Fields[2].Str = to_string(
format_hex_no_prefix(Symbol->st_value, ELFT::Is64Bits ? 16 : 8));
Fields[3].Str = to_string(format_decimal(Symbol->st_size, 5));
unsigned char SymbolType = Symbol->getType();
if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
Fields[4].Str = enumToString(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
Fields[4].Str = enumToString(SymbolType, makeArrayRef(ElfSymbolTypes));
Fields[5].Str =
enumToString(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
Fields[6].Str = enumToString(Symbol->getVisibility(),
makeArrayRef(ElfSymbolVisibilities));
Fields[7].Str = getSymbolSectionNdx(*Symbol, SymIndex, ShndxTable);
Fields[8].Str =
this->getFullSymbolName(*Symbol, SymIndex, ShndxTable, StrTable, true);
for (const Field &Entry : Fields)
printField(Entry);
OS << "\n";
}
template <class ELFT>
void GNUELFDumper<ELFT>::printSymbols(bool PrintSymbols,
bool PrintDynamicSymbols) {
if (!PrintSymbols && !PrintDynamicSymbols)
return;
// GNU readelf prints both the .dynsym and .symtab with --symbols.
this->printSymbolsHelper(true);
if (PrintSymbols)
this->printSymbolsHelper(false);
}
template <class ELFT>
void GNUELFDumper<ELFT>::printHashTableSymbols(const Elf_Hash &SysVHash) {
if (this->DynamicStringTable.empty())
return;
if (ELFT::Is64Bits)
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
else
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
OS << "\n";
Elf_Sym_Range DynSyms = this->dynamic_symbols();
const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0];
if (!FirstSym) {
this->reportUniqueWarning(
Twine("unable to print symbols for the .hash table: the "
"dynamic symbol table ") +
(this->DynSymRegion ? "is empty" : "was not found"));
return;
}
DataRegion<Elf_Word> ShndxTable(
(const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
auto Buckets = SysVHash.buckets();
auto Chains = SysVHash.chains();
for (uint32_t Buc = 0; Buc < SysVHash.nbucket; Buc++) {
if (Buckets[Buc] == ELF::STN_UNDEF)
continue;
BitVector Visited(SysVHash.nchain);
for (uint32_t Ch = Buckets[Buc]; Ch < SysVHash.nchain; Ch = Chains[Ch]) {
if (Ch == ELF::STN_UNDEF)
break;
if (Visited[Ch]) {
this->reportUniqueWarning(".hash section is invalid: bucket " +
Twine(Ch) +
": a cycle was detected in the linked chain");
break;
}
printHashedSymbol(FirstSym + Ch, Ch, ShndxTable, this->DynamicStringTable,
Buc);
Visited[Ch] = true;
}
}
}
template <class ELFT>
void GNUELFDumper<ELFT>::printGnuHashTableSymbols(const Elf_GnuHash &GnuHash) {
if (this->DynamicStringTable.empty())
return;
Elf_Sym_Range DynSyms = this->dynamic_symbols();
const Elf_Sym *FirstSym = DynSyms.empty() ? nullptr : &DynSyms[0];
if (!FirstSym) {
this->reportUniqueWarning(
Twine("unable to print symbols for the .gnu.hash table: the "
"dynamic symbol table ") +
(this->DynSymRegion ? "is empty" : "was not found"));
return;
}
auto GetSymbol = [&](uint64_t SymIndex,
uint64_t SymsTotal) -> const Elf_Sym * {
if (SymIndex >= SymsTotal) {
this->reportUniqueWarning(
"unable to print hashed symbol with index " + Twine(SymIndex) +
", which is greater than or equal to the number of dynamic symbols "
"(" +
Twine::utohexstr(SymsTotal) + ")");
return nullptr;
}
return FirstSym + SymIndex;
};
Expected<ArrayRef<Elf_Word>> ValuesOrErr =
getGnuHashTableChains<ELFT>(this->DynSymRegion, &GnuHash);
ArrayRef<Elf_Word> Values;
if (!ValuesOrErr)
this->reportUniqueWarning("unable to get hash values for the SHT_GNU_HASH "
"section: " +
toString(ValuesOrErr.takeError()));
else
Values = *ValuesOrErr;
DataRegion<Elf_Word> ShndxTable(
(const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
ArrayRef<Elf_Word> Buckets = GnuHash.buckets();
for (uint32_t Buc = 0; Buc < GnuHash.nbuckets; Buc++) {
if (Buckets[Buc] == ELF::STN_UNDEF)
continue;
uint32_t Index = Buckets[Buc];
// Print whole chain.
while (true) {
uint32_t SymIndex = Index++;
if (const Elf_Sym *Sym = GetSymbol(SymIndex, DynSyms.size()))
printHashedSymbol(Sym, SymIndex, ShndxTable, this->DynamicStringTable,
Buc);
else
break;
if (SymIndex < GnuHash.symndx) {
this->reportUniqueWarning(
"unable to read the hash value for symbol with index " +
Twine(SymIndex) +
", which is less than the index of the first hashed symbol (" +
Twine(GnuHash.symndx) + ")");
break;
}
// Chain ends at symbol with stopper bit.
if ((Values[SymIndex - GnuHash.symndx] & 1) == 1)
break;
}
}
}
template <class ELFT> void GNUELFDumper<ELFT>::printHashSymbols() {
if (this->HashTable) {
OS << "\n Symbol table of .hash for image:\n";
if (Error E = checkHashTable<ELFT>(*this, this->HashTable))
this->reportUniqueWarning(std::move(E));
else
printHashTableSymbols(*this->HashTable);
}
// Try printing the .gnu.hash table.
if (this->GnuHashTable) {
OS << "\n Symbol table of .gnu.hash for image:\n";
if (ELFT::Is64Bits)
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
else
OS << " Num Buc: Value Size Type Bind Vis Ndx Name";
OS << "\n";
if (Error E = checkGNUHashTable<ELFT>(this->Obj, this->GnuHashTable))
this->reportUniqueWarning(std::move(E));
else
printGnuHashTableSymbols(*this->GnuHashTable);
}
}
template <class ELFT> void GNUELFDumper<ELFT>::printSectionDetails() {
ArrayRef<Elf_Shdr> Sections = cantFail(this->Obj.sections());
OS << "There are " << to_string(Sections.size())
<< " section headers, starting at offset "
<< "0x" << to_hexString(this->Obj.getHeader().e_shoff, false) << ":\n\n";
OS << "Section Headers:\n";
auto PrintFields = [&](ArrayRef<Field> V) {
for (const Field &F : V)
printField(F);
OS << "\n";
};
PrintFields({{"[Nr]", 2}, {"Name", 7}});
constexpr bool Is64 = ELFT::Is64Bits;
PrintFields({{"Type", 7},
{Is64 ? "Address" : "Addr", 23},
{"Off", Is64 ? 40 : 32},
{"Size", Is64 ? 47 : 39},
{"ES", Is64 ? 54 : 46},
{"Lk", Is64 ? 59 : 51},
{"Inf", Is64 ? 62 : 54},
{"Al", Is64 ? 66 : 57}});
PrintFields({{"Flags", 7}});
StringRef SecStrTable;
if (Expected<StringRef> SecStrTableOrErr =
this->Obj.getSectionStringTable(Sections, this->WarningHandler))
SecStrTable = *SecStrTableOrErr;
else
this->reportUniqueWarning(SecStrTableOrErr.takeError());
size_t SectionIndex = 0;
const unsigned AddrSize = Is64 ? 16 : 8;
for (const Elf_Shdr &S : Sections) {
StringRef Name = "<?>";
if (Expected<StringRef> NameOrErr =
this->Obj.getSectionName(S, SecStrTable))
Name = *NameOrErr;
else
this->reportUniqueWarning(NameOrErr.takeError());
OS.PadToColumn(2);
OS << "[" << right_justify(to_string(SectionIndex), 2) << "]";
PrintFields({{Name, 7}});
PrintFields(
{{getSectionTypeString(this->Obj.getHeader().e_machine, S.sh_type), 7},
{to_string(format_hex_no_prefix(S.sh_addr, AddrSize)), 23},
{to_string(format_hex_no_prefix(S.sh_offset, 6)), Is64 ? 39 : 32},
{to_string(format_hex_no_prefix(S.sh_size, 6)), Is64 ? 47 : 39},
{to_string(format_hex_no_prefix(S.sh_entsize, 2)), Is64 ? 54 : 46},
{to_string(S.sh_link), Is64 ? 59 : 51},
{to_string(S.sh_info), Is64 ? 63 : 55},
{to_string(S.sh_addralign), Is64 ? 66 : 58}});
OS.PadToColumn(7);
OS << "[" << to_string(format_hex_no_prefix(S.sh_flags, AddrSize)) << "]: ";
DenseMap<unsigned, StringRef> FlagToName = {
{SHF_WRITE, "WRITE"}, {SHF_ALLOC, "ALLOC"},
{SHF_EXECINSTR, "EXEC"}, {SHF_MERGE, "MERGE"},
{SHF_STRINGS, "STRINGS"}, {SHF_INFO_LINK, "INFO LINK"},
{SHF_LINK_ORDER, "LINK ORDER"}, {SHF_OS_NONCONFORMING, "OS NONCONF"},
{SHF_GROUP, "GROUP"}, {SHF_TLS, "TLS"},
{SHF_COMPRESSED, "COMPRESSED"}, {SHF_EXCLUDE, "EXCLUDE"}};
uint64_t Flags = S.sh_flags;
uint64_t UnknownFlags = 0;
ListSeparator LS;
while (Flags) {
// Take the least significant bit as a flag.
uint64_t Flag = Flags & -Flags;
Flags -= Flag;
auto It = FlagToName.find(Flag);
if (It != FlagToName.end())
OS << LS << It->second;
else
UnknownFlags |= Flag;
}
auto PrintUnknownFlags = [&](uint64_t Mask, StringRef Name) {
uint64_t FlagsToPrint = UnknownFlags & Mask;
if (!FlagsToPrint)
return;
OS << LS << Name << " ("
<< to_string(format_hex_no_prefix(FlagsToPrint, AddrSize)) << ")";
UnknownFlags &= ~Mask;
};
PrintUnknownFlags(SHF_MASKOS, "OS");
PrintUnknownFlags(SHF_MASKPROC, "PROC");
PrintUnknownFlags(uint64_t(-1), "UNKNOWN");
OS << "\n";
++SectionIndex;
}
}
static inline std::string printPhdrFlags(unsigned Flag) {
std::string Str;
Str = (Flag & PF_R) ? "R" : " ";
Str += (Flag & PF_W) ? "W" : " ";
Str += (Flag & PF_X) ? "E" : " ";
return Str;
}
template <class ELFT>
static bool checkTLSSections(const typename ELFT::Phdr &Phdr,
const typename ELFT::Shdr &Sec) {
if (Sec.sh_flags & ELF::SHF_TLS) {
// .tbss must only be shown in the PT_TLS segment.
if (Sec.sh_type == ELF::SHT_NOBITS)
return Phdr.p_type == ELF::PT_TLS;
// SHF_TLS sections are only shown in PT_TLS, PT_LOAD or PT_GNU_RELRO
// segments.
return (Phdr.p_type == ELF::PT_TLS) || (Phdr.p_type == ELF::PT_LOAD) ||
(Phdr.p_type == ELF::PT_GNU_RELRO);
}
// PT_TLS must only have SHF_TLS sections.
return Phdr.p_type != ELF::PT_TLS;
}
template <class ELFT>
static bool checkOffsets(const typename ELFT::Phdr &Phdr,
const typename ELFT::Shdr &Sec) {
// SHT_NOBITS sections don't need to have an offset inside the segment.
if (Sec.sh_type == ELF::SHT_NOBITS)
return true;
if (Sec.sh_offset < Phdr.p_offset)
return false;
// Only non-empty sections can be at the end of a segment.
if (Sec.sh_size == 0)
return (Sec.sh_offset + 1 <= Phdr.p_offset + Phdr.p_filesz);
return Sec.sh_offset + Sec.sh_size <= Phdr.p_offset + Phdr.p_filesz;
}
// Check that an allocatable section belongs to a virtual address
// space of a segment.
template <class ELFT>
static bool checkVMA(const typename ELFT::Phdr &Phdr,
const typename ELFT::Shdr &Sec) {
if (!(Sec.sh_flags & ELF::SHF_ALLOC))
return true;
if (Sec.sh_addr < Phdr.p_vaddr)
return false;
bool IsTbss =
(Sec.sh_type == ELF::SHT_NOBITS) && ((Sec.sh_flags & ELF::SHF_TLS) != 0);
// .tbss is special, it only has memory in PT_TLS and has NOBITS properties.
bool IsTbssInNonTLS = IsTbss && Phdr.p_type != ELF::PT_TLS;
// Only non-empty sections can be at the end of a segment.
if (Sec.sh_size == 0 || IsTbssInNonTLS)
return Sec.sh_addr + 1 <= Phdr.p_vaddr + Phdr.p_memsz;
return Sec.sh_addr + Sec.sh_size <= Phdr.p_vaddr + Phdr.p_memsz;
}
template <class ELFT>
static bool checkPTDynamic(const typename ELFT::Phdr &Phdr,
const typename ELFT::Shdr &Sec) {
if (Phdr.p_type != ELF::PT_DYNAMIC || Phdr.p_memsz == 0 || Sec.sh_size != 0)
return true;
// We get here when we have an empty section. Only non-empty sections can be
// at the start or at the end of PT_DYNAMIC.
// Is section within the phdr both based on offset and VMA?
bool CheckOffset = (Sec.sh_type == ELF::SHT_NOBITS) ||
(Sec.sh_offset > Phdr.p_offset &&
Sec.sh_offset < Phdr.p_offset + Phdr.p_filesz);
bool CheckVA = !(Sec.sh_flags & ELF::SHF_ALLOC) ||
(Sec.sh_addr > Phdr.p_vaddr && Sec.sh_addr < Phdr.p_memsz);
return CheckOffset && CheckVA;
}
template <class ELFT>
void GNUELFDumper<ELFT>::printProgramHeaders(
bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) {
if (PrintProgramHeaders)
printProgramHeaders();
// Display the section mapping along with the program headers, unless
// -section-mapping is explicitly set to false.
if (PrintSectionMapping != cl::BOU_FALSE)
printSectionMapping();
}
template <class ELFT> void GNUELFDumper<ELFT>::printProgramHeaders() {
unsigned Bias = ELFT::Is64Bits ? 8 : 0;
const Elf_Ehdr &Header = this->Obj.getHeader();
Field Fields[8] = {2, 17, 26, 37 + Bias,
48 + Bias, 56 + Bias, 64 + Bias, 68 + Bias};
OS << "\nElf file type is "
<< enumToString(Header.e_type, makeArrayRef(ElfObjectFileType)) << "\n"
<< "Entry point " << format_hex(Header.e_entry, 3) << "\n"
<< "There are " << Header.e_phnum << " program headers,"
<< " starting at offset " << Header.e_phoff << "\n\n"
<< "Program Headers:\n";
if (ELFT::Is64Bits)
OS << " Type Offset VirtAddr PhysAddr "
<< " FileSiz MemSiz Flg Align\n";
else
OS << " Type Offset VirtAddr PhysAddr FileSiz "
<< "MemSiz Flg Align\n";
unsigned Width = ELFT::Is64Bits ? 18 : 10;
unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = this->Obj.program_headers();
if (!PhdrsOrErr) {
this->reportUniqueWarning("unable to dump program headers: " +
toString(PhdrsOrErr.takeError()));
return;
}
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
Fields[0].Str = getGNUPtType(Header.e_machine, Phdr.p_type);
Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8));
Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width));
Fields[3].Str = to_string(format_hex(Phdr.p_paddr, Width));
Fields[4].Str = to_string(format_hex(Phdr.p_filesz, SizeWidth));
Fields[5].Str = to_string(format_hex(Phdr.p_memsz, SizeWidth));
Fields[6].Str = printPhdrFlags(Phdr.p_flags);
Fields[7].Str = to_string(format_hex(Phdr.p_align, 1));
for (const Field &F : Fields)
printField(F);
if (Phdr.p_type == ELF::PT_INTERP) {
OS << "\n";
auto ReportBadInterp = [&](const Twine &Msg) {
this->reportUniqueWarning(
"unable to read program interpreter name at offset 0x" +
Twine::utohexstr(Phdr.p_offset) + ": " + Msg);
};
if (Phdr.p_offset >= this->Obj.getBufSize()) {
ReportBadInterp("it goes past the end of the file (0x" +
Twine::utohexstr(this->Obj.getBufSize()) + ")");
continue;
}
const char *Data =
reinterpret_cast<const char *>(this->Obj.base()) + Phdr.p_offset;
size_t MaxSize = this->Obj.getBufSize() - Phdr.p_offset;
size_t Len = strnlen(Data, MaxSize);
if (Len == MaxSize) {
ReportBadInterp("it is not null-terminated");
continue;
}
OS << " [Requesting program interpreter: ";
OS << StringRef(Data, Len) << "]";
}
OS << "\n";
}
}
template <class ELFT> void GNUELFDumper<ELFT>::printSectionMapping() {
OS << "\n Section to Segment mapping:\n Segment Sections...\n";
DenseSet<const Elf_Shdr *> BelongsToSegment;
int Phnum = 0;
Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = this->Obj.program_headers();
if (!PhdrsOrErr) {
this->reportUniqueWarning(
"can't read program headers to build section to segment mapping: " +
toString(PhdrsOrErr.takeError()));
return;
}
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
std::string Sections;
OS << format(" %2.2d ", Phnum++);
// Check if each section is in a segment and then print mapping.
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
if (Sec.sh_type == ELF::SHT_NULL)
continue;
// readelf additionally makes sure it does not print zero sized sections
// at end of segments and for PT_DYNAMIC both start and end of section
// .tbss must only be shown in PT_TLS section.
if (checkTLSSections<ELFT>(Phdr, Sec) && checkOffsets<ELFT>(Phdr, Sec) &&
checkVMA<ELFT>(Phdr, Sec) && checkPTDynamic<ELFT>(Phdr, Sec)) {
Sections +=
unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() +
" ";
BelongsToSegment.insert(&Sec);
}
}
OS << Sections << "\n";
OS.flush();
}
// Display sections that do not belong to a segment.
std::string Sections;
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
if (BelongsToSegment.find(&Sec) == BelongsToSegment.end())
Sections +=
unwrapOrError(this->FileName, this->Obj.getSectionName(Sec)).str() +
' ';
}
if (!Sections.empty()) {
OS << " None " << Sections << '\n';
OS.flush();
}
}
namespace {
template <class ELFT>
RelSymbol<ELFT> getSymbolForReloc(const ELFDumper<ELFT> &Dumper,
const Relocation<ELFT> &Reloc) {
using Elf_Sym = typename ELFT::Sym;
auto WarnAndReturn = [&](const Elf_Sym *Sym,
const Twine &Reason) -> RelSymbol<ELFT> {
Dumper.reportUniqueWarning(
"unable to get name of the dynamic symbol with index " +
Twine(Reloc.Symbol) + ": " + Reason);
return {Sym, "<corrupt>"};
};
ArrayRef<Elf_Sym> Symbols = Dumper.dynamic_symbols();
const Elf_Sym *FirstSym = Symbols.begin();
if (!FirstSym)
return WarnAndReturn(nullptr, "no dynamic symbol table found");
// We might have an object without a section header. In this case the size of
// Symbols is zero, because there is no way to know the size of the dynamic
// table. We should allow this case and not print a warning.
if (!Symbols.empty() && Reloc.Symbol >= Symbols.size())
return WarnAndReturn(
nullptr,
"index is greater than or equal to the number of dynamic symbols (" +
Twine(Symbols.size()) + ")");
const ELFFile<ELFT> &Obj = Dumper.getElfObject().getELFFile();
const uint64_t FileSize = Obj.getBufSize();
const uint64_t SymOffset = ((const uint8_t *)FirstSym - Obj.base()) +
(uint64_t)Reloc.Symbol * sizeof(Elf_Sym);
if (SymOffset + sizeof(Elf_Sym) > FileSize)
return WarnAndReturn(nullptr, "symbol at 0x" + Twine::utohexstr(SymOffset) +
" goes past the end of the file (0x" +
Twine::utohexstr(FileSize) + ")");
const Elf_Sym *Sym = FirstSym + Reloc.Symbol;
Expected<StringRef> ErrOrName = Sym->getName(Dumper.getDynamicStringTable());
if (!ErrOrName)
return WarnAndReturn(Sym, toString(ErrOrName.takeError()));
return {Sym == FirstSym ? nullptr : Sym, maybeDemangle(*ErrOrName)};
}
} // namespace
template <class ELFT>
static size_t getMaxDynamicTagSize(const ELFFile<ELFT> &Obj,
typename ELFT::DynRange Tags) {
size_t Max = 0;
for (const typename ELFT::Dyn &Dyn : Tags)
Max = std::max(Max, Obj.getDynamicTagAsString(Dyn.d_tag).size());
return Max;
}
template <class ELFT> void GNUELFDumper<ELFT>::printDynamicTable() {
Elf_Dyn_Range Table = this->dynamic_table();
if (Table.empty())
return;
OS << "Dynamic section at offset "
<< format_hex(reinterpret_cast<const uint8_t *>(this->DynamicTable.Addr) -
this->Obj.base(),
1)
<< " contains " << Table.size() << " entries:\n";
// The type name is surrounded with round brackets, hence add 2.
size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table) + 2;
// The "Name/Value" column should be indented from the "Type" column by N
// spaces, where N = MaxTagSize - length of "Type" (4) + trailing
// space (1) = 3.
OS << " Tag" + std::string(ELFT::Is64Bits ? 16 : 8, ' ') + "Type"
<< std::string(MaxTagSize - 3, ' ') << "Name/Value\n";
std::string ValueFmt = " %-" + std::to_string(MaxTagSize) + "s ";
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
std::string Type =
std::string("(") + this->Obj.getDynamicTagAsString(Tag) + ")";
std::string Value = this->getDynamicEntry(Tag, Entry.getVal());
OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10)
<< format(ValueFmt.c_str(), Type.c_str()) << Value << "\n";
}
}
template <class ELFT> void GNUELFDumper<ELFT>::printDynamicRelocations() {
this->printDynamicRelocationsHelper();
}
template <class ELFT>
void ELFDumper<ELFT>::printDynamicReloc(const Relocation<ELFT> &R) {
printRelRelaReloc(R, getSymbolForReloc(*this, R));
}
template <class ELFT>
void ELFDumper<ELFT>::printRelocationsHelper(const Elf_Shdr &Sec) {
this->forEachRelocationDo(
Sec, opts::RawRelr,
[&](const Relocation<ELFT> &R, unsigned Ndx, const Elf_Shdr &Sec,
const Elf_Shdr *SymTab) { printReloc(R, Ndx, Sec, SymTab); },
[&](const Elf_Relr &R) { printRelrReloc(R); });
}
template <class ELFT> void ELFDumper<ELFT>::printDynamicRelocationsHelper() {
const bool IsMips64EL = this->Obj.isMips64EL();
if (this->DynRelaRegion.Size > 0) {
printDynamicRelocHeader(ELF::SHT_RELA, "RELA", this->DynRelaRegion);
for (const Elf_Rela &Rela :
this->DynRelaRegion.template getAsArrayRef<Elf_Rela>())
printDynamicReloc(Relocation<ELFT>(Rela, IsMips64EL));
}
if (this->DynRelRegion.Size > 0) {
printDynamicRelocHeader(ELF::SHT_REL, "REL", this->DynRelRegion);
for (const Elf_Rel &Rel :
this->DynRelRegion.template getAsArrayRef<Elf_Rel>())
printDynamicReloc(Relocation<ELFT>(Rel, IsMips64EL));
}
if (this->DynRelrRegion.Size > 0) {
printDynamicRelocHeader(ELF::SHT_REL, "RELR", this->DynRelrRegion);
Elf_Relr_Range Relrs =
this->DynRelrRegion.template getAsArrayRef<Elf_Relr>();
for (const Elf_Rel &Rel : Obj.decode_relrs(Relrs))
printDynamicReloc(Relocation<ELFT>(Rel, IsMips64EL));
}
if (this->DynPLTRelRegion.Size) {
if (this->DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) {
printDynamicRelocHeader(ELF::SHT_RELA, "PLT", this->DynPLTRelRegion);
for (const Elf_Rela &Rela :
this->DynPLTRelRegion.template getAsArrayRef<Elf_Rela>())
printDynamicReloc(Relocation<ELFT>(Rela, IsMips64EL));
} else {
printDynamicRelocHeader(ELF::SHT_REL, "PLT", this->DynPLTRelRegion);
for (const Elf_Rel &Rel :
this->DynPLTRelRegion.template getAsArrayRef<Elf_Rel>())
printDynamicReloc(Relocation<ELFT>(Rel, IsMips64EL));
}
}
}
template <class ELFT>
void GNUELFDumper<ELFT>::printGNUVersionSectionProlog(
const typename ELFT::Shdr &Sec, const Twine &Label, unsigned EntriesNum) {
// Don't inline the SecName, because it might report a warning to stderr and
// corrupt the output.
StringRef SecName = this->getPrintableSectionName(Sec);
OS << Label << " section '" << SecName << "' "
<< "contains " << EntriesNum << " entries:\n";
StringRef LinkedSecName = "<corrupt>";
if (Expected<const typename ELFT::Shdr *> LinkedSecOrErr =
this->Obj.getSection(Sec.sh_link))
LinkedSecName = this->getPrintableSectionName(**LinkedSecOrErr);
else
this->reportUniqueWarning("invalid section linked to " +
this->describe(Sec) + ": " +
toString(LinkedSecOrErr.takeError()));
OS << " Addr: " << format_hex_no_prefix(Sec.sh_addr, 16)
<< " Offset: " << format_hex(Sec.sh_offset, 8)
<< " Link: " << Sec.sh_link << " (" << LinkedSecName << ")\n";
}
template <class ELFT>
void GNUELFDumper<ELFT>::printVersionSymbolSection(const Elf_Shdr *Sec) {
if (!Sec)
return;
printGNUVersionSectionProlog(*Sec, "Version symbols",
Sec->sh_size / sizeof(Elf_Versym));
Expected<ArrayRef<Elf_Versym>> VerTableOrErr =
this->getVersionTable(*Sec, /*SymTab=*/nullptr,
/*StrTab=*/nullptr, /*SymTabSec=*/nullptr);
if (!VerTableOrErr) {
this->reportUniqueWarning(VerTableOrErr.takeError());
return;
}
SmallVector<Optional<VersionEntry>, 0> *VersionMap = nullptr;
if (Expected<SmallVector<Optional<VersionEntry>, 0> *> MapOrErr =
this->getVersionMap())
VersionMap = *MapOrErr;
else
this->reportUniqueWarning(MapOrErr.takeError());
ArrayRef<Elf_Versym> VerTable = *VerTableOrErr;
std::vector<StringRef> Versions;
for (size_t I = 0, E = VerTable.size(); I < E; ++I) {
unsigned Ndx = VerTable[I].vs_index;
if (Ndx == VER_NDX_LOCAL || Ndx == VER_NDX_GLOBAL) {
Versions.emplace_back(Ndx == VER_NDX_LOCAL ? "*local*" : "*global*");
continue;
}
if (!VersionMap) {
Versions.emplace_back("<corrupt>");
continue;
}
bool IsDefault;
Expected<StringRef> NameOrErr = this->Obj.getSymbolVersionByIndex(
Ndx, IsDefault, *VersionMap, /*IsSymHidden=*/None);
if (!NameOrErr) {
this->reportUniqueWarning("unable to get a version for entry " +
Twine(I) + " of " + this->describe(*Sec) +
": " + toString(NameOrErr.takeError()));
Versions.emplace_back("<corrupt>");
continue;
}
Versions.emplace_back(*NameOrErr);
}
// readelf prints 4 entries per line.
uint64_t Entries = VerTable.size();
for (uint64_t VersymRow = 0; VersymRow < Entries; VersymRow += 4) {
OS << " " << format_hex_no_prefix(VersymRow, 3) << ":";
for (uint64_t I = 0; (I < 4) && (I + VersymRow) < Entries; ++I) {
unsigned Ndx = VerTable[VersymRow + I].vs_index;
OS << format("%4x%c", Ndx & VERSYM_VERSION,
Ndx & VERSYM_HIDDEN ? 'h' : ' ');
OS << left_justify("(" + std::string(Versions[VersymRow + I]) + ")", 13);
}
OS << '\n';
}
OS << '\n';
}
static std::string versionFlagToString(unsigned Flags) {
if (Flags == 0)
return "none";
std::string Ret;
auto AddFlag = [&Ret, &Flags](unsigned Flag, StringRef Name) {
if (!(Flags & Flag))
return;
if (!Ret.empty())
Ret += " | ";
Ret += Name;
Flags &= ~Flag;
};
AddFlag(VER_FLG_BASE, "BASE");
AddFlag(VER_FLG_WEAK, "WEAK");
AddFlag(VER_FLG_INFO, "INFO");
AddFlag(~0, "<unknown>");
return Ret;
}
template <class ELFT>
void GNUELFDumper<ELFT>::printVersionDefinitionSection(const Elf_Shdr *Sec) {
if (!Sec)
return;
printGNUVersionSectionProlog(*Sec, "Version definition", Sec->sh_info);
Expected<std::vector<VerDef>> V = this->Obj.getVersionDefinitions(*Sec);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
}
for (const VerDef &Def : *V) {
OS << format(" 0x%04x: Rev: %u Flags: %s Index: %u Cnt: %u Name: %s\n",
Def.Offset, Def.Version,
versionFlagToString(Def.Flags).c_str(), Def.Ndx, Def.Cnt,
Def.Name.data());
unsigned I = 0;
for (const VerdAux &Aux : Def.AuxV)
OS << format(" 0x%04x: Parent %u: %s\n", Aux.Offset, ++I,
Aux.Name.data());
}
OS << '\n';
}
template <class ELFT>
void GNUELFDumper<ELFT>::printVersionDependencySection(const Elf_Shdr *Sec) {
if (!Sec)
return;
unsigned VerneedNum = Sec->sh_info;
printGNUVersionSectionProlog(*Sec, "Version needs", VerneedNum);
Expected<std::vector<VerNeed>> V =
this->Obj.getVersionDependencies(*Sec, this->WarningHandler);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
}
for (const VerNeed &VN : *V) {
OS << format(" 0x%04x: Version: %u File: %s Cnt: %u\n", VN.Offset,
VN.Version, VN.File.data(), VN.Cnt);
for (const VernAux &Aux : VN.AuxV)
OS << format(" 0x%04x: Name: %s Flags: %s Version: %u\n", Aux.Offset,
Aux.Name.data(), versionFlagToString(Aux.Flags).c_str(),
Aux.Other);
}
OS << '\n';
}
template <class ELFT>
void GNUELFDumper<ELFT>::printHashHistogram(const Elf_Hash &HashTable) {
size_t NBucket = HashTable.nbucket;
size_t NChain = HashTable.nchain;
ArrayRef<Elf_Word> Buckets = HashTable.buckets();
ArrayRef<Elf_Word> Chains = HashTable.chains();
size_t TotalSyms = 0;
// If hash table is correct, we have at least chains with 0 length
size_t MaxChain = 1;
size_t CumulativeNonZero = 0;
if (NChain == 0 || NBucket == 0)
return;
std::vector<size_t> ChainLen(NBucket, 0);
// Go over all buckets and and note chain lengths of each bucket (total
// unique chain lengths).
for (size_t B = 0; B < NBucket; B++) {
BitVector Visited(NChain);
for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) {
if (C == ELF::STN_UNDEF)
break;
if (Visited[C]) {
this->reportUniqueWarning(".hash section is invalid: bucket " +
Twine(C) +
": a cycle was detected in the linked chain");
break;
}
Visited[C] = true;
if (MaxChain <= ++ChainLen[B])
MaxChain++;
}
TotalSyms += ChainLen[B];
}
if (!TotalSyms)
return;
std::vector<size_t> Count(MaxChain, 0);
// Count how long is the chain for each bucket
for (size_t B = 0; B < NBucket; B++)
++Count[ChainLen[B]];
// Print Number of buckets with each chain lengths and their cumulative
// coverage of the symbols
OS << "Histogram for bucket list length (total of " << NBucket
<< " buckets)\n"
<< " Length Number % of total Coverage\n";
for (size_t I = 0; I < MaxChain; I++) {
CumulativeNonZero += Count[I] * I;
OS << format("%7lu %-10lu (%5.1f%%) %5.1f%%\n", I, Count[I],
(Count[I] * 100.0) / NBucket,
(CumulativeNonZero * 100.0) / TotalSyms);
}
}
template <class ELFT>
void GNUELFDumper<ELFT>::printGnuHashHistogram(
const Elf_GnuHash &GnuHashTable) {
Expected<ArrayRef<Elf_Word>> ChainsOrErr =
getGnuHashTableChains<ELFT>(this->DynSymRegion, &GnuHashTable);
if (!ChainsOrErr) {
this->reportUniqueWarning("unable to print the GNU hash table histogram: " +
toString(ChainsOrErr.takeError()));
return;
}
ArrayRef<Elf_Word> Chains = *ChainsOrErr;
size_t Symndx = GnuHashTable.symndx;
size_t TotalSyms = 0;
size_t MaxChain = 1;
size_t CumulativeNonZero = 0;
size_t NBucket = GnuHashTable.nbuckets;
if (Chains.empty() || NBucket == 0)
return;
ArrayRef<Elf_Word> Buckets = GnuHashTable.buckets();
std::vector<size_t> ChainLen(NBucket, 0);
for (size_t B = 0; B < NBucket; B++) {
if (!Buckets[B])
continue;
size_t Len = 1;
for (size_t C = Buckets[B] - Symndx;
C < Chains.size() && (Chains[C] & 1) == 0; C++)
if (MaxChain < ++Len)
MaxChain++;
ChainLen[B] = Len;
TotalSyms += Len;
}
MaxChain++;
if (!TotalSyms)
return;
std::vector<size_t> Count(MaxChain, 0);
for (size_t B = 0; B < NBucket; B++)
++Count[ChainLen[B]];
// Print Number of buckets with each chain lengths and their cumulative
// coverage of the symbols
OS << "Histogram for `.gnu.hash' bucket list length (total of " << NBucket
<< " buckets)\n"
<< " Length Number % of total Coverage\n";
for (size_t I = 0; I < MaxChain; I++) {
CumulativeNonZero += Count[I] * I;
OS << format("%7lu %-10lu (%5.1f%%) %5.1f%%\n", I, Count[I],
(Count[I] * 100.0) / NBucket,
(CumulativeNonZero * 100.0) / TotalSyms);
}
}
// Hash histogram shows statistics of how efficient the hash was for the
// dynamic symbol table. The table shows the number of hash buckets for
// different lengths of chains as an absolute number and percentage of the total
// buckets, and the cumulative coverage of symbols for each set of buckets.
template <class ELFT> void GNUELFDumper<ELFT>::printHashHistograms() {
// Print histogram for the .hash section.
if (this->HashTable) {
if (Error E = checkHashTable<ELFT>(*this, this->HashTable))
this->reportUniqueWarning(std::move(E));
else
printHashHistogram(*this->HashTable);
}
// Print histogram for the .gnu.hash section.
if (this->GnuHashTable) {
if (Error E = checkGNUHashTable<ELFT>(this->Obj, this->GnuHashTable))
this->reportUniqueWarning(std::move(E));
else
printGnuHashHistogram(*this->GnuHashTable);
}
}
template <class ELFT> void GNUELFDumper<ELFT>::printCGProfile() {
OS << "GNUStyle::printCGProfile not implemented\n";
}
template <class ELFT> void GNUELFDumper<ELFT>::printBBAddrMaps() {
OS << "GNUStyle::printBBAddrMaps not implemented\n";
}
static Expected<std::vector<uint64_t>> toULEB128Array(ArrayRef<uint8_t> Data) {
std::vector<uint64_t> Ret;
const uint8_t *Cur = Data.begin();
const uint8_t *End = Data.end();
while (Cur != End) {
unsigned Size;
const char *Err;
Ret.push_back(decodeULEB128(Cur, &Size, End, &Err));
if (Err)
return createError(Err);
Cur += Size;
}
return Ret;
}
template <class ELFT>
static Expected<std::vector<uint64_t>>
decodeAddrsigSection(const ELFFile<ELFT> &Obj, const typename ELFT::Shdr &Sec) {
Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj.getSectionContents(Sec);
if (!ContentsOrErr)
return ContentsOrErr.takeError();
if (Expected<std::vector<uint64_t>> SymsOrErr =
toULEB128Array(*ContentsOrErr))
return *SymsOrErr;
else
return createError("unable to decode " + describe(Obj, Sec) + ": " +
toString(SymsOrErr.takeError()));
}
template <class ELFT> void GNUELFDumper<ELFT>::printAddrsig() {
if (!this->DotAddrsigSec)
return;
Expected<std::vector<uint64_t>> SymsOrErr =
decodeAddrsigSection(this->Obj, *this->DotAddrsigSec);
if (!SymsOrErr) {
this->reportUniqueWarning(SymsOrErr.takeError());
return;
}
StringRef Name = this->getPrintableSectionName(*this->DotAddrsigSec);
OS << "\nAddress-significant symbols section '" << Name << "'"
<< " contains " << SymsOrErr->size() << " entries:\n";
OS << " Num: Name\n";
Field Fields[2] = {0, 8};
size_t SymIndex = 0;
for (uint64_t Sym : *SymsOrErr) {
Fields[0].Str = to_string(format_decimal(++SymIndex, 6)) + ":";
Fields[1].Str = this->getStaticSymbolName(Sym);
for (const Field &Entry : Fields)
printField(Entry);
OS << "\n";
}
}
template <typename ELFT>
static std::string getGNUProperty(uint32_t Type, uint32_t DataSize,
ArrayRef<uint8_t> Data) {
std::string str;
raw_string_ostream OS(str);
uint32_t PrData;
auto DumpBit = [&](uint32_t Flag, StringRef Name) {
if (PrData & Flag) {
PrData &= ~Flag;
OS << Name;
if (PrData)
OS << ", ";
}
};
switch (Type) {
default:
OS << format("<application-specific type 0x%x>", Type);
return OS.str();
case GNU_PROPERTY_STACK_SIZE: {
OS << "stack size: ";
if (DataSize == sizeof(typename ELFT::uint))
OS << formatv("{0:x}",
(uint64_t)(*(const typename ELFT::Addr *)Data.data()));
else
OS << format("<corrupt length: 0x%x>", DataSize);
return OS.str();
}
case GNU_PROPERTY_NO_COPY_ON_PROTECTED:
OS << "no copy on protected";
if (DataSize)
OS << format(" <corrupt length: 0x%x>", DataSize);
return OS.str();
case GNU_PROPERTY_AARCH64_FEATURE_1_AND:
case GNU_PROPERTY_X86_FEATURE_1_AND:
OS << ((Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) ? "aarch64 feature: "
: "x86 feature: ");
if (DataSize != 4) {
OS << format("<corrupt length: 0x%x>", DataSize);
return OS.str();
}
PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
if (PrData == 0) {
OS << "<None>";
return OS.str();
}
if (Type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) {
DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI");
DumpBit(GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC");
} else {
DumpBit(GNU_PROPERTY_X86_FEATURE_1_IBT, "IBT");
DumpBit(GNU_PROPERTY_X86_FEATURE_1_SHSTK, "SHSTK");
}
if (PrData)
OS << format("<unknown flags: 0x%x>", PrData);
return OS.str();
case GNU_PROPERTY_X86_FEATURE_2_NEEDED:
case GNU_PROPERTY_X86_FEATURE_2_USED:
OS << "x86 feature "
<< (Type == GNU_PROPERTY_X86_FEATURE_2_NEEDED ? "needed: " : "used: ");
if (DataSize != 4) {
OS << format("<corrupt length: 0x%x>", DataSize);
return OS.str();
}
PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
if (PrData == 0) {
OS << "<None>";
return OS.str();
}
DumpBit(GNU_PROPERTY_X86_FEATURE_2_X86, "x86");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_X87, "x87");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_MMX, "MMX");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_XMM, "XMM");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_YMM, "YMM");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_ZMM, "ZMM");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_FXSR, "FXSR");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVE, "XSAVE");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT, "XSAVEOPT");
DumpBit(GNU_PROPERTY_X86_FEATURE_2_XSAVEC, "XSAVEC");
if (PrData)
OS << format("<unknown flags: 0x%x>", PrData);
return OS.str();
case GNU_PROPERTY_X86_ISA_1_NEEDED:
case GNU_PROPERTY_X86_ISA_1_USED:
OS << "x86 ISA "
<< (Type == GNU_PROPERTY_X86_ISA_1_NEEDED ? "needed: " : "used: ");
if (DataSize != 4) {
OS << format("<corrupt length: 0x%x>", DataSize);
return OS.str();
}
PrData = support::endian::read32<ELFT::TargetEndianness>(Data.data());
if (PrData == 0) {
OS << "<None>";
return OS.str();
}
DumpBit(GNU_PROPERTY_X86_ISA_1_BASELINE, "x86-64-baseline");
DumpBit(GNU_PROPERTY_X86_ISA_1_V2, "x86-64-v2");
DumpBit(GNU_PROPERTY_X86_ISA_1_V3, "x86-64-v3");
DumpBit(GNU_PROPERTY_X86_ISA_1_V4, "x86-64-v4");
if (PrData)
OS << format("<unknown flags: 0x%x>", PrData);
return OS.str();
}
}
template <typename ELFT>
static SmallVector<std::string, 4> getGNUPropertyList(ArrayRef<uint8_t> Arr) {
using Elf_Word = typename ELFT::Word;
SmallVector<std::string, 4> Properties;
while (Arr.size() >= 8) {
uint32_t Type = *reinterpret_cast<const Elf_Word *>(Arr.data());
uint32_t DataSize = *reinterpret_cast<const Elf_Word *>(Arr.data() + 4);
Arr = Arr.drop_front(8);
// Take padding size into account if present.
uint64_t PaddedSize = alignTo(DataSize, sizeof(typename ELFT::uint));
std::string str;
raw_string_ostream OS(str);
if (Arr.size() < PaddedSize) {
OS << format("<corrupt type (0x%x) datasz: 0x%x>", Type, DataSize);
Properties.push_back(OS.str());
break;
}
Properties.push_back(
getGNUProperty<ELFT>(Type, DataSize, Arr.take_front(PaddedSize)));
Arr = Arr.drop_front(PaddedSize);
}
if (!Arr.empty())
Properties.push_back("<corrupted GNU_PROPERTY_TYPE_0>");
return Properties;
}
struct GNUAbiTag {
std::string OSName;
std::string ABI;
bool IsValid;
};
template <typename ELFT> static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
typedef typename ELFT::Word Elf_Word;
ArrayRef<Elf_Word> Words(reinterpret_cast<const Elf_Word *>(Desc.begin()),
reinterpret_cast<const Elf_Word *>(Desc.end()));
if (Words.size() < 4)
return {"", "", /*IsValid=*/false};
static const char *OSNames[] = {
"Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
};
StringRef OSName = "Unknown";
if (Words[0] < array_lengthof(OSNames))
OSName = OSNames[Words[0]];
uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
std::string str;
raw_string_ostream ABI(str);
ABI << Major << "." << Minor << "." << Patch;
return {std::string(OSName), ABI.str(), /*IsValid=*/true};
}
static std::string getGNUBuildId(ArrayRef<uint8_t> Desc) {
std::string str;
raw_string_ostream OS(str);
for (uint8_t B : Desc)
OS << format_hex_no_prefix(B, 2);
return OS.str();
}
static StringRef getDescAsStringRef(ArrayRef<uint8_t> Desc) {
return StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
}
template <typename ELFT>
static bool printGNUNote(raw_ostream &OS, uint32_t NoteType,
ArrayRef<uint8_t> Desc) {
// Return true if we were able to pretty-print the note, false otherwise.
switch (NoteType) {
default:
return false;
case ELF::NT_GNU_ABI_TAG: {
const GNUAbiTag &AbiTag = getGNUAbiTag<ELFT>(Desc);
if (!AbiTag.IsValid)
OS << " <corrupt GNU_ABI_TAG>";
else
OS << " OS: " << AbiTag.OSName << ", ABI: " << AbiTag.ABI;
break;
}
case ELF::NT_GNU_BUILD_ID: {
OS << " Build ID: " << getGNUBuildId(Desc);
break;
}
case ELF::NT_GNU_GOLD_VERSION:
OS << " Version: " << getDescAsStringRef(Desc);
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
OS << " Properties:";
for (const std::string &Property : getGNUPropertyList<ELFT>(Desc))
OS << " " << Property << "\n";
break;
}
OS << '\n';
return true;
}
template <typename ELFT>
static bool printLLVMOMPOFFLOADNote(raw_ostream &OS, uint32_t NoteType,
ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return false;
case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION:
OS << " Version: " << getDescAsStringRef(Desc);
break;
case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER:
OS << " Producer: " << getDescAsStringRef(Desc);
break;
case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION:
OS << " Producer version: " << getDescAsStringRef(Desc);
break;
}
OS << '\n';
return true;
}
const EnumEntry<unsigned> FreeBSDFeatureCtlFlags[] = {
{"ASLR_DISABLE", NT_FREEBSD_FCTL_ASLR_DISABLE},
{"PROTMAX_DISABLE", NT_FREEBSD_FCTL_PROTMAX_DISABLE},
{"STKGAP_DISABLE", NT_FREEBSD_FCTL_STKGAP_DISABLE},
{"WXNEEDED", NT_FREEBSD_FCTL_WXNEEDED},
{"LA48", NT_FREEBSD_FCTL_LA48},
{"ASG_DISABLE", NT_FREEBSD_FCTL_ASG_DISABLE},
};
struct FreeBSDNote {
std::string Type;
std::string Value;
};
template <typename ELFT>
static Optional<FreeBSDNote>
getFreeBSDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc, bool IsCore) {
if (IsCore)
return None; // No pretty-printing yet.
switch (NoteType) {
case ELF::NT_FREEBSD_ABI_TAG:
if (Desc.size() != 4)
return None;
return FreeBSDNote{
"ABI tag",
utostr(support::endian::read32<ELFT::TargetEndianness>(Desc.data()))};
case ELF::NT_FREEBSD_ARCH_TAG:
return FreeBSDNote{"Arch tag", toStringRef(Desc).str()};
case ELF::NT_FREEBSD_FEATURE_CTL: {
if (Desc.size() != 4)
return None;
unsigned Value =
support::endian::read32<ELFT::TargetEndianness>(Desc.data());
std::string FlagsStr;
raw_string_ostream OS(FlagsStr);
printFlags(Value, makeArrayRef(FreeBSDFeatureCtlFlags), OS);
if (OS.str().empty())
OS << "0x" << utohexstr(Value);
else
OS << "(0x" << utohexstr(Value) << ")";
return FreeBSDNote{"Feature flags", OS.str()};
}
default:
return None;
}
}
struct AMDNote {
std::string Type;
std::string Value;
};
template <typename ELFT>
static AMDNote getAMDNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return {"", ""};
case ELF::NT_AMD_HSA_CODE_OBJECT_VERSION: {
struct CodeObjectVersion {
uint32_t MajorVersion;
uint32_t MinorVersion;
};
if (Desc.size() != sizeof(CodeObjectVersion))
return {"AMD HSA Code Object Version",
"Invalid AMD HSA Code Object Version"};
std::string VersionString;
raw_string_ostream StrOS(VersionString);
auto Version = reinterpret_cast<const CodeObjectVersion *>(Desc.data());
StrOS << "[Major: " << Version->MajorVersion
<< ", Minor: " << Version->MinorVersion << "]";
return {"AMD HSA Code Object Version", VersionString};
}
case ELF::NT_AMD_HSA_HSAIL: {
struct HSAILProperties {
uint32_t HSAILMajorVersion;
uint32_t HSAILMinorVersion;
uint8_t Profile;
uint8_t MachineModel;
uint8_t DefaultFloatRound;
};
if (Desc.size() != sizeof(HSAILProperties))
return {"AMD HSA HSAIL Properties", "Invalid AMD HSA HSAIL Properties"};
auto Properties = reinterpret_cast<const HSAILProperties *>(Desc.data());
std::string HSAILPropetiesString;
raw_string_ostream StrOS(HSAILPropetiesString);
StrOS << "[HSAIL Major: " << Properties->HSAILMajorVersion
<< ", HSAIL Minor: " << Properties->HSAILMinorVersion
<< ", Profile: " << uint32_t(Properties->Profile)
<< ", Machine Model: " << uint32_t(Properties->MachineModel)
<< ", Default Float Round: "
<< uint32_t(Properties->DefaultFloatRound) << "]";
return {"AMD HSA HSAIL Properties", HSAILPropetiesString};
}
case ELF::NT_AMD_HSA_ISA_VERSION: {
struct IsaVersion {
uint16_t VendorNameSize;
uint16_t ArchitectureNameSize;
uint32_t Major;
uint32_t Minor;
uint32_t Stepping;
};
if (Desc.size() < sizeof(IsaVersion))
return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
auto Isa = reinterpret_cast<const IsaVersion *>(Desc.data());
if (Desc.size() < sizeof(IsaVersion) +
Isa->VendorNameSize + Isa->ArchitectureNameSize ||
Isa->VendorNameSize == 0 || Isa->ArchitectureNameSize == 0)
return {"AMD HSA ISA Version", "Invalid AMD HSA ISA Version"};
std::string IsaString;
raw_string_ostream StrOS(IsaString);
StrOS << "[Vendor: "
<< StringRef((const char*)Desc.data() + sizeof(IsaVersion), Isa->VendorNameSize - 1)
<< ", Architecture: "
<< StringRef((const char*)Desc.data() + sizeof(IsaVersion) + Isa->VendorNameSize,
Isa->ArchitectureNameSize - 1)
<< ", Major: " << Isa->Major << ", Minor: " << Isa->Minor
<< ", Stepping: " << Isa->Stepping << "]";
return {"AMD HSA ISA Version", IsaString};
}
case ELF::NT_AMD_HSA_METADATA: {
if (Desc.size() == 0)
return {"AMD HSA Metadata", ""};
return {
"AMD HSA Metadata",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size() - 1)};
}
case ELF::NT_AMD_HSA_ISA_NAME: {
if (Desc.size() == 0)
return {"AMD HSA ISA Name", ""};
return {
"AMD HSA ISA Name",
std::string(reinterpret_cast<const char *>(Desc.data()), Desc.size())};
}
case ELF::NT_AMD_PAL_METADATA: {
struct PALMetadata {
uint32_t Key;
uint32_t Value;
};
if (Desc.size() % sizeof(PALMetadata) != 0)
return {"AMD PAL Metadata", "Invalid AMD PAL Metadata"};
auto Isa = reinterpret_cast<const PALMetadata *>(Desc.data());
std::string MetadataString;
raw_string_ostream StrOS(MetadataString);
for (size_t I = 0, E = Desc.size() / sizeof(PALMetadata); I < E; ++I) {
StrOS << "[" << Isa[I].Key << ": " << Isa[I].Value << "]";
}
return {"AMD PAL Metadata", MetadataString};
}
}
}
struct AMDGPUNote {
std::string Type;
std::string Value;
};
template <typename ELFT>
static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
switch (NoteType) {
default:
return {"", ""};
case ELF::NT_AMDGPU_METADATA: {
StringRef MsgPackString =
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
msgpack::Document MsgPackDoc;
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return {"", ""};
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
std::string MetadataString;
if (!Verifier.verify(MsgPackDoc.getRoot()))
MetadataString = "Invalid AMDGPU Metadata\n";
raw_string_ostream StrOS(MetadataString);
if (MsgPackDoc.getRoot().isScalar()) {
// TODO: passing a scalar root to toYAML() asserts:
// (PolymorphicTraits<T>::getKind(Val) != NodeKind::Scalar &&
// "plain scalar documents are not supported")
// To avoid this crash we print the raw data instead.
return {"", ""};
}
MsgPackDoc.toYAML(StrOS);
return {"AMDGPU Metadata", StrOS.str()};
}
}
}
struct CoreFileMapping {
uint64_t Start, End, Offset;
StringRef Filename;
};
struct CoreNote {
uint64_t PageSize;
std::vector<CoreFileMapping> Mappings;
};
static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
// Expected format of the NT_FILE note description:
// 1. # of file mappings (call it N)
// 2. Page size
// 3. N (start, end, offset) triples
// 4. N packed filenames (null delimited)
// Each field is an Elf_Addr, except for filenames which are char* strings.
CoreNote Ret;
const int Bytes = Desc.getAddressSize();
if (!Desc.isValidOffsetForAddress(2))
return createError("the note of size 0x" + Twine::utohexstr(Desc.size()) +
" is too short, expected at least 0x" +
Twine::utohexstr(Bytes * 2));
if (Desc.getData().back() != 0)
return createError("the note is not NUL terminated");
uint64_t DescOffset = 0;
uint64_t FileCount = Desc.getAddress(&DescOffset);
Ret.PageSize = Desc.getAddress(&DescOffset);
if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes))
return createError("unable to read file mappings (found " +
Twine(FileCount) + "): the note of size 0x" +
Twine::utohexstr(Desc.size()) + " is too short");
uint64_t FilenamesOffset = 0;
DataExtractor Filenames(
Desc.getData().drop_front(DescOffset + 3 * FileCount * Bytes),
Desc.isLittleEndian(), Desc.getAddressSize());
Ret.Mappings.resize(FileCount);
size_t I = 0;
for (CoreFileMapping &Mapping : Ret.Mappings) {
++I;
if (!Filenames.isValidOffsetForDataOfSize(FilenamesOffset, 1))
return createError(
"unable to read the file name for the mapping with index " +
Twine(I) + ": the note of size 0x" + Twine::utohexstr(Desc.size()) +
" is truncated");
Mapping.Start = Desc.getAddress(&DescOffset);
Mapping.End = Desc.getAddress(&DescOffset);
Mapping.Offset = Desc.getAddress(&DescOffset);
Mapping.Filename = Filenames.getCStrRef(&FilenamesOffset);
}
return Ret;
}
template <typename ELFT>
static void printCoreNote(raw_ostream &OS, const CoreNote &Note) {
// Length of "0x<address>" string.
const int FieldWidth = ELFT::Is64Bits ? 18 : 10;
OS << " Page size: " << format_decimal(Note.PageSize, 0) << '\n';
OS << " " << right_justify("Start", FieldWidth) << " "
<< right_justify("End", FieldWidth) << " "
<< right_justify("Page Offset", FieldWidth) << '\n';
for (const CoreFileMapping &Mapping : Note.Mappings) {
OS << " " << format_hex(Mapping.Start, FieldWidth) << " "
<< format_hex(Mapping.End, FieldWidth) << " "
<< format_hex(Mapping.Offset, FieldWidth) << "\n "
<< Mapping.Filename << '\n';
}
}
const NoteType GenericNoteTypes[] = {
{ELF::NT_VERSION, "NT_VERSION (version)"},
{ELF::NT_ARCH, "NT_ARCH (architecture)"},
{ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"},
{ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"},
};
const NoteType GNUNoteTypes[] = {
{ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"},
{ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"},
{ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"},
{ELF::NT_GNU_GOLD_VERSION, "NT_GNU_GOLD_VERSION (gold version)"},
{ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"},
};
const NoteType FreeBSDCoreNoteTypes[] = {
{ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"},
{ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"},
{ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"},
{ELF::NT_FREEBSD_PROCSTAT_VMMAP, "NT_PROCSTAT_VMMAP (vmmap data)"},
{ELF::NT_FREEBSD_PROCSTAT_GROUPS, "NT_PROCSTAT_GROUPS (groups data)"},
{ELF::NT_FREEBSD_PROCSTAT_UMASK, "NT_PROCSTAT_UMASK (umask data)"},
{ELF::NT_FREEBSD_PROCSTAT_RLIMIT, "NT_PROCSTAT_RLIMIT (rlimit data)"},
{ELF::NT_FREEBSD_PROCSTAT_OSREL, "NT_PROCSTAT_OSREL (osreldate data)"},
{ELF::NT_FREEBSD_PROCSTAT_PSSTRINGS,
"NT_PROCSTAT_PSSTRINGS (ps_strings data)"},
{ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"},
};
const NoteType FreeBSDNoteTypes[] = {
{ELF::NT_FREEBSD_ABI_TAG, "NT_FREEBSD_ABI_TAG (ABI version tag)"},
{ELF::NT_FREEBSD_NOINIT_TAG, "NT_FREEBSD_NOINIT_TAG (no .init tag)"},
{ELF::NT_FREEBSD_ARCH_TAG, "NT_FREEBSD_ARCH_TAG (architecture tag)"},
{ELF::NT_FREEBSD_FEATURE_CTL,
"NT_FREEBSD_FEATURE_CTL (FreeBSD feature control)"},
};
const NoteType NetBSDCoreNoteTypes[] = {
{ELF::NT_NETBSDCORE_PROCINFO,
"NT_NETBSDCORE_PROCINFO (procinfo structure)"},
{ELF::NT_NETBSDCORE_AUXV, "NT_NETBSDCORE_AUXV (ELF auxiliary vector data)"},
{ELF::NT_NETBSDCORE_LWPSTATUS, "PT_LWPSTATUS (ptrace_lwpstatus structure)"},
};
const NoteType OpenBSDCoreNoteTypes[] = {
{ELF::NT_OPENBSD_PROCINFO, "NT_OPENBSD_PROCINFO (procinfo structure)"},
{ELF::NT_OPENBSD_AUXV, "NT_OPENBSD_AUXV (ELF auxiliary vector data)"},
{ELF::NT_OPENBSD_REGS, "NT_OPENBSD_REGS (regular registers)"},
{ELF::NT_OPENBSD_FPREGS, "NT_OPENBSD_FPREGS (floating point registers)"},
{ELF::NT_OPENBSD_WCOOKIE, "NT_OPENBSD_WCOOKIE (window cookie)"},
};
const NoteType AMDNoteTypes[] = {
{ELF::NT_AMD_HSA_CODE_OBJECT_VERSION,
"NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"},
{ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"},
{ELF::NT_AMD_HSA_ISA_VERSION, "NT_AMD_HSA_ISA_VERSION (AMD HSA ISA Version)"},
{ELF::NT_AMD_HSA_METADATA, "NT_AMD_HSA_METADATA (AMD HSA Metadata)"},
{ELF::NT_AMD_HSA_ISA_NAME, "NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name)"},
{ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"},
};
const NoteType AMDGPUNoteTypes[] = {
{ELF::NT_AMDGPU_METADATA, "NT_AMDGPU_METADATA (AMDGPU Metadata)"},
};
const NoteType LLVMOMPOFFLOADNoteTypes[] = {
{ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION,
"NT_LLVM_OPENMP_OFFLOAD_VERSION (image format version)"},
{ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER,
"NT_LLVM_OPENMP_OFFLOAD_PRODUCER (producing toolchain)"},
{ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION,
"NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION (producing toolchain version)"},
};
const NoteType CoreNoteTypes[] = {
{ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"},
{ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"},
{ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"},
{ELF::NT_TASKSTRUCT, "NT_TASKSTRUCT (task structure)"},
{ELF::NT_AUXV, "NT_AUXV (auxiliary vector)"},
{ELF::NT_PSTATUS, "NT_PSTATUS (pstatus structure)"},
{ELF::NT_FPREGS, "NT_FPREGS (floating point registers)"},
{ELF::NT_PSINFO, "NT_PSINFO (psinfo structure)"},
{ELF::NT_LWPSTATUS, "NT_LWPSTATUS (lwpstatus_t structure)"},
{ELF::NT_LWPSINFO, "NT_LWPSINFO (lwpsinfo_t structure)"},
{ELF::NT_WIN32PSTATUS, "NT_WIN32PSTATUS (win32_pstatus structure)"},
{ELF::NT_PPC_VMX, "NT_PPC_VMX (ppc Altivec registers)"},
{ELF::NT_PPC_VSX, "NT_PPC_VSX (ppc VSX registers)"},
{ELF::NT_PPC_TAR, "NT_PPC_TAR (ppc TAR register)"},
{ELF::NT_PPC_PPR, "NT_PPC_PPR (ppc PPR register)"},
{ELF::NT_PPC_DSCR, "NT_PPC_DSCR (ppc DSCR register)"},
{ELF::NT_PPC_EBB, "NT_PPC_EBB (ppc EBB registers)"},
{ELF::NT_PPC_PMU, "NT_PPC_PMU (ppc PMU registers)"},
{ELF::NT_PPC_TM_CGPR, "NT_PPC_TM_CGPR (ppc checkpointed GPR registers)"},
{ELF::NT_PPC_TM_CFPR,
"NT_PPC_TM_CFPR (ppc checkpointed floating point registers)"},
{ELF::NT_PPC_TM_CVMX,
"NT_PPC_TM_CVMX (ppc checkpointed Altivec registers)"},
{ELF::NT_PPC_TM_CVSX, "NT_PPC_TM_CVSX (ppc checkpointed VSX registers)"},
{ELF::NT_PPC_TM_SPR, "NT_PPC_TM_SPR (ppc TM special purpose registers)"},
{ELF::NT_PPC_TM_CTAR, "NT_PPC_TM_CTAR (ppc checkpointed TAR register)"},
{ELF::NT_PPC_TM_CPPR, "NT_PPC_TM_CPPR (ppc checkpointed PPR register)"},
{ELF::NT_PPC_TM_CDSCR, "NT_PPC_TM_CDSCR (ppc checkpointed DSCR register)"},
{ELF::NT_386_TLS, "NT_386_TLS (x86 TLS information)"},
{ELF::NT_386_IOPERM, "NT_386_IOPERM (x86 I/O permissions)"},
{ELF::NT_X86_XSTATE, "NT_X86_XSTATE (x86 XSAVE extended state)"},
{ELF::NT_S390_HIGH_GPRS, "NT_S390_HIGH_GPRS (s390 upper register halves)"},
{ELF::NT_S390_TIMER, "NT_S390_TIMER (s390 timer register)"},
{ELF::NT_S390_TODCMP, "NT_S390_TODCMP (s390 TOD comparator register)"},
{ELF::NT_S390_TODPREG, "NT_S390_TODPREG (s390 TOD programmable register)"},
{ELF::NT_S390_CTRS, "NT_S390_CTRS (s390 control registers)"},
{ELF::NT_S390_PREFIX, "NT_S390_PREFIX (s390 prefix register)"},
{ELF::NT_S390_LAST_BREAK,
"NT_S390_LAST_BREAK (s390 last breaking event address)"},
{ELF::NT_S390_SYSTEM_CALL,
"NT_S390_SYSTEM_CALL (s390 system call restart data)"},
{ELF::NT_S390_TDB, "NT_S390_TDB (s390 transaction diagnostic block)"},
{ELF::NT_S390_VXRS_LOW,
"NT_S390_VXRS_LOW (s390 vector registers 0-15 upper half)"},
{ELF::NT_S390_VXRS_HIGH, "NT_S390_VXRS_HIGH (s390 vector registers 16-31)"},
{ELF::NT_S390_GS_CB, "NT_S390_GS_CB (s390 guarded-storage registers)"},
{ELF::NT_S390_GS_BC,
"NT_S390_GS_BC (s390 guarded-storage broadcast control)"},
{ELF::NT_ARM_VFP, "NT_ARM_VFP (arm VFP registers)"},
{ELF::NT_ARM_TLS, "NT_ARM_TLS (AArch TLS registers)"},
{ELF::NT_ARM_HW_BREAK,
"NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"},
{ELF::NT_ARM_HW_WATCH,
"NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"},
{ELF::NT_FILE, "NT_FILE (mapped files)"},
{ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"},
{ELF::NT_SIGINFO, "NT_SIGINFO (siginfo_t data)"},
};
template <class ELFT>
StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) {
uint32_t Type = Note.getType();
auto FindNote = [&](ArrayRef<NoteType> V) -> StringRef {
for (const NoteType &N : V)
if (N.ID == Type)
return N.Name;
return "";
};
StringRef Name = Note.getName();
if (Name == "GNU")
return FindNote(GNUNoteTypes);
if (Name == "FreeBSD") {
if (ELFType == ELF::ET_CORE) {
// FreeBSD also places the generic core notes in the FreeBSD namespace.
StringRef Result = FindNote(FreeBSDCoreNoteTypes);
if (!Result.empty())
return Result;
return FindNote(CoreNoteTypes);
} else {
return FindNote(FreeBSDNoteTypes);
}
}
if (ELFType == ELF::ET_CORE && Name.startswith("NetBSD-CORE")) {
StringRef Result = FindNote(NetBSDCoreNoteTypes);
if (!Result.empty())
return Result;
return FindNote(CoreNoteTypes);
}
if (ELFType == ELF::ET_CORE && Name.startswith("OpenBSD")) {
// OpenBSD also places the generic core notes in the OpenBSD namespace.
StringRef Result = FindNote(OpenBSDCoreNoteTypes);
if (!Result.empty())
return Result;
return FindNote(CoreNoteTypes);
}
if (Name == "AMD")
return FindNote(AMDNoteTypes);
if (Name == "AMDGPU")
return FindNote(AMDGPUNoteTypes);
if (Name == "LLVMOMPOFFLOAD")
return FindNote(LLVMOMPOFFLOADNoteTypes);
if (ELFType == ELF::ET_CORE)
return FindNote(CoreNoteTypes);
return FindNote(GenericNoteTypes);
}
template <class ELFT>
static void printNotesHelper(
const ELFDumper<ELFT> &Dumper,
llvm::function_ref<void(Optional<StringRef>, typename ELFT::Off,
typename ELFT::Addr)>
StartNotesFn,
llvm::function_ref<Error(const typename ELFT::Note &, bool)> ProcessNoteFn,
llvm::function_ref<void()> FinishNotesFn) {
const ELFFile<ELFT> &Obj = Dumper.getElfObject().getELFFile();
bool IsCoreFile = Obj.getHeader().e_type == ELF::ET_CORE;
ArrayRef<typename ELFT::Shdr> Sections = cantFail(Obj.sections());
if (!IsCoreFile && !Sections.empty()) {
for (const typename ELFT::Shdr &S : Sections) {
if (S.sh_type != SHT_NOTE)
continue;
StartNotesFn(expectedToOptional(Obj.getSectionName(S)), S.sh_offset,
S.sh_size);
Error Err = Error::success();
size_t I = 0;
for (const typename ELFT::Note Note : Obj.notes(S, Err)) {
if (Error E = ProcessNoteFn(Note, IsCoreFile))
Dumper.reportUniqueWarning(
"unable to read note with index " + Twine(I) + " from the " +
describe(Obj, S) + ": " + toString(std::move(E)));
++I;
}
if (Err)
Dumper.reportUniqueWarning("unable to read notes from the " +
describe(Obj, S) + ": " +
toString(std::move(Err)));
FinishNotesFn();
}
return;
}
Expected<ArrayRef<typename ELFT::Phdr>> PhdrsOrErr = Obj.program_headers();
if (!PhdrsOrErr) {
Dumper.reportUniqueWarning(
"unable to read program headers to locate the PT_NOTE segment: " +
toString(PhdrsOrErr.takeError()));
return;
}
for (size_t I = 0, E = (*PhdrsOrErr).size(); I != E; ++I) {
const typename ELFT::Phdr &P = (*PhdrsOrErr)[I];
if (P.p_type != PT_NOTE)
continue;
StartNotesFn(/*SecName=*/None, P.p_offset, P.p_filesz);
Error Err = Error::success();
size_t Index = 0;
for (const typename ELFT::Note Note : Obj.notes(P, Err)) {
if (Error E = ProcessNoteFn(Note, IsCoreFile))
Dumper.reportUniqueWarning("unable to read note with index " +
Twine(Index) +
" from the PT_NOTE segment with index " +
Twine(I) + ": " + toString(std::move(E)));
++Index;
}
if (Err)
Dumper.reportUniqueWarning(
"unable to read notes from the PT_NOTE segment with index " +
Twine(I) + ": " + toString(std::move(Err)));
FinishNotesFn();
}
}
template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
bool IsFirstHeader = true;
auto PrintHeader = [&](Optional<StringRef> SecName,
const typename ELFT::Off Offset,
const typename ELFT::Addr Size) {
// Print a newline between notes sections to match GNU readelf.
if (!IsFirstHeader) {
OS << '\n';
} else {
IsFirstHeader = false;
}
OS << "Displaying notes found ";
if (SecName)
OS << "in: " << *SecName << "\n";
else
OS << "at file offset " << format_hex(Offset, 10) << " with length "
<< format_hex(Size, 10) << ":\n";
OS << " Owner Data size \tDescription\n";
};
auto ProcessNote = [&](const Elf_Note &Note, bool IsCore) -> Error {
StringRef Name = Note.getName();
ArrayRef<uint8_t> Descriptor = Note.getDesc();
Elf_Word Type = Note.getType();
// Print the note owner/type.
OS << " " << left_justify(Name, 20) << ' '
<< format_hex(Descriptor.size(), 10) << '\t';
StringRef NoteType =
getNoteTypeName<ELFT>(Note, this->Obj.getHeader().e_type);
if (!NoteType.empty())
OS << NoteType << '\n';
else
OS << "Unknown note type: (" << format_hex(Type, 10) << ")\n";
// Print the description, or fallback to printing raw bytes for unknown
// owners/if we fail to pretty-print the contents.
if (Name == "GNU") {
if (printGNUNote<ELFT>(OS, Type, Descriptor))
return Error::success();
} else if (Name == "FreeBSD") {
if (Optional<FreeBSDNote> N =
getFreeBSDNote<ELFT>(Type, Descriptor, IsCore)) {
OS << " " << N->Type << ": " << N->Value << '\n';
return Error::success();
}
} else if (Name == "AMD") {
const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
if (!N.Type.empty()) {
OS << " " << N.Type << ":\n " << N.Value << '\n';
return Error::success();
}
} else if (Name == "AMDGPU") {
const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
if (!N.Type.empty()) {
OS << " " << N.Type << ":\n " << N.Value << '\n';
return Error::success();
}
} else if (Name == "LLVMOMPOFFLOAD") {
if (printLLVMOMPOFFLOADNote<ELFT>(OS, Type, Descriptor))
return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
DataExtractor DescExtractor(Descriptor,
ELFT::TargetEndianness == support::little,
sizeof(Elf_Addr));
if (Expected<CoreNote> NoteOrErr = readCoreNote(DescExtractor)) {
printCoreNote<ELFT>(OS, *NoteOrErr);
return Error::success();
} else {
return NoteOrErr.takeError();
}
}
}
if (!Descriptor.empty()) {
OS << " description data:";
for (uint8_t B : Descriptor)
OS << " " << format("%02x", B);
OS << '\n';
}
return Error::success();
};
printNotesHelper(*this, PrintHeader, ProcessNote, []() {});
}
template <class ELFT> void GNUELFDumper<ELFT>::printELFLinkerOptions() {
OS << "printELFLinkerOptions not implemented!\n";
}
template <class ELFT>
void ELFDumper<ELFT>::printDependentLibsHelper(
function_ref<void(const Elf_Shdr &)> OnSectionStart,
function_ref<void(StringRef, uint64_t)> OnLibEntry) {
auto Warn = [this](unsigned SecNdx, StringRef Msg) {
this->reportUniqueWarning("SHT_LLVM_DEPENDENT_LIBRARIES section at index " +
Twine(SecNdx) + " is broken: " + Msg);
};
unsigned I = -1;
for (const Elf_Shdr &Shdr : cantFail(Obj.sections())) {
++I;
if (Shdr.sh_type != ELF::SHT_LLVM_DEPENDENT_LIBRARIES)
continue;
OnSectionStart(Shdr);
Expected<ArrayRef<uint8_t>> ContentsOrErr = Obj.getSectionContents(Shdr);
if (!ContentsOrErr) {
Warn(I, toString(ContentsOrErr.takeError()));
continue;
}
ArrayRef<uint8_t> Contents = *ContentsOrErr;
if (!Contents.empty() && Contents.back() != 0) {
Warn(I, "the content is not null-terminated");
continue;
}
for (const uint8_t *I = Contents.begin(), *E = Contents.end(); I < E;) {
StringRef Lib((const char *)I);
OnLibEntry(Lib, I - Contents.begin());
I += Lib.size() + 1;
}
}
}
template <class ELFT>
void ELFDumper<ELFT>::forEachRelocationDo(
const Elf_Shdr &Sec, bool RawRelr,
llvm::function_ref<void(const Relocation<ELFT> &, unsigned,
const Elf_Shdr &, const Elf_Shdr *)>
RelRelaFn,
llvm::function_ref<void(const Elf_Relr &)> RelrFn) {
auto Warn = [&](Error &&E,
const Twine &Prefix = "unable to read relocations from") {
this->reportUniqueWarning(Prefix + " " + describe(Sec) + ": " +
toString(std::move(E)));
};
// SHT_RELR/SHT_ANDROID_RELR sections do not have an associated symbol table.
// For them we should not treat the value of the sh_link field as an index of
// a symbol table.
const Elf_Shdr *SymTab;
if (Sec.sh_type != ELF::SHT_RELR && Sec.sh_type != ELF::SHT_ANDROID_RELR) {
Expected<const Elf_Shdr *> SymTabOrErr = Obj.getSection(Sec.sh_link);
if (!SymTabOrErr) {
Warn(SymTabOrErr.takeError(), "unable to locate a symbol table for");
return;
}
SymTab = *SymTabOrErr;
}
unsigned RelNdx = 0;
const bool IsMips64EL = this->Obj.isMips64EL();
switch (Sec.sh_type) {
case ELF::SHT_REL:
if (Expected<Elf_Rel_Range> RangeOrErr = Obj.rels(Sec)) {
for (const Elf_Rel &R : *RangeOrErr)
RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec, SymTab);
} else {
Warn(RangeOrErr.takeError());
}
break;
case ELF::SHT_RELA:
if (Expected<Elf_Rela_Range> RangeOrErr = Obj.relas(Sec)) {
for (const Elf_Rela &R : *RangeOrErr)
RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec, SymTab);
} else {
Warn(RangeOrErr.takeError());
}
break;
case ELF::SHT_RELR:
case ELF::SHT_ANDROID_RELR: {
Expected<Elf_Relr_Range> RangeOrErr = Obj.relrs(Sec);
if (!RangeOrErr) {
Warn(RangeOrErr.takeError());
break;
}
if (RawRelr) {
for (const Elf_Relr &R : *RangeOrErr)
RelrFn(R);
break;
}
for (const Elf_Rel &R : Obj.decode_relrs(*RangeOrErr))
RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec,
/*SymTab=*/nullptr);
break;
}
case ELF::SHT_ANDROID_REL:
case ELF::SHT_ANDROID_RELA:
if (Expected<std::vector<Elf_Rela>> RelasOrErr = Obj.android_relas(Sec)) {
for (const Elf_Rela &R : *RelasOrErr)
RelRelaFn(Relocation<ELFT>(R, IsMips64EL), RelNdx++, Sec, SymTab);
} else {
Warn(RelasOrErr.takeError());
}
break;
}
}
template <class ELFT>
StringRef ELFDumper<ELFT>::getPrintableSectionName(const Elf_Shdr &Sec) const {
StringRef Name = "<?>";
if (Expected<StringRef> SecNameOrErr =
Obj.getSectionName(Sec, this->WarningHandler))
Name = *SecNameOrErr;
else
this->reportUniqueWarning("unable to get the name of " + describe(Sec) +
": " + toString(SecNameOrErr.takeError()));
return Name;
}
template <class ELFT> void GNUELFDumper<ELFT>::printDependentLibs() {
bool SectionStarted = false;
struct NameOffset {
StringRef Name;
uint64_t Offset;
};
std::vector<NameOffset> SecEntries;
NameOffset Current;
auto PrintSection = [&]() {
OS << "Dependent libraries section " << Current.Name << " at offset "
<< format_hex(Current.Offset, 1) << " contains " << SecEntries.size()
<< " entries:\n";
for (NameOffset Entry : SecEntries)
OS << " [" << format("%6" PRIx64, Entry.Offset) << "] " << Entry.Name
<< "\n";
OS << "\n";
SecEntries.clear();
};
auto OnSectionStart = [&](const Elf_Shdr &Shdr) {
if (SectionStarted)
PrintSection();
SectionStarted = true;
Current.Offset = Shdr.sh_offset;
Current.Name = this->getPrintableSectionName(Shdr);
};
auto OnLibEntry = [&](StringRef Lib, uint64_t Offset) {
SecEntries.push_back(NameOffset{Lib, Offset});
};
this->printDependentLibsHelper(OnSectionStart, OnLibEntry);
if (SectionStarted)
PrintSection();
}
template <class ELFT>
SmallVector<uint32_t> ELFDumper<ELFT>::getSymbolIndexesForFunctionAddress(
uint64_t SymValue, Optional<const Elf_Shdr *> FunctionSec) {
SmallVector<uint32_t> SymbolIndexes;
if (!this->AddressToIndexMap.hasValue()) {
// Populate the address to index map upon the first invocation of this
// function.
this->AddressToIndexMap.emplace();
if (this->DotSymtabSec) {
if (Expected<Elf_Sym_Range> SymsOrError =
Obj.symbols(this->DotSymtabSec)) {
uint32_t Index = (uint32_t)-1;
for (const Elf_Sym &Sym : *SymsOrError) {
++Index;
if (Sym.st_shndx == ELF::SHN_UNDEF || Sym.getType() != ELF::STT_FUNC)
continue;
Expected<uint64_t> SymAddrOrErr =
ObjF.toSymbolRef(this->DotSymtabSec, Index).getAddress();
if (!SymAddrOrErr) {
std::string Name = this->getStaticSymbolName(Index);
reportUniqueWarning("unable to get address of symbol '" + Name +
"': " + toString(SymAddrOrErr.takeError()));
return SymbolIndexes;
}
(*this->AddressToIndexMap)[*SymAddrOrErr].push_back(Index);
}
} else {
reportUniqueWarning("unable to read the symbol table: " +
toString(SymsOrError.takeError()));
}
}
}
auto Symbols = this->AddressToIndexMap->find(SymValue);
if (Symbols == this->AddressToIndexMap->end())
return SymbolIndexes;
for (uint32_t Index : Symbols->second) {
// Check if the symbol is in the right section. FunctionSec == None
// means "any section".
if (FunctionSec) {
const Elf_Sym &Sym = *cantFail(Obj.getSymbol(this->DotSymtabSec, Index));
if (Expected<const Elf_Shdr *> SecOrErr =
Obj.getSection(Sym, this->DotSymtabSec,
this->getShndxTable(this->DotSymtabSec))) {
if (*FunctionSec != *SecOrErr)
continue;
} else {
std::string Name = this->getStaticSymbolName(Index);
// Note: it is impossible to trigger this error currently, it is
// untested.
reportUniqueWarning("unable to get section of symbol '" + Name +
"': " + toString(SecOrErr.takeError()));
return SymbolIndexes;
}
}
SymbolIndexes.push_back(Index);
}
return SymbolIndexes;
}
template <class ELFT>
bool ELFDumper<ELFT>::printFunctionStackSize(
uint64_t SymValue, Optional<const Elf_Shdr *> FunctionSec,
const Elf_Shdr &StackSizeSec, DataExtractor Data, uint64_t *Offset) {
SmallVector<uint32_t> FuncSymIndexes =
this->getSymbolIndexesForFunctionAddress(SymValue, FunctionSec);
if (FuncSymIndexes.empty())
reportUniqueWarning(
"could not identify function symbol for stack size entry in " +
describe(StackSizeSec));
// Extract the size. The expectation is that Offset is pointing to the right
// place, i.e. past the function address.
Error Err = Error::success();
uint64_t StackSize = Data.getULEB128(Offset, &Err);
if (Err) {
reportUniqueWarning("could not extract a valid stack size from " +
describe(StackSizeSec) + ": " +
toString(std::move(Err)));
return false;
}
if (FuncSymIndexes.empty()) {
printStackSizeEntry(StackSize, {"?"});
} else {
SmallVector<std::string> FuncSymNames;
for (uint32_t Index : FuncSymIndexes)
FuncSymNames.push_back(this->getStaticSymbolName(Index));
printStackSizeEntry(StackSize, FuncSymNames);
}
return true;
}
template <class ELFT>
void GNUELFDumper<ELFT>::printStackSizeEntry(uint64_t Size,
ArrayRef<std::string> FuncNames) {
OS.PadToColumn(2);
OS << format_decimal(Size, 11);
OS.PadToColumn(18);
OS << join(FuncNames.begin(), FuncNames.end(), ", ") << "\n";
}
template <class ELFT>
void ELFDumper<ELFT>::printStackSize(const Relocation<ELFT> &R,
const Elf_Shdr &RelocSec, unsigned Ndx,
const Elf_Shdr *SymTab,
const Elf_Shdr *FunctionSec,
const Elf_Shdr &StackSizeSec,
const RelocationResolver &Resolver,
DataExtractor Data) {
// This function ignores potentially erroneous input, unless it is directly
// related to stack size reporting.
const Elf_Sym *Sym = nullptr;
Expected<RelSymbol<ELFT>> TargetOrErr = this->getRelocationTarget(R, SymTab);
if (!TargetOrErr)
reportUniqueWarning("unable to get the target of relocation with index " +
Twine(Ndx) + " in " + describe(RelocSec) + ": " +
toString(TargetOrErr.takeError()));
else
Sym = TargetOrErr->Sym;
uint64_t RelocSymValue = 0;
if (Sym) {
Expected<const Elf_Shdr *> SectionOrErr =
this->Obj.getSection(*Sym, SymTab, this->getShndxTable(SymTab));
if (!SectionOrErr) {
reportUniqueWarning(
"cannot identify the section for relocation symbol '" +
(*TargetOrErr).Name + "': " + toString(SectionOrErr.takeError()));
} else if (*SectionOrErr != FunctionSec) {
reportUniqueWarning("relocation symbol '" + (*TargetOrErr).Name +
"' is not in the expected section");
// Pretend that the symbol is in the correct section and report its
// stack size anyway.
FunctionSec = *SectionOrErr;
}
RelocSymValue = Sym->st_value;
}
uint64_t Offset = R.Offset;
if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) {
reportUniqueWarning("found invalid relocation offset (0x" +
Twine::utohexstr(Offset) + ") into " +
describe(StackSizeSec) +
" while trying to extract a stack size entry");
return;
}
uint64_t SymValue =
Resolver(R.Type, Offset, RelocSymValue, Data.getAddress(&Offset),
R.Addend.getValueOr(0));
this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data,
&Offset);
}
template <class ELFT>
void ELFDumper<ELFT>::printNonRelocatableStackSizes(
std::function<void()> PrintHeader) {
// This function ignores potentially erroneous input, unless it is directly
// related to stack size reporting.
for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
if (this->getPrintableSectionName(Sec) != ".stack_sizes")
continue;
PrintHeader();
ArrayRef<uint8_t> Contents =
unwrapOrError(this->FileName, Obj.getSectionContents(Sec));
DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
uint64_t Offset = 0;
while (Offset < Contents.size()) {
// The function address is followed by a ULEB representing the stack
// size. Check for an extra byte before we try to process the entry.
if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(Elf_Addr) + 1)) {
reportUniqueWarning(
describe(Sec) +
" ended while trying to extract a stack size entry");
break;
}
uint64_t SymValue = Data.getAddress(&Offset);
if (!printFunctionStackSize(SymValue, /*FunctionSec=*/None, Sec, Data,
&Offset))
break;
}
}
}
template <class ELFT>
void ELFDumper<ELFT>::getSectionAndRelocations(
std::function<bool(const Elf_Shdr &)> IsMatch,
llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> &SecToRelocMap) {
for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
if (IsMatch(Sec))
if (SecToRelocMap.insert(std::make_pair(&Sec, (const Elf_Shdr *)nullptr))
.second)
continue;
if (Sec.sh_type != ELF::SHT_RELA && Sec.sh_type != ELF::SHT_REL)
continue;
Expected<const Elf_Shdr *> RelSecOrErr = Obj.getSection(Sec.sh_info);
if (!RelSecOrErr) {
reportUniqueWarning(describe(Sec) +
": failed to get a relocated section: " +
toString(RelSecOrErr.takeError()));
continue;
}
const Elf_Shdr *ContentsSec = *RelSecOrErr;
if (IsMatch(*ContentsSec))
SecToRelocMap[ContentsSec] = &Sec;
}
}
template <class ELFT>
void ELFDumper<ELFT>::printRelocatableStackSizes(
std::function<void()> PrintHeader) {
// Build a map between stack size sections and their corresponding relocation
// sections.
llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> StackSizeRelocMap;
auto IsMatch = [&](const Elf_Shdr &Sec) -> bool {
StringRef SectionName;
if (Expected<StringRef> NameOrErr = Obj.getSectionName(Sec))
SectionName = *NameOrErr;
else
consumeError(NameOrErr.takeError());
return SectionName == ".stack_sizes";
};
getSectionAndRelocations(IsMatch, StackSizeRelocMap);
for (const auto &StackSizeMapEntry : StackSizeRelocMap) {
PrintHeader();
const Elf_Shdr *StackSizesELFSec = StackSizeMapEntry.first;
const Elf_Shdr *RelocSec = StackSizeMapEntry.second;
// Warn about stack size sections without a relocation section.
if (!RelocSec) {
reportWarning(createError(".stack_sizes (" + describe(*StackSizesELFSec) +
") does not have a corresponding "
"relocation section"),
FileName);
continue;
}
// A .stack_sizes section header's sh_link field is supposed to point
// to the section that contains the functions whose stack sizes are
// described in it.
const Elf_Shdr *FunctionSec = unwrapOrError(
this->FileName, Obj.getSection(StackSizesELFSec->sh_link));
SupportsRelocation IsSupportedFn;
RelocationResolver Resolver;
std::tie(IsSupportedFn, Resolver) = getRelocationResolver(this->ObjF);
ArrayRef<uint8_t> Contents =
unwrapOrError(this->FileName, Obj.getSectionContents(*StackSizesELFSec));
DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
forEachRelocationDo(
*RelocSec, /*RawRelr=*/false,
[&](const Relocation<ELFT> &R, unsigned Ndx, const Elf_Shdr &Sec,
const Elf_Shdr *SymTab) {
if (!IsSupportedFn || !IsSupportedFn(R.Type)) {
reportUniqueWarning(
describe(*RelocSec) +
" contains an unsupported relocation with index " + Twine(Ndx) +
": " + Obj.getRelocationTypeName(R.Type));
return;
}
this->printStackSize(R, *RelocSec, Ndx, SymTab, FunctionSec,
*StackSizesELFSec, Resolver, Data);
},
[](const Elf_Relr &) {
llvm_unreachable("can't get here, because we only support "
"SHT_REL/SHT_RELA sections");
});
}
}
template <class ELFT>
void GNUELFDumper<ELFT>::printStackSizes() {
bool HeaderHasBeenPrinted = false;
auto PrintHeader = [&]() {
if (HeaderHasBeenPrinted)
return;
OS << "\nStack Sizes:\n";
OS.PadToColumn(9);
OS << "Size";
OS.PadToColumn(18);
OS << "Functions\n";
HeaderHasBeenPrinted = true;
};
// For non-relocatable objects, look directly for sections whose name starts
// with .stack_sizes and process the contents.
if (this->Obj.getHeader().e_type == ELF::ET_REL)
this->printRelocatableStackSizes(PrintHeader);
else
this->printNonRelocatableStackSizes(PrintHeader);
}
template <class ELFT>
void GNUELFDumper<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
size_t Bias = ELFT::Is64Bits ? 8 : 0;
auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) {
OS.PadToColumn(2);
OS << format_hex_no_prefix(Parser.getGotAddress(E), 8 + Bias);
OS.PadToColumn(11 + Bias);
OS << format_decimal(Parser.getGotOffset(E), 6) << "(gp)";
OS.PadToColumn(22 + Bias);
OS << format_hex_no_prefix(*E, 8 + Bias);
OS.PadToColumn(31 + 2 * Bias);
OS << Purpose << "\n";
};
OS << (Parser.IsStatic ? "Static GOT:\n" : "Primary GOT:\n");
OS << " Canonical gp value: "
<< format_hex_no_prefix(Parser.getGp(), 8 + Bias) << "\n\n";
OS << " Reserved entries:\n";
if (ELFT::Is64Bits)
OS << " Address Access Initial Purpose\n";
else
OS << " Address Access Initial Purpose\n";
PrintEntry(Parser.getGotLazyResolver(), "Lazy resolver");
if (Parser.getGotModulePointer())
PrintEntry(Parser.getGotModulePointer(), "Module pointer (GNU extension)");
if (!Parser.getLocalEntries().empty()) {
OS << "\n";
OS << " Local entries:\n";
if (ELFT::Is64Bits)
OS << " Address Access Initial\n";
else
OS << " Address Access Initial\n";
for (auto &E : Parser.getLocalEntries())
PrintEntry(&E, "");
}
if (Parser.IsStatic)
return;
if (!Parser.getGlobalEntries().empty()) {
OS << "\n";
OS << " Global entries:\n";
if (ELFT::Is64Bits)
OS << " Address Access Initial Sym.Val."
<< " Type Ndx Name\n";
else
OS << " Address Access Initial Sym.Val. Type Ndx Name\n";
DataRegion<Elf_Word> ShndxTable(
(const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
for (auto &E : Parser.getGlobalEntries()) {
const Elf_Sym &Sym = *Parser.getGotSym(&E);
const Elf_Sym &FirstSym = this->dynamic_symbols()[0];
std::string SymName = this->getFullSymbolName(
Sym, &Sym - &FirstSym, ShndxTable, this->DynamicStringTable, false);
OS.PadToColumn(2);
OS << to_string(format_hex_no_prefix(Parser.getGotAddress(&E), 8 + Bias));
OS.PadToColumn(11 + Bias);
OS << to_string(format_decimal(Parser.getGotOffset(&E), 6)) + "(gp)";
OS.PadToColumn(22 + Bias);
OS << to_string(format_hex_no_prefix(E, 8 + Bias));
OS.PadToColumn(31 + 2 * Bias);
OS << to_string(format_hex_no_prefix(Sym.st_value, 8 + Bias));
OS.PadToColumn(40 + 3 * Bias);
OS << enumToString(Sym.getType(), makeArrayRef(ElfSymbolTypes));
OS.PadToColumn(48 + 3 * Bias);
OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin(),
ShndxTable);
OS.PadToColumn(52 + 3 * Bias);
OS << SymName << "\n";
}
}
if (!Parser.getOtherEntries().empty())
OS << "\n Number of TLS and multi-GOT entries "
<< Parser.getOtherEntries().size() << "\n";
}
template <class ELFT>
void GNUELFDumper<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
size_t Bias = ELFT::Is64Bits ? 8 : 0;
auto PrintEntry = [&](const Elf_Addr *E, StringRef Purpose) {
OS.PadToColumn(2);
OS << format_hex_no_prefix(Parser.getPltAddress(E), 8 + Bias);
OS.PadToColumn(11 + Bias);
OS << format_hex_no_prefix(*E, 8 + Bias);
OS.PadToColumn(20 + 2 * Bias);
OS << Purpose << "\n";
};
OS << "PLT GOT:\n\n";
OS << " Reserved entries:\n";
OS << " Address Initial Purpose\n";
PrintEntry(Parser.getPltLazyResolver(), "PLT lazy resolver");
if (Parser.getPltModulePointer())
PrintEntry(Parser.getPltModulePointer(), "Module pointer");
if (!Parser.getPltEntries().empty()) {
OS << "\n";
OS << " Entries:\n";
OS << " Address Initial Sym.Val. Type Ndx Name\n";
DataRegion<Elf_Word> ShndxTable(
(const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
for (auto &E : Parser.getPltEntries()) {
const Elf_Sym &Sym = *Parser.getPltSym(&E);
const Elf_Sym &FirstSym = *cantFail(
this->Obj.template getEntry<Elf_Sym>(*Parser.getPltSymTable(), 0));
std::string SymName = this->getFullSymbolName(
Sym, &Sym - &FirstSym, ShndxTable, this->DynamicStringTable, false);
OS.PadToColumn(2);
OS << to_string(format_hex_no_prefix(Parser.getPltAddress(&E), 8 + Bias));
OS.PadToColumn(11 + Bias);
OS << to_string(format_hex_no_prefix(E, 8 + Bias));
OS.PadToColumn(20 + 2 * Bias);
OS << to_string(format_hex_no_prefix(Sym.st_value, 8 + Bias));
OS.PadToColumn(29 + 3 * Bias);
OS << enumToString(Sym.getType(), makeArrayRef(ElfSymbolTypes));
OS.PadToColumn(37 + 3 * Bias);
OS << getSymbolSectionNdx(Sym, &Sym - this->dynamic_symbols().begin(),
ShndxTable);
OS.PadToColumn(41 + 3 * Bias);
OS << SymName << "\n";
}
}
}
template <class ELFT>
Expected<const Elf_Mips_ABIFlags<ELFT> *>
getMipsAbiFlagsSection(const ELFDumper<ELFT> &Dumper) {
const typename ELFT::Shdr *Sec = Dumper.findSectionByName(".MIPS.abiflags");
if (Sec == nullptr)
return nullptr;
constexpr StringRef ErrPrefix = "unable to read the .MIPS.abiflags section: ";
Expected<ArrayRef<uint8_t>> DataOrErr =
Dumper.getElfObject().getELFFile().getSectionContents(*Sec);
if (!DataOrErr)
return createError(ErrPrefix + toString(DataOrErr.takeError()));
if (DataOrErr->size() != sizeof(Elf_Mips_ABIFlags<ELFT>))
return createError(ErrPrefix + "it has a wrong size (" +
Twine(DataOrErr->size()) + ")");
return reinterpret_cast<const Elf_Mips_ABIFlags<ELFT> *>(DataOrErr->data());
}
template <class ELFT> void GNUELFDumper<ELFT>::printMipsABIFlags() {
const Elf_Mips_ABIFlags<ELFT> *Flags = nullptr;
if (Expected<const Elf_Mips_ABIFlags<ELFT> *> SecOrErr =
getMipsAbiFlagsSection(*this))
Flags = *SecOrErr;
else
this->reportUniqueWarning(SecOrErr.takeError());
if (!Flags)
return;
OS << "MIPS ABI Flags Version: " << Flags->version << "\n\n";
OS << "ISA: MIPS" << int(Flags->isa_level);
if (Flags->isa_rev > 1)
OS << "r" << int(Flags->isa_rev);
OS << "\n";
OS << "GPR size: " << getMipsRegisterSize(Flags->gpr_size) << "\n";
OS << "CPR1 size: " << getMipsRegisterSize(Flags->cpr1_size) << "\n";
OS << "CPR2 size: " << getMipsRegisterSize(Flags->cpr2_size) << "\n";
OS << "FP ABI: "
<< enumToString(Flags->fp_abi, makeArrayRef(ElfMipsFpABIType)) << "\n";
OS << "ISA Extension: "
<< enumToString(Flags->isa_ext, makeArrayRef(ElfMipsISAExtType)) << "\n";
if (Flags->ases == 0)
OS << "ASEs: None\n";
else
// FIXME: Print each flag on a separate line.
OS << "ASEs: " << printFlags(Flags->ases, makeArrayRef(ElfMipsASEFlags))
<< "\n";
OS << "FLAGS 1: " << format_hex_no_prefix(Flags->flags1, 8, false) << "\n";
OS << "FLAGS 2: " << format_hex_no_prefix(Flags->flags2, 8, false) << "\n";
OS << "\n";
}
template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
const Elf_Ehdr &E = this->Obj.getHeader();
{
DictScope D(W, "ElfHeader");
{
DictScope D(W, "Ident");
W.printBinary("Magic", makeArrayRef(E.e_ident).slice(ELF::EI_MAG0, 4));
W.printEnum("Class", E.e_ident[ELF::EI_CLASS], makeArrayRef(ElfClass));
W.printEnum("DataEncoding", E.e_ident[ELF::EI_DATA],
makeArrayRef(ElfDataEncoding));
W.printNumber("FileVersion", E.e_ident[ELF::EI_VERSION]);
auto OSABI = makeArrayRef(ElfOSABI);
if (E.e_ident[ELF::EI_OSABI] >= ELF::ELFOSABI_FIRST_ARCH &&
E.e_ident[ELF::EI_OSABI] <= ELF::ELFOSABI_LAST_ARCH) {
switch (E.e_machine) {
case ELF::EM_AMDGPU:
OSABI = makeArrayRef(AMDGPUElfOSABI);
break;
case ELF::EM_ARM:
OSABI = makeArrayRef(ARMElfOSABI);
break;
case ELF::EM_TI_C6000:
OSABI = makeArrayRef(C6000ElfOSABI);
break;
}
}
W.printEnum("OS/ABI", E.e_ident[ELF::EI_OSABI], OSABI);
W.printNumber("ABIVersion", E.e_ident[ELF::EI_ABIVERSION]);
W.printBinary("Unused", makeArrayRef(E.e_ident).slice(ELF::EI_PAD));
}
std::string TypeStr;
if (const EnumEntry<unsigned> *Ent = getObjectFileEnumEntry(E.e_type)) {
TypeStr = Ent->Name.str();
} else {
if (E.e_type >= ET_LOPROC)
TypeStr = "Processor Specific";
else if (E.e_type >= ET_LOOS)
TypeStr = "OS Specific";
else
TypeStr = "Unknown";
}
W.printString("Type", TypeStr + " (0x" + to_hexString(E.e_type) + ")");
W.printEnum("Machine", E.e_machine, makeArrayRef(ElfMachineType));
W.printNumber("Version", E.e_version);
W.printHex("Entry", E.e_entry);
W.printHex("ProgramHeaderOffset", E.e_phoff);
W.printHex("SectionHeaderOffset", E.e_shoff);
if (E.e_machine == EM_MIPS)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderMipsFlags),
unsigned(ELF::EF_MIPS_ARCH), unsigned(ELF::EF_MIPS_ABI),
unsigned(ELF::EF_MIPS_MACH));
else if (E.e_machine == EM_AMDGPU) {
switch (E.e_ident[ELF::EI_ABIVERSION]) {
default:
W.printHex("Flags", E.e_flags);
break;
case 0:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
LLVM_FALLTHROUGH;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
W.printFlags("Flags", E.e_flags,
makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion3),
unsigned(ELF::EF_AMDGPU_MACH));
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
W.printFlags("Flags", E.e_flags,
makeArrayRef(ElfHeaderAMDGPUFlagsABIVersion4),
unsigned(ELF::EF_AMDGPU_MACH),
unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
break;
}
} else if (E.e_machine == EM_RISCV)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderRISCVFlags));
else if (E.e_machine == EM_AVR)
W.printFlags("Flags", E.e_flags, makeArrayRef(ElfHeaderAVRFlags),
unsigned(ELF::EF_AVR_ARCH_MASK));
else
W.printFlags("Flags", E.e_flags);
W.printNumber("HeaderSize", E.e_ehsize);
W.printNumber("ProgramHeaderEntrySize", E.e_phentsize);
W.printNumber("ProgramHeaderCount", E.e_phnum);
W.printNumber("SectionHeaderEntrySize", E.e_shentsize);
W.printString("SectionHeaderCount",
getSectionHeadersNumString(this->Obj, this->FileName));
W.printString("StringTableSectionIndex",
getSectionHeaderTableIndexString(this->Obj, this->FileName));
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printGroupSections() {
DictScope Lists(W, "Groups");
std::vector<GroupSection> V = this->getGroups();
DenseMap<uint64_t, const GroupSection *> Map = mapSectionsToGroups(V);
for (const GroupSection &G : V) {
DictScope D(W, "Group");
W.printNumber("Name", G.Name, G.ShName);
W.printNumber("Index", G.Index);
W.printNumber("Link", G.Link);
W.printNumber("Info", G.Info);
W.printHex("Type", getGroupType(G.Type), G.Type);
W.startLine() << "Signature: " << G.Signature << "\n";
ListScope L(W, "Section(s) in group");
for (const GroupMember &GM : G.Members) {
const GroupSection *MainGroup = Map[GM.Index];
if (MainGroup != &G)
this->reportUniqueWarning(
"section with index " + Twine(GM.Index) +
", included in the group section with index " +
Twine(MainGroup->Index) +
", was also found in the group section with index " +
Twine(G.Index));
W.startLine() << GM.Name << " (" << GM.Index << ")\n";
}
}
if (V.empty())
W.startLine() << "There are no group sections in the file.\n";
}
template <class ELFT> void LLVMELFDumper<ELFT>::printRelocations() {
ListScope D(W, "Relocations");
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
if (!isRelocationSec<ELFT>(Sec))
continue;
StringRef Name = this->getPrintableSectionName(Sec);
unsigned SecNdx = &Sec - &cantFail(this->Obj.sections()).front();
W.startLine() << "Section (" << SecNdx << ") " << Name << " {\n";
W.indent();
this->printRelocationsHelper(Sec);
W.unindent();
W.startLine() << "}\n";
}
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printRelrReloc(const Elf_Relr &R) {
W.startLine() << W.hex(R) << "\n";
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printRelRelaReloc(const Relocation<ELFT> &R,
const RelSymbol<ELFT> &RelSym) {
StringRef SymbolName = RelSym.Name;
SmallString<32> RelocName;
this->Obj.getRelocationTypeName(R.Type, RelocName);
if (opts::ExpandRelocs) {
DictScope Group(W, "Relocation");
W.printHex("Offset", R.Offset);
W.printNumber("Type", RelocName, R.Type);
W.printNumber("Symbol", !SymbolName.empty() ? SymbolName : "-", R.Symbol);
if (R.Addend)
W.printHex("Addend", (uintX_t)*R.Addend);
} else {
raw_ostream &OS = W.startLine();
OS << W.hex(R.Offset) << " " << RelocName << " "
<< (!SymbolName.empty() ? SymbolName : "-");
if (R.Addend)
OS << " " << W.hex((uintX_t)*R.Addend);
OS << "\n";
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printSectionHeaders() {
ListScope SectionsD(W, "Sections");
int SectionIndex = -1;
std::vector<EnumEntry<unsigned>> FlagsList =
getSectionFlagsForTarget(this->Obj.getHeader().e_machine);
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
DictScope SectionD(W, "Section");
W.printNumber("Index", ++SectionIndex);
W.printNumber("Name", this->getPrintableSectionName(Sec), Sec.sh_name);
W.printHex("Type",
object::getELFSectionTypeName(this->Obj.getHeader().e_machine,
Sec.sh_type),
Sec.sh_type);
W.printFlags("Flags", Sec.sh_flags, makeArrayRef(FlagsList));
W.printHex("Address", Sec.sh_addr);
W.printHex("Offset", Sec.sh_offset);
W.printNumber("Size", Sec.sh_size);
W.printNumber("Link", Sec.sh_link);
W.printNumber("Info", Sec.sh_info);
W.printNumber("AddressAlignment", Sec.sh_addralign);
W.printNumber("EntrySize", Sec.sh_entsize);
if (opts::SectionRelocations) {
ListScope D(W, "Relocations");
this->printRelocationsHelper(Sec);
}
if (opts::SectionSymbols) {
ListScope D(W, "Symbols");
if (this->DotSymtabSec) {
StringRef StrTable = unwrapOrError(
this->FileName,
this->Obj.getStringTableForSymtab(*this->DotSymtabSec));
ArrayRef<Elf_Word> ShndxTable = this->getShndxTable(this->DotSymtabSec);
typename ELFT::SymRange Symbols = unwrapOrError(
this->FileName, this->Obj.symbols(this->DotSymtabSec));
for (const Elf_Sym &Sym : Symbols) {
const Elf_Shdr *SymSec = unwrapOrError(
this->FileName,
this->Obj.getSection(Sym, this->DotSymtabSec, ShndxTable));
if (SymSec == &Sec)
printSymbol(Sym, &Sym - &Symbols[0], ShndxTable, StrTable, false,
false);
}
}
}
if (opts::SectionData && Sec.sh_type != ELF::SHT_NOBITS) {
ArrayRef<uint8_t> Data =
unwrapOrError(this->FileName, this->Obj.getSectionContents(Sec));
W.printBinaryBlock(
"SectionData",
StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()));
}
}
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printSymbolSection(
const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable) const {
auto GetSectionSpecialType = [&]() -> Optional<StringRef> {
if (Symbol.isUndefined())
return StringRef("Undefined");
if (Symbol.isProcessorSpecific())
return StringRef("Processor Specific");
if (Symbol.isOSSpecific())
return StringRef("Operating System Specific");
if (Symbol.isAbsolute())
return StringRef("Absolute");
if (Symbol.isCommon())
return StringRef("Common");
if (Symbol.isReserved() && Symbol.st_shndx != SHN_XINDEX)
return StringRef("Reserved");
return None;
};
if (Optional<StringRef> Type = GetSectionSpecialType()) {
W.printHex("Section", *Type, Symbol.st_shndx);
return;
}
Expected<unsigned> SectionIndex =
this->getSymbolSectionIndex(Symbol, SymIndex, ShndxTable);
if (!SectionIndex) {
assert(Symbol.st_shndx == SHN_XINDEX &&
"getSymbolSectionIndex should only fail due to an invalid "
"SHT_SYMTAB_SHNDX table/reference");
this->reportUniqueWarning(SectionIndex.takeError());
W.printHex("Section", "Reserved", SHN_XINDEX);
return;
}
Expected<StringRef> SectionName =
this->getSymbolSectionName(Symbol, *SectionIndex);
if (!SectionName) {
// Don't report an invalid section name if the section headers are missing.
// In such situations, all sections will be "invalid".
if (!this->ObjF.sections().empty())
this->reportUniqueWarning(SectionName.takeError());
else
consumeError(SectionName.takeError());
W.printHex("Section", "<?>", *SectionIndex);
} else {
W.printHex("Section", *SectionName, *SectionIndex);
}
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
Optional<StringRef> StrTable,
bool IsDynamic,
bool /*NonVisibilityBitsUsed*/) const {
std::string FullSymbolName = this->getFullSymbolName(
Symbol, SymIndex, ShndxTable, StrTable, IsDynamic);
unsigned char SymbolType = Symbol.getType();
DictScope D(W, "Symbol");
W.printNumber("Name", FullSymbolName, Symbol.st_name);
W.printHex("Value", Symbol.st_value);
W.printNumber("Size", Symbol.st_size);
W.printEnum("Binding", Symbol.getBinding(), makeArrayRef(ElfSymbolBindings));
if (this->Obj.getHeader().e_machine == ELF::EM_AMDGPU &&
SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
W.printEnum("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes));
else
W.printEnum("Type", SymbolType, makeArrayRef(ElfSymbolTypes));
if (Symbol.st_other == 0)
// Usually st_other flag is zero. Do not pollute the output
// by flags enumeration in that case.
W.printNumber("Other", 0);
else {
std::vector<EnumEntry<unsigned>> SymOtherFlags(std::begin(ElfSymOtherFlags),
std::end(ElfSymOtherFlags));
if (this->Obj.getHeader().e_machine == EM_MIPS) {
// Someones in their infinite wisdom decided to make STO_MIPS_MIPS16
// flag overlapped with other ST_MIPS_xxx flags. So consider both
// cases separately.
if ((Symbol.st_other & STO_MIPS_MIPS16) == STO_MIPS_MIPS16)
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfMips16SymOtherFlags),
std::end(ElfMips16SymOtherFlags));
else
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfMipsSymOtherFlags),
std::end(ElfMipsSymOtherFlags));
} else if (this->Obj.getHeader().e_machine == EM_AARCH64) {
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfAArch64SymOtherFlags),
std::end(ElfAArch64SymOtherFlags));
} else if (this->Obj.getHeader().e_machine == EM_RISCV) {
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfRISCVSymOtherFlags),
std::end(ElfRISCVSymOtherFlags));
}
W.printFlags("Other", Symbol.st_other, makeArrayRef(SymOtherFlags), 0x3u);
}
printSymbolSection(Symbol, SymIndex, ShndxTable);
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printSymbols(bool PrintSymbols,
bool PrintDynamicSymbols) {
if (PrintSymbols) {
ListScope Group(W, "Symbols");
this->printSymbolsHelper(false);
}
if (PrintDynamicSymbols) {
ListScope Group(W, "DynamicSymbols");
this->printSymbolsHelper(true);
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printDynamicTable() {
Elf_Dyn_Range Table = this->dynamic_table();
if (Table.empty())
return;
W.startLine() << "DynamicSection [ (" << Table.size() << " entries)\n";
size_t MaxTagSize = getMaxDynamicTagSize(this->Obj, Table);
// The "Name/Value" column should be indented from the "Type" column by N
// spaces, where N = MaxTagSize - length of "Type" (4) + trailing
// space (1) = -3.
W.startLine() << " Tag" << std::string(ELFT::Is64Bits ? 16 : 8, ' ')
<< "Type" << std::string(MaxTagSize - 3, ' ') << "Name/Value\n";
std::string ValueFmt = "%-" + std::to_string(MaxTagSize) + "s ";
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
std::string Value = this->getDynamicEntry(Tag, Entry.getVal());
W.startLine() << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10, true)
<< " "
<< format(ValueFmt.c_str(),
this->Obj.getDynamicTagAsString(Tag).c_str())
<< Value << "\n";
}
W.startLine() << "]\n";
}
template <class ELFT> void LLVMELFDumper<ELFT>::printDynamicRelocations() {
W.startLine() << "Dynamic Relocations {\n";
W.indent();
this->printDynamicRelocationsHelper();
W.unindent();
W.startLine() << "}\n";
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printProgramHeaders(
bool PrintProgramHeaders, cl::boolOrDefault PrintSectionMapping) {
if (PrintProgramHeaders)
printProgramHeaders();
if (PrintSectionMapping == cl::BOU_TRUE)
printSectionMapping();
}
template <class ELFT> void LLVMELFDumper<ELFT>::printProgramHeaders() {
ListScope L(W, "ProgramHeaders");
Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = this->Obj.program_headers();
if (!PhdrsOrErr) {
this->reportUniqueWarning("unable to dump program headers: " +
toString(PhdrsOrErr.takeError()));
return;
}
for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
DictScope P(W, "ProgramHeader");
StringRef Type =
segmentTypeToString(this->Obj.getHeader().e_machine, Phdr.p_type);
W.printHex("Type", Type.empty() ? "Unknown" : Type, Phdr.p_type);
W.printHex("Offset", Phdr.p_offset);
W.printHex("VirtualAddress", Phdr.p_vaddr);
W.printHex("PhysicalAddress", Phdr.p_paddr);
W.printNumber("FileSize", Phdr.p_filesz);
W.printNumber("MemSize", Phdr.p_memsz);
W.printFlags("Flags", Phdr.p_flags, makeArrayRef(ElfSegmentFlags));
W.printNumber("Alignment", Phdr.p_align);
}
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printVersionSymbolSection(const Elf_Shdr *Sec) {
ListScope SS(W, "VersionSymbols");
if (!Sec)
return;
StringRef StrTable;
ArrayRef<Elf_Sym> Syms;
const Elf_Shdr *SymTabSec;
Expected<ArrayRef<Elf_Versym>> VerTableOrErr =
this->getVersionTable(*Sec, &Syms, &StrTable, &SymTabSec);
if (!VerTableOrErr) {
this->reportUniqueWarning(VerTableOrErr.takeError());
return;
}
if (StrTable.empty() || Syms.empty() || Syms.size() != VerTableOrErr->size())
return;
ArrayRef<Elf_Word> ShNdxTable = this->getShndxTable(SymTabSec);
for (size_t I = 0, E = Syms.size(); I < E; ++I) {
DictScope S(W, "Symbol");
W.printNumber("Version", (*VerTableOrErr)[I].vs_index & VERSYM_VERSION);
W.printString("Name",
this->getFullSymbolName(Syms[I], I, ShNdxTable, StrTable,
/*IsDynamic=*/true));
}
}
const EnumEntry<unsigned> SymVersionFlags[] = {
{"Base", "BASE", VER_FLG_BASE},
{"Weak", "WEAK", VER_FLG_WEAK},
{"Info", "INFO", VER_FLG_INFO}};
template <class ELFT>
void LLVMELFDumper<ELFT>::printVersionDefinitionSection(const Elf_Shdr *Sec) {
ListScope SD(W, "VersionDefinitions");
if (!Sec)
return;
Expected<std::vector<VerDef>> V = this->Obj.getVersionDefinitions(*Sec);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
}
for (const VerDef &D : *V) {
DictScope Def(W, "Definition");
W.printNumber("Version", D.Version);
W.printFlags("Flags", D.Flags, makeArrayRef(SymVersionFlags));
W.printNumber("Index", D.Ndx);
W.printNumber("Hash", D.Hash);
W.printString("Name", D.Name.c_str());
W.printList(
"Predecessors", D.AuxV,
[](raw_ostream &OS, const VerdAux &Aux) { OS << Aux.Name.c_str(); });
}
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printVersionDependencySection(const Elf_Shdr *Sec) {
ListScope SD(W, "VersionRequirements");
if (!Sec)
return;
Expected<std::vector<VerNeed>> V =
this->Obj.getVersionDependencies(*Sec, this->WarningHandler);
if (!V) {
this->reportUniqueWarning(V.takeError());
return;
}
for (const VerNeed &VN : *V) {
DictScope Entry(W, "Dependency");
W.printNumber("Version", VN.Version);
W.printNumber("Count", VN.Cnt);
W.printString("FileName", VN.File.c_str());
ListScope L(W, "Entries");
for (const VernAux &Aux : VN.AuxV) {
DictScope Entry(W, "Entry");
W.printNumber("Hash", Aux.Hash);
W.printFlags("Flags", Aux.Flags, makeArrayRef(SymVersionFlags));
W.printNumber("Index", Aux.Other);
W.printString("Name", Aux.Name.c_str());
}
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printHashHistograms() {
W.startLine() << "Hash Histogram not implemented!\n";
}
// Returns true if rel/rela section exists, and populates SymbolIndices.
// Otherwise returns false.
template <class ELFT>
static bool getSymbolIndices(const typename ELFT::Shdr *CGRelSection,
const ELFFile<ELFT> &Obj,
const LLVMELFDumper<ELFT> *Dumper,
SmallVector<uint32_t, 128> &SymbolIndices) {
if (!CGRelSection) {
Dumper->reportUniqueWarning(
"relocation section for a call graph section doesn't exist");
return false;
}
if (CGRelSection->sh_type == SHT_REL) {
typename ELFT::RelRange CGProfileRel;
Expected<typename ELFT::RelRange> CGProfileRelOrError =
Obj.rels(*CGRelSection);
if (!CGProfileRelOrError) {
Dumper->reportUniqueWarning("unable to load relocations for "
"SHT_LLVM_CALL_GRAPH_PROFILE section: " +
toString(CGProfileRelOrError.takeError()));
return false;
}
CGProfileRel = *CGProfileRelOrError;
for (const typename ELFT::Rel &Rel : CGProfileRel)
SymbolIndices.push_back(Rel.getSymbol(Obj.isMips64EL()));
} else {
// MC unconditionally produces SHT_REL, but GNU strip/objcopy may convert
// the format to SHT_RELA
// (https://sourceware.org/bugzilla/show_bug.cgi?id=28035)
typename ELFT::RelaRange CGProfileRela;
Expected<typename ELFT::RelaRange> CGProfileRelaOrError =
Obj.relas(*CGRelSection);
if (!CGProfileRelaOrError) {
Dumper->reportUniqueWarning("unable to load relocations for "
"SHT_LLVM_CALL_GRAPH_PROFILE section: " +
toString(CGProfileRelaOrError.takeError()));
return false;
}
CGProfileRela = *CGProfileRelaOrError;
for (const typename ELFT::Rela &Rela : CGProfileRela)
SymbolIndices.push_back(Rela.getSymbol(Obj.isMips64EL()));
}
return true;
}
template <class ELFT> void LLVMELFDumper<ELFT>::printCGProfile() {
llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> SecToRelocMap;
auto IsMatch = [](const Elf_Shdr &Sec) -> bool {
return Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE;
};
this->getSectionAndRelocations(IsMatch, SecToRelocMap);
for (const auto &CGMapEntry : SecToRelocMap) {
const Elf_Shdr *CGSection = CGMapEntry.first;
const Elf_Shdr *CGRelSection = CGMapEntry.second;
Expected<ArrayRef<Elf_CGProfile>> CGProfileOrErr =
this->Obj.template getSectionContentsAsArray<Elf_CGProfile>(*CGSection);
if (!CGProfileOrErr) {
this->reportUniqueWarning(
"unable to load the SHT_LLVM_CALL_GRAPH_PROFILE section: " +
toString(CGProfileOrErr.takeError()));
return;
}
SmallVector<uint32_t, 128> SymbolIndices;
bool UseReloc =
getSymbolIndices<ELFT>(CGRelSection, this->Obj, this, SymbolIndices);
if (UseReloc && SymbolIndices.size() != CGProfileOrErr->size() * 2) {
this->reportUniqueWarning(
"number of from/to pairs does not match number of frequencies");
UseReloc = false;
}
ListScope L(W, "CGProfile");
for (uint32_t I = 0, Size = CGProfileOrErr->size(); I != Size; ++I) {
const Elf_CGProfile &CGPE = (*CGProfileOrErr)[I];
DictScope D(W, "CGProfileEntry");
if (UseReloc) {
uint32_t From = SymbolIndices[I * 2];
uint32_t To = SymbolIndices[I * 2 + 1];
W.printNumber("From", this->getStaticSymbolName(From), From);
W.printNumber("To", this->getStaticSymbolName(To), To);
}
W.printNumber("Weight", CGPE.cgp_weight);
}
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL;
for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
if (Sec.sh_type != SHT_LLVM_BB_ADDR_MAP)
continue;
Optional<const Elf_Shdr *> FunctionSec = None;
if (IsRelocatable)
FunctionSec =
unwrapOrError(this->FileName, this->Obj.getSection(Sec.sh_link));
ListScope L(W, "BBAddrMap");
Expected<std::vector<BBAddrMap>> BBAddrMapOrErr =
this->Obj.decodeBBAddrMap(Sec);
if (!BBAddrMapOrErr) {
this->reportUniqueWarning("unable to dump " + this->describe(Sec) + ": " +
toString(BBAddrMapOrErr.takeError()));
continue;
}
for (const BBAddrMap &AM : *BBAddrMapOrErr) {
DictScope D(W, "Function");
W.printHex("At", AM.Addr);
SmallVector<uint32_t> FuncSymIndex =
this->getSymbolIndexesForFunctionAddress(AM.Addr, FunctionSec);
std::string FuncName = "<?>";
if (FuncSymIndex.empty())
this->reportUniqueWarning(
"could not identify function symbol for address (0x" +
Twine::utohexstr(AM.Addr) + ") in " + this->describe(Sec));
else
FuncName = this->getStaticSymbolName(FuncSymIndex.front());
W.printString("Name", FuncName);
ListScope L(W, "BB entries");
for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) {
DictScope L(W);
W.printHex("Offset", BBE.Offset);
W.printHex("Size", BBE.Size);
W.printBoolean("HasReturn", BBE.HasReturn);
W.printBoolean("HasTailCall", BBE.HasTailCall);
W.printBoolean("IsEHPad", BBE.IsEHPad);
W.printBoolean("CanFallThrough", BBE.CanFallThrough);
}
}
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printAddrsig() {
ListScope L(W, "Addrsig");
if (!this->DotAddrsigSec)
return;
Expected<std::vector<uint64_t>> SymsOrErr =
decodeAddrsigSection(this->Obj, *this->DotAddrsigSec);
if (!SymsOrErr) {
this->reportUniqueWarning(SymsOrErr.takeError());
return;
}
for (uint64_t Sym : *SymsOrErr)
W.printNumber("Sym", this->getStaticSymbolName(Sym), Sym);
}
template <typename ELFT>
static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
ScopedPrinter &W) {
// Return true if we were able to pretty-print the note, false otherwise.
switch (NoteType) {
default:
return false;
case ELF::NT_GNU_ABI_TAG: {
const GNUAbiTag &AbiTag = getGNUAbiTag<ELFT>(Desc);
if (!AbiTag.IsValid) {
W.printString("ABI", "<corrupt GNU_ABI_TAG>");
return false;
} else {
W.printString("OS", AbiTag.OSName);
W.printString("ABI", AbiTag.ABI);
}
break;
}
case ELF::NT_GNU_BUILD_ID: {
W.printString("Build ID", getGNUBuildId(Desc));
break;
}
case ELF::NT_GNU_GOLD_VERSION:
W.printString("Version", getDescAsStringRef(Desc));
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
ListScope D(W, "Property");
for (const std::string &Property : getGNUPropertyList<ELFT>(Desc))
W.printString(Property);
break;
}
return true;
}
template <typename ELFT>
static bool printLLVMOMPOFFLOADNoteLLVMStyle(uint32_t NoteType,
ArrayRef<uint8_t> Desc,
ScopedPrinter &W) {
switch (NoteType) {
default:
return false;
case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION:
W.printString("Version", getDescAsStringRef(Desc));
break;
case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER:
W.printString("Producer", getDescAsStringRef(Desc));
break;
case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION:
W.printString("Producer version", getDescAsStringRef(Desc));
break;
}
return true;
}
static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) {
W.printNumber("Page Size", Note.PageSize);
for (const CoreFileMapping &Mapping : Note.Mappings) {
ListScope D(W, "Mapping");
W.printHex("Start", Mapping.Start);
W.printHex("End", Mapping.End);
W.printHex("Offset", Mapping.Offset);
W.printString("Filename", Mapping.Filename);
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
ListScope L(W, "Notes");
std::unique_ptr<DictScope> NoteScope;
auto StartNotes = [&](Optional<StringRef> SecName,
const typename ELFT::Off Offset,
const typename ELFT::Addr Size) {
NoteScope = std::make_unique<DictScope>(W, "NoteSection");
W.printString("Name", SecName ? *SecName : "<?>");
W.printHex("Offset", Offset);
W.printHex("Size", Size);
};
auto EndNotes = [&] { NoteScope.reset(); };
auto ProcessNote = [&](const Elf_Note &Note, bool IsCore) -> Error {
DictScope D2(W, "Note");
StringRef Name = Note.getName();
ArrayRef<uint8_t> Descriptor = Note.getDesc();
Elf_Word Type = Note.getType();
// Print the note owner/type.
W.printString("Owner", Name);
W.printHex("Data size", Descriptor.size());
StringRef NoteType =
getNoteTypeName<ELFT>(Note, this->Obj.getHeader().e_type);
if (!NoteType.empty())
W.printString("Type", NoteType);
else
W.printString("Type",
"Unknown (" + to_string(format_hex(Type, 10)) + ")");
// Print the description, or fallback to printing raw bytes for unknown
// owners/if we fail to pretty-print the contents.
if (Name == "GNU") {
if (printGNUNoteLLVMStyle<ELFT>(Type, Descriptor, W))
return Error::success();
} else if (Name == "FreeBSD") {
if (Optional<FreeBSDNote> N =
getFreeBSDNote<ELFT>(Type, Descriptor, IsCore)) {
W.printString(N->Type, N->Value);
return Error::success();
}
} else if (Name == "AMD") {
const AMDNote N = getAMDNote<ELFT>(Type, Descriptor);
if (!N.Type.empty()) {
W.printString(N.Type, N.Value);
return Error::success();
}
} else if (Name == "AMDGPU") {
const AMDGPUNote N = getAMDGPUNote<ELFT>(Type, Descriptor);
if (!N.Type.empty()) {
W.printString(N.Type, N.Value);
return Error::success();
}
} else if (Name == "LLVMOMPOFFLOAD") {
if (printLLVMOMPOFFLOADNoteLLVMStyle<ELFT>(Type, Descriptor, W))
return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
DataExtractor DescExtractor(Descriptor,
ELFT::TargetEndianness == support::little,
sizeof(Elf_Addr));
if (Expected<CoreNote> N = readCoreNote(DescExtractor)) {
printCoreNoteLLVMStyle(*N, W);
return Error::success();
} else {
return N.takeError();
}
}
}
if (!Descriptor.empty()) {
W.printBinaryBlock("Description data", Descriptor);
}
return Error::success();
};
printNotesHelper(*this, StartNotes, ProcessNote, EndNotes);
}
template <class ELFT> void LLVMELFDumper<ELFT>::printELFLinkerOptions() {
ListScope L(W, "LinkerOptions");
unsigned I = -1;
for (const Elf_Shdr &Shdr : cantFail(this->Obj.sections())) {
++I;
if (Shdr.sh_type != ELF::SHT_LLVM_LINKER_OPTIONS)
continue;
Expected<ArrayRef<uint8_t>> ContentsOrErr =
this->Obj.getSectionContents(Shdr);
if (!ContentsOrErr) {
this->reportUniqueWarning("unable to read the content of the "
"SHT_LLVM_LINKER_OPTIONS section: " +
toString(ContentsOrErr.takeError()));
continue;
}
if (ContentsOrErr->empty())
continue;
if (ContentsOrErr->back() != 0) {
this->reportUniqueWarning("SHT_LLVM_LINKER_OPTIONS section at index " +
Twine(I) +
" is broken: the "
"content is not null-terminated");
continue;
}
SmallVector<StringRef, 16> Strings;
toStringRef(ContentsOrErr->drop_back()).split(Strings, '\0');
if (Strings.size() % 2 != 0) {
this->reportUniqueWarning(
"SHT_LLVM_LINKER_OPTIONS section at index " + Twine(I) +
" is broken: an incomplete "
"key-value pair was found. The last possible key was: \"" +
Strings.back() + "\"");
continue;
}
for (size_t I = 0; I < Strings.size(); I += 2)
W.printString(Strings[I], Strings[I + 1]);
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printDependentLibs() {
ListScope L(W, "DependentLibs");
this->printDependentLibsHelper(
[](const Elf_Shdr &) {},
[this](StringRef Lib, uint64_t) { W.printString(Lib); });
}
template <class ELFT> void LLVMELFDumper<ELFT>::printStackSizes() {
ListScope L(W, "StackSizes");
if (this->Obj.getHeader().e_type == ELF::ET_REL)
this->printRelocatableStackSizes([]() {});
else
this->printNonRelocatableStackSizes([]() {});
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printStackSizeEntry(uint64_t Size,
ArrayRef<std::string> FuncNames) {
DictScope D(W, "Entry");
W.printList("Functions", FuncNames);
W.printHex("Size", Size);
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printMipsGOT(const MipsGOTParser<ELFT> &Parser) {
auto PrintEntry = [&](const Elf_Addr *E) {
W.printHex("Address", Parser.getGotAddress(E));
W.printNumber("Access", Parser.getGotOffset(E));
W.printHex("Initial", *E);
};
DictScope GS(W, Parser.IsStatic ? "Static GOT" : "Primary GOT");
W.printHex("Canonical gp value", Parser.getGp());
{
ListScope RS(W, "Reserved entries");
{
DictScope D(W, "Entry");
PrintEntry(Parser.getGotLazyResolver());
W.printString("Purpose", StringRef("Lazy resolver"));
}
if (Parser.getGotModulePointer()) {
DictScope D(W, "Entry");
PrintEntry(Parser.getGotModulePointer());
W.printString("Purpose", StringRef("Module pointer (GNU extension)"));
}
}
{
ListScope LS(W, "Local entries");
for (auto &E : Parser.getLocalEntries()) {
DictScope D(W, "Entry");
PrintEntry(&E);
}
}
if (Parser.IsStatic)
return;
{
ListScope GS(W, "Global entries");
for (auto &E : Parser.getGlobalEntries()) {
DictScope D(W, "Entry");
PrintEntry(&E);
const Elf_Sym &Sym = *Parser.getGotSym(&E);
W.printHex("Value", Sym.st_value);
W.printEnum("Type", Sym.getType(), makeArrayRef(ElfSymbolTypes));
const unsigned SymIndex = &Sym - this->dynamic_symbols().begin();
DataRegion<Elf_Word> ShndxTable(
(const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
printSymbolSection(Sym, SymIndex, ShndxTable);
std::string SymName = this->getFullSymbolName(
Sym, SymIndex, ShndxTable, this->DynamicStringTable, true);
W.printNumber("Name", SymName, Sym.st_name);
}
}
W.printNumber("Number of TLS and multi-GOT entries",
uint64_t(Parser.getOtherEntries().size()));
}
template <class ELFT>
void LLVMELFDumper<ELFT>::printMipsPLT(const MipsGOTParser<ELFT> &Parser) {
auto PrintEntry = [&](const Elf_Addr *E) {
W.printHex("Address", Parser.getPltAddress(E));
W.printHex("Initial", *E);
};
DictScope GS(W, "PLT GOT");
{
ListScope RS(W, "Reserved entries");
{
DictScope D(W, "Entry");
PrintEntry(Parser.getPltLazyResolver());
W.printString("Purpose", StringRef("PLT lazy resolver"));
}
if (auto E = Parser.getPltModulePointer()) {
DictScope D(W, "Entry");
PrintEntry(E);
W.printString("Purpose", StringRef("Module pointer"));
}
}
{
ListScope LS(W, "Entries");
DataRegion<Elf_Word> ShndxTable(
(const Elf_Word *)this->DynSymTabShndxRegion.Addr, this->Obj.end());
for (auto &E : Parser.getPltEntries()) {
DictScope D(W, "Entry");
PrintEntry(&E);
const Elf_Sym &Sym = *Parser.getPltSym(&E);
W.printHex("Value", Sym.st_value);
W.printEnum("Type", Sym.getType(), makeArrayRef(ElfSymbolTypes));
printSymbolSection(Sym, &Sym - this->dynamic_symbols().begin(),
ShndxTable);
const Elf_Sym *FirstSym = cantFail(
this->Obj.template getEntry<Elf_Sym>(*Parser.getPltSymTable(), 0));
std::string SymName = this->getFullSymbolName(
Sym, &Sym - FirstSym, ShndxTable, Parser.getPltStrTable(), true);
W.printNumber("Name", SymName, Sym.st_name);
}
}
}
template <class ELFT> void LLVMELFDumper<ELFT>::printMipsABIFlags() {
const Elf_Mips_ABIFlags<ELFT> *Flags;
if (Expected<const Elf_Mips_ABIFlags<ELFT> *> SecOrErr =
getMipsAbiFlagsSection(*this)) {
Flags = *SecOrErr;
if (!Flags) {
W.startLine() << "There is no .MIPS.abiflags section in the file.\n";
return;
}
} else {
this->reportUniqueWarning(SecOrErr.takeError());
return;
}
raw_ostream &OS = W.getOStream();
DictScope GS(W, "MIPS ABI Flags");
W.printNumber("Version", Flags->version);
W.startLine() << "ISA: ";
if (Flags->isa_rev <= 1)
OS << format("MIPS%u", Flags->isa_level);
else
OS << format("MIPS%ur%u", Flags->isa_level, Flags->isa_rev);
OS << "\n";
W.printEnum("ISA Extension", Flags->isa_ext, makeArrayRef(ElfMipsISAExtType));
W.printFlags("ASEs", Flags->ases, makeArrayRef(ElfMipsASEFlags));
W.printEnum("FP ABI", Flags->fp_abi, makeArrayRef(ElfMipsFpABIType));
W.printNumber("GPR size", getMipsRegisterSize(Flags->gpr_size));
W.printNumber("CPR1 size", getMipsRegisterSize(Flags->cpr1_size));
W.printNumber("CPR2 size", getMipsRegisterSize(Flags->cpr2_size));
W.printFlags("Flags 1", Flags->flags1, makeArrayRef(ElfMipsFlags1));
W.printHex("Flags 2", Flags->flags2);
}
template <class ELFT>
void JSONELFDumper<ELFT>::printFileSummary(StringRef FileStr, ObjectFile &Obj,
ArrayRef<std::string> InputFilenames,
const Archive *A) {
FileScope = std::make_unique<DictScope>(this->W, FileStr);
DictScope D(this->W, "FileSummary");
this->W.printString("File", FileStr);
this->W.printString("Format", Obj.getFileFormatName());
this->W.printString("Arch", Triple::getArchTypeName(Obj.getArch()));
this->W.printString(
"AddressSize",
std::string(formatv("{0}bit", 8 * Obj.getBytesInAddress())));
this->printLoadName();
}
diff --git a/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 21339a3f8f3d..893d8a55c895 100644
--- a/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/contrib/llvm-project/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -1,1049 +1,1049 @@
//===-- llvm-rtdyld.cpp - MCJIT Testing Tool ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This is a testing tool for use with the MC-JIT LLVM components.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringMap.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <future>
#include <list>
using namespace llvm;
using namespace llvm::object;
static cl::OptionCategory RTDyldCategory("RTDyld Options");
static cl::list<std::string> InputFileList(cl::Positional, cl::ZeroOrMore,
cl::desc("<input files>"),
cl::cat(RTDyldCategory));
enum ActionType {
AC_Execute,
AC_PrintObjectLineInfo,
AC_PrintLineInfo,
AC_PrintDebugLineInfo,
AC_Verify
};
static cl::opt<ActionType> Action(
cl::desc("Action to perform:"), cl::init(AC_Execute),
cl::values(
clEnumValN(AC_Execute, "execute",
"Load, link, and execute the inputs."),
clEnumValN(AC_PrintLineInfo, "printline",
"Load, link, and print line information for each function."),
clEnumValN(AC_PrintDebugLineInfo, "printdebugline",
"Load, link, and print line information for each function "
"using the debug object"),
clEnumValN(AC_PrintObjectLineInfo, "printobjline",
"Like -printlineinfo but does not load the object first"),
clEnumValN(AC_Verify, "verify",
"Load, link and verify the resulting memory image.")),
cl::cat(RTDyldCategory));
static cl::opt<std::string>
EntryPoint("entry", cl::desc("Function to call as entry point."),
cl::init("_main"), cl::cat(RTDyldCategory));
static cl::list<std::string> Dylibs("dylib", cl::desc("Add library."),
cl::ZeroOrMore, cl::cat(RTDyldCategory));
static cl::list<std::string> InputArgv("args", cl::Positional,
cl::desc("<program arguments>..."),
cl::ZeroOrMore, cl::PositionalEatsArgs,
cl::cat(RTDyldCategory));
static cl::opt<std::string>
TripleName("triple", cl::desc("Target triple for disassembler"),
cl::cat(RTDyldCategory));
static cl::opt<std::string>
MCPU("mcpu",
cl::desc("Target a specific cpu type (-mcpu=help for details)"),
cl::value_desc("cpu-name"), cl::init(""), cl::cat(RTDyldCategory));
static cl::list<std::string>
CheckFiles("check",
cl::desc("File containing RuntimeDyld verifier checks."),
cl::ZeroOrMore, cl::cat(RTDyldCategory));
static cl::opt<uint64_t>
PreallocMemory("preallocate",
cl::desc("Allocate memory upfront rather than on-demand"),
cl::init(0), cl::cat(RTDyldCategory));
static cl::opt<uint64_t> TargetAddrStart(
"target-addr-start",
cl::desc("For -verify only: start of phony target address "
"range."),
cl::init(4096), // Start at "page 1" - no allocating at "null".
cl::Hidden, cl::cat(RTDyldCategory));
static cl::opt<uint64_t> TargetAddrEnd(
"target-addr-end",
cl::desc("For -verify only: end of phony target address range."),
cl::init(~0ULL), cl::Hidden, cl::cat(RTDyldCategory));
static cl::opt<uint64_t> TargetSectionSep(
"target-section-sep",
cl::desc("For -verify only: Separation between sections in "
"phony target address space."),
cl::init(0), cl::Hidden, cl::cat(RTDyldCategory));
static cl::list<std::string>
SpecificSectionMappings("map-section",
cl::desc("For -verify only: Map a section to a "
"specific address."),
cl::ZeroOrMore, cl::Hidden,
cl::cat(RTDyldCategory));
static cl::list<std::string> DummySymbolMappings(
"dummy-extern",
cl::desc("For -verify only: Inject a symbol into the extern "
"symbol table."),
cl::ZeroOrMore, cl::Hidden, cl::cat(RTDyldCategory));
static cl::opt<bool> PrintAllocationRequests(
"print-alloc-requests",
cl::desc("Print allocation requests made to the memory "
"manager by RuntimeDyld"),
cl::Hidden, cl::cat(RTDyldCategory));
static cl::opt<bool> ShowTimes("show-times",
cl::desc("Show times for llvm-rtdyld phases"),
cl::init(false), cl::cat(RTDyldCategory));
ExitOnError ExitOnErr;
struct RTDyldTimers {
TimerGroup RTDyldTG{"llvm-rtdyld timers", "timers for llvm-rtdyld phases"};
Timer LoadObjectsTimer{"load", "time to load/add object files", RTDyldTG};
Timer LinkTimer{"link", "time to link object files", RTDyldTG};
Timer RunTimer{"run", "time to execute jitlink'd code", RTDyldTG};
};
std::unique_ptr<RTDyldTimers> Timers;
/* *** */
using SectionIDMap = StringMap<unsigned>;
using FileToSectionIDMap = StringMap<SectionIDMap>;
void dumpFileToSectionIDMap(const FileToSectionIDMap &FileToSecIDMap) {
for (const auto &KV : FileToSecIDMap) {
llvm::dbgs() << "In " << KV.first() << "\n";
for (auto &KV2 : KV.second)
llvm::dbgs() << " \"" << KV2.first() << "\" -> " << KV2.second << "\n";
}
}
Expected<unsigned> getSectionId(const FileToSectionIDMap &FileToSecIDMap,
StringRef FileName, StringRef SectionName) {
auto I = FileToSecIDMap.find(FileName);
if (I == FileToSecIDMap.end())
return make_error<StringError>("No file named " + FileName,
inconvertibleErrorCode());
auto &SectionIDs = I->second;
auto J = SectionIDs.find(SectionName);
if (J == SectionIDs.end())
return make_error<StringError>("No section named \"" + SectionName +
"\" in file " + FileName,
inconvertibleErrorCode());
return J->second;
}
// A trivial memory manager that doesn't do anything fancy, just uses the
// support library allocation routines directly.
class TrivialMemoryManager : public RTDyldMemoryManager {
public:
struct SectionInfo {
SectionInfo(StringRef Name, sys::MemoryBlock MB, unsigned SectionID)
: Name(std::string(Name)), MB(std::move(MB)), SectionID(SectionID) {}
std::string Name;
sys::MemoryBlock MB;
unsigned SectionID = ~0U;
};
SmallVector<SectionInfo, 16> FunctionMemory;
SmallVector<SectionInfo, 16> DataMemory;
uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID,
StringRef SectionName) override;
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID, StringRef SectionName,
bool IsReadOnly) override;
TrivialMemoryManager::TLSSection
allocateTLSSection(uintptr_t Size, unsigned Alignment, unsigned SectionID,
StringRef SectionName) override;
/// If non null, records subsequent Name -> SectionID mappings.
void setSectionIDsMap(SectionIDMap *SecIDMap) {
this->SecIDMap = SecIDMap;
}
void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true) override {
return nullptr;
}
bool finalizeMemory(std::string *ErrMsg) override { return false; }
void addDummySymbol(const std::string &Name, uint64_t Addr) {
DummyExterns[Name] = Addr;
}
JITSymbol findSymbol(const std::string &Name) override {
auto I = DummyExterns.find(Name);
if (I != DummyExterns.end())
return JITSymbol(I->second, JITSymbolFlags::Exported);
if (auto Sym = RTDyldMemoryManager::findSymbol(Name))
return Sym;
else if (auto Err = Sym.takeError())
ExitOnErr(std::move(Err));
else
ExitOnErr(make_error<StringError>("Could not find definition for \"" +
Name + "\"",
inconvertibleErrorCode()));
llvm_unreachable("Should have returned or exited by now");
}
void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
size_t Size) override {}
void deregisterEHFrames() override {}
void preallocateSlab(uint64_t Size) {
std::error_code EC;
sys::MemoryBlock MB =
sys::Memory::allocateMappedMemory(Size, nullptr,
sys::Memory::MF_READ |
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
report_fatal_error(Twine("Can't allocate enough memory: ") +
EC.message());
PreallocSlab = MB;
UsePreallocation = true;
SlabSize = Size;
}
uint8_t *allocateFromSlab(uintptr_t Size, unsigned Alignment, bool isCode,
StringRef SectionName, unsigned SectionID) {
Size = alignTo(Size, Alignment);
if (CurrentSlabOffset + Size > SlabSize)
report_fatal_error("Can't allocate enough memory. Tune --preallocate");
uintptr_t OldSlabOffset = CurrentSlabOffset;
sys::MemoryBlock MB((void *)OldSlabOffset, Size);
if (isCode)
FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
else
DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
CurrentSlabOffset += Size;
return (uint8_t*)OldSlabOffset;
}
private:
std::map<std::string, uint64_t> DummyExterns;
sys::MemoryBlock PreallocSlab;
bool UsePreallocation = false;
uintptr_t SlabSize = 0;
uintptr_t CurrentSlabOffset = 0;
SectionIDMap *SecIDMap = nullptr;
-#if defined(__x86_64__) && defined(__ELF__)
+#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__)
unsigned UsedTLSStorage = 0;
#endif
};
uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
unsigned Alignment,
unsigned SectionID,
StringRef SectionName) {
if (PrintAllocationRequests)
outs() << "allocateCodeSection(Size = " << Size << ", Alignment = "
<< Alignment << ", SectionName = " << SectionName << ")\n";
if (SecIDMap)
(*SecIDMap)[SectionName] = SectionID;
if (UsePreallocation)
return allocateFromSlab(Size, Alignment, true /* isCode */,
SectionName, SectionID);
std::error_code EC;
sys::MemoryBlock MB =
sys::Memory::allocateMappedMemory(Size, nullptr,
sys::Memory::MF_READ |
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
report_fatal_error(Twine("MemoryManager allocation failed: ") +
EC.message());
FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
unsigned Alignment,
unsigned SectionID,
StringRef SectionName,
bool IsReadOnly) {
if (PrintAllocationRequests)
outs() << "allocateDataSection(Size = " << Size << ", Alignment = "
<< Alignment << ", SectionName = " << SectionName << ")\n";
if (SecIDMap)
(*SecIDMap)[SectionName] = SectionID;
if (UsePreallocation)
return allocateFromSlab(Size, Alignment, false /* isCode */, SectionName,
SectionID);
std::error_code EC;
sys::MemoryBlock MB =
sys::Memory::allocateMappedMemory(Size, nullptr,
sys::Memory::MF_READ |
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
report_fatal_error(Twine("MemoryManager allocation failed: ") +
EC.message());
DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
// In case the execution needs TLS storage, we define a very small TLS memory
// area here that will be used in allocateTLSSection().
-#if defined(__x86_64__) && defined(__ELF__)
+#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__)
extern "C" {
alignas(16) __attribute__((visibility("hidden"), tls_model("initial-exec"),
used)) thread_local char LLVMRTDyldTLSSpace[16];
}
#endif
TrivialMemoryManager::TLSSection
TrivialMemoryManager::allocateTLSSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID,
StringRef SectionName) {
-#if defined(__x86_64__) && defined(__ELF__)
+#if defined(__x86_64__) && defined(__ELF__) && defined(__linux__)
if (Size + UsedTLSStorage > sizeof(LLVMRTDyldTLSSpace)) {
return {};
}
// Get the offset of the TLSSpace in the TLS block by using a tpoff
// relocation here.
int64_t TLSOffset;
asm("leaq LLVMRTDyldTLSSpace@tpoff, %0" : "=r"(TLSOffset));
TLSSection Section;
// We use the storage directly as the initialization image. This means that
// when a new thread is spawned after this allocation, it will not be
// initialized correctly. This means, llvm-rtdyld will only support TLS in a
// single thread.
Section.InitializationImage =
reinterpret_cast<uint8_t *>(LLVMRTDyldTLSSpace + UsedTLSStorage);
Section.Offset = TLSOffset + UsedTLSStorage;
UsedTLSStorage += Size;
return Section;
#else
return {};
#endif
}
static const char *ProgramName;
static void ErrorAndExit(const Twine &Msg) {
errs() << ProgramName << ": error: " << Msg << "\n";
exit(1);
}
static void loadDylibs() {
for (const std::string &Dylib : Dylibs) {
if (!sys::fs::is_regular_file(Dylib))
report_fatal_error(Twine("Dylib not found: '") + Dylib + "'.");
std::string ErrMsg;
if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
report_fatal_error(Twine("Error loading '") + Dylib + "': " + ErrMsg);
}
}
/* *** */
static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
assert(LoadObjects || !UseDebugObj);
// Load any dylibs requested on the command line.
loadDylibs();
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
for (auto &File : InputFileList) {
// Instantiate a dynamic linker.
TrivialMemoryManager MemMgr;
RuntimeDyld Dyld(MemMgr, MemMgr);
// Load the input memory buffer.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
MemoryBuffer::getFileOrSTDIN(File);
if (std::error_code EC = InputBuffer.getError())
ErrorAndExit("unable to read input: '" + EC.message() + "'");
Expected<std::unique_ptr<ObjectFile>> MaybeObj(
ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef()));
if (!MaybeObj) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(MaybeObj.takeError(), OS);
OS.flush();
ErrorAndExit("unable to create object file: '" + Buf + "'");
}
ObjectFile &Obj = **MaybeObj;
OwningBinary<ObjectFile> DebugObj;
std::unique_ptr<RuntimeDyld::LoadedObjectInfo> LoadedObjInfo = nullptr;
ObjectFile *SymbolObj = &Obj;
if (LoadObjects) {
// Load the object file
LoadedObjInfo =
Dyld.loadObject(Obj);
if (Dyld.hasError())
ErrorAndExit(Dyld.getErrorString());
// Resolve all the relocations we can.
Dyld.resolveRelocations();
if (UseDebugObj) {
DebugObj = LoadedObjInfo->getObjectForDebug(Obj);
SymbolObj = DebugObj.getBinary();
LoadedObjInfo.reset();
}
}
std::unique_ptr<DIContext> Context = DWARFContext::create(
*SymbolObj, DWARFContext::ProcessDebugRelocations::Process,
LoadedObjInfo.get());
std::vector<std::pair<SymbolRef, uint64_t>> SymAddr =
object::computeSymbolSizes(*SymbolObj);
// Use symbol info to iterate functions in the object.
for (const auto &P : SymAddr) {
object::SymbolRef Sym = P.first;
Expected<SymbolRef::Type> TypeOrErr = Sym.getType();
if (!TypeOrErr) {
// TODO: Actually report errors helpfully.
consumeError(TypeOrErr.takeError());
continue;
}
SymbolRef::Type Type = *TypeOrErr;
if (Type == object::SymbolRef::ST_Function) {
Expected<StringRef> Name = Sym.getName();
if (!Name) {
// TODO: Actually report errors helpfully.
consumeError(Name.takeError());
continue;
}
Expected<uint64_t> AddrOrErr = Sym.getAddress();
if (!AddrOrErr) {
// TODO: Actually report errors helpfully.
consumeError(AddrOrErr.takeError());
continue;
}
uint64_t Addr = *AddrOrErr;
object::SectionedAddress Address;
uint64_t Size = P.second;
// If we're not using the debug object, compute the address of the
// symbol in memory (rather than that in the unrelocated object file)
// and use that to query the DWARFContext.
if (!UseDebugObj && LoadObjects) {
auto SecOrErr = Sym.getSection();
if (!SecOrErr) {
// TODO: Actually report errors helpfully.
consumeError(SecOrErr.takeError());
continue;
}
object::section_iterator Sec = *SecOrErr;
Address.SectionIndex = Sec->getIndex();
uint64_t SectionLoadAddress =
LoadedObjInfo->getSectionLoadAddress(*Sec);
if (SectionLoadAddress != 0)
Addr += SectionLoadAddress - Sec->getAddress();
} else if (auto SecOrErr = Sym.getSection())
Address.SectionIndex = SecOrErr.get()->getIndex();
outs() << "Function: " << *Name << ", Size = " << Size
<< ", Addr = " << Addr << "\n";
Address.Address = Addr;
DILineInfoTable Lines =
Context->getLineInfoForAddressRange(Address, Size);
for (auto &D : Lines) {
outs() << " Line info @ " << D.first - Addr << ": "
<< D.second.FileName << ", line:" << D.second.Line << "\n";
}
}
}
}
return 0;
}
static void doPreallocation(TrivialMemoryManager &MemMgr) {
// Allocate a slab of memory upfront, if required. This is used if
// we want to test small code models.
if (static_cast<intptr_t>(PreallocMemory) < 0)
report_fatal_error("Pre-allocated bytes of memory must be a positive integer.");
// FIXME: Limit the amount of memory that can be preallocated?
if (PreallocMemory != 0)
MemMgr.preallocateSlab(PreallocMemory);
}
static int executeInput() {
// Load any dylibs requested on the command line.
loadDylibs();
// Instantiate a dynamic linker.
TrivialMemoryManager MemMgr;
doPreallocation(MemMgr);
RuntimeDyld Dyld(MemMgr, MemMgr);
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
{
TimeRegion TR(Timers ? &Timers->LoadObjectsTimer : nullptr);
for (auto &File : InputFileList) {
// Load the input memory buffer.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
MemoryBuffer::getFileOrSTDIN(File);
if (std::error_code EC = InputBuffer.getError())
ErrorAndExit("unable to read input: '" + EC.message() + "'");
Expected<std::unique_ptr<ObjectFile>> MaybeObj(
ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef()));
if (!MaybeObj) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(MaybeObj.takeError(), OS);
OS.flush();
ErrorAndExit("unable to create object file: '" + Buf + "'");
}
ObjectFile &Obj = **MaybeObj;
// Load the object file
Dyld.loadObject(Obj);
if (Dyld.hasError()) {
ErrorAndExit(Dyld.getErrorString());
}
}
}
{
TimeRegion TR(Timers ? &Timers->LinkTimer : nullptr);
// Resove all the relocations we can.
// FIXME: Error out if there are unresolved relocations.
Dyld.resolveRelocations();
}
// Get the address of the entry point (_main by default).
void *MainAddress = Dyld.getSymbolLocalAddress(EntryPoint);
if (!MainAddress)
ErrorAndExit("no definition for '" + EntryPoint + "'");
// Invalidate the instruction cache for each loaded function.
for (auto &FM : MemMgr.FunctionMemory) {
auto &FM_MB = FM.MB;
// Make sure the memory is executable.
// setExecutable will call InvalidateInstructionCache.
if (auto EC = sys::Memory::protectMappedMemory(FM_MB,
sys::Memory::MF_READ |
sys::Memory::MF_EXEC))
ErrorAndExit("unable to mark function executable: '" + EC.message() +
"'");
}
// Dispatch to _main().
errs() << "loaded '" << EntryPoint << "' at: " << (void*)MainAddress << "\n";
int (*Main)(int, const char**) =
(int(*)(int,const char**)) uintptr_t(MainAddress);
std::vector<const char *> Argv;
// Use the name of the first input object module as argv[0] for the target.
Argv.push_back(InputFileList[0].data());
for (auto &Arg : InputArgv)
Argv.push_back(Arg.data());
Argv.push_back(nullptr);
int Result = 0;
{
TimeRegion TR(Timers ? &Timers->RunTimer : nullptr);
Result = Main(Argv.size() - 1, Argv.data());
}
return Result;
}
static int checkAllExpressions(RuntimeDyldChecker &Checker) {
for (const auto& CheckerFileName : CheckFiles) {
ErrorOr<std::unique_ptr<MemoryBuffer>> CheckerFileBuf =
MemoryBuffer::getFileOrSTDIN(CheckerFileName);
if (std::error_code EC = CheckerFileBuf.getError())
ErrorAndExit("unable to read input '" + CheckerFileName + "': " +
EC.message());
if (!Checker.checkAllRulesInBuffer("# rtdyld-check:",
CheckerFileBuf.get().get()))
ErrorAndExit("some checks in '" + CheckerFileName + "' failed");
}
return 0;
}
void applySpecificSectionMappings(RuntimeDyld &Dyld,
const FileToSectionIDMap &FileToSecIDMap) {
for (StringRef Mapping : SpecificSectionMappings) {
size_t EqualsIdx = Mapping.find_first_of("=");
std::string SectionIDStr = std::string(Mapping.substr(0, EqualsIdx));
size_t ComaIdx = Mapping.find_first_of(",");
if (ComaIdx == StringRef::npos)
report_fatal_error("Invalid section specification '" + Mapping +
"'. Should be '<file name>,<section name>=<addr>'");
std::string FileName = SectionIDStr.substr(0, ComaIdx);
std::string SectionName = SectionIDStr.substr(ComaIdx + 1);
unsigned SectionID =
ExitOnErr(getSectionId(FileToSecIDMap, FileName, SectionName));
auto* OldAddr = Dyld.getSectionContent(SectionID).data();
std::string NewAddrStr = std::string(Mapping.substr(EqualsIdx + 1));
uint64_t NewAddr;
if (StringRef(NewAddrStr).getAsInteger(0, NewAddr))
report_fatal_error("Invalid section address in mapping '" + Mapping +
"'.");
Dyld.mapSectionAddress(OldAddr, NewAddr);
}
}
// Scatter sections in all directions!
// Remaps section addresses for -verify mode. The following command line options
// can be used to customize the layout of the memory within the phony target's
// address space:
// -target-addr-start <s> -- Specify where the phony target address range starts.
// -target-addr-end <e> -- Specify where the phony target address range ends.
// -target-section-sep <d> -- Specify how big a gap should be left between the
// end of one section and the start of the next.
// Defaults to zero. Set to something big
// (e.g. 1 << 32) to stress-test stubs, GOTs, etc.
//
static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
RuntimeDyld &Dyld,
TrivialMemoryManager &MemMgr) {
// Set up a work list (section addr/size pairs).
typedef std::list<const TrivialMemoryManager::SectionInfo*> WorklistT;
WorklistT Worklist;
for (const auto& CodeSection : MemMgr.FunctionMemory)
Worklist.push_back(&CodeSection);
for (const auto& DataSection : MemMgr.DataMemory)
Worklist.push_back(&DataSection);
// Keep an "already allocated" mapping of section target addresses to sizes.
// Sections whose address mappings aren't specified on the command line will
// allocated around the explicitly mapped sections while maintaining the
// minimum separation.
std::map<uint64_t, uint64_t> AlreadyAllocated;
// Move the previously applied mappings (whether explicitly specified on the
// command line, or implicitly set by RuntimeDyld) into the already-allocated
// map.
for (WorklistT::iterator I = Worklist.begin(), E = Worklist.end();
I != E;) {
WorklistT::iterator Tmp = I;
++I;
auto LoadAddr = Dyld.getSectionLoadAddress((*Tmp)->SectionID);
if (LoadAddr != static_cast<uint64_t>(
reinterpret_cast<uintptr_t>((*Tmp)->MB.base()))) {
// A section will have a LoadAddr of 0 if it wasn't loaded for whatever
// reason (e.g. zero byte COFF sections). Don't include those sections in
// the allocation map.
if (LoadAddr != 0)
AlreadyAllocated[LoadAddr] = (*Tmp)->MB.allocatedSize();
Worklist.erase(Tmp);
}
}
// If the -target-addr-end option wasn't explicitly passed, then set it to a
// sensible default based on the target triple.
if (TargetAddrEnd.getNumOccurrences() == 0) {
if (TargetTriple.isArch16Bit())
TargetAddrEnd = (1ULL << 16) - 1;
else if (TargetTriple.isArch32Bit())
TargetAddrEnd = (1ULL << 32) - 1;
// TargetAddrEnd already has a sensible default for 64-bit systems, so
// there's nothing to do in the 64-bit case.
}
// Process any elements remaining in the worklist.
while (!Worklist.empty()) {
auto *CurEntry = Worklist.front();
Worklist.pop_front();
uint64_t NextSectionAddr = TargetAddrStart;
for (const auto &Alloc : AlreadyAllocated)
if (NextSectionAddr + CurEntry->MB.allocatedSize() + TargetSectionSep <=
Alloc.first)
break;
else
NextSectionAddr = Alloc.first + Alloc.second + TargetSectionSep;
Dyld.mapSectionAddress(CurEntry->MB.base(), NextSectionAddr);
AlreadyAllocated[NextSectionAddr] = CurEntry->MB.allocatedSize();
}
// Add dummy symbols to the memory manager.
for (const auto &Mapping : DummySymbolMappings) {
size_t EqualsIdx = Mapping.find_first_of('=');
if (EqualsIdx == StringRef::npos)
report_fatal_error(Twine("Invalid dummy symbol specification '") +
Mapping + "'. Should be '<symbol name>=<addr>'");
std::string Symbol = Mapping.substr(0, EqualsIdx);
std::string AddrStr = Mapping.substr(EqualsIdx + 1);
uint64_t Addr;
if (StringRef(AddrStr).getAsInteger(0, Addr))
report_fatal_error(Twine("Invalid symbol mapping '") + Mapping + "'.");
MemMgr.addDummySymbol(Symbol, Addr);
}
}
// Load and link the objects specified on the command line, but do not execute
// anything. Instead, attach a RuntimeDyldChecker instance and call it to
// verify the correctness of the linked memory.
static int linkAndVerify() {
// Check for missing triple.
if (TripleName == "")
ErrorAndExit("-triple required when running in -verify mode.");
// Look up the target and build the disassembler.
Triple TheTriple(Triple::normalize(TripleName));
std::string ErrorStr;
const Target *TheTarget =
TargetRegistry::lookupTarget("", TheTriple, ErrorStr);
if (!TheTarget)
ErrorAndExit("Error accessing target '" + TripleName + "': " + ErrorStr);
TripleName = TheTriple.getTriple();
std::unique_ptr<MCSubtargetInfo> STI(
TheTarget->createMCSubtargetInfo(TripleName, MCPU, ""));
if (!STI)
ErrorAndExit("Unable to create subtarget info!");
std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
if (!MRI)
ErrorAndExit("Unable to create target register info!");
MCTargetOptions MCOptions;
std::unique_ptr<MCAsmInfo> MAI(
TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!MAI)
ErrorAndExit("Unable to create target asm info!");
MCContext Ctx(Triple(TripleName), MAI.get(), MRI.get(), STI.get());
std::unique_ptr<MCDisassembler> Disassembler(
TheTarget->createMCDisassembler(*STI, Ctx));
if (!Disassembler)
ErrorAndExit("Unable to create disassembler!");
std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
if (!MII)
ErrorAndExit("Unable to create target instruction info!");
std::unique_ptr<MCInstPrinter> InstPrinter(
TheTarget->createMCInstPrinter(Triple(TripleName), 0, *MAI, *MII, *MRI));
// Load any dylibs requested on the command line.
loadDylibs();
// Instantiate a dynamic linker.
TrivialMemoryManager MemMgr;
doPreallocation(MemMgr);
struct StubID {
unsigned SectionID;
uint32_t Offset;
};
using StubInfos = StringMap<StubID>;
using StubContainers = StringMap<StubInfos>;
StubContainers StubMap;
RuntimeDyld Dyld(MemMgr, MemMgr);
Dyld.setProcessAllSections(true);
Dyld.setNotifyStubEmitted([&StubMap](StringRef FilePath,
StringRef SectionName,
StringRef SymbolName, unsigned SectionID,
uint32_t StubOffset) {
std::string ContainerName =
(sys::path::filename(FilePath) + "/" + SectionName).str();
StubMap[ContainerName][SymbolName] = {SectionID, StubOffset};
});
auto GetSymbolInfo =
[&Dyld, &MemMgr](
StringRef Symbol) -> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
RuntimeDyldChecker::MemoryRegionInfo SymInfo;
// First get the target address.
if (auto InternalSymbol = Dyld.getSymbol(Symbol))
SymInfo.setTargetAddress(InternalSymbol.getAddress());
else {
// Symbol not found in RuntimeDyld. Fall back to external lookup.
#ifdef _MSC_VER
using ExpectedLookupResult =
MSVCPExpected<JITSymbolResolver::LookupResult>;
#else
using ExpectedLookupResult = Expected<JITSymbolResolver::LookupResult>;
#endif
auto ResultP = std::make_shared<std::promise<ExpectedLookupResult>>();
auto ResultF = ResultP->get_future();
MemMgr.lookup(JITSymbolResolver::LookupSet({Symbol}),
[=](Expected<JITSymbolResolver::LookupResult> Result) {
ResultP->set_value(std::move(Result));
});
auto Result = ResultF.get();
if (!Result)
return Result.takeError();
auto I = Result->find(Symbol);
assert(I != Result->end() &&
"Expected symbol address if no error occurred");
SymInfo.setTargetAddress(I->second.getAddress());
}
// Now find the symbol content if possible (otherwise leave content as a
// default-constructed StringRef).
if (auto *SymAddr = Dyld.getSymbolLocalAddress(Symbol)) {
unsigned SectionID = Dyld.getSymbolSectionID(Symbol);
if (SectionID != ~0U) {
char *CSymAddr = static_cast<char *>(SymAddr);
StringRef SecContent = Dyld.getSectionContent(SectionID);
uint64_t SymSize = SecContent.size() - (CSymAddr - SecContent.data());
SymInfo.setContent(ArrayRef<char>(CSymAddr, SymSize));
}
}
return SymInfo;
};
auto IsSymbolValid = [&Dyld, GetSymbolInfo](StringRef Symbol) {
if (Dyld.getSymbol(Symbol))
return true;
auto SymInfo = GetSymbolInfo(Symbol);
if (!SymInfo) {
logAllUnhandledErrors(SymInfo.takeError(), errs(), "RTDyldChecker: ");
return false;
}
return SymInfo->getTargetAddress() != 0;
};
FileToSectionIDMap FileToSecIDMap;
auto GetSectionInfo = [&Dyld, &FileToSecIDMap](StringRef FileName,
StringRef SectionName)
-> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
auto SectionID = getSectionId(FileToSecIDMap, FileName, SectionName);
if (!SectionID)
return SectionID.takeError();
RuntimeDyldChecker::MemoryRegionInfo SecInfo;
SecInfo.setTargetAddress(Dyld.getSectionLoadAddress(*SectionID));
StringRef SecContent = Dyld.getSectionContent(*SectionID);
SecInfo.setContent(ArrayRef<char>(SecContent.data(), SecContent.size()));
return SecInfo;
};
auto GetStubInfo = [&Dyld, &StubMap](StringRef StubContainer,
StringRef SymbolName)
-> Expected<RuntimeDyldChecker::MemoryRegionInfo> {
if (!StubMap.count(StubContainer))
return make_error<StringError>("Stub container not found: " +
StubContainer,
inconvertibleErrorCode());
if (!StubMap[StubContainer].count(SymbolName))
return make_error<StringError>("Symbol name " + SymbolName +
" in stub container " + StubContainer,
inconvertibleErrorCode());
auto &SI = StubMap[StubContainer][SymbolName];
RuntimeDyldChecker::MemoryRegionInfo StubMemInfo;
StubMemInfo.setTargetAddress(Dyld.getSectionLoadAddress(SI.SectionID) +
SI.Offset);
StringRef SecContent =
Dyld.getSectionContent(SI.SectionID).substr(SI.Offset);
StubMemInfo.setContent(
ArrayRef<char>(SecContent.data(), SecContent.size()));
return StubMemInfo;
};
// We will initialize this below once we have the first object file and can
// know the endianness.
std::unique_ptr<RuntimeDyldChecker> Checker;
// If we don't have any input files, read from stdin.
if (!InputFileList.size())
InputFileList.push_back("-");
for (auto &InputFile : InputFileList) {
// Load the input memory buffer.
ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
MemoryBuffer::getFileOrSTDIN(InputFile);
if (std::error_code EC = InputBuffer.getError())
ErrorAndExit("unable to read input: '" + EC.message() + "'");
Expected<std::unique_ptr<ObjectFile>> MaybeObj(
ObjectFile::createObjectFile((*InputBuffer)->getMemBufferRef()));
if (!MaybeObj) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(MaybeObj.takeError(), OS);
OS.flush();
ErrorAndExit("unable to create object file: '" + Buf + "'");
}
ObjectFile &Obj = **MaybeObj;
if (!Checker)
Checker = std::make_unique<RuntimeDyldChecker>(
IsSymbolValid, GetSymbolInfo, GetSectionInfo, GetStubInfo,
GetStubInfo, Obj.isLittleEndian() ? support::little : support::big,
Disassembler.get(), InstPrinter.get(), dbgs());
auto FileName = sys::path::filename(InputFile);
MemMgr.setSectionIDsMap(&FileToSecIDMap[FileName]);
// Load the object file
Dyld.loadObject(Obj);
if (Dyld.hasError()) {
ErrorAndExit(Dyld.getErrorString());
}
}
// Re-map the section addresses into the phony target address space and add
// dummy symbols.
applySpecificSectionMappings(Dyld, FileToSecIDMap);
remapSectionsAndSymbols(TheTriple, Dyld, MemMgr);
// Resolve all the relocations we can.
Dyld.resolveRelocations();
// Register EH frames.
Dyld.registerEHFrames();
int ErrorCode = checkAllExpressions(*Checker);
if (Dyld.hasError())
ErrorAndExit("RTDyld reported an error applying relocations:\n " +
Dyld.getErrorString());
return ErrorCode;
}
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
ProgramName = argv[0];
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllDisassemblers();
cl::HideUnrelatedOptions({&RTDyldCategory, &getColorCategory()});
cl::ParseCommandLineOptions(argc, argv, "llvm MC-JIT tool\n");
ExitOnErr.setBanner(std::string(argv[0]) + ": ");
Timers = ShowTimes ? std::make_unique<RTDyldTimers>() : nullptr;
int Result = 0;
switch (Action) {
case AC_Execute:
Result = executeInput();
break;
case AC_PrintDebugLineInfo:
Result =
printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ true);
break;
case AC_PrintLineInfo:
Result =
printLineInfoForInput(/* LoadObjects */ true, /* UseDebugObj */ false);
break;
case AC_PrintObjectLineInfo:
Result =
printLineInfoForInput(/* LoadObjects */ false, /* UseDebugObj */ false);
break;
case AC_Verify:
Result = linkAndVerify();
break;
}
return Result;
}
diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc
index 10533dfb19d6..073becaaedab 100644
--- a/lib/clang/include/VCSVersion.inc
+++ b/lib/clang/include/VCSVersion.inc
@@ -1,10 +1,10 @@
// $FreeBSD$
-#define LLVM_REVISION "llvmorg-14.0.0-rc1-74-g4dc3cb8e3255"
+#define LLVM_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
#define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"
-#define CLANG_REVISION "llvmorg-14.0.0-rc1-74-g4dc3cb8e3255"
+#define CLANG_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
#define CLANG_REPOSITORY "https://github.com/llvm/llvm-project.git"
-#define LLDB_REVISION "llvmorg-14.0.0-rc1-74-g4dc3cb8e3255"
+#define LLDB_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
#define LLDB_REPOSITORY "https://github.com/llvm/llvm-project.git"
diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc
index 6956f2bb2c45..10526b338fbc 100644
--- a/lib/clang/include/lld/Common/Version.inc
+++ b/lib/clang/include/lld/Common/Version.inc
@@ -1,4 +1,4 @@
// Local identifier in __FreeBSD_version style
#define LLD_FREEBSD_VERSION 1400003
-#define LLD_VERSION_STRING "14.0.0 (FreeBSD llvmorg-14.0.0-rc1-74-g4dc3cb8e3255-" __XSTRING(LLD_FREEBSD_VERSION) ")"
+#define LLD_VERSION_STRING "14.0.0 (FreeBSD llvmorg-14.0.0-rc2-12-g09546e1b5103-" __XSTRING(LLD_FREEBSD_VERSION) ")"
diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h
index 4c259487d494..7444d5650fab 100644
--- a/lib/clang/include/llvm/Support/VCSRevision.h
+++ b/lib/clang/include/llvm/Support/VCSRevision.h
@@ -1,3 +1,3 @@
/* $FreeBSD$ */
-#define LLVM_REVISION "llvmorg-14.0.0-rc1-74-g4dc3cb8e3255"
+#define LLVM_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103"
#define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"
diff --git a/lib/clang/libllvm/Makefile b/lib/clang/libllvm/Makefile
index 51c4b2d2938d..31cf33e0f9bd 100644
--- a/lib/clang/libllvm/Makefile
+++ b/lib/clang/libllvm/Makefile
@@ -1,2031 +1,2032 @@
# $FreeBSD$
.include <src.opts.mk>
.include "../llvm.pre.mk"
LIB= llvm
INTERNALLIB=
CFLAGS+= -I${.OBJDIR}
.if ${MK_LLVM_TARGET_AARCH64} == "no" && ${MK_LLVM_TARGET_ARM} == "no" && \
${MK_LLVM_TARGET_BPF} == "no" && ${MK_LLVM_TARGET_MIPS} == "no" && \
${MK_LLVM_TARGET_POWERPC} == "no" && ${MK_LLVM_TARGET_RISCV} == "no" && \
${MK_LLVM_TARGET_X86} == "no"
.error Please enable at least one of: MK_LLVM_TARGET_AARCH64,\
MK_LLVM_TARGET_ARM, MK_LLVM_TARGET_BPF, MK_LLVM_TARGET_MIPS, \
MK_LLVM_TARGET_POWERPC, MK_LLVM_TARGET_RISCV, or MK_LLVM_TARGET_X86
.endif
.for arch in AArch64 ARM BPF Mips PowerPC RISCV X86
. if ${MK_LLVM_TARGET_${arch:tu}} != "no"
CFLAGS+= -I${LLVM_SRCS}/lib/Target/${arch}
. endif
.endfor
SRCDIR= llvm/lib
# Explanation of different SRCS variants below:
# SRCS_MIN: always required, even for bootstrap
# SRCS_MIW: required for world stage (after cross-tools)
# SRCS_EXT: required for MK_CLANG_EXTRAS
# SRCS_EXL: required for MK_CLANG_EXTRAS and MK_LLD
# SRCS_FUL: required for MK_CLANG_FULL
# SRCS_LLD: required for MK_LLD
# SRCS_XDB: required for MK_CLANG_EXTRAS and MK_LLDB
# SRCS_XDL: required for MK_CLANG_EXTRAS, MK_LLD and MK_LLDB
# SRCS_XDW: required for MK_CLANG_EXTRAS and MK_LLDB in world stage
SRCS_MIN+= Analysis/AliasAnalysis.cpp
SRCS_MIN+= Analysis/AliasAnalysisEvaluator.cpp
SRCS_MIN+= Analysis/AliasAnalysisSummary.cpp
SRCS_MIN+= Analysis/AliasSetTracker.cpp
SRCS_EXT+= Analysis/Analysis.cpp
SRCS_MIN+= Analysis/AssumeBundleQueries.cpp
SRCS_MIN+= Analysis/AssumptionCache.cpp
SRCS_MIN+= Analysis/BasicAliasAnalysis.cpp
SRCS_MIN+= Analysis/BlockFrequencyInfo.cpp
SRCS_MIN+= Analysis/BlockFrequencyInfoImpl.cpp
SRCS_MIN+= Analysis/BranchProbabilityInfo.cpp
SRCS_MIN+= Analysis/CFG.cpp
SRCS_MIN+= Analysis/CFGPrinter.cpp
SRCS_MIN+= Analysis/CFLAndersAliasAnalysis.cpp
SRCS_MIN+= Analysis/CFLSteensAliasAnalysis.cpp
SRCS_MIN+= Analysis/CGSCCPassManager.cpp
SRCS_MIN+= Analysis/CallGraph.cpp
SRCS_MIN+= Analysis/CallGraphSCCPass.cpp
SRCS_MIN+= Analysis/CallPrinter.cpp
SRCS_MIN+= Analysis/CaptureTracking.cpp
SRCS_MIN+= Analysis/CmpInstAnalysis.cpp
SRCS_MIN+= Analysis/CodeMetrics.cpp
SRCS_MIN+= Analysis/ConstantFolding.cpp
SRCS_MIN+= Analysis/ConstraintSystem.cpp
SRCS_MIN+= Analysis/CostModel.cpp
SRCS_MIN+= Analysis/CycleAnalysis.cpp
SRCS_MIN+= Analysis/DDG.cpp
SRCS_MIN+= Analysis/DDGPrinter.cpp
SRCS_MIN+= Analysis/Delinearization.cpp
SRCS_MIN+= Analysis/DemandedBits.cpp
SRCS_MIN+= Analysis/DependenceAnalysis.cpp
SRCS_MIN+= Analysis/DependenceGraphBuilder.cpp
SRCS_MIN+= Analysis/DivergenceAnalysis.cpp
SRCS_MIN+= Analysis/DomPrinter.cpp
SRCS_MIN+= Analysis/DomTreeUpdater.cpp
SRCS_MIN+= Analysis/DominanceFrontier.cpp
SRCS_MIN+= Analysis/EHPersonalities.cpp
SRCS_MIN+= Analysis/FunctionPropertiesAnalysis.cpp
SRCS_MIN+= Analysis/GlobalsModRef.cpp
SRCS_MIN+= Analysis/GuardUtils.cpp
SRCS_MIN+= Analysis/HeatUtils.cpp
SRCS_MIN+= Analysis/IRSimilarityIdentifier.cpp
SRCS_MIN+= Analysis/IVDescriptors.cpp
SRCS_MIN+= Analysis/IVUsers.cpp
SRCS_MIN+= Analysis/ImportedFunctionsInliningStatistics.cpp
SRCS_MIN+= Analysis/IndirectCallPromotionAnalysis.cpp
SRCS_MIN+= Analysis/InlineAdvisor.cpp
SRCS_MIN+= Analysis/InlineCost.cpp
SRCS_MIN+= Analysis/InlineSizeEstimatorAnalysis.cpp
SRCS_MIN+= Analysis/InstCount.cpp
SRCS_MIN+= Analysis/InstructionPrecedenceTracking.cpp
SRCS_MIN+= Analysis/InstructionSimplify.cpp
SRCS_MIN+= Analysis/Interval.cpp
SRCS_MIN+= Analysis/IntervalPartition.cpp
SRCS_MIN+= Analysis/LazyBlockFrequencyInfo.cpp
SRCS_MIN+= Analysis/LazyBranchProbabilityInfo.cpp
SRCS_MIN+= Analysis/LazyCallGraph.cpp
SRCS_MIN+= Analysis/LazyValueInfo.cpp
SRCS_MIN+= Analysis/LegacyDivergenceAnalysis.cpp
SRCS_MIN+= Analysis/Lint.cpp
SRCS_MIN+= Analysis/Loads.cpp
SRCS_MIN+= Analysis/LoopAccessAnalysis.cpp
SRCS_MIN+= Analysis/LoopAnalysisManager.cpp
SRCS_MIN+= Analysis/LoopCacheAnalysis.cpp
SRCS_MIN+= Analysis/LoopInfo.cpp
SRCS_MIN+= Analysis/LoopNestAnalysis.cpp
SRCS_MIN+= Analysis/LoopPass.cpp
SRCS_MIN+= Analysis/LoopUnrollAnalyzer.cpp
SRCS_MIN+= Analysis/MemDepPrinter.cpp
SRCS_MIN+= Analysis/MemDerefPrinter.cpp
SRCS_MIN+= Analysis/MemoryBuiltins.cpp
SRCS_MIN+= Analysis/MemoryDependenceAnalysis.cpp
SRCS_MIN+= Analysis/MemoryLocation.cpp
SRCS_MIN+= Analysis/MemorySSA.cpp
SRCS_MIN+= Analysis/MemorySSAUpdater.cpp
SRCS_MIN+= Analysis/ModuleDebugInfoPrinter.cpp
SRCS_MIN+= Analysis/ModuleSummaryAnalysis.cpp
SRCS_MIN+= Analysis/MustExecute.cpp
SRCS_MIN+= Analysis/ObjCARCAliasAnalysis.cpp
SRCS_MIN+= Analysis/ObjCARCAnalysisUtils.cpp
SRCS_MIN+= Analysis/ObjCARCInstKind.cpp
SRCS_MIN+= Analysis/OptimizationRemarkEmitter.cpp
SRCS_MIN+= Analysis/OverflowInstAnalysis.cpp
SRCS_MIN+= Analysis/PHITransAddr.cpp
SRCS_MIN+= Analysis/PhiValues.cpp
SRCS_MIN+= Analysis/PostDominators.cpp
SRCS_MIN+= Analysis/ProfileSummaryInfo.cpp
SRCS_MIN+= Analysis/PtrUseVisitor.cpp
SRCS_MIN+= Analysis/RegionInfo.cpp
SRCS_MIN+= Analysis/RegionPass.cpp
SRCS_MIN+= Analysis/RegionPrinter.cpp
SRCS_MIN+= Analysis/ReplayInlineAdvisor.cpp
SRCS_MIN+= Analysis/ScalarEvolution.cpp
SRCS_MIN+= Analysis/ScalarEvolution.cpp
SRCS_MIN+= Analysis/ScalarEvolutionAliasAnalysis.cpp
SRCS_MIN+= Analysis/ScalarEvolutionDivision.cpp
SRCS_MIN+= Analysis/ScalarEvolutionNormalization.cpp
SRCS_MIN+= Analysis/ScopedNoAliasAA.cpp
SRCS_MIN+= Analysis/StackLifetime.cpp
SRCS_MIN+= Analysis/StackSafetyAnalysis.cpp
SRCS_MIN+= Analysis/SyncDependenceAnalysis.cpp
SRCS_MIN+= Analysis/SyntheticCountsUtils.cpp
SRCS_MIN+= Analysis/TargetLibraryInfo.cpp
SRCS_MIN+= Analysis/TargetTransformInfo.cpp
SRCS_MIN+= Analysis/TypeBasedAliasAnalysis.cpp
SRCS_MIN+= Analysis/TypeMetadataUtils.cpp
SRCS_MIN+= Analysis/VFABIDemangling.cpp
SRCS_MIN+= Analysis/ValueLattice.cpp
SRCS_MIN+= Analysis/ValueLatticeUtils.cpp
SRCS_MIN+= Analysis/ValueTracking.cpp
SRCS_MIN+= Analysis/VectorUtils.cpp
SRCS_MIN+= AsmParser/LLLexer.cpp
SRCS_MIN+= AsmParser/LLParser.cpp
SRCS_MIN+= AsmParser/Parser.cpp
SRCS_MIN+= BinaryFormat/AMDGPUMetadataVerifier.cpp
+SRCS_MIN+= BinaryFormat/COFF.cpp
SRCS_MIN+= BinaryFormat/Dwarf.cpp
SRCS_MIN+= BinaryFormat/Magic.cpp
SRCS_MIN+= BinaryFormat/MachO.cpp
SRCS_MIN+= BinaryFormat/MsgPackDocument.cpp
SRCS_MIN+= BinaryFormat/MsgPackDocumentYAML.cpp
SRCS_MIN+= BinaryFormat/MsgPackReader.cpp
SRCS_MIN+= BinaryFormat/MsgPackWriter.cpp
SRCS_MIN+= BinaryFormat/Wasm.cpp
SRCS_MIN+= BinaryFormat/XCOFF.cpp
SRCS_MIN+= Bitcode/Reader/BitReader.cpp
SRCS_EXT+= Bitcode/Reader/BitcodeAnalyzer.cpp
SRCS_MIN+= Bitcode/Reader/BitcodeReader.cpp
SRCS_MIN+= Bitcode/Reader/MetadataLoader.cpp
SRCS_MIN+= Bitcode/Reader/ValueList.cpp
SRCS_MIN+= Bitcode/Writer/BitcodeWriter.cpp
SRCS_MIN+= Bitcode/Writer/BitcodeWriterPass.cpp
SRCS_MIN+= Bitcode/Writer/ValueEnumerator.cpp
SRCS_MIN+= Bitstream/Reader/BitstreamReader.cpp
SRCS_MIN+= CodeGen/AggressiveAntiDepBreaker.cpp
SRCS_MIN+= CodeGen/AllocationOrder.cpp
SRCS_MIN+= CodeGen/Analysis.cpp
SRCS_MIN+= CodeGen/AsmPrinter/AIXException.cpp
SRCS_MIN+= CodeGen/AsmPrinter/ARMException.cpp
SRCS_MIN+= CodeGen/AsmPrinter/AccelTable.cpp
SRCS_MIN+= CodeGen/AsmPrinter/AddressPool.cpp
SRCS_MIN+= CodeGen/AsmPrinter/AsmPrinter.cpp
SRCS_MIN+= CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
SRCS_MIN+= CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
SRCS_MIN+= CodeGen/AsmPrinter/CodeViewDebug.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DIE.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DIEHash.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DebugHandlerBase.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DebugLocStream.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DwarfCFIException.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DwarfCompileUnit.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DwarfDebug.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DwarfExpression.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DwarfFile.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DwarfStringPool.cpp
SRCS_MIN+= CodeGen/AsmPrinter/DwarfUnit.cpp
SRCS_MIN+= CodeGen/AsmPrinter/EHStreamer.cpp
SRCS_EXT+= CodeGen/AsmPrinter/ErlangGCPrinter.cpp
SRCS_MIN+= CodeGen/AsmPrinter/OcamlGCPrinter.cpp
SRCS_MIN+= CodeGen/AsmPrinter/PseudoProbePrinter.cpp
SRCS_MIN+= CodeGen/AsmPrinter/WasmException.cpp
SRCS_MIN+= CodeGen/AsmPrinter/WinCFGuard.cpp
SRCS_MIN+= CodeGen/AsmPrinter/WinException.cpp
SRCS_MIN+= CodeGen/AtomicExpandPass.cpp
SRCS_MIN+= CodeGen/BasicBlockSections.cpp
SRCS_MIN+= CodeGen/BasicTargetTransformInfo.cpp
SRCS_MIN+= CodeGen/BranchFolding.cpp
SRCS_MIN+= CodeGen/BranchRelaxation.cpp
SRCS_MIN+= CodeGen/BreakFalseDeps.cpp
SRCS_MIN+= CodeGen/CFGuardLongjmp.cpp
SRCS_MIN+= CodeGen/CFIInstrInserter.cpp
SRCS_MIN+= CodeGen/CalcSpillWeights.cpp
SRCS_MIN+= CodeGen/CallingConvLower.cpp
SRCS_MIN+= CodeGen/CodeGen.cpp
SRCS_MIN+= CodeGen/CodeGenCommonISel.cpp
SRCS_MIN+= CodeGen/CodeGenPrepare.cpp
SRCS_EXL+= CodeGen/CommandFlags.cpp
SRCS_MIN+= CodeGen/CriticalAntiDepBreaker.cpp
SRCS_MIN+= CodeGen/DFAPacketizer.cpp
SRCS_MIN+= CodeGen/DeadMachineInstructionElim.cpp
SRCS_MIN+= CodeGen/DetectDeadLanes.cpp
SRCS_MIN+= CodeGen/DwarfEHPrepare.cpp
SRCS_MIN+= CodeGen/EHContGuardCatchret.cpp
SRCS_MIN+= CodeGen/EarlyIfConversion.cpp
SRCS_MIN+= CodeGen/EdgeBundles.cpp
SRCS_MIN+= CodeGen/ExecutionDomainFix.cpp
SRCS_MIN+= CodeGen/ExpandMemCmp.cpp
SRCS_MIN+= CodeGen/ExpandPostRAPseudos.cpp
SRCS_MIN+= CodeGen/ExpandReductions.cpp
SRCS_MIN+= CodeGen/ExpandVectorPredication.cpp
SRCS_MIN+= CodeGen/FEntryInserter.cpp
SRCS_MIN+= CodeGen/FaultMaps.cpp
SRCS_MIN+= CodeGen/FinalizeISel.cpp
SRCS_MIN+= CodeGen/FixupStatepointCallerSaved.cpp
SRCS_MIN+= CodeGen/FuncletLayout.cpp
SRCS_MIN+= CodeGen/GCMetadata.cpp
SRCS_MIN+= CodeGen/GCMetadataPrinter.cpp
SRCS_MIN+= CodeGen/GCRootLowering.cpp
SRCS_MIN+= CodeGen/GlobalISel/CSEInfo.cpp
SRCS_MIN+= CodeGen/GlobalISel/CSEMIRBuilder.cpp
SRCS_MIN+= CodeGen/GlobalISel/Combiner.cpp
SRCS_MIN+= CodeGen/GlobalISel/CombinerHelper.cpp
SRCS_MIN+= CodeGen/GlobalISel/CallLowering.cpp
SRCS_MIN+= CodeGen/GlobalISel/GISelChangeObserver.cpp
SRCS_MIN+= CodeGen/GlobalISel/GISelKnownBits.cpp
SRCS_MIN+= CodeGen/GlobalISel/GlobalISel.cpp
SRCS_MIN+= CodeGen/GlobalISel/IRTranslator.cpp
SRCS_MIN+= CodeGen/GlobalISel/InlineAsmLowering.cpp
SRCS_MIN+= CodeGen/GlobalISel/InstructionSelect.cpp
SRCS_MIN+= CodeGen/GlobalISel/InstructionSelector.cpp
SRCS_MIN+= CodeGen/GlobalISel/LegacyLegalizerInfo.cpp
SRCS_MIN+= CodeGen/GlobalISel/LegalityPredicates.cpp
SRCS_MIN+= CodeGen/GlobalISel/LegalizeMutations.cpp
SRCS_MIN+= CodeGen/GlobalISel/Legalizer.cpp
SRCS_MIN+= CodeGen/GlobalISel/LegalizerHelper.cpp
SRCS_MIN+= CodeGen/GlobalISel/LegalizerInfo.cpp
SRCS_MIN+= CodeGen/GlobalISel/LoadStoreOpt.cpp
SRCS_MIN+= CodeGen/GlobalISel/Localizer.cpp
SRCS_MIN+= CodeGen/GlobalISel/LostDebugLocObserver.cpp
SRCS_MIN+= CodeGen/GlobalISel/MachineIRBuilder.cpp
SRCS_MIN+= CodeGen/GlobalISel/RegBankSelect.cpp
SRCS_MIN+= CodeGen/GlobalISel/RegisterBank.cpp
SRCS_MIN+= CodeGen/GlobalISel/RegisterBankInfo.cpp
SRCS_MIN+= CodeGen/GlobalISel/Utils.cpp
SRCS_MIN+= CodeGen/GlobalMerge.cpp
SRCS_MIN+= CodeGen/HardwareLoops.cpp
SRCS_MIN+= CodeGen/IfConversion.cpp
SRCS_MIN+= CodeGen/ImplicitNullChecks.cpp
SRCS_MIN+= CodeGen/IndirectBrExpandPass.cpp
SRCS_MIN+= CodeGen/InlineSpiller.cpp
SRCS_MIN+= CodeGen/InterferenceCache.cpp
SRCS_MIN+= CodeGen/InterleavedAccessPass.cpp
SRCS_MIN+= CodeGen/InterleavedLoadCombinePass.cpp
SRCS_MIN+= CodeGen/IntrinsicLowering.cpp
SRCS_MIN+= CodeGen/LLVMTargetMachine.cpp
SRCS_MIN+= CodeGen/LatencyPriorityQueue.cpp
SRCS_MIN+= CodeGen/LazyMachineBlockFrequencyInfo.cpp
SRCS_MIN+= CodeGen/LexicalScopes.cpp
SRCS_MIN+= CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
SRCS_MIN+= CodeGen/LiveDebugValues/LiveDebugValues.cpp
SRCS_MIN+= CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
SRCS_MIN+= CodeGen/LiveDebugVariables.cpp
SRCS_MIN+= CodeGen/LiveInterval.cpp
SRCS_MIN+= CodeGen/LiveIntervalCalc.cpp
SRCS_MIN+= CodeGen/LiveIntervalUnion.cpp
SRCS_MIN+= CodeGen/LiveIntervals.cpp
SRCS_MIN+= CodeGen/LivePhysRegs.cpp
SRCS_MIN+= CodeGen/LiveRangeCalc.cpp
SRCS_MIN+= CodeGen/LiveRangeEdit.cpp
SRCS_MIN+= CodeGen/LiveRangeShrink.cpp
SRCS_MIN+= CodeGen/LiveRegMatrix.cpp
SRCS_MIN+= CodeGen/LiveRegUnits.cpp
SRCS_MIN+= CodeGen/LiveStacks.cpp
SRCS_MIN+= CodeGen/LiveVariables.cpp
SRCS_MIN+= CodeGen/LocalStackSlotAllocation.cpp
SRCS_MIN+= CodeGen/LoopTraversal.cpp
SRCS_MIN+= CodeGen/LowLevelType.cpp
SRCS_MIN+= CodeGen/LowerEmuTLS.cpp
SRCS_MIN+= CodeGen/MBFIWrapper.cpp
SRCS_MIN+= CodeGen/MIRCanonicalizerPass.cpp
SRCS_MIN+= CodeGen/MIRFSDiscriminator.cpp
SRCS_MIN+= CodeGen/MIRNamerPass.cpp
SRCS_EXT+= CodeGen/MIRParser/MILexer.cpp
SRCS_EXT+= CodeGen/MIRParser/MIParser.cpp
SRCS_EXT+= CodeGen/MIRParser/MIRParser.cpp
SRCS_MIN+= CodeGen/MIRPrinter.cpp
SRCS_MIN+= CodeGen/MIRPrintingPass.cpp
SRCS_MIN+= CodeGen/MIRSampleProfile.cpp
SRCS_MIN+= CodeGen/MIRVRegNamerUtils.cpp
SRCS_MIN+= CodeGen/MLRegallocEvictAdvisor.cpp
SRCS_MIN+= CodeGen/MachineBasicBlock.cpp
SRCS_MIN+= CodeGen/MachineBlockFrequencyInfo.cpp
SRCS_MIN+= CodeGen/MachineBlockPlacement.cpp
SRCS_MIN+= CodeGen/MachineBranchProbabilityInfo.cpp
SRCS_MIN+= CodeGen/MachineCSE.cpp
SRCS_MIN+= CodeGen/MachineCheckDebugify.cpp
SRCS_MIN+= CodeGen/MachineCombiner.cpp
SRCS_MIN+= CodeGen/MachineCopyPropagation.cpp
SRCS_MIN+= CodeGen/MachineCycleAnalysis.cpp
SRCS_MIN+= CodeGen/MachineDebugify.cpp
SRCS_MIN+= CodeGen/MachineDominanceFrontier.cpp
SRCS_MIN+= CodeGen/MachineDominators.cpp
SRCS_MIN+= CodeGen/MachineFrameInfo.cpp
SRCS_MIN+= CodeGen/MachineFunction.cpp
SRCS_MIN+= CodeGen/MachineFunctionPass.cpp
SRCS_MIN+= CodeGen/MachineFunctionPrinterPass.cpp
SRCS_MIN+= CodeGen/MachineFunctionSplitter.cpp
SRCS_MIN+= CodeGen/MachineInstr.cpp
SRCS_MIN+= CodeGen/MachineInstrBundle.cpp
SRCS_MIN+= CodeGen/MachineLICM.cpp
SRCS_MIN+= CodeGen/MachineLoopInfo.cpp
SRCS_MIN+= CodeGen/MachineLoopUtils.cpp
SRCS_MIN+= CodeGen/MachineModuleInfo.cpp
SRCS_MIN+= CodeGen/MachineModuleInfoImpls.cpp
SRCS_MIN+= CodeGen/MachineModuleSlotTracker.cpp
SRCS_MIN+= CodeGen/MachineOperand.cpp
SRCS_MIN+= CodeGen/MachineOptimizationRemarkEmitter.cpp
SRCS_MIN+= CodeGen/MachineOutliner.cpp
SRCS_MIN+= CodeGen/MachinePipeliner.cpp
SRCS_MIN+= CodeGen/MachinePostDominators.cpp
SRCS_MIN+= CodeGen/MachineRegionInfo.cpp
SRCS_MIN+= CodeGen/MachineRegisterInfo.cpp
SRCS_MIN+= CodeGen/MachineSSAContext.cpp
SRCS_MIN+= CodeGen/MachineSSAUpdater.cpp
SRCS_MIN+= CodeGen/MachineScheduler.cpp
SRCS_MIN+= CodeGen/MachineSink.cpp
SRCS_MIN+= CodeGen/MachineSizeOpts.cpp
SRCS_MIN+= CodeGen/MachineStableHash.cpp
SRCS_MIN+= CodeGen/MachineStripDebug.cpp
SRCS_MIN+= CodeGen/MachineTraceMetrics.cpp
SRCS_MIN+= CodeGen/MachineVerifier.cpp
SRCS_MIN+= CodeGen/MacroFusion.cpp
SRCS_MIN+= CodeGen/ModuloSchedule.cpp
SRCS_MIN+= CodeGen/MultiHazardRecognizer.cpp
SRCS_MIN+= CodeGen/OptimizePHIs.cpp
SRCS_MIN+= CodeGen/PHIElimination.cpp
SRCS_MIN+= CodeGen/PHIEliminationUtils.cpp
SRCS_MIN+= CodeGen/ParallelCG.cpp
SRCS_MIN+= CodeGen/PatchableFunction.cpp
SRCS_MIN+= CodeGen/PeepholeOptimizer.cpp
SRCS_MIN+= CodeGen/PostRAHazardRecognizer.cpp
SRCS_MIN+= CodeGen/PostRASchedulerList.cpp
SRCS_MIN+= CodeGen/PreISelIntrinsicLowering.cpp
SRCS_MIN+= CodeGen/ProcessImplicitDefs.cpp
SRCS_MIN+= CodeGen/PrologEpilogInserter.cpp
SRCS_MIN+= CodeGen/PseudoProbeInserter.cpp
SRCS_MIN+= CodeGen/PseudoSourceValue.cpp
SRCS_MIN+= CodeGen/ReachingDefAnalysis.cpp
SRCS_MIN+= CodeGen/ReplaceWithVeclib.cpp
SRCS_MIN+= CodeGen/RDFGraph.cpp
SRCS_MIN+= CodeGen/RDFLiveness.cpp
SRCS_MIN+= CodeGen/RDFRegisters.cpp
SRCS_MIN+= CodeGen/RegAllocBase.cpp
SRCS_MIN+= CodeGen/RegAllocBasic.cpp
SRCS_MIN+= CodeGen/RegAllocEvictionAdvisor.cpp
SRCS_MIN+= CodeGen/RegAllocFast.cpp
SRCS_MIN+= CodeGen/RegAllocGreedy.cpp
SRCS_MIN+= CodeGen/RegAllocPBQP.cpp
SRCS_MIN+= CodeGen/RegUsageInfoCollector.cpp
SRCS_MIN+= CodeGen/RegUsageInfoPropagate.cpp
SRCS_MIN+= CodeGen/RegisterClassInfo.cpp
SRCS_MIN+= CodeGen/RegisterCoalescer.cpp
SRCS_MIN+= CodeGen/RegisterPressure.cpp
SRCS_MIN+= CodeGen/RegisterScavenging.cpp
SRCS_MIN+= CodeGen/RegisterUsageInfo.cpp
SRCS_MIN+= CodeGen/RemoveRedundantDebugValues.cpp
SRCS_MIN+= CodeGen/RenameIndependentSubregs.cpp
SRCS_MIN+= CodeGen/ResetMachineFunctionPass.cpp
SRCS_MIN+= CodeGen/SafeStack.cpp
SRCS_MIN+= CodeGen/SafeStackLayout.cpp
SRCS_MIN+= CodeGen/ScheduleDAG.cpp
SRCS_MIN+= CodeGen/ScheduleDAGInstrs.cpp
SRCS_MIN+= CodeGen/ScheduleDAGPrinter.cpp
SRCS_MIN+= CodeGen/ScoreboardHazardRecognizer.cpp
SRCS_MIN+= CodeGen/SelectionDAG/DAGCombiner.cpp
SRCS_MIN+= CodeGen/SelectionDAG/FastISel.cpp
SRCS_MIN+= CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
SRCS_MIN+= CodeGen/SelectionDAG/InstrEmitter.cpp
SRCS_MIN+= CodeGen/SelectionDAG/LegalizeDAG.cpp
SRCS_MIN+= CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
SRCS_MIN+= CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
SRCS_MIN+= CodeGen/SelectionDAG/LegalizeTypes.cpp
SRCS_MIN+= CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
SRCS_MIN+= CodeGen/SelectionDAG/LegalizeVectorOps.cpp
SRCS_MIN+= CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
SRCS_MIN+= CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
SRCS_MIN+= CodeGen/SelectionDAG/ScheduleDAGFast.cpp
SRCS_MIN+= CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
SRCS_MIN+= CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
SRCS_MIN+= CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
SRCS_MIN+= CodeGen/SelectionDAG/SelectionDAG.cpp
SRCS_MIN+= CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
SRCS_MIN+= CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
SRCS_MIN+= CodeGen/SelectionDAG/SelectionDAGDumper.cpp
SRCS_MIN+= CodeGen/SelectionDAG/SelectionDAGISel.cpp
SRCS_MIN+= CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
SRCS_MIN+= CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
SRCS_MIN+= CodeGen/SelectionDAG/StatepointLowering.cpp
SRCS_MIN+= CodeGen/SelectionDAG/TargetLowering.cpp
SRCS_MIN+= CodeGen/ShadowStackGCLowering.cpp
SRCS_MIN+= CodeGen/ShrinkWrap.cpp
SRCS_MIN+= CodeGen/SjLjEHPrepare.cpp
SRCS_MIN+= CodeGen/SlotIndexes.cpp
SRCS_MIN+= CodeGen/SpillPlacement.cpp
SRCS_MIN+= CodeGen/SplitKit.cpp
SRCS_MIN+= CodeGen/StackColoring.cpp
SRCS_MIN+= CodeGen/StackMapLivenessAnalysis.cpp
SRCS_MIN+= CodeGen/StackMaps.cpp
SRCS_MIN+= CodeGen/StackProtector.cpp
SRCS_MIN+= CodeGen/StackSlotColoring.cpp
SRCS_MIN+= CodeGen/SwiftErrorValueTracking.cpp
SRCS_MIN+= CodeGen/SwitchLoweringUtils.cpp
SRCS_MIN+= CodeGen/TailDuplication.cpp
SRCS_MIN+= CodeGen/TailDuplicator.cpp
SRCS_MIN+= CodeGen/TargetFrameLoweringImpl.cpp
SRCS_MIN+= CodeGen/TargetInstrInfo.cpp
SRCS_MIN+= CodeGen/TargetLoweringBase.cpp
SRCS_MIN+= CodeGen/TargetLoweringObjectFileImpl.cpp
SRCS_MIN+= CodeGen/TargetOptionsImpl.cpp
SRCS_MIN+= CodeGen/TargetPassConfig.cpp
SRCS_MIN+= CodeGen/TargetRegisterInfo.cpp
SRCS_MIN+= CodeGen/TargetSchedule.cpp
SRCS_MIN+= CodeGen/TargetSubtargetInfo.cpp
SRCS_MIN+= CodeGen/TwoAddressInstructionPass.cpp
SRCS_MIN+= CodeGen/TypePromotion.cpp
SRCS_MIN+= CodeGen/UnreachableBlockElim.cpp
SRCS_MIN+= CodeGen/ValueTypes.cpp
SRCS_MIN+= CodeGen/VirtRegMap.cpp
SRCS_MIN+= CodeGen/WasmEHPrepare.cpp
SRCS_MIN+= CodeGen/WinEHPrepare.cpp
SRCS_MIN+= CodeGen/XRayInstrumentation.cpp
SRCS_EXT+= DWP/DWP.cpp
SRCS_EXT+= DWP/DWPError.cpp
SRCS_EXT+= DebugInfo/CodeView/AppendingTypeTableBuilder.cpp
SRCS_MIN+= DebugInfo/CodeView/CVSymbolVisitor.cpp
SRCS_MIN+= DebugInfo/CodeView/CVTypeVisitor.cpp
SRCS_MIN+= DebugInfo/CodeView/CodeViewError.cpp
SRCS_MIN+= DebugInfo/CodeView/CodeViewRecordIO.cpp
SRCS_MIN+= DebugInfo/CodeView/ContinuationRecordBuilder.cpp
SRCS_MIN+= DebugInfo/CodeView/DebugChecksumsSubsection.cpp
SRCS_EXT+= DebugInfo/CodeView/DebugCrossExSubsection.cpp
SRCS_EXT+= DebugInfo/CodeView/DebugCrossImpSubsection.cpp
SRCS_MIN+= DebugInfo/CodeView/DebugFrameDataSubsection.cpp
SRCS_MIN+= DebugInfo/CodeView/DebugInlineeLinesSubsection.cpp
SRCS_MIN+= DebugInfo/CodeView/DebugLinesSubsection.cpp
SRCS_MIN+= DebugInfo/CodeView/DebugStringTableSubsection.cpp
SRCS_MIW+= DebugInfo/CodeView/DebugSubsection.cpp
SRCS_EXT+= DebugInfo/CodeView/DebugSubsectionRecord.cpp
SRCS_EXT+= DebugInfo/CodeView/DebugSubsectionVisitor.cpp
SRCS_EXT+= DebugInfo/CodeView/DebugSymbolRVASubsection.cpp
SRCS_EXT+= DebugInfo/CodeView/DebugSymbolsSubsection.cpp
SRCS_MIN+= DebugInfo/CodeView/EnumTables.cpp
SRCS_MIN+= DebugInfo/CodeView/Formatters.cpp
SRCS_MIN+= DebugInfo/CodeView/GlobalTypeTableBuilder.cpp
SRCS_MIN+= DebugInfo/CodeView/LazyRandomTypeCollection.cpp
SRCS_MIN+= DebugInfo/CodeView/Line.cpp
SRCS_MIN+= DebugInfo/CodeView/MergingTypeTableBuilder.cpp
SRCS_MIN+= DebugInfo/CodeView/RecordName.cpp
SRCS_MIN+= DebugInfo/CodeView/RecordSerialization.cpp
SRCS_MIN+= DebugInfo/CodeView/SimpleTypeSerializer.cpp
SRCS_EXT+= DebugInfo/CodeView/StringsAndChecksums.cpp
SRCS_MIN+= DebugInfo/CodeView/SymbolDumper.cpp
SRCS_MIN+= DebugInfo/CodeView/SymbolRecordMapping.cpp
SRCS_EXT+= DebugInfo/CodeView/SymbolSerializer.cpp
SRCS_MIN+= DebugInfo/CodeView/TypeDumpVisitor.cpp
SRCS_MIN+= DebugInfo/CodeView/TypeHashing.cpp
SRCS_MIN+= DebugInfo/CodeView/TypeIndex.cpp
SRCS_MIN+= DebugInfo/CodeView/TypeIndexDiscovery.cpp
SRCS_EXT+= DebugInfo/CodeView/TypeRecordHelpers.cpp
SRCS_MIN+= DebugInfo/CodeView/TypeRecordMapping.cpp
SRCS_MIN+= DebugInfo/CodeView/TypeStreamMerger.cpp
SRCS_MIN+= DebugInfo/CodeView/TypeTableCollection.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFAcceleratorTable.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFAddressRange.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFCompileUnit.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFContext.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDataExtractor.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugAbbrev.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugAddr.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugArangeSet.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugAranges.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugFrame.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugLine.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugLoc.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugMacro.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugPubTable.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugRangeList.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDebugRnglists.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFDie.cpp
SRCS_MIN+= DebugInfo/DWARF/DWARFExpression.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFFormValue.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFGdbIndex.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFListTable.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFTypeUnit.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFUnit.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFUnitIndex.cpp
SRCS_MIW+= DebugInfo/DWARF/DWARFVerifier.cpp
SRCS_MIN+= DebugInfo/MSF/MSFBuilder.cpp
SRCS_MIN+= DebugInfo/MSF/MSFCommon.cpp
SRCS_EXT+= DebugInfo/MSF/MSFError.cpp
SRCS_MIN+= DebugInfo/MSF/MappedBlockStream.cpp
SRCS_EXT+= DebugInfo/PDB/GenericError.cpp
SRCS_EXT+= DebugInfo/PDB/IPDBSourceFile.cpp
SRCS_EXT+= DebugInfo/PDB/Native/DbiModuleDescriptor.cpp
SRCS_EXT+= DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
SRCS_EXT+= DebugInfo/PDB/Native/DbiModuleList.cpp
SRCS_EXT+= DebugInfo/PDB/Native/DbiStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/DbiStreamBuilder.cpp
SRCS_EXT+= DebugInfo/PDB/Native/EnumTables.cpp
SRCS_EXT+= DebugInfo/PDB/Native/GSIStreamBuilder.cpp
SRCS_EXT+= DebugInfo/PDB/Native/GlobalsStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/Hash.cpp
SRCS_EXT+= DebugInfo/PDB/Native/HashTable.cpp
SRCS_EXT+= DebugInfo/PDB/Native/InfoStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/InfoStreamBuilder.cpp
SRCS_EXT+= DebugInfo/PDB/Native/InjectedSourceStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/ModuleDebugStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NamedStreamMap.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeCompilandSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeEnumGlobals.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeEnumModules.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeEnumSymbols.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeEnumTypes.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeExeSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeFunctionSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeLineNumber.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativePublicSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeRawSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeSession.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeSourceFile.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypeArray.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypeBuiltin.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypeEnum.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypeFunctionSig.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypePointer.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypeTypedef.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypeUDT.cpp
SRCS_EXT+= DebugInfo/PDB/Native/NativeTypeVTShape.cpp
SRCS_EXT+= DebugInfo/PDB/Native/PDBFile.cpp
SRCS_EXT+= DebugInfo/PDB/Native/PDBFileBuilder.cpp
SRCS_EXT+= DebugInfo/PDB/Native/PDBStringTable.cpp
SRCS_EXT+= DebugInfo/PDB/Native/PDBStringTableBuilder.cpp
SRCS_EXT+= DebugInfo/PDB/Native/PublicsStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/RawError.cpp
SRCS_EXT+= DebugInfo/PDB/Native/SymbolCache.cpp
SRCS_EXT+= DebugInfo/PDB/Native/SymbolStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/TpiHashing.cpp
SRCS_EXT+= DebugInfo/PDB/Native/TpiStream.cpp
SRCS_EXT+= DebugInfo/PDB/Native/TpiStreamBuilder.cpp
SRCS_EXT+= DebugInfo/PDB/PDB.cpp
SRCS_EXT+= DebugInfo/PDB/PDBContext.cpp
SRCS_EXT+= DebugInfo/PDB/PDBExtras.cpp
SRCS_EXT+= DebugInfo/PDB/PDBInterfaceAnchors.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymDumper.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolAnnotation.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolBlock.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolCompiland.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolCompilandDetails.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolCompilandEnv.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolCustom.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolData.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolExe.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolFunc.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolLabel.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolPublicSymbol.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolThunk.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeArray.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeCustom.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeDimension.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeEnum.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeFriend.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeFunctionSig.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeManaged.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypePointer.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeTypedef.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeUDT.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeVTable.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolUnknown.cpp
SRCS_EXT+= DebugInfo/PDB/PDBSymbolUsingNamespace.cpp
SRCS_EXT+= DebugInfo/PDB/UDTLayout.cpp
SRCS_MIW+= DebugInfo/Symbolize/DIFetcher.cpp
SRCS_MIW+= DebugInfo/Symbolize/DIPrinter.cpp
SRCS_MIW+= DebugInfo/Symbolize/SymbolizableObjectFile.cpp
SRCS_MIW+= DebugInfo/Symbolize/Symbolize.cpp
SRCS_MIW+= Debuginfod/DIFetcher.cpp
SRCS_MIW+= Debuginfod/Debuginfod.cpp
SRCS_MIW+= Debuginfod/HTTPClient.cpp
SRCS_MIN+= Demangle/DLangDemangle.cpp
SRCS_MIN+= Demangle/Demangle.cpp
SRCS_MIN+= Demangle/ItaniumDemangle.cpp
SRCS_MIN+= Demangle/MicrosoftDemangle.cpp
SRCS_MIN+= Demangle/MicrosoftDemangleNodes.cpp
SRCS_MIN+= Demangle/RustDemangle.cpp
SRCS_XDB+= ExecutionEngine/ExecutionEngine.cpp
SRCS_XDB+= ExecutionEngine/ExecutionEngineBindings.cpp
SRCS_XDB+= ExecutionEngine/GDBRegistrationListener.cpp
SRCS_XDB+= ExecutionEngine/Interpreter/Execution.cpp
SRCS_XDB+= ExecutionEngine/Interpreter/ExternalFunctions.cpp
SRCS_XDB+= ExecutionEngine/Interpreter/Interpreter.cpp
SRCS_EXT+= ExecutionEngine/JITLink/EHFrameSupport.cpp
SRCS_EXT+= ExecutionEngine/JITLink/ELF.cpp
SRCS_EXT+= ExecutionEngine/JITLink/ELF_aarch64.cpp
SRCS_EXT+= ExecutionEngine/JITLink/ELF_riscv.cpp
SRCS_EXT+= ExecutionEngine/JITLink/ELF_x86_64.cpp
SRCS_EXT+= ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp
SRCS_EXT+= ExecutionEngine/JITLink/JITLink.cpp
SRCS_EXT+= ExecutionEngine/JITLink/JITLinkGeneric.cpp
SRCS_EXT+= ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
SRCS_EXT+= ExecutionEngine/JITLink/MachO.cpp
SRCS_EXT+= ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
SRCS_EXT+= ExecutionEngine/JITLink/MachO_arm64.cpp
SRCS_EXT+= ExecutionEngine/JITLink/MachO_x86_64.cpp
SRCS_EXT+= ExecutionEngine/JITLink/MemoryFlags.cpp
SRCS_EXT+= ExecutionEngine/JITLink/aarch64.cpp
SRCS_EXT+= ExecutionEngine/JITLink/riscv.cpp
SRCS_EXT+= ExecutionEngine/JITLink/x86_64.cpp
SRCS_XDB+= ExecutionEngine/MCJIT/MCJIT.cpp
SRCS_EXT+= ExecutionEngine/Orc/CompileOnDemandLayer.cpp
SRCS_EXT+= ExecutionEngine/Orc/CompileUtils.cpp
SRCS_EXT+= ExecutionEngine/Orc/Core.cpp
SRCS_EXT+= ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
SRCS_EXT+= ExecutionEngine/Orc/DebugUtils.cpp
SRCS_EXT+= ExecutionEngine/Orc/ELFNixPlatform.cpp
SRCS_EXT+= ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
SRCS_EXT+= ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
SRCS_EXT+= ExecutionEngine/Orc/EPCGenericDylibManager.cpp
SRCS_EXT+= ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
SRCS_EXT+= ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
SRCS_EXT+= ExecutionEngine/Orc/ExecutionUtils.cpp
SRCS_EXT+= ExecutionEngine/Orc/ExecutorProcessControl.cpp
SRCS_EXT+= ExecutionEngine/Orc/IRCompileLayer.cpp
SRCS_EXT+= ExecutionEngine/Orc/IRTransformLayer.cpp
SRCS_EXT+= ExecutionEngine/Orc/IndirectionUtils.cpp
SRCS_EXT+= ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
SRCS_EXT+= ExecutionEngine/Orc/LLJIT.cpp
SRCS_EXT+= ExecutionEngine/Orc/Layer.cpp
SRCS_EXT+= ExecutionEngine/Orc/LazyReexports.cpp
SRCS_EXT+= ExecutionEngine/Orc/MachOPlatform.cpp
SRCS_EXT+= ExecutionEngine/Orc/Mangling.cpp
SRCS_EXT+= ExecutionEngine/Orc/ObjectFileInterface.cpp
SRCS_EXT+= ExecutionEngine/Orc/ObjectLinkingLayer.cpp
SRCS_EXT+= ExecutionEngine/Orc/ObjectTransformLayer.cpp
SRCS_EXT+= ExecutionEngine/Orc/OrcABISupport.cpp
SRCS_EXT+= ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
SRCS_EXT+= ExecutionEngine/Orc/Shared/AllocationActions.cpp
SRCS_EXT+= ExecutionEngine/Orc/Shared/OrcError.cpp
SRCS_EXT+= ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
SRCS_EXT+= ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
SRCS_EXT+= ExecutionEngine/Orc/SimpleRemoteEPC.cpp
SRCS_EXT+= ExecutionEngine/Orc/Speculation.cpp
SRCS_XDB+= ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
SRCS_EXT+= ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
SRCS_EXT+= ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.cpp
SRCS_EXT+= ExecutionEngine/Orc/TaskDispatch.cpp
SRCS_EXT+= ExecutionEngine/Orc/ThreadSafeModule.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/JITSymbol.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
SRCS_XDB+= ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
SRCS_XDB+= ExecutionEngine/SectionMemoryManager.cpp
SRCS_XDB+= ExecutionEngine/TargetSelect.cpp
SRCS_MIN+= Frontend/OpenMP/OMP.cpp
SRCS_MIN+= Frontend/OpenMP/OMPContext.cpp
SRCS_MIN+= Frontend/OpenMP/OMPIRBuilder.cpp
SRCS_MIN+= IR/AbstractCallSite.cpp
SRCS_MIN+= IR/AsmWriter.cpp
SRCS_MIN+= IR/Assumptions.cpp
SRCS_MIN+= IR/Attributes.cpp
SRCS_MIN+= IR/AutoUpgrade.cpp
SRCS_MIN+= IR/BasicBlock.cpp
SRCS_EXT+= IR/BuiltinGCs.cpp
SRCS_MIN+= IR/Comdat.cpp
SRCS_MIN+= IR/ConstantFold.cpp
SRCS_MIN+= IR/ConstantRange.cpp
SRCS_MIN+= IR/Constants.cpp
SRCS_MIN+= IR/Core.cpp
SRCS_MIN+= IR/DIBuilder.cpp
SRCS_MIN+= IR/DataLayout.cpp
SRCS_MIN+= IR/DebugInfo.cpp
SRCS_MIN+= IR/DebugInfoMetadata.cpp
SRCS_MIN+= IR/DebugLoc.cpp
SRCS_MIN+= IR/DiagnosticHandler.cpp
SRCS_MIN+= IR/DiagnosticInfo.cpp
SRCS_MIN+= IR/DiagnosticPrinter.cpp
SRCS_MIN+= IR/Dominators.cpp
SRCS_MIN+= IR/FPEnv.cpp
SRCS_MIN+= IR/Function.cpp
SRCS_MIN+= IR/GCStrategy.cpp
SRCS_MIN+= IR/GVMaterializer.cpp
SRCS_MIN+= IR/Globals.cpp
SRCS_MIN+= IR/IRBuilder.cpp
SRCS_MIN+= IR/IRPrintingPasses.cpp
SRCS_MIN+= IR/InlineAsm.cpp
SRCS_MIN+= IR/Instruction.cpp
SRCS_MIN+= IR/Instructions.cpp
SRCS_MIN+= IR/IntrinsicInst.cpp
SRCS_MIN+= IR/LLVMContext.cpp
SRCS_MIN+= IR/LLVMContextImpl.cpp
SRCS_MIN+= IR/LLVMRemarkStreamer.cpp
SRCS_MIN+= IR/LegacyPassManager.cpp
SRCS_MIN+= IR/MDBuilder.cpp
SRCS_MIN+= IR/Mangler.cpp
SRCS_MIN+= IR/Metadata.cpp
SRCS_MIN+= IR/Module.cpp
SRCS_MIN+= IR/ModuleSummaryIndex.cpp
SRCS_MIN+= IR/Operator.cpp
SRCS_MIN+= IR/OptBisect.cpp
SRCS_MIN+= IR/Pass.cpp
SRCS_MIN+= IR/PassInstrumentation.cpp
SRCS_MIN+= IR/PassManager.cpp
SRCS_MIN+= IR/PassRegistry.cpp
SRCS_MIN+= IR/PassTimingInfo.cpp
SRCS_MIN+= IR/PrintPasses.cpp
SRCS_MIN+= IR/ProfileSummary.cpp
SRCS_MIN+= IR/PseudoProbe.cpp
SRCS_MIN+= IR/ReplaceConstant.cpp
SRCS_MIN+= IR/SSAContext.cpp
SRCS_MIN+= IR/SafepointIRVerifier.cpp
SRCS_MIN+= IR/Statepoint.cpp
SRCS_MIN+= IR/Type.cpp
SRCS_MIN+= IR/TypeFinder.cpp
SRCS_MIN+= IR/Use.cpp
SRCS_MIN+= IR/User.cpp
SRCS_MIN+= IR/Value.cpp
SRCS_MIN+= IR/ValueSymbolTable.cpp
SRCS_MIN+= IR/Verifier.cpp
SRCS_MIN+= IRReader/IRReader.cpp
SRCS_MIN+= LTO/LTO.cpp
SRCS_MIN+= LTO/LTOBackend.cpp
SRCS_EXL+= LTO/LTOCodeGenerator.cpp
SRCS_EXL+= LTO/LTOModule.cpp
SRCS_EXL+= LTO/SummaryBasedOptimizations.cpp
SRCS_EXL+= LTO/ThinLTOCodeGenerator.cpp
SRCS_MIN+= LTO/UpdateCompilerUsed.cpp
# Only needed for clangd/clang-query, uncomment once we build those.
# SRCS_XDW+= LineEditor/LineEditor.cpp
SRCS_MIN+= Linker/IRMover.cpp
SRCS_MIN+= Linker/LinkModules.cpp
SRCS_MIN+= MC/ConstantPools.cpp
SRCS_MIN+= MC/ELFObjectWriter.cpp
SRCS_MIN+= MC/MCAsmBackend.cpp
SRCS_MIN+= MC/MCAsmInfo.cpp
SRCS_MIN+= MC/MCAsmInfoCOFF.cpp
SRCS_MIN+= MC/MCAsmInfoDarwin.cpp
SRCS_MIN+= MC/MCAsmInfoELF.cpp
SRCS_MIN+= MC/MCAsmInfoXCOFF.cpp
SRCS_MIN+= MC/MCAsmMacro.cpp
SRCS_MIN+= MC/MCAsmStreamer.cpp
SRCS_MIN+= MC/MCAssembler.cpp
SRCS_MIN+= MC/MCCodeEmitter.cpp
SRCS_MIN+= MC/MCCodeView.cpp
SRCS_MIN+= MC/MCContext.cpp
SRCS_XDL+= MC/MCDisassembler/Disassembler.cpp
SRCS_XDW+= MC/MCDisassembler/MCDisassembler.cpp
SRCS_XDW+= MC/MCDisassembler/MCExternalSymbolizer.cpp
SRCS_MIN+= MC/MCDisassembler/MCRelocationInfo.cpp
SRCS_XDW+= MC/MCDisassembler/MCSymbolizer.cpp
SRCS_MIN+= MC/MCDwarf.cpp
SRCS_MIN+= MC/MCELFObjectTargetWriter.cpp
SRCS_MIN+= MC/MCELFStreamer.cpp
SRCS_MIN+= MC/MCExpr.cpp
SRCS_MIN+= MC/MCFragment.cpp
SRCS_MIN+= MC/MCInst.cpp
SRCS_MIN+= MC/MCInstPrinter.cpp
SRCS_MIN+= MC/MCInstrAnalysis.cpp
SRCS_MIN+= MC/MCInstrDesc.cpp
SRCS_MIN+= MC/MCInstrInfo.cpp
SRCS_MIN+= MC/MCLinkerOptimizationHint.cpp
SRCS_MIN+= MC/MCMachOStreamer.cpp
SRCS_MIN+= MC/MCMachObjectTargetWriter.cpp
SRCS_MIN+= MC/MCNullStreamer.cpp
SRCS_MIN+= MC/MCObjectFileInfo.cpp
SRCS_MIN+= MC/MCObjectStreamer.cpp
SRCS_MIN+= MC/MCObjectWriter.cpp
SRCS_MIN+= MC/MCParser/AsmLexer.cpp
SRCS_MIN+= MC/MCParser/AsmParser.cpp
SRCS_MIN+= MC/MCParser/COFFAsmParser.cpp
SRCS_MIN+= MC/MCParser/DarwinAsmParser.cpp
SRCS_MIN+= MC/MCParser/ELFAsmParser.cpp
SRCS_MIN+= MC/MCParser/GOFFAsmParser.cpp
SRCS_MIN+= MC/MCParser/MCAsmLexer.cpp
SRCS_MIN+= MC/MCParser/MCAsmParser.cpp
SRCS_MIN+= MC/MCParser/MCAsmParserExtension.cpp
SRCS_MIN+= MC/MCParser/MCTargetAsmParser.cpp
SRCS_MIN+= MC/MCParser/WasmAsmParser.cpp
SRCS_MIN+= MC/MCParser/XCOFFAsmParser.cpp
SRCS_MIN+= MC/MCPseudoProbe.cpp
SRCS_MIN+= MC/MCRegisterInfo.cpp
SRCS_MIN+= MC/MCSchedule.cpp
SRCS_MIN+= MC/MCSection.cpp
SRCS_MIN+= MC/MCSectionCOFF.cpp
SRCS_MIN+= MC/MCSectionELF.cpp
SRCS_MIN+= MC/MCSectionMachO.cpp
SRCS_MIN+= MC/MCSectionWasm.cpp
SRCS_MIN+= MC/MCSectionXCOFF.cpp
SRCS_MIN+= MC/MCStreamer.cpp
SRCS_MIN+= MC/MCSubtargetInfo.cpp
SRCS_MIN+= MC/MCSymbol.cpp
SRCS_MIN+= MC/MCSymbolELF.cpp
SRCS_MIN+= MC/MCSymbolXCOFF.cpp
SRCS_MIN+= MC/MCTargetOptions.cpp
SRCS_MIN+= MC/MCTargetOptionsCommandFlags.cpp
SRCS_MIN+= MC/MCValue.cpp
SRCS_MIN+= MC/MCWasmStreamer.cpp
SRCS_MIN+= MC/MCWin64EH.cpp
SRCS_MIN+= MC/MCWinCOFFStreamer.cpp
SRCS_MIN+= MC/MCWinEH.cpp
SRCS_MIN+= MC/MCXCOFFStreamer.cpp
SRCS_MIN+= MC/MCXCOFFObjectTargetWriter.cpp
SRCS_MIN+= MC/MachObjectWriter.cpp
SRCS_MIN+= MC/StringTableBuilder.cpp
SRCS_MIN+= MC/SubtargetFeature.cpp
SRCS_MIN+= MC/TargetRegistry.cpp
SRCS_MIN+= MC/WasmObjectWriter.cpp
SRCS_MIN+= MC/WinCOFFObjectWriter.cpp
SRCS_MIN+= MC/XCOFFObjectWriter.cpp
SRCS_EXT+= MCA/CodeEmitter.cpp
SRCS_EXT+= MCA/Context.cpp
SRCS_EXT+= MCA/CustomBehaviour.cpp
SRCS_EXT+= MCA/HWEventListener.cpp
SRCS_EXT+= MCA/HardwareUnits/HardwareUnit.cpp
SRCS_EXT+= MCA/HardwareUnits/LSUnit.cpp
SRCS_EXT+= MCA/HardwareUnits/RegisterFile.cpp
SRCS_EXT+= MCA/HardwareUnits/ResourceManager.cpp
SRCS_EXT+= MCA/HardwareUnits/RetireControlUnit.cpp
SRCS_EXT+= MCA/HardwareUnits/Scheduler.cpp
SRCS_EXT+= MCA/InstrBuilder.cpp
SRCS_EXT+= MCA/Instruction.cpp
SRCS_EXT+= MCA/Pipeline.cpp
SRCS_EXT+= MCA/Stages/DispatchStage.cpp
SRCS_EXT+= MCA/Stages/EntryStage.cpp
SRCS_EXT+= MCA/Stages/ExecuteStage.cpp
SRCS_EXT+= MCA/Stages/InOrderIssueStage.cpp
SRCS_EXT+= MCA/Stages/InstructionTables.cpp
SRCS_EXT+= MCA/Stages/MicroOpQueueStage.cpp
SRCS_EXT+= MCA/Stages/RetireStage.cpp
SRCS_EXT+= MCA/Stages/Stage.cpp
SRCS_EXT+= MCA/Support.cpp
SRCS_EXT+= MCA/View.cpp
SRCS_MIN+= Object/Archive.cpp
SRCS_MIN+= Object/ArchiveWriter.cpp
SRCS_MIN+= Object/Binary.cpp
SRCS_MIN+= Object/COFFImportFile.cpp
SRCS_MIW+= Object/COFFModuleDefinition.cpp
SRCS_MIN+= Object/COFFObjectFile.cpp
SRCS_MIN+= Object/Decompressor.cpp
SRCS_MIN+= Object/ELF.cpp
SRCS_MIN+= Object/ELFObjectFile.cpp
SRCS_MIN+= Object/Error.cpp
SRCS_MIW+= Object/FaultMapParser.cpp
SRCS_MIN+= Object/IRObjectFile.cpp
SRCS_MIN+= Object/IRSymtab.cpp
SRCS_MIN+= Object/MachOObjectFile.cpp
SRCS_MIW+= Object/MachOUniversal.cpp
SRCS_MIW+= Object/MachOUniversalWriter.cpp
SRCS_MIW+= Object/Minidump.cpp
SRCS_MIN+= Object/ModuleSymbolTable.cpp
SRCS_EXT+= Object/Object.cpp
SRCS_MIN+= Object/ObjectFile.cpp
SRCS_MIN+= Object/RecordStreamer.cpp
SRCS_MIW+= Object/RelocationResolver.cpp
SRCS_MIW+= Object/SymbolSize.cpp
SRCS_MIN+= Object/SymbolicFile.cpp
SRCS_MIW+= Object/TapiFile.cpp
SRCS_MIW+= Object/TapiUniversal.cpp
SRCS_MIN+= Object/WasmObjectFile.cpp
SRCS_MIW+= Object/WindowsMachineFlag.cpp
SRCS_MIN+= Object/WindowsResource.cpp
SRCS_MIN+= Object/XCOFFObjectFile.cpp
SRCS_MIN+= ObjectYAML/COFFYAML.cpp
SRCS_EXT+= ObjectYAML/CodeViewYAMLDebugSections.cpp
SRCS_EXT+= ObjectYAML/CodeViewYAMLSymbols.cpp
SRCS_EXT+= ObjectYAML/CodeViewYAMLTypes.cpp
SRCS_MIN+= ObjectYAML/DWARFYAML.cpp
SRCS_MIN+= ObjectYAML/ELFYAML.cpp
SRCS_MIN+= ObjectYAML/MachOYAML.cpp
SRCS_EXT+= ObjectYAML/YAML.cpp
SRCS_MIN+= Option/Arg.cpp
SRCS_MIN+= Option/ArgList.cpp
SRCS_MIN+= Option/OptTable.cpp
SRCS_MIN+= Option/Option.cpp
SRCS_MIN+= Passes/OptimizationLevel.cpp
SRCS_MIN+= Passes/PassBuilder.cpp
SRCS_MIN+= Passes/PassBuilderPipelines.cpp
SRCS_MIN+= Passes/PassPlugin.cpp
SRCS_MIN+= Passes/StandardInstrumentations.cpp
SRCS_MIN+= ProfileData/Coverage/CoverageMapping.cpp
SRCS_MIN+= ProfileData/Coverage/CoverageMappingReader.cpp
SRCS_MIN+= ProfileData/Coverage/CoverageMappingWriter.cpp
SRCS_MIN+= ProfileData/GCOV.cpp
SRCS_MIN+= ProfileData/InstrProf.cpp
SRCS_MIN+= ProfileData/InstrProfCorrelator.cpp
SRCS_MIN+= ProfileData/InstrProfReader.cpp
SRCS_MIN+= ProfileData/InstrProfWriter.cpp
SRCS_MIN+= ProfileData/ProfileSummaryBuilder.cpp
SRCS_MIW+= ProfileData/RawMemProfReader.cpp
SRCS_MIN+= ProfileData/SampleProf.cpp
SRCS_MIN+= ProfileData/SampleProfReader.cpp
SRCS_MIN+= ProfileData/SampleProfWriter.cpp
SRCS_MIN+= Remarks/BitstreamRemarkParser.cpp
SRCS_MIN+= Remarks/BitstreamRemarkSerializer.cpp
SRCS_MIN+= Remarks/RemarkFormat.cpp
SRCS_MIN+= Remarks/RemarkParser.cpp
SRCS_MIN+= Remarks/RemarkSerializer.cpp
SRCS_MIN+= Remarks/RemarkStreamer.cpp
SRCS_MIN+= Remarks/RemarkStringTable.cpp
SRCS_MIN+= Remarks/YAMLRemarkParser.cpp
SRCS_MIN+= Remarks/YAMLRemarkSerializer.cpp
SRCS_MIN+= Support/AArch64TargetParser.cpp
SRCS_MIN+= Support/ABIBreak.cpp
SRCS_MIN+= Support/APFixedPoint.cpp
SRCS_MIN+= Support/APFloat.cpp
SRCS_MIN+= Support/APInt.cpp
SRCS_MIN+= Support/APSInt.cpp
SRCS_MIN+= Support/ARMAttributeParser.cpp
SRCS_MIN+= Support/ARMBuildAttrs.cpp
SRCS_MIN+= Support/ARMTargetParser.cpp
SRCS_MIN+= Support/ARMWinEH.cpp
SRCS_MIN+= Support/Allocator.cpp
SRCS_MIN+= Support/BinaryStreamError.cpp
SRCS_MIN+= Support/BinaryStreamReader.cpp
SRCS_MIN+= Support/BinaryStreamRef.cpp
SRCS_MIN+= Support/BinaryStreamWriter.cpp
SRCS_MIN+= Support/BlockFrequency.cpp
SRCS_MIN+= Support/BranchProbability.cpp
SRCS_MIN+= Support/BuryPointer.cpp
SRCS_MIN+= Support/CachePruning.cpp
SRCS_MIW+= Support/Caching.cpp
SRCS_MIW+= Support/COM.cpp
SRCS_MIN+= Support/CRC.cpp
SRCS_MIN+= Support/Chrono.cpp
SRCS_MIN+= Support/CodeGenCoverage.cpp
SRCS_MIN+= Support/CommandLine.cpp
SRCS_MIN+= Support/Compression.cpp
SRCS_MIN+= Support/ConvertUTF.cpp
SRCS_MIN+= Support/ConvertUTFWrapper.cpp
SRCS_MIN+= Support/CrashRecoveryContext.cpp
SRCS_MIN+= Support/DAGDeltaAlgorithm.cpp
SRCS_MIN+= Support/DJB.cpp
SRCS_MIN+= Support/DataExtractor.cpp
SRCS_MIN+= Support/Debug.cpp
SRCS_MIN+= Support/DebugCounter.cpp
SRCS_MIN+= Support/DeltaAlgorithm.cpp
SRCS_MIN+= Support/DivisionByConstantInfo.cpp
SRCS_MIN+= Support/DynamicLibrary.cpp
SRCS_MIN+= Support/ELFAttributeParser.cpp
SRCS_MIN+= Support/ELFAttributes.cpp
SRCS_MIN+= Support/Errno.cpp
SRCS_MIN+= Support/Error.cpp
SRCS_MIN+= Support/ErrorHandling.cpp
SRCS_MIN+= Support/ExtensibleRTTI.cpp
SRCS_MIN+= Support/FileCollector.cpp
SRCS_MIW+= Support/FileOutputBuffer.cpp
SRCS_MIN+= Support/FileUtilities.cpp
SRCS_MIN+= Support/FoldingSet.cpp
SRCS_MIN+= Support/FormatVariadic.cpp
SRCS_MIN+= Support/FormattedStream.cpp
SRCS_MIN+= Support/GlobPattern.cpp
SRCS_MIN+= Support/GraphWriter.cpp
SRCS_MIN+= Support/Hashing.cpp
SRCS_MIN+= Support/Host.cpp
SRCS_MIN+= Support/InitLLVM.cpp
SRCS_MIN+= Support/InstructionCost.cpp
SRCS_MIN+= Support/IntEqClasses.cpp
SRCS_MIN+= Support/IntervalMap.cpp
SRCS_MIN+= Support/ItaniumManglingCanonicalizer.cpp
SRCS_MIN+= Support/JSON.cpp
SRCS_MIN+= Support/KnownBits.cpp
SRCS_MIN+= Support/LEB128.cpp
SRCS_MIN+= Support/LineIterator.cpp
SRCS_MIN+= Support/Locale.cpp
SRCS_MIN+= Support/LockFileManager.cpp
SRCS_MIN+= Support/LowLevelType.cpp
SRCS_MIN+= Support/MD5.cpp
SRCS_MIW+= Support/MSP430AttributeParser.cpp
SRCS_MIW+= Support/MSP430Attributes.cpp
SRCS_MIN+= Support/ManagedStatic.cpp
SRCS_MIN+= Support/MathExtras.cpp
SRCS_MIN+= Support/MemAlloc.cpp
SRCS_MIW+= Support/Memory.cpp
SRCS_MIN+= Support/MemoryBuffer.cpp
SRCS_MIN+= Support/MemoryBufferRef.cpp
SRCS_MIN+= Support/NativeFormatting.cpp
SRCS_MIN+= Support/OptimizedStructLayout.cpp
SRCS_MIN+= Support/Optional.cpp
SRCS_EXL+= Support/Parallel.cpp
SRCS_MIN+= Support/Path.cpp
SRCS_MIN+= Support/PluginLoader.cpp
SRCS_MIN+= Support/PrettyStackTrace.cpp
SRCS_MIN+= Support/Process.cpp
SRCS_MIN+= Support/Program.cpp
SRCS_MIN+= Support/RISCVAttributeParser.cpp
SRCS_MIN+= Support/RISCVAttributes.cpp
SRCS_MIN+= Support/RISCVISAInfo.cpp
SRCS_MIN+= Support/RWMutex.cpp
SRCS_MIN+= Support/RandomNumberGenerator.cpp
SRCS_MIN+= Support/Regex.cpp
SRCS_MIN+= Support/SHA1.cpp
SRCS_MIN+= Support/SHA256.cpp
SRCS_MIN+= Support/ScaledNumber.cpp
SRCS_MIN+= Support/ScopedPrinter.cpp
SRCS_MIN+= Support/Signals.cpp
SRCS_MIN+= Support/Signposts.cpp
SRCS_MIN+= Support/SmallPtrSet.cpp
SRCS_MIN+= Support/SmallVector.cpp
SRCS_MIN+= Support/SourceMgr.cpp
SRCS_MIN+= Support/SpecialCaseList.cpp
SRCS_MIN+= Support/Statistic.cpp
SRCS_MIN+= Support/StringExtras.cpp
SRCS_MIN+= Support/StringMap.cpp
SRCS_MIN+= Support/StringRef.cpp
SRCS_MIN+= Support/StringSaver.cpp
SRCS_MIN+= Support/SuffixTree.cpp
SRCS_MIN+= Support/SymbolRemappingReader.cpp
SRCS_EXT+= Support/SystemUtils.cpp
SRCS_LLD+= Support/TarWriter.cpp
SRCS_MIN+= Support/TargetParser.cpp
SRCS_MIN+= Support/ThreadLocal.cpp
SRCS_MIW+= Support/ThreadPool.cpp
SRCS_MIN+= Support/Threading.cpp
SRCS_MIN+= Support/TimeProfiler.cpp
SRCS_MIN+= Support/Timer.cpp
SRCS_MIN+= Support/ToolOutputFile.cpp
SRCS_MIN+= Support/TrigramIndex.cpp
SRCS_MIN+= Support/Triple.cpp
SRCS_MIN+= Support/Twine.cpp
SRCS_MIN+= Support/TypeSize.cpp
SRCS_MIN+= Support/Unicode.cpp
SRCS_MIN+= Support/UnicodeCaseFold.cpp
SRCS_MIN+= Support/Valgrind.cpp
SRCS_MIN+= Support/VirtualFileSystem.cpp
SRCS_MIN+= Support/VersionTuple.cpp
SRCS_MIN+= Support/Watchdog.cpp
SRCS_MIN+= Support/WithColor.cpp
SRCS_MIN+= Support/X86TargetParser.cpp
SRCS_MIN+= Support/YAMLParser.cpp
SRCS_MIN+= Support/YAMLTraits.cpp
SRCS_FUL+= Support/Z3Solver.cpp
SRCS_MIN+= Support/circular_raw_ostream.cpp
SRCS_MIN+= Support/raw_os_ostream.cpp
SRCS_MIN+= Support/raw_ostream.cpp
SRCS_MIN+= Support/regcomp.c
SRCS_MIN+= Support/regerror.c
SRCS_MIN+= Support/regexec.c
SRCS_MIN+= Support/regfree.c
SRCS_MIN+= Support/regstrlcpy.c
SRCS_MIN+= Support/xxhash.cpp
SRCS_MIN+= TableGen/DetailedRecordsBackend.cpp
SRCS_MIN+= TableGen/Error.cpp
SRCS_MIN+= TableGen/JSONBackend.cpp
SRCS_MIN+= TableGen/Main.cpp
SRCS_MIN+= TableGen/Record.cpp
SRCS_MIN+= TableGen/SetTheory.cpp
SRCS_MIN+= TableGen/StringMatcher.cpp
SRCS_MIN+= TableGen/TGLexer.cpp
SRCS_MIN+= TableGen/TGParser.cpp
SRCS_MIN+= TableGen/TableGenBackend.cpp
.if ${MK_LLVM_TARGET_AARCH64} != "no"
SRCS_MIN+= Target/AArch64/AArch64A53Fix835769.cpp
SRCS_MIN+= Target/AArch64/AArch64A57FPLoadBalancing.cpp
SRCS_MIN+= Target/AArch64/AArch64AdvSIMDScalarPass.cpp
SRCS_MIN+= Target/AArch64/AArch64AsmPrinter.cpp
SRCS_MIN+= Target/AArch64/AArch64BranchTargets.cpp
SRCS_MIN+= Target/AArch64/AArch64CallingConvention.cpp
SRCS_MIN+= Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
SRCS_MIN+= Target/AArch64/AArch64CollectLOH.cpp
SRCS_MIN+= Target/AArch64/AArch64CompressJumpTables.cpp
SRCS_MIN+= Target/AArch64/AArch64CondBrTuning.cpp
SRCS_MIN+= Target/AArch64/AArch64ConditionOptimizer.cpp
SRCS_MIN+= Target/AArch64/AArch64ConditionalCompares.cpp
SRCS_MIN+= Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
SRCS_MIN+= Target/AArch64/AArch64ExpandImm.cpp
SRCS_MIN+= Target/AArch64/AArch64ExpandPseudoInsts.cpp
SRCS_MIN+= Target/AArch64/AArch64FalkorHWPFFix.cpp
SRCS_MIN+= Target/AArch64/AArch64FastISel.cpp
SRCS_MIN+= Target/AArch64/AArch64FrameLowering.cpp
SRCS_MIN+= Target/AArch64/AArch64ISelDAGToDAG.cpp
SRCS_MIN+= Target/AArch64/AArch64ISelLowering.cpp
SRCS_MIN+= Target/AArch64/AArch64InstrInfo.cpp
SRCS_MIN+= Target/AArch64/AArch64LoadStoreOptimizer.cpp
SRCS_MIN+= Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
SRCS_MIN+= Target/AArch64/AArch64MCInstLower.cpp
SRCS_MIN+= Target/AArch64/AArch64MIPeepholeOpt.cpp
SRCS_MIN+= Target/AArch64/AArch64MachineFunctionInfo.cpp
SRCS_MIN+= Target/AArch64/AArch64MacroFusion.cpp
SRCS_MIN+= Target/AArch64/AArch64PBQPRegAlloc.cpp
SRCS_MIN+= Target/AArch64/AArch64PromoteConstant.cpp
SRCS_MIN+= Target/AArch64/AArch64RedundantCopyElimination.cpp
SRCS_MIN+= Target/AArch64/AArch64RegisterInfo.cpp
SRCS_MIN+= Target/AArch64/AArch64SIMDInstrOpt.cpp
SRCS_MIN+= Target/AArch64/AArch64SLSHardening.cpp
SRCS_MIN+= Target/AArch64/AArch64SelectionDAGInfo.cpp
SRCS_MIN+= Target/AArch64/AArch64SpeculationHardening.cpp
SRCS_MIN+= Target/AArch64/AArch64StackTagging.cpp
SRCS_MIN+= Target/AArch64/AArch64StackTaggingPreRA.cpp
SRCS_MIN+= Target/AArch64/AArch64StorePairSuppress.cpp
SRCS_MIN+= Target/AArch64/AArch64Subtarget.cpp
SRCS_MIN+= Target/AArch64/AArch64TargetMachine.cpp
SRCS_MIN+= Target/AArch64/AArch64TargetObjectFile.cpp
SRCS_MIN+= Target/AArch64/AArch64TargetTransformInfo.cpp
SRCS_MIN+= Target/AArch64/AsmParser/AArch64AsmParser.cpp
SRCS_XDW+= Target/AArch64/Disassembler/AArch64Disassembler.cpp
SRCS_XDW+= Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64CallLowering.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64InstructionSelector.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64LegalizerInfo.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
SRCS_MIN+= Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp
SRCS_MIN+= Target/AArch64/MCTargetDesc/AArch64WinCOFFStreamer.cpp
SRCS_MIN+= Target/AArch64/SVEIntrinsicOpts.cpp
SRCS_MIN+= Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
SRCS_MIN+= Target/AArch64/Utils/AArch64BaseInfo.cpp
.endif # MK_LLVM_TARGET_AARCH64
.if ${MK_LLVM_TARGET_ARM} != "no"
SRCS_MIN+= Target/ARM/A15SDOptimizer.cpp
SRCS_MIN+= Target/ARM/ARMAsmPrinter.cpp
SRCS_MIN+= Target/ARM/ARMBaseInstrInfo.cpp
SRCS_MIN+= Target/ARM/ARMBaseRegisterInfo.cpp
SRCS_MIN+= Target/ARM/ARMBasicBlockInfo.cpp
SRCS_MIN+= Target/ARM/ARMBlockPlacement.cpp
SRCS_MIN+= Target/ARM/ARMBranchTargets.cpp
SRCS_MIN+= Target/ARM/ARMCallLowering.cpp
SRCS_MIN+= Target/ARM/ARMCallingConv.cpp
SRCS_MIN+= Target/ARM/ARMConstantIslandPass.cpp
SRCS_MIN+= Target/ARM/ARMConstantPoolValue.cpp
SRCS_MIN+= Target/ARM/ARMExpandPseudoInsts.cpp
SRCS_MIN+= Target/ARM/ARMFastISel.cpp
SRCS_MIN+= Target/ARM/ARMFrameLowering.cpp
SRCS_MIN+= Target/ARM/ARMHazardRecognizer.cpp
SRCS_MIN+= Target/ARM/ARMISelDAGToDAG.cpp
SRCS_MIN+= Target/ARM/ARMISelLowering.cpp
SRCS_MIN+= Target/ARM/ARMInstrInfo.cpp
SRCS_MIN+= Target/ARM/ARMInstructionSelector.cpp
SRCS_MIN+= Target/ARM/ARMLegalizerInfo.cpp
SRCS_MIN+= Target/ARM/ARMLoadStoreOptimizer.cpp
SRCS_MIN+= Target/ARM/ARMLowOverheadLoops.cpp
SRCS_MIN+= Target/ARM/ARMMCInstLower.cpp
SRCS_MIN+= Target/ARM/ARMMachineFunctionInfo.cpp
SRCS_MIN+= Target/ARM/ARMMacroFusion.cpp
SRCS_MIN+= Target/ARM/ARMOptimizeBarriersPass.cpp
SRCS_MIN+= Target/ARM/ARMParallelDSP.cpp
SRCS_MIN+= Target/ARM/ARMRegisterBankInfo.cpp
SRCS_MIN+= Target/ARM/ARMRegisterInfo.cpp
SRCS_MIN+= Target/ARM/ARMSLSHardening.cpp
SRCS_MIN+= Target/ARM/ARMSelectionDAGInfo.cpp
SRCS_MIN+= Target/ARM/ARMSubtarget.cpp
SRCS_MIN+= Target/ARM/ARMTargetMachine.cpp
SRCS_MIN+= Target/ARM/ARMTargetObjectFile.cpp
SRCS_MIN+= Target/ARM/ARMTargetTransformInfo.cpp
SRCS_MIN+= Target/ARM/AsmParser/ARMAsmParser.cpp
SRCS_MIN+= Target/ARM/Disassembler/ARMDisassembler.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMMCExpr.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
SRCS_MIN+= Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
SRCS_MIN+= Target/ARM/MLxExpansionPass.cpp
SRCS_MIN+= Target/ARM/MVEGatherScatterLowering.cpp
SRCS_MIN+= Target/ARM/MVELaneInterleavingPass.cpp
SRCS_MIN+= Target/ARM/MVETPAndVPTOptimisationsPass.cpp
SRCS_MIN+= Target/ARM/MVETailPredication.cpp
SRCS_MIN+= Target/ARM/MVEVPTBlockPass.cpp
SRCS_MIN+= Target/ARM/TargetInfo/ARMTargetInfo.cpp
SRCS_MIN+= Target/ARM/Thumb1FrameLowering.cpp
SRCS_MIN+= Target/ARM/Thumb1InstrInfo.cpp
SRCS_MIN+= Target/ARM/Thumb2ITBlockPass.cpp
SRCS_MIN+= Target/ARM/Thumb2InstrInfo.cpp
SRCS_MIN+= Target/ARM/Thumb2SizeReduction.cpp
SRCS_MIN+= Target/ARM/ThumbRegisterInfo.cpp
SRCS_MIN+= Target/ARM/Utils/ARMBaseInfo.cpp
.endif # MK_LLVM_TARGET_ARM
.if ${MK_LLVM_TARGET_BPF} != "no"
SRCS_MIN+= Target/BPF/AsmParser/BPFAsmParser.cpp
SRCS_MIN+= Target/BPF/BPFAbstractMemberAccess.cpp
SRCS_MIN+= Target/BPF/BPFAdjustOpt.cpp
SRCS_MIN+= Target/BPF/BPFAsmPrinter.cpp
SRCS_MIN+= Target/BPF/BPFCheckAndAdjustIR.cpp
SRCS_MIN+= Target/BPF/BPFFrameLowering.cpp
SRCS_MIN+= Target/BPF/BPFIRPeephole.cpp
SRCS_MIN+= Target/BPF/BPFISelDAGToDAG.cpp
SRCS_MIN+= Target/BPF/BPFISelLowering.cpp
SRCS_MIN+= Target/BPF/BPFInstrInfo.cpp
SRCS_MIN+= Target/BPF/BPFMCInstLower.cpp
SRCS_MIN+= Target/BPF/BPFMIChecking.cpp
SRCS_MIN+= Target/BPF/BPFMIPeephole.cpp
SRCS_MIN+= Target/BPF/BPFMISimplifyPatchable.cpp
SRCS_MIN+= Target/BPF/BPFPreserveDIType.cpp
SRCS_MIN+= Target/BPF/BPFRegisterInfo.cpp
SRCS_MIN+= Target/BPF/BPFSelectionDAGInfo.cpp
SRCS_MIN+= Target/BPF/BPFSubtarget.cpp
SRCS_MIN+= Target/BPF/BPFTargetMachine.cpp
SRCS_MIN+= Target/BPF/BTFDebug.cpp
SRCS_MIN+= Target/BPF/Disassembler/BPFDisassembler.cpp
SRCS_MIN+= Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
SRCS_MIN+= Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
SRCS_MIN+= Target/BPF/MCTargetDesc/BPFInstPrinter.cpp
SRCS_MIN+= Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
SRCS_MIN+= Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
SRCS_MIN+= Target/BPF/TargetInfo/BPFTargetInfo.cpp
.endif # MK_LLVM_TARGET_BPF
.if ${MK_LLVM_TARGET_MIPS} != "no"
SRCS_MIN+= Target/Mips/AsmParser/MipsAsmParser.cpp
SRCS_XDW+= Target/Mips/Disassembler/MipsDisassembler.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsABIInfo.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsMCExpr.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsOptionRecord.cpp
SRCS_MIN+= Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
SRCS_MIN+= Target/Mips/MicroMipsSizeReduction.cpp
SRCS_MIN+= Target/Mips/Mips16FrameLowering.cpp
SRCS_MIN+= Target/Mips/Mips16HardFloat.cpp
SRCS_MIN+= Target/Mips/Mips16HardFloatInfo.cpp
SRCS_MIN+= Target/Mips/Mips16ISelDAGToDAG.cpp
SRCS_MIN+= Target/Mips/Mips16ISelLowering.cpp
SRCS_MIN+= Target/Mips/Mips16InstrInfo.cpp
SRCS_MIN+= Target/Mips/Mips16RegisterInfo.cpp
SRCS_MIN+= Target/Mips/MipsAnalyzeImmediate.cpp
SRCS_MIN+= Target/Mips/MipsAsmPrinter.cpp
SRCS_MIN+= Target/Mips/MipsBranchExpansion.cpp
SRCS_MIN+= Target/Mips/MipsCCState.cpp
SRCS_MIN+= Target/Mips/MipsCallLowering.cpp
SRCS_MIN+= Target/Mips/MipsConstantIslandPass.cpp
SRCS_MIN+= Target/Mips/MipsDelaySlotFiller.cpp
SRCS_MIN+= Target/Mips/MipsExpandPseudo.cpp
SRCS_MIN+= Target/Mips/MipsFastISel.cpp
SRCS_MIN+= Target/Mips/MipsFrameLowering.cpp
SRCS_MIN+= Target/Mips/MipsISelDAGToDAG.cpp
SRCS_MIN+= Target/Mips/MipsISelLowering.cpp
SRCS_MIN+= Target/Mips/MipsInstrInfo.cpp
SRCS_MIN+= Target/Mips/MipsInstructionSelector.cpp
SRCS_MIN+= Target/Mips/MipsLegalizerInfo.cpp
SRCS_MIN+= Target/Mips/MipsMCInstLower.cpp
SRCS_MIN+= Target/Mips/MipsMachineFunction.cpp
SRCS_MIN+= Target/Mips/MipsModuleISelDAGToDAG.cpp
SRCS_MIN+= Target/Mips/MipsOptimizePICCall.cpp
SRCS_MIN+= Target/Mips/MipsOs16.cpp
SRCS_MIN+= Target/Mips/MipsPreLegalizerCombiner.cpp
SRCS_MIN+= Target/Mips/MipsRegisterBankInfo.cpp
SRCS_MIN+= Target/Mips/MipsRegisterInfo.cpp
SRCS_MIN+= Target/Mips/MipsSEFrameLowering.cpp
SRCS_MIN+= Target/Mips/MipsSEISelDAGToDAG.cpp
SRCS_MIN+= Target/Mips/MipsSEISelLowering.cpp
SRCS_MIN+= Target/Mips/MipsSEInstrInfo.cpp
SRCS_MIN+= Target/Mips/MipsSERegisterInfo.cpp
SRCS_MIN+= Target/Mips/MipsSubtarget.cpp
SRCS_MIN+= Target/Mips/MipsTargetMachine.cpp
SRCS_MIN+= Target/Mips/MipsTargetObjectFile.cpp
SRCS_MIN+= Target/Mips/TargetInfo/MipsTargetInfo.cpp
.endif # MK_LLVM_TARGET_MIPS
.if ${MK_LLVM_TARGET_POWERPC} != "no"
SRCS_MIN+= Target/PowerPC/AsmParser/PPCAsmParser.cpp
SRCS_MIN+= Target/PowerPC/Disassembler/PPCDisassembler.cpp
SRCS_MIN+= Target/PowerPC/GISel/PPCCallLowering.cpp
SRCS_MIN+= Target/PowerPC/GISel/PPCInstructionSelector.cpp
SRCS_MIN+= Target/PowerPC/GISel/PPCLegalizerInfo.cpp
SRCS_MIN+= Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
SRCS_MIN+= Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
SRCS_MIN+= Target/PowerPC/PPCAsmPrinter.cpp
SRCS_MIN+= Target/PowerPC/PPCBoolRetToInt.cpp
SRCS_MIN+= Target/PowerPC/PPCBranchCoalescing.cpp
SRCS_MIN+= Target/PowerPC/PPCBranchSelector.cpp
SRCS_MIN+= Target/PowerPC/PPCCCState.cpp
SRCS_MIN+= Target/PowerPC/PPCCTRLoops.cpp
SRCS_MIN+= Target/PowerPC/PPCCallingConv.cpp
SRCS_MIN+= Target/PowerPC/PPCEarlyReturn.cpp
SRCS_MIN+= Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
SRCS_MIN+= Target/PowerPC/PPCExpandISEL.cpp
SRCS_MIN+= Target/PowerPC/PPCFastISel.cpp
SRCS_MIN+= Target/PowerPC/PPCFrameLowering.cpp
SRCS_MIN+= Target/PowerPC/PPCHazardRecognizers.cpp
SRCS_MIN+= Target/PowerPC/PPCISelDAGToDAG.cpp
SRCS_MIN+= Target/PowerPC/PPCISelLowering.cpp
SRCS_MIN+= Target/PowerPC/PPCInstrInfo.cpp
SRCS_MIN+= Target/PowerPC/PPCLoopInstrFormPrep.cpp
SRCS_MIN+= Target/PowerPC/PPCLowerMASSVEntries.cpp
SRCS_MIN+= Target/PowerPC/PPCMacroFusion.cpp
SRCS_MIN+= Target/PowerPC/PPCMCInstLower.cpp
SRCS_MIN+= Target/PowerPC/PPCMIPeephole.cpp
SRCS_MIN+= Target/PowerPC/PPCMachineFunctionInfo.cpp
SRCS_MIN+= Target/PowerPC/PPCMachineScheduler.cpp
SRCS_MIN+= Target/PowerPC/PPCPreEmitPeephole.cpp
SRCS_MIN+= Target/PowerPC/PPCReduceCRLogicals.cpp
SRCS_MIN+= Target/PowerPC/PPCRegisterInfo.cpp
SRCS_MIN+= Target/PowerPC/PPCSubtarget.cpp
SRCS_MIN+= Target/PowerPC/PPCTLSDynamicCall.cpp
SRCS_MIN+= Target/PowerPC/PPCTOCRegDeps.cpp
SRCS_MIN+= Target/PowerPC/PPCTargetMachine.cpp
SRCS_MIN+= Target/PowerPC/PPCTargetObjectFile.cpp
SRCS_MIN+= Target/PowerPC/PPCTargetTransformInfo.cpp
SRCS_MIN+= Target/PowerPC/PPCVSXCopy.cpp
SRCS_MIN+= Target/PowerPC/PPCVSXFMAMutate.cpp
SRCS_MIN+= Target/PowerPC/PPCVSXSwapRemoval.cpp
SRCS_MIN+= Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
.endif # MK_LLVM_TARGET_POWERPC
.if ${MK_LLVM_TARGET_RISCV} != "no"
SRCS_MIN+= Target/RISCV/AsmParser/RISCVAsmParser.cpp
SRCS_MIN+= Target/RISCV/Disassembler/RISCVDisassembler.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
SRCS_MIN+= Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
SRCS_MIN+= Target/RISCV/RISCVAsmPrinter.cpp
SRCS_MIN+= Target/RISCV/RISCVCallLowering.cpp
SRCS_MIN+= Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
SRCS_MIN+= Target/RISCV/RISCVExpandPseudoInsts.cpp
SRCS_MIN+= Target/RISCV/RISCVFrameLowering.cpp
SRCS_MIN+= Target/RISCV/RISCVGatherScatterLowering.cpp
SRCS_MIN+= Target/RISCV/RISCVInsertVSETVLI.cpp
SRCS_MIN+= Target/RISCV/RISCVInstrInfo.cpp
SRCS_MIN+= Target/RISCV/RISCVInstructionSelector.cpp
SRCS_MIN+= Target/RISCV/RISCVISelDAGToDAG.cpp
SRCS_MIN+= Target/RISCV/RISCVISelLowering.cpp
SRCS_MIN+= Target/RISCV/RISCVLegalizerInfo.cpp
SRCS_MIN+= Target/RISCV/RISCVMCInstLower.cpp
SRCS_MIN+= Target/RISCV/RISCVMergeBaseOffset.cpp
SRCS_MIN+= Target/RISCV/RISCVRegisterBankInfo.cpp
SRCS_MIN+= Target/RISCV/RISCVRegisterInfo.cpp
SRCS_MIN+= Target/RISCV/RISCVSExtWRemoval.cpp
SRCS_MIN+= Target/RISCV/RISCVSubtarget.cpp
SRCS_MIN+= Target/RISCV/RISCVTargetMachine.cpp
SRCS_MIN+= Target/RISCV/RISCVTargetObjectFile.cpp
SRCS_MIN+= Target/RISCV/RISCVTargetTransformInfo.cpp
SRCS_MIN+= Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
.endif # MK_LLVM_TARGET_RISCV
SRCS_MIN+= Target/Target.cpp
SRCS_MIN+= Target/TargetLoweringObjectFile.cpp
SRCS_MIN+= Target/TargetMachine.cpp
SRCS_MIN+= Target/TargetMachineC.cpp
.if ${MK_LLVM_TARGET_X86} != "no"
SRCS_MIN+= Target/X86/AsmParser/X86AsmParser.cpp
SRCS_XDW+= Target/X86/Disassembler/X86Disassembler.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86AsmBackend.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86InstComments.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
SRCS_MIN+= Target/X86/MCTargetDesc/X86WinCOFFTargetStreamer.cpp
SRCS_MIN+= Target/X86/TargetInfo/X86TargetInfo.cpp
SRCS_MIN+= Target/X86/X86AsmPrinter.cpp
SRCS_MIN+= Target/X86/X86AvoidStoreForwardingBlocks.cpp
SRCS_MIN+= Target/X86/X86AvoidTrailingCall.cpp
SRCS_MIN+= Target/X86/X86CallFrameOptimization.cpp
SRCS_MIN+= Target/X86/X86CallLowering.cpp
SRCS_MIN+= Target/X86/X86CallingConv.cpp
SRCS_MIN+= Target/X86/X86CmovConversion.cpp
SRCS_MIN+= Target/X86/X86DiscriminateMemOps.cpp
SRCS_MIN+= Target/X86/X86DomainReassignment.cpp
SRCS_MIN+= Target/X86/X86DynAllocaExpander.cpp
SRCS_MIN+= Target/X86/X86EvexToVex.cpp
SRCS_MIN+= Target/X86/X86ExpandPseudo.cpp
SRCS_MIN+= Target/X86/X86FastISel.cpp
SRCS_MIN+= Target/X86/X86FastTileConfig.cpp
SRCS_MIN+= Target/X86/X86FixupBWInsts.cpp
SRCS_MIN+= Target/X86/X86FixupLEAs.cpp
SRCS_MIN+= Target/X86/X86FixupSetCC.cpp
SRCS_MIN+= Target/X86/X86FlagsCopyLowering.cpp
SRCS_MIN+= Target/X86/X86FloatingPoint.cpp
SRCS_MIN+= Target/X86/X86FrameLowering.cpp
SRCS_MIN+= Target/X86/X86ISelDAGToDAG.cpp
SRCS_MIN+= Target/X86/X86ISelLowering.cpp
SRCS_MIN+= Target/X86/X86IndirectBranchTracking.cpp
SRCS_MIN+= Target/X86/X86IndirectThunks.cpp
SRCS_MIN+= Target/X86/X86InsertPrefetch.cpp
SRCS_MIN+= Target/X86/X86InsertWait.cpp
SRCS_MIN+= Target/X86/X86InstCombineIntrinsic.cpp
SRCS_MIN+= Target/X86/X86InstrFMA3Info.cpp
SRCS_MIN+= Target/X86/X86InstrFoldTables.cpp
SRCS_MIN+= Target/X86/X86InstrInfo.cpp
SRCS_MIN+= Target/X86/X86InstructionSelector.cpp
SRCS_MIN+= Target/X86/X86InterleavedAccess.cpp
SRCS_MIN+= Target/X86/X86LegalizerInfo.cpp
SRCS_MIN+= Target/X86/X86LoadValueInjectionLoadHardening.cpp
SRCS_MIN+= Target/X86/X86LoadValueInjectionRetHardening.cpp
SRCS_MIN+= Target/X86/X86LowerAMXIntrinsics.cpp
SRCS_MIN+= Target/X86/X86LowerAMXType.cpp
SRCS_MIN+= Target/X86/X86LowerTileCopy.cpp
SRCS_MIN+= Target/X86/X86MCInstLower.cpp
SRCS_MIN+= Target/X86/X86MachineFunctionInfo.cpp
SRCS_MIN+= Target/X86/X86MacroFusion.cpp
SRCS_MIN+= Target/X86/X86OptimizeLEAs.cpp
SRCS_MIN+= Target/X86/X86PadShortFunction.cpp
SRCS_MIN+= Target/X86/X86PartialReduction.cpp
SRCS_MIN+= Target/X86/X86PreAMXConfig.cpp
SRCS_MIN+= Target/X86/X86PreTileConfig.cpp
SRCS_MIN+= Target/X86/X86RegisterBankInfo.cpp
SRCS_MIN+= Target/X86/X86RegisterInfo.cpp
SRCS_MIN+= Target/X86/X86SelectionDAGInfo.cpp
SRCS_MIN+= Target/X86/X86ShuffleDecodeConstantPool.cpp
SRCS_MIN+= Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp
SRCS_MIN+= Target/X86/X86SpeculativeLoadHardening.cpp
SRCS_MIN+= Target/X86/X86Subtarget.cpp
SRCS_MIN+= Target/X86/X86TargetMachine.cpp
SRCS_MIN+= Target/X86/X86TargetObjectFile.cpp
SRCS_MIN+= Target/X86/X86TargetTransformInfo.cpp
SRCS_MIN+= Target/X86/X86TileConfig.cpp
SRCS_MIN+= Target/X86/X86VZeroUpper.cpp
SRCS_MIN+= Target/X86/X86WinEHState.cpp
.endif # MK_LLVM_TARGET_X86
SRCS_MIW+= TextAPI/Architecture.cpp
SRCS_MIW+= TextAPI/ArchitectureSet.cpp
SRCS_MIW+= TextAPI/InterfaceFile.cpp
SRCS_MIW+= TextAPI/PackedVersion.cpp
SRCS_MIW+= TextAPI/Platform.cpp
SRCS_MIW+= TextAPI/Target.cpp
SRCS_MIW+= TextAPI/TextStub.cpp
SRCS_MIW+= TextAPI/TextStubCommon.cpp
SRCS_MIN+= ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
SRCS_MIW+= ToolDrivers/llvm-lib/LibDriver.cpp
SRCS_MIN+= Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
SRCS_MIN+= Transforms/AggressiveInstCombine/TruncInstCombine.cpp
SRCS_MIN+= Transforms/CFGuard/CFGuard.cpp
SRCS_MIN+= Transforms/Coroutines/CoroCleanup.cpp
SRCS_MIN+= Transforms/Coroutines/CoroEarly.cpp
SRCS_MIN+= Transforms/Coroutines/CoroElide.cpp
SRCS_MIN+= Transforms/Coroutines/CoroFrame.cpp
SRCS_MIN+= Transforms/Coroutines/CoroSplit.cpp
SRCS_MIN+= Transforms/Coroutines/Coroutines.cpp
SRCS_MIN+= Transforms/IPO/AlwaysInliner.cpp
SRCS_MIN+= Transforms/IPO/Annotation2Metadata.cpp
SRCS_MIN+= Transforms/IPO/ArgumentPromotion.cpp
SRCS_MIN+= Transforms/IPO/Attributor.cpp
SRCS_MIN+= Transforms/IPO/AttributorAttributes.cpp
SRCS_MIN+= Transforms/IPO/BarrierNoopPass.cpp
SRCS_MIN+= Transforms/IPO/BlockExtractor.cpp
SRCS_MIN+= Transforms/IPO/CalledValuePropagation.cpp
SRCS_MIN+= Transforms/IPO/ConstantMerge.cpp
SRCS_MIN+= Transforms/IPO/CrossDSOCFI.cpp
SRCS_MIN+= Transforms/IPO/DeadArgumentElimination.cpp
SRCS_MIN+= Transforms/IPO/ElimAvailExtern.cpp
SRCS_MIN+= Transforms/IPO/ExtractGV.cpp
SRCS_MIN+= Transforms/IPO/ForceFunctionAttrs.cpp
SRCS_MIN+= Transforms/IPO/FunctionAttrs.cpp
SRCS_MIN+= Transforms/IPO/FunctionImport.cpp
SRCS_MIN+= Transforms/IPO/FunctionSpecialization.cpp
SRCS_MIN+= Transforms/IPO/GlobalDCE.cpp
SRCS_MIN+= Transforms/IPO/GlobalOpt.cpp
SRCS_MIN+= Transforms/IPO/GlobalSplit.cpp
SRCS_MIN+= Transforms/IPO/HotColdSplitting.cpp
SRCS_EXT+= Transforms/IPO/IPO.cpp
SRCS_MIN+= Transforms/IPO/IROutliner.cpp
SRCS_MIN+= Transforms/IPO/InferFunctionAttrs.cpp
SRCS_MIN+= Transforms/IPO/InlineSimple.cpp
SRCS_MIN+= Transforms/IPO/Inliner.cpp
SRCS_MIN+= Transforms/IPO/Internalize.cpp
SRCS_MIN+= Transforms/IPO/LoopExtractor.cpp
SRCS_MIN+= Transforms/IPO/LowerTypeTests.cpp
SRCS_MIN+= Transforms/IPO/MergeFunctions.cpp
SRCS_MIN+= Transforms/IPO/ModuleInliner.cpp
SRCS_MIN+= Transforms/IPO/OpenMPOpt.cpp
SRCS_MIN+= Transforms/IPO/PartialInlining.cpp
SRCS_MIN+= Transforms/IPO/PassManagerBuilder.cpp
SRCS_MIN+= Transforms/IPO/PruneEH.cpp
SRCS_MIN+= Transforms/IPO/SCCP.cpp
SRCS_MIN+= Transforms/IPO/SampleContextTracker.cpp
SRCS_MIN+= Transforms/IPO/SampleProfile.cpp
SRCS_MIN+= Transforms/IPO/SampleProfileProbe.cpp
SRCS_MIN+= Transforms/IPO/StripDeadPrototypes.cpp
SRCS_MIN+= Transforms/IPO/StripSymbols.cpp
SRCS_MIN+= Transforms/IPO/SyntheticCountsPropagation.cpp
SRCS_MIN+= Transforms/IPO/ThinLTOBitcodeWriter.cpp
SRCS_MIN+= Transforms/IPO/WholeProgramDevirt.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineAddSub.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineAndOrXor.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineAtomicRMW.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineCalls.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineCasts.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineCompares.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineMulDivRem.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineNegator.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombinePHI.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineSelect.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineShifts.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
SRCS_MIN+= Transforms/InstCombine/InstCombineVectorOps.cpp
SRCS_MIN+= Transforms/InstCombine/InstructionCombining.cpp
SRCS_MIN+= Transforms/Instrumentation/AddressSanitizer.cpp
SRCS_MIN+= Transforms/Instrumentation/BoundsChecking.cpp
SRCS_MIN+= Transforms/Instrumentation/CGProfile.cpp
SRCS_MIN+= Transforms/Instrumentation/ControlHeightReduction.cpp
SRCS_MIN+= Transforms/Instrumentation/DataFlowSanitizer.cpp
SRCS_MIN+= Transforms/Instrumentation/GCOVProfiling.cpp
SRCS_MIN+= Transforms/Instrumentation/HWAddressSanitizer.cpp
SRCS_MIN+= Transforms/Instrumentation/IndirectCallPromotion.cpp
SRCS_MIN+= Transforms/Instrumentation/InstrOrderFile.cpp
SRCS_MIN+= Transforms/Instrumentation/InstrProfiling.cpp
SRCS_MIN+= Transforms/Instrumentation/Instrumentation.cpp
SRCS_MIN+= Transforms/Instrumentation/MemProfiler.cpp
SRCS_MIN+= Transforms/Instrumentation/MemorySanitizer.cpp
SRCS_MIN+= Transforms/Instrumentation/PGOInstrumentation.cpp
SRCS_MIN+= Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
SRCS_MIN+= Transforms/Instrumentation/PoisonChecking.cpp
SRCS_MIN+= Transforms/Instrumentation/SanitizerCoverage.cpp
SRCS_MIN+= Transforms/Instrumentation/ThreadSanitizer.cpp
SRCS_MIN+= Transforms/Instrumentation/ValueProfileCollector.cpp
SRCS_MIN+= Transforms/ObjCARC/DependencyAnalysis.cpp
SRCS_MIN+= Transforms/ObjCARC/ObjCARC.cpp
SRCS_MIN+= Transforms/ObjCARC/ObjCARCAPElim.cpp
SRCS_MIN+= Transforms/ObjCARC/ObjCARCContract.cpp
SRCS_MIN+= Transforms/ObjCARC/ObjCARCExpand.cpp
SRCS_MIN+= Transforms/ObjCARC/ObjCARCOpts.cpp
SRCS_MIN+= Transforms/ObjCARC/ProvenanceAnalysis.cpp
SRCS_MIN+= Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
SRCS_MIN+= Transforms/ObjCARC/PtrState.cpp
SRCS_MIN+= Transforms/Scalar/ADCE.cpp
SRCS_MIN+= Transforms/Scalar/AlignmentFromAssumptions.cpp
SRCS_MIN+= Transforms/Scalar/AnnotationRemarks.cpp
SRCS_MIN+= Transforms/Scalar/BDCE.cpp
SRCS_MIN+= Transforms/Scalar/CallSiteSplitting.cpp
SRCS_MIN+= Transforms/Scalar/ConstantHoisting.cpp
SRCS_MIN+= Transforms/Scalar/ConstraintElimination.cpp
SRCS_MIN+= Transforms/Scalar/CorrelatedValuePropagation.cpp
SRCS_MIN+= Transforms/Scalar/DCE.cpp
SRCS_MIN+= Transforms/Scalar/DFAJumpThreading.cpp
SRCS_MIN+= Transforms/Scalar/DeadStoreElimination.cpp
SRCS_MIN+= Transforms/Scalar/DivRemPairs.cpp
SRCS_MIN+= Transforms/Scalar/EarlyCSE.cpp
SRCS_MIN+= Transforms/Scalar/FlattenCFGPass.cpp
SRCS_MIN+= Transforms/Scalar/Float2Int.cpp
SRCS_MIN+= Transforms/Scalar/GVN.cpp
SRCS_MIN+= Transforms/Scalar/GVNHoist.cpp
SRCS_MIN+= Transforms/Scalar/GVNSink.cpp
SRCS_MIN+= Transforms/Scalar/GuardWidening.cpp
SRCS_MIN+= Transforms/Scalar/IVUsersPrinter.cpp
SRCS_MIN+= Transforms/Scalar/IndVarSimplify.cpp
SRCS_MIN+= Transforms/Scalar/InductiveRangeCheckElimination.cpp
SRCS_MIN+= Transforms/Scalar/InferAddressSpaces.cpp
SRCS_MIN+= Transforms/Scalar/InstSimplifyPass.cpp
SRCS_MIN+= Transforms/Scalar/JumpThreading.cpp
SRCS_MIN+= Transforms/Scalar/LICM.cpp
SRCS_MIN+= Transforms/Scalar/LoopAccessAnalysisPrinter.cpp
SRCS_MIN+= Transforms/Scalar/LoopBoundSplit.cpp
SRCS_MIN+= Transforms/Scalar/LoopDataPrefetch.cpp
SRCS_MIN+= Transforms/Scalar/LoopDeletion.cpp
SRCS_MIN+= Transforms/Scalar/LoopDistribute.cpp
SRCS_MIN+= Transforms/Scalar/LoopFlatten.cpp
SRCS_MIN+= Transforms/Scalar/LoopFuse.cpp
SRCS_MIN+= Transforms/Scalar/LoopIdiomRecognize.cpp
SRCS_MIN+= Transforms/Scalar/LoopInstSimplify.cpp
SRCS_MIN+= Transforms/Scalar/LoopInterchange.cpp
SRCS_MIN+= Transforms/Scalar/LoopLoadElimination.cpp
SRCS_MIN+= Transforms/Scalar/LoopPassManager.cpp
SRCS_MIN+= Transforms/Scalar/LoopPredication.cpp
SRCS_MIN+= Transforms/Scalar/LoopRerollPass.cpp
SRCS_MIN+= Transforms/Scalar/LoopRotation.cpp
SRCS_MIN+= Transforms/Scalar/LoopSimplifyCFG.cpp
SRCS_MIN+= Transforms/Scalar/LoopSink.cpp
SRCS_MIN+= Transforms/Scalar/LoopStrengthReduce.cpp
SRCS_MIN+= Transforms/Scalar/LoopUnrollPass.cpp
SRCS_MIN+= Transforms/Scalar/LoopUnrollAndJamPass.cpp
SRCS_MIN+= Transforms/Scalar/LoopUnswitch.cpp
SRCS_MIN+= Transforms/Scalar/LoopVersioningLICM.cpp
SRCS_MIN+= Transforms/Scalar/LowerAtomic.cpp
SRCS_MIN+= Transforms/Scalar/LowerConstantIntrinsics.cpp
SRCS_MIN+= Transforms/Scalar/LowerExpectIntrinsic.cpp
SRCS_MIN+= Transforms/Scalar/LowerGuardIntrinsic.cpp
SRCS_MIN+= Transforms/Scalar/LowerMatrixIntrinsics.cpp
SRCS_MIN+= Transforms/Scalar/LowerWidenableCondition.cpp
SRCS_MIN+= Transforms/Scalar/MakeGuardsExplicit.cpp
SRCS_MIN+= Transforms/Scalar/MemCpyOptimizer.cpp
SRCS_MIN+= Transforms/Scalar/MergeICmps.cpp
SRCS_MIN+= Transforms/Scalar/MergedLoadStoreMotion.cpp
SRCS_MIN+= Transforms/Scalar/NaryReassociate.cpp
SRCS_MIN+= Transforms/Scalar/NewGVN.cpp
SRCS_MIN+= Transforms/Scalar/PartiallyInlineLibCalls.cpp
SRCS_MIN+= Transforms/Scalar/PlaceSafepoints.cpp
SRCS_MIN+= Transforms/Scalar/Reassociate.cpp
SRCS_MIN+= Transforms/Scalar/Reg2Mem.cpp
SRCS_MIN+= Transforms/Scalar/RewriteStatepointsForGC.cpp
SRCS_MIN+= Transforms/Scalar/SCCP.cpp
SRCS_MIN+= Transforms/Scalar/SROA.cpp
SRCS_EXT+= Transforms/Scalar/Scalar.cpp
SRCS_MIN+= Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
SRCS_MIN+= Transforms/Scalar/Scalarizer.cpp
SRCS_MIN+= Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
SRCS_MIN+= Transforms/Scalar/SimpleLoopUnswitch.cpp
SRCS_MIN+= Transforms/Scalar/SimplifyCFGPass.cpp
SRCS_MIN+= Transforms/Scalar/Sink.cpp
SRCS_MIN+= Transforms/Scalar/SpeculativeExecution.cpp
SRCS_MIN+= Transforms/Scalar/StraightLineStrengthReduce.cpp
SRCS_MIN+= Transforms/Scalar/StructurizeCFG.cpp
SRCS_MIN+= Transforms/Scalar/TailRecursionElimination.cpp
SRCS_MIN+= Transforms/Scalar/WarnMissedTransforms.cpp
SRCS_MIN+= Transforms/Utils/AMDGPUEmitPrintf.cpp
SRCS_MIN+= Transforms/Utils/ASanStackFrameLayout.cpp
SRCS_MIN+= Transforms/Utils/AddDiscriminators.cpp
SRCS_MIN+= Transforms/Utils/AssumeBundleBuilder.cpp
SRCS_MIN+= Transforms/Utils/BasicBlockUtils.cpp
SRCS_MIN+= Transforms/Utils/BreakCriticalEdges.cpp
SRCS_MIN+= Transforms/Utils/BuildLibCalls.cpp
SRCS_MIN+= Transforms/Utils/BypassSlowDivision.cpp
SRCS_MIN+= Transforms/Utils/CallGraphUpdater.cpp
SRCS_MIN+= Transforms/Utils/CallPromotionUtils.cpp
SRCS_MIN+= Transforms/Utils/CanonicalizeAliases.cpp
SRCS_MIN+= Transforms/Utils/CanonicalizeFreezeInLoops.cpp
SRCS_MIN+= Transforms/Utils/CloneFunction.cpp
SRCS_MIN+= Transforms/Utils/CloneModule.cpp
SRCS_MIN+= Transforms/Utils/CodeExtractor.cpp
SRCS_MIN+= Transforms/Utils/CodeLayout.cpp
SRCS_MIN+= Transforms/Utils/CodeMoverUtils.cpp
SRCS_MIN+= Transforms/Utils/CtorUtils.cpp
SRCS_MIN+= Transforms/Utils/Debugify.cpp
SRCS_MIN+= Transforms/Utils/DemoteRegToStack.cpp
SRCS_MIN+= Transforms/Utils/EntryExitInstrumenter.cpp
SRCS_MIN+= Transforms/Utils/EscapeEnumerator.cpp
SRCS_MIN+= Transforms/Utils/Evaluator.cpp
SRCS_MIN+= Transforms/Utils/FixIrreducible.cpp
SRCS_MIN+= Transforms/Utils/FlattenCFG.cpp
SRCS_MIN+= Transforms/Utils/FunctionComparator.cpp
SRCS_MIN+= Transforms/Utils/FunctionImportUtils.cpp
SRCS_MIN+= Transforms/Utils/GlobalStatus.cpp
SRCS_MIN+= Transforms/Utils/GuardUtils.cpp
SRCS_MIN+= Transforms/Utils/HelloWorld.cpp
SRCS_MIN+= Transforms/Utils/InjectTLIMappings.cpp
SRCS_MIN+= Transforms/Utils/InlineFunction.cpp
SRCS_MIN+= Transforms/Utils/InstructionNamer.cpp
SRCS_MIN+= Transforms/Utils/IntegerDivision.cpp
SRCS_MIN+= Transforms/Utils/LCSSA.cpp
SRCS_MIN+= Transforms/Utils/LibCallsShrinkWrap.cpp
SRCS_MIN+= Transforms/Utils/Local.cpp
SRCS_MIN+= Transforms/Utils/LoopPeel.cpp
SRCS_MIN+= Transforms/Utils/LoopSimplify.cpp
SRCS_MIN+= Transforms/Utils/LoopRotationUtils.cpp
SRCS_MIN+= Transforms/Utils/LoopUnroll.cpp
SRCS_MIN+= Transforms/Utils/LoopUnrollAndJam.cpp
SRCS_MIN+= Transforms/Utils/LoopUnrollRuntime.cpp
SRCS_MIN+= Transforms/Utils/LoopUtils.cpp
SRCS_MIN+= Transforms/Utils/LoopVersioning.cpp
SRCS_MIN+= Transforms/Utils/LowerInvoke.cpp
SRCS_MIN+= Transforms/Utils/LowerSwitch.cpp
SRCS_MIN+= Transforms/Utils/MatrixUtils.cpp
SRCS_MIN+= Transforms/Utils/Mem2Reg.cpp
SRCS_MIN+= Transforms/Utils/MetaRenamer.cpp
SRCS_MIN+= Transforms/Utils/MemoryOpRemark.cpp
SRCS_MIN+= Transforms/Utils/ModuleUtils.cpp
SRCS_MIN+= Transforms/Utils/NameAnonGlobals.cpp
SRCS_MIN+= Transforms/Utils/PredicateInfo.cpp
SRCS_MIN+= Transforms/Utils/PromoteMemoryToRegister.cpp
SRCS_MIN+= Transforms/Utils/RelLookupTableConverter.cpp
SRCS_MIN+= Transforms/Utils/SCCPSolver.cpp
SRCS_MIN+= Transforms/Utils/SSAUpdater.cpp
SRCS_MIN+= Transforms/Utils/SSAUpdaterBulk.cpp
SRCS_MIN+= Transforms/Utils/SanitizerStats.cpp
SRCS_MIN+= Transforms/Utils/ScalarEvolutionExpander.cpp
SRCS_MIN+= Transforms/Utils/SampleProfileInference.cpp
SRCS_MIN+= Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
SRCS_MIN+= Transforms/Utils/SimplifyCFG.cpp
SRCS_MIN+= Transforms/Utils/SimplifyIndVar.cpp
SRCS_MIN+= Transforms/Utils/SimplifyLibCalls.cpp
SRCS_MIN+= Transforms/Utils/SizeOpts.cpp
SRCS_MIN+= Transforms/Utils/SplitModule.cpp
SRCS_MIN+= Transforms/Utils/StripGCRelocates.cpp
SRCS_MIN+= Transforms/Utils/StripNonLineTableDebugInfo.cpp
SRCS_MIN+= Transforms/Utils/SymbolRewriter.cpp
SRCS_MIN+= Transforms/Utils/UnifyFunctionExitNodes.cpp
SRCS_MIN+= Transforms/Utils/UnifyLoopExits.cpp
SRCS_EXT+= Transforms/Utils/Utils.cpp
SRCS_MIN+= Transforms/Utils/VNCoercion.cpp
SRCS_MIN+= Transforms/Utils/ValueMapper.cpp
SRCS_MIN+= Transforms/Vectorize/LoadStoreVectorizer.cpp
SRCS_MIN+= Transforms/Vectorize/LoopVectorizationLegality.cpp
SRCS_MIN+= Transforms/Vectorize/LoopVectorize.cpp
SRCS_MIN+= Transforms/Vectorize/SLPVectorizer.cpp
SRCS_MIN+= Transforms/Vectorize/VPlan.cpp
SRCS_MIN+= Transforms/Vectorize/VPlanHCFGBuilder.cpp
SRCS_MIN+= Transforms/Vectorize/VPlanPredicator.cpp
SRCS_MIN+= Transforms/Vectorize/VPlanTransforms.cpp
SRCS_MIN+= Transforms/Vectorize/VPlanVerifier.cpp
SRCS_MIN+= Transforms/Vectorize/VectorCombine.cpp
SRCS_EXT+= Transforms/Vectorize/Vectorize.cpp
SRCS_EXT+= XRay/BlockIndexer.cpp
SRCS_EXT+= XRay/BlockVerifier.cpp
SRCS_EXT+= XRay/FDRRecordProducer.cpp
SRCS_EXT+= XRay/FDRRecords.cpp
SRCS_EXT+= XRay/FDRTraceExpander.cpp
SRCS_EXT+= XRay/FileHeaderReader.cpp
SRCS_EXT+= XRay/InstrumentationMap.cpp
SRCS_EXT+= XRay/LogBuilderConsumer.cpp
SRCS_EXT+= XRay/RecordInitializer.cpp
SRCS_EXT+= XRay/Trace.cpp
SRCS_ALL+= ${SRCS_MIN}
.if !defined(TOOLS_PREFIX) || ${MK_LLD_BOOTSTRAP} != "no"
SRCS_ALL+= ${SRCS_MIW}
.endif
.if ${MK_CLANG_EXTRAS} != "no"
SRCS_ALL+= ${SRCS_EXT}
.endif
.if ${MK_CLANG_FULL} != "no"
SRCS_ALL+= ${SRCS_FUL}
.endif
.if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLD} != "no" || \
(defined(TOOLS_PREFIX) && ${MK_LLD_BOOTSTRAP} != "no")
SRCS_ALL+= ${SRCS_EXL}
.endif
.if ${MK_LLD} != "no" || \
(defined(TOOLS_PREFIX) && ${MK_LLD_BOOTSTRAP} != "no")
SRCS_ALL+= ${SRCS_LLD}
.endif
.if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLDB} != "no"
SRCS_ALL+= ${SRCS_XDB}
.endif
.if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLDB} != "no" || ${MK_LLD} != "no" || \
(defined(TOOLS_PREFIX) && ${MK_LLD_BOOTSTRAP} != "no")
SRCS_ALL+= ${SRCS_XDL}
.endif
.if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLDB} != "no" || !defined(TOOLS_PREFIX)
SRCS_ALL+= ${SRCS_XDW}
.endif
SRCS+= ${GENSRCS}
SRCS+= ${SRCS_ALL:O}
llvm/Frontend/OpenMP/OMP.h.inc: ${LLVM_SRCS}/include/llvm/Frontend/OpenMP/OMP.td
${LLVM_TBLGEN} --gen-directive-decl \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/include/llvm/Frontend/OpenMP/OMP.td
TGHDRS+= llvm/Frontend/OpenMP/OMP.h.inc
llvm/Frontend/OpenMP/OMP.inc: ${LLVM_SRCS}/include/llvm/Frontend/OpenMP/OMP.td
${LLVM_TBLGEN} --gen-directive-impl \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/include/llvm/Frontend/OpenMP/OMP.td
TGHDRS+= llvm/Frontend/OpenMP/OMP.inc
OMP.cpp: ${LLVM_SRCS}/include/llvm/Frontend/OpenMP/OMP.td
${LLVM_TBLGEN} --gen-directive-impl \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/include/llvm/Frontend/OpenMP/OMP.td
GENSRCS+= OMP.cpp
llvm/IR/Attributes.inc: ${LLVM_SRCS}/include/llvm/IR/Attributes.td
${LLVM_TBLGEN} -gen-attrs \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/include/llvm/IR/Attributes.td
TGHDRS+= llvm/IR/Attributes.inc
llvm/IR/IntrinsicEnums.inc: ${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
${LLVM_TBLGEN} -gen-intrinsic-enums \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
TGHDRS+= llvm/IR/IntrinsicEnums.inc
llvm/IR/IntrinsicImpl.inc: ${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
${LLVM_TBLGEN} -gen-intrinsic-impl \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
TGHDRS+= llvm/IR/IntrinsicImpl.inc
.for arch in \
AArch64/aarch64 AMDGPU/amdgcn ARM/arm BPF/bpf Hexagon/hexagon \
Mips/mips NVPTX/nvvm PowerPC/ppc R600/r600 RISCV/riscv S390/s390 \
VE/ve WebAssembly/wasm X86/x86 XCore/xcore
llvm/IR/Intrinsics${arch:H}.h: ${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
${LLVM_TBLGEN} -gen-intrinsic-enums -intrinsic-prefix=${arch:T} \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
TGHDRS+= llvm/IR/Intrinsics${arch:H}.h
.endfor
llvm-lib/Options.inc: ${LLVM_SRCS}/lib/ToolDrivers/llvm-lib/Options.td
${LLVM_TBLGEN} -gen-opt-parser-defs \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/lib/ToolDrivers/llvm-lib/Options.td
TGHDRS+= llvm-lib/Options.inc
CFLAGS.LibDriver.cpp+= -I${.OBJDIR}/llvm-lib
llvm-dlltool/Options.inc: ${LLVM_SRCS}/lib/ToolDrivers/llvm-dlltool/Options.td
${LLVM_TBLGEN} -gen-opt-parser-defs \
-I ${LLVM_SRCS}/include -d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/lib/ToolDrivers/llvm-dlltool/Options.td
TGHDRS+= llvm-dlltool/Options.inc
CFLAGS.DlltoolDriver.cpp+= -I${.OBJDIR}/llvm-dlltool
beforebuild:
# 20170724 remove stale Options.inc file, of which there are two different
# versions after upstream r308421, one for llvm-lib, one for llvm-dlltool
.for f in Options.inc
.if exists(${f}) || exists(${f}.d)
@echo Removing stale generated ${f} files
@rm -f ${f} ${f}.d
.endif
.endfor
# Note: some rules are superfluous, not every combination is valid.
.for arch in \
AArch64/AArch64 ARM/ARM BPF/BPF Mips/Mips PowerPC/PPC RISCV/RISCV \
X86/X86
. for hdr in \
AsmMatcher/-gen-asm-matcher \
AsmWriter/-gen-asm-writer \
AsmWriter1/-gen-asm-writer,-asmwriternum=1 \
CallingConv/-gen-callingconv \
CodeEmitter/-gen-emitter \
CompressInstEmitter/-gen-compress-inst-emitter \
DAGISel/-gen-dag-isel \
DisassemblerTables/-gen-disassembler \
EVEX2VEXTables/-gen-x86-EVEX2VEX-tables \
FastISel/-gen-fast-isel \
GlobalISel/-gen-global-isel \
InstrInfo/-gen-instr-info \
MCCodeEmitter/-gen-emitter \
MCPseudoLowering/-gen-pseudo-lowering \
O0PreLegalizeGICombiner/-gen-global-isel-combiner,-combiners=${arch:H}O0PreLegalizerCombinerHelper \
PostLegalizeGICombiner/-gen-global-isel-combiner,-combiners=${arch:H}PostLegalizerCombinerHelper \
PostLegalizeGILowering/-gen-global-isel-combiner,-combiners=${arch:H}PostLegalizerLoweringHelper \
PreLegalizeGICombiner/-gen-global-isel-combiner,-combiners=${arch:H}PreLegalizerCombinerHelper \
RegisterBank/-gen-register-bank \
RegisterInfo/-gen-register-info \
SearchableTables/-gen-searchable-tables \
SubtargetInfo/-gen-subtarget \
SystemOperands/-gen-searchable-tables \
SystemRegister/-gen-searchable-tables
${arch:T}Gen${hdr:H}.inc: ${LLVM_SRCS}/lib/Target/${arch:H}/${arch:T}.td
${LLVM_TBLGEN} ${hdr:T:C/,/ /g} \
-I ${LLVM_SRCS}/include -I ${LLVM_SRCS}/lib/Target/${arch:H} \
-d ${.TARGET}.d -o ${.TARGET} \
${LLVM_SRCS}/lib/Target/${arch:H}/${arch:T}.td
. endfor
.endfor
.if ${MK_LLVM_TARGET_AARCH64} != "no"
TGHDRS+= AArch64GenAsmMatcher.inc
TGHDRS+= AArch64GenAsmWriter.inc
TGHDRS+= AArch64GenAsmWriter1.inc
TGHDRS+= AArch64GenCallingConv.inc
TGHDRS+= AArch64GenDAGISel.inc
TGHDRS+= AArch64GenDisassemblerTables.inc
TGHDRS+= AArch64GenFastISel.inc
TGHDRS+= AArch64GenGlobalISel.inc
TGHDRS+= AArch64GenInstrInfo.inc
TGHDRS+= AArch64GenMCCodeEmitter.inc
TGHDRS+= AArch64GenMCPseudoLowering.inc
TGHDRS+= AArch64GenO0PreLegalizeGICombiner.inc
TGHDRS+= AArch64GenPostLegalizeGICombiner.inc
TGHDRS+= AArch64GenPostLegalizeGILowering.inc
TGHDRS+= AArch64GenPreLegalizeGICombiner.inc
TGHDRS+= AArch64GenRegisterBank.inc
TGHDRS+= AArch64GenRegisterInfo.inc
TGHDRS+= AArch64GenSubtargetInfo.inc
TGHDRS+= AArch64GenSystemOperands.inc
.endif # MK_LLVM_TARGET_AARCH64
.if ${MK_LLVM_TARGET_ARM} != "no"
TGHDRS+= ARMGenAsmMatcher.inc
TGHDRS+= ARMGenAsmWriter.inc
TGHDRS+= ARMGenCallingConv.inc
TGHDRS+= ARMGenDAGISel.inc
TGHDRS+= ARMGenDisassemblerTables.inc
TGHDRS+= ARMGenFastISel.inc
TGHDRS+= ARMGenGlobalISel.inc
TGHDRS+= ARMGenInstrInfo.inc
TGHDRS+= ARMGenMCCodeEmitter.inc
TGHDRS+= ARMGenMCPseudoLowering.inc
TGHDRS+= ARMGenRegisterBank.inc
TGHDRS+= ARMGenRegisterInfo.inc
TGHDRS+= ARMGenSubtargetInfo.inc
TGHDRS+= ARMGenSystemRegister.inc
.endif # MK_LLVM_TARGET_ARM
.if ${MK_LLVM_TARGET_BPF} != "no"
TGHDRS+= BPFGenAsmMatcher.inc
TGHDRS+= BPFGenAsmWriter.inc
TGHDRS+= BPFGenCallingConv.inc
TGHDRS+= BPFGenDAGISel.inc
TGHDRS+= BPFGenDisassemblerTables.inc
TGHDRS+= BPFGenInstrInfo.inc
TGHDRS+= BPFGenMCCodeEmitter.inc
TGHDRS+= BPFGenRegisterInfo.inc
TGHDRS+= BPFGenSubtargetInfo.inc
.endif # MK_LLVM_TARGET_BPF
.if ${MK_LLVM_TARGET_MIPS} != "no"
TGHDRS+= MipsGenAsmMatcher.inc
TGHDRS+= MipsGenAsmWriter.inc
TGHDRS+= MipsGenCallingConv.inc
TGHDRS+= MipsGenDAGISel.inc
TGHDRS+= MipsGenDisassemblerTables.inc
TGHDRS+= MipsGenFastISel.inc
TGHDRS+= MipsGenGlobalISel.inc
TGHDRS+= MipsGenInstrInfo.inc
TGHDRS+= MipsGenMCCodeEmitter.inc
TGHDRS+= MipsGenMCPseudoLowering.inc
TGHDRS+= MipsGenRegisterBank.inc
TGHDRS+= MipsGenRegisterInfo.inc
TGHDRS+= MipsGenSubtargetInfo.inc
.endif # MK_LLVM_TARGET_MIPS
.if ${MK_LLVM_TARGET_POWERPC} != "no"
TGHDRS+= PPCGenAsmMatcher.inc
TGHDRS+= PPCGenAsmWriter.inc
TGHDRS+= PPCGenCallingConv.inc
TGHDRS+= PPCGenDAGISel.inc
TGHDRS+= PPCGenDisassemblerTables.inc
TGHDRS+= PPCGenFastISel.inc
TGHDRS+= PPCGenGlobalISel.inc
TGHDRS+= PPCGenInstrInfo.inc
TGHDRS+= PPCGenMCCodeEmitter.inc
TGHDRS+= PPCGenRegisterBank.inc
TGHDRS+= PPCGenRegisterInfo.inc
TGHDRS+= PPCGenSubtargetInfo.inc
.endif # MK_LLVM_TARGET_POWERPC
.if ${MK_LLVM_TARGET_RISCV} != "no"
TGHDRS+= RISCVGenAsmMatcher.inc
TGHDRS+= RISCVGenAsmWriter.inc
TGHDRS+= RISCVGenCallingConv.inc
TGHDRS+= RISCVGenCompressInstEmitter.inc
TGHDRS+= RISCVGenDAGISel.inc
TGHDRS+= RISCVGenDisassemblerTables.inc
TGHDRS+= RISCVGenDAGISel.inc
TGHDRS+= RISCVGenGlobalISel.inc
TGHDRS+= RISCVGenInstrInfo.inc
TGHDRS+= RISCVGenMCCodeEmitter.inc
TGHDRS+= RISCVGenMCPseudoLowering.inc
TGHDRS+= RISCVGenRegisterBank.inc
TGHDRS+= RISCVGenRegisterInfo.inc
TGHDRS+= RISCVGenSearchableTables.inc
TGHDRS+= RISCVGenSubtargetInfo.inc
TGHDRS+= RISCVGenSystemOperands.inc
.endif # MK_LLVM_TARGET_RISCV
.if ${MK_LLVM_TARGET_X86} != "no"
TGHDRS+= X86GenAsmMatcher.inc
TGHDRS+= X86GenAsmWriter.inc
TGHDRS+= X86GenAsmWriter1.inc
TGHDRS+= X86GenCallingConv.inc
TGHDRS+= X86GenDAGISel.inc
TGHDRS+= X86GenDisassemblerTables.inc
TGHDRS+= X86GenEVEX2VEXTables.inc
TGHDRS+= X86GenFastISel.inc
TGHDRS+= X86GenGlobalISel.inc
TGHDRS+= X86GenInstrInfo.inc
TGHDRS+= X86GenRegisterBank.inc
TGHDRS+= X86GenRegisterInfo.inc
TGHDRS+= X86GenSubtargetInfo.inc
.endif # MK_LLVM_TARGET_X86
DEPENDFILES+= ${TGHDRS:C/$/.d/}
DPSRCS+= ${TGHDRS}
CLEANFILES+= ${TGHDRS} ${TGHDRS:C/$/.d/}
CLEANFILES+= ${GENSRCS} ${GENSRCS:C/$/.d/}
.include "../llvm.build.mk"
.include <bsd.lib.mk>
diff --git a/lib/libclang_rt/asan_static/Makefile b/lib/libclang_rt/asan_static/Makefile
index 6f0af674dbdd..cc4d160a9b99 100644
--- a/lib/libclang_rt/asan_static/Makefile
+++ b/lib/libclang_rt/asan_static/Makefile
@@ -1,10 +1,10 @@
# $FreeBSD$
.include <bsd.init.mk>
-LIB= clang_rt.asan-static-${CRTARCH}
+LIB= clang_rt.asan_static-${CRTARCH}
SRCS+= asan/asan_rtl_static.cpp
SRCS+= asan/asan_rtl_x86_64.S
.include <bsd.lib.mk>

File Metadata

Mime Type
application/octet-stream
Expires
Thu, Jun 6, 12:34 AM (2 d)
Storage Engine
chunks
Storage Format
Chunks
Storage Handle
y4atbygFK8s.
Default Alt Text
(5 MB)

Event Timeline