diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp index 694a8095e336..c5ad1c7d2c2e 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp +++ b/contrib/llvm-project/clang/lib/Basic/Targets/X86.cpp @@ -1,1508 +1,1509 @@ //===--- X86.cpp - Implement X86 target feature support -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements X86 TargetInfo objects. // //===----------------------------------------------------------------------===// #include "X86.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/X86TargetParser.h" namespace clang { namespace targets { const Builtin::Info BuiltinInfoX86[] = { #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ {#ID, TYPE, ATTRS, HEADER, LANGS, FEATURE}, #include "clang/Basic/BuiltinsX86.def" #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ {#ID, TYPE, ATTRS, HEADER, LANGS, FEATURE}, #include "clang/Basic/BuiltinsX86_64.def" }; static const char *const GCCRegNames[] = { "ax", "dx", "cx", "bx", "si", "di", "bp", "sp", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)", "argp", "flags", "fpcr", "fpsr", "dirflag", "frame", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15", "xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23", "xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31", "ymm16", "ymm17", "ymm18", "ymm19", "ymm20", "ymm21", "ymm22", "ymm23", "ymm24", "ymm25", "ymm26", "ymm27", "ymm28", "ymm29", "ymm30", "ymm31", "zmm0", "zmm1", "zmm2", "zmm3", "zmm4", "zmm5", "zmm6", "zmm7", "zmm8", "zmm9", "zmm10", "zmm11", "zmm12", "zmm13", "zmm14", "zmm15", "zmm16", "zmm17", "zmm18", "zmm19", "zmm20", "zmm21", "zmm22", "zmm23", "zmm24", "zmm25", "zmm26", "zmm27", "zmm28", "zmm29", "zmm30", "zmm31", "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7", "cr0", "cr2", "cr3", "cr4", "cr8", "dr0", "dr1", "dr2", "dr3", "dr6", "dr7", "bnd0", "bnd1", "bnd2", "bnd3", "tmm0", "tmm1", "tmm2", "tmm3", "tmm4", "tmm5", "tmm6", "tmm7", }; const TargetInfo::AddlRegName AddlRegNames[] = { {{"al", "ah", "eax", "rax"}, 0}, {{"bl", "bh", "ebx", "rbx"}, 3}, {{"cl", "ch", "ecx", "rcx"}, 2}, {{"dl", "dh", "edx", "rdx"}, 1}, {{"esi", "rsi"}, 4}, {{"edi", "rdi"}, 5}, {{"esp", "rsp"}, 7}, {{"ebp", "rbp"}, 6}, {{"r8d", "r8w", "r8b"}, 38}, {{"r9d", "r9w", "r9b"}, 39}, {{"r10d", "r10w", "r10b"}, 40}, {{"r11d", "r11w", "r11b"}, 41}, {{"r12d", "r12w", "r12b"}, 42}, {{"r13d", "r13w", "r13b"}, 43}, {{"r14d", "r14w", "r14b"}, 44}, {{"r15d", "r15w", "r15b"}, 45}, }; } // namespace targets } // namespace clang using namespace clang; using namespace clang::targets; bool X86TargetInfo::setFPMath(StringRef Name) { if (Name == "387") { FPMath = FP_387; return true; } if (Name == "sse") { FPMath = FP_SSE; return true; } return false; } bool X86TargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { // FIXME: This *really* should not be here. // X86_64 always has SSE2. if (getTriple().getArch() == llvm::Triple::x86_64) setFeatureEnabled(Features, "sse2", true); using namespace llvm::X86; SmallVector CPUFeatures; getFeaturesForCPU(CPU, CPUFeatures); for (auto &F : CPUFeatures) setFeatureEnabled(Features, F, true); if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec)) return false; // Can't do this earlier because we need to be able to explicitly enable // or disable these features and the things that they depend upon. // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled. auto I = Features.find("sse4.2"); if (I != Features.end() && I->getValue() && llvm::find(FeaturesVec, "-popcnt") == FeaturesVec.end()) Features["popcnt"] = true; // Additionally, if SSE is enabled and mmx is not explicitly disabled, // then enable MMX. I = Features.find("sse"); if (I != Features.end() && I->getValue() && llvm::find(FeaturesVec, "-mmx") == FeaturesVec.end()) Features["mmx"] = true; // Enable xsave if avx is enabled and xsave is not explicitly disabled. I = Features.find("avx"); if (I != Features.end() && I->getValue() && llvm::find(FeaturesVec, "-xsave") == FeaturesVec.end()) Features["xsave"] = true; return true; } void X86TargetInfo::setFeatureEnabled(llvm::StringMap &Features, StringRef Name, bool Enabled) const { if (Name == "sse4") { // We can get here via the __target__ attribute since that's not controlled // via the -msse4/-mno-sse4 command line alias. Handle this the same way // here - turn on the sse4.2 if enabled, turn off the sse4.1 level if // disabled. if (Enabled) Name = "sse4.2"; else Name = "sse4.1"; } Features[Name] = Enabled; llvm::X86::updateImpliedFeatures(Name, Enabled, Features); } /// handleTargetFeatures - Perform initialization based on the user /// configured set of features. bool X86TargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { for (const auto &Feature : Features) { if (Feature[0] != '+') continue; if (Feature == "+aes") { HasAES = true; } else if (Feature == "+vaes") { HasVAES = true; } else if (Feature == "+pclmul") { HasPCLMUL = true; } else if (Feature == "+vpclmulqdq") { HasVPCLMULQDQ = true; } else if (Feature == "+lzcnt") { HasLZCNT = true; } else if (Feature == "+rdrnd") { HasRDRND = true; } else if (Feature == "+fsgsbase") { HasFSGSBASE = true; } else if (Feature == "+bmi") { HasBMI = true; } else if (Feature == "+bmi2") { HasBMI2 = true; } else if (Feature == "+popcnt") { HasPOPCNT = true; } else if (Feature == "+rtm") { HasRTM = true; } else if (Feature == "+prfchw") { HasPRFCHW = true; } else if (Feature == "+rdseed") { HasRDSEED = true; } else if (Feature == "+adx") { HasADX = true; } else if (Feature == "+tbm") { HasTBM = true; } else if (Feature == "+lwp") { HasLWP = true; } else if (Feature == "+fma") { HasFMA = true; } else if (Feature == "+f16c") { HasF16C = true; } else if (Feature == "+gfni") { HasGFNI = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { HasAVX512VPOPCNTDQ = true; } else if (Feature == "+avx512vnni") { HasAVX512VNNI = true; } else if (Feature == "+avx512bf16") { HasAVX512BF16 = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; } else if (Feature == "+avx512pf") { HasAVX512PF = true; } else if (Feature == "+avx512dq") { HasAVX512DQ = true; } else if (Feature == "+avx512bitalg") { HasAVX512BITALG = true; } else if (Feature == "+avx512bw") { HasAVX512BW = true; } else if (Feature == "+avx512vl") { HasAVX512VL = true; } else if (Feature == "+avx512vbmi") { HasAVX512VBMI = true; } else if (Feature == "+avx512vbmi2") { HasAVX512VBMI2 = true; } else if (Feature == "+avx512ifma") { HasAVX512IFMA = true; } else if (Feature == "+avx512vp2intersect") { HasAVX512VP2INTERSECT = true; } else if (Feature == "+sha") { HasSHA = true; } else if (Feature == "+shstk") { HasSHSTK = true; } else if (Feature == "+movbe") { HasMOVBE = true; } else if (Feature == "+sgx") { HasSGX = true; } else if (Feature == "+cx8") { HasCX8 = true; } else if (Feature == "+cx16") { HasCX16 = true; } else if (Feature == "+fxsr") { HasFXSR = true; } else if (Feature == "+xsave") { HasXSAVE = true; } else if (Feature == "+xsaveopt") { HasXSAVEOPT = true; } else if (Feature == "+xsavec") { HasXSAVEC = true; } else if (Feature == "+xsaves") { HasXSAVES = true; } else if (Feature == "+mwaitx") { HasMWAITX = true; } else if (Feature == "+pku") { HasPKU = true; } else if (Feature == "+clflushopt") { HasCLFLUSHOPT = true; } else if (Feature == "+clwb") { HasCLWB = true; } else if (Feature == "+wbnoinvd") { HasWBNOINVD = true; } else if (Feature == "+prefetchwt1") { HasPREFETCHWT1 = true; } else if (Feature == "+clzero") { HasCLZERO = true; } else if (Feature == "+cldemote") { HasCLDEMOTE = true; } else if (Feature == "+rdpid") { HasRDPID = true; } else if (Feature == "+kl") { HasKL = true; } else if (Feature == "+widekl") { HasWIDEKL = true; } else if (Feature == "+retpoline-external-thunk") { HasRetpolineExternalThunk = true; } else if (Feature == "+sahf") { HasLAHFSAHF = true; } else if (Feature == "+waitpkg") { HasWAITPKG = true; } else if (Feature == "+movdiri") { HasMOVDIRI = true; } else if (Feature == "+movdir64b") { HasMOVDIR64B = true; } else if (Feature == "+pconfig") { HasPCONFIG = true; } else if (Feature == "+ptwrite") { HasPTWRITE = true; } else if (Feature == "+invpcid") { HasINVPCID = true; } else if (Feature == "+enqcmd") { HasENQCMD = true; } else if (Feature == "+hreset") { HasHRESET = true; } else if (Feature == "+amx-bf16") { HasAMXBF16 = true; } else if (Feature == "+amx-int8") { HasAMXINT8 = true; } else if (Feature == "+amx-tile") { HasAMXTILE = true; } else if (Feature == "+avxvnni") { HasAVXVNNI = true; } else if (Feature == "+serialize") { HasSERIALIZE = true; } else if (Feature == "+tsxldtrk") { HasTSXLDTRK = true; } else if (Feature == "+uintr") { HasUINTR = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) .Case("+avx512f", AVX512F) .Case("+avx2", AVX2) .Case("+avx", AVX) .Case("+sse4.2", SSE42) .Case("+sse4.1", SSE41) .Case("+ssse3", SSSE3) .Case("+sse3", SSE3) .Case("+sse2", SSE2) .Case("+sse", SSE1) .Default(NoSSE); SSELevel = std::max(SSELevel, Level); MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch(Feature) .Case("+3dnowa", AMD3DNowAthlon) .Case("+3dnow", AMD3DNow) .Case("+mmx", MMX) .Default(NoMMX3DNow); MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel); XOPEnum XLevel = llvm::StringSwitch(Feature) .Case("+xop", XOP) .Case("+fma4", FMA4) .Case("+sse4a", SSE4A) .Default(NoXOP); XOPLevel = std::max(XOPLevel, XLevel); } // LLVM doesn't have a separate switch for fpmath, so only accept it if it // matches the selected sse level. if ((FPMath == FP_SSE && SSELevel < SSE1) || (FPMath == FP_387 && SSELevel >= SSE1)) { Diags.Report(diag::err_target_unsupported_fpmath) << (FPMath == FP_SSE ? "sse" : "387"); return false; } SimdDefaultAlign = hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; return true; } /// X86TargetInfo::getTargetDefines - Return the set of the X86-specific macro /// definitions for this particular subtarget. void X86TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Inline assembly supports X86 flag outputs. Builder.defineMacro("__GCC_ASM_FLAG_OUTPUTS__"); std::string CodeModel = getTargetOpts().CodeModel; if (CodeModel == "default") CodeModel = "small"; Builder.defineMacro("__code_model_" + CodeModel + "__"); // Target identification. if (getTriple().getArch() == llvm::Triple::x86_64) { Builder.defineMacro("__amd64__"); Builder.defineMacro("__amd64"); Builder.defineMacro("__x86_64"); Builder.defineMacro("__x86_64__"); if (getTriple().getArchName() == "x86_64h") { Builder.defineMacro("__x86_64h"); Builder.defineMacro("__x86_64h__"); } } else { DefineStd(Builder, "i386", Opts); } Builder.defineMacro("__SEG_GS"); Builder.defineMacro("__SEG_FS"); Builder.defineMacro("__seg_gs", "__attribute__((address_space(256)))"); Builder.defineMacro("__seg_fs", "__attribute__((address_space(257)))"); // Subtarget options. // FIXME: We are hard-coding the tune parameters based on the CPU, but they // truly should be based on -mtune options. using namespace llvm::X86; switch (CPU) { case CK_None: break; case CK_i386: // The rest are coming from the i386 define above. Builder.defineMacro("__tune_i386__"); break; case CK_i486: case CK_WinChipC6: case CK_WinChip2: case CK_C3: defineCPUMacros(Builder, "i486"); break; case CK_PentiumMMX: Builder.defineMacro("__pentium_mmx__"); Builder.defineMacro("__tune_pentium_mmx__"); LLVM_FALLTHROUGH; case CK_i586: case CK_Pentium: defineCPUMacros(Builder, "i586"); defineCPUMacros(Builder, "pentium"); break; case CK_Pentium3: case CK_PentiumM: Builder.defineMacro("__tune_pentium3__"); LLVM_FALLTHROUGH; case CK_Pentium2: case CK_C3_2: Builder.defineMacro("__tune_pentium2__"); LLVM_FALLTHROUGH; case CK_PentiumPro: case CK_i686: defineCPUMacros(Builder, "i686"); defineCPUMacros(Builder, "pentiumpro"); break; case CK_Pentium4: defineCPUMacros(Builder, "pentium4"); break; case CK_Yonah: case CK_Prescott: case CK_Nocona: defineCPUMacros(Builder, "nocona"); break; case CK_Core2: case CK_Penryn: defineCPUMacros(Builder, "core2"); break; case CK_Bonnell: defineCPUMacros(Builder, "atom"); break; case CK_Silvermont: defineCPUMacros(Builder, "slm"); break; case CK_Goldmont: defineCPUMacros(Builder, "goldmont"); break; case CK_GoldmontPlus: defineCPUMacros(Builder, "goldmont_plus"); break; case CK_Tremont: defineCPUMacros(Builder, "tremont"); break; case CK_Nehalem: case CK_Westmere: case CK_SandyBridge: case CK_IvyBridge: case CK_Haswell: case CK_Broadwell: case CK_SkylakeClient: case CK_SkylakeServer: case CK_Cascadelake: case CK_Cooperlake: case CK_Cannonlake: case CK_IcelakeClient: case CK_IcelakeServer: case CK_Tigerlake: case CK_SapphireRapids: case CK_Alderlake: // FIXME: Historically, we defined this legacy name, it would be nice to // remove it at some point. We've never exposed fine-grained names for // recent primary x86 CPUs, and we should keep it that way. defineCPUMacros(Builder, "corei7"); break; case CK_KNL: defineCPUMacros(Builder, "knl"); break; case CK_KNM: break; case CK_Lakemont: defineCPUMacros(Builder, "i586", /*Tuning*/false); defineCPUMacros(Builder, "pentium", /*Tuning*/false); Builder.defineMacro("__tune_lakemont__"); break; case CK_K6_2: Builder.defineMacro("__k6_2__"); Builder.defineMacro("__tune_k6_2__"); LLVM_FALLTHROUGH; case CK_K6_3: if (CPU != CK_K6_2) { // In case of fallthrough // FIXME: GCC may be enabling these in cases where some other k6 // architecture is specified but -m3dnow is explicitly provided. The // exact semantics need to be determined and emulated here. Builder.defineMacro("__k6_3__"); Builder.defineMacro("__tune_k6_3__"); } LLVM_FALLTHROUGH; case CK_K6: defineCPUMacros(Builder, "k6"); break; case CK_Athlon: case CK_AthlonXP: defineCPUMacros(Builder, "athlon"); if (SSELevel != NoSSE) { Builder.defineMacro("__athlon_sse__"); Builder.defineMacro("__tune_athlon_sse__"); } break; case CK_K8: case CK_K8SSE3: case CK_x86_64: + defineCPUMacros(Builder, "k8"); + break; case CK_x86_64_v2: case CK_x86_64_v3: case CK_x86_64_v4: - defineCPUMacros(Builder, "k8"); break; case CK_AMDFAM10: defineCPUMacros(Builder, "amdfam10"); break; case CK_BTVER1: defineCPUMacros(Builder, "btver1"); break; case CK_BTVER2: defineCPUMacros(Builder, "btver2"); break; case CK_BDVER1: defineCPUMacros(Builder, "bdver1"); break; case CK_BDVER2: defineCPUMacros(Builder, "bdver2"); break; case CK_BDVER3: defineCPUMacros(Builder, "bdver3"); break; case CK_BDVER4: defineCPUMacros(Builder, "bdver4"); break; case CK_ZNVER1: defineCPUMacros(Builder, "znver1"); break; case CK_ZNVER2: defineCPUMacros(Builder, "znver2"); break; case CK_ZNVER3: defineCPUMacros(Builder, "znver3"); break; case CK_Geode: defineCPUMacros(Builder, "geode"); break; } // Target properties. Builder.defineMacro("__REGISTER_PREFIX__", ""); // Define __NO_MATH_INLINES on linux/x86 so that we don't get inline // functions in glibc header files that use FP Stack inline asm which the // backend can't deal with (PR879). Builder.defineMacro("__NO_MATH_INLINES"); if (HasAES) Builder.defineMacro("__AES__"); if (HasVAES) Builder.defineMacro("__VAES__"); if (HasPCLMUL) Builder.defineMacro("__PCLMUL__"); if (HasVPCLMULQDQ) Builder.defineMacro("__VPCLMULQDQ__"); // Note, in 32-bit mode, GCC does not define the macro if -mno-sahf. In LLVM, // the feature flag only applies to 64-bit mode. if (HasLAHFSAHF || getTriple().getArch() == llvm::Triple::x86) Builder.defineMacro("__LAHF_SAHF__"); if (HasLZCNT) Builder.defineMacro("__LZCNT__"); if (HasRDRND) Builder.defineMacro("__RDRND__"); if (HasFSGSBASE) Builder.defineMacro("__FSGSBASE__"); if (HasBMI) Builder.defineMacro("__BMI__"); if (HasBMI2) Builder.defineMacro("__BMI2__"); if (HasPOPCNT) Builder.defineMacro("__POPCNT__"); if (HasRTM) Builder.defineMacro("__RTM__"); if (HasPRFCHW) Builder.defineMacro("__PRFCHW__"); if (HasRDSEED) Builder.defineMacro("__RDSEED__"); if (HasADX) Builder.defineMacro("__ADX__"); if (HasTBM) Builder.defineMacro("__TBM__"); if (HasLWP) Builder.defineMacro("__LWP__"); if (HasMWAITX) Builder.defineMacro("__MWAITX__"); if (HasMOVBE) Builder.defineMacro("__MOVBE__"); switch (XOPLevel) { case XOP: Builder.defineMacro("__XOP__"); LLVM_FALLTHROUGH; case FMA4: Builder.defineMacro("__FMA4__"); LLVM_FALLTHROUGH; case SSE4A: Builder.defineMacro("__SSE4A__"); LLVM_FALLTHROUGH; case NoXOP: break; } if (HasFMA) Builder.defineMacro("__FMA__"); if (HasF16C) Builder.defineMacro("__F16C__"); if (HasGFNI) Builder.defineMacro("__GFNI__"); if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) Builder.defineMacro("__AVX512VPOPCNTDQ__"); if (HasAVX512VNNI) Builder.defineMacro("__AVX512VNNI__"); if (HasAVX512BF16) Builder.defineMacro("__AVX512BF16__"); if (HasAVX512ER) Builder.defineMacro("__AVX512ER__"); if (HasAVX512PF) Builder.defineMacro("__AVX512PF__"); if (HasAVX512DQ) Builder.defineMacro("__AVX512DQ__"); if (HasAVX512BITALG) Builder.defineMacro("__AVX512BITALG__"); if (HasAVX512BW) Builder.defineMacro("__AVX512BW__"); if (HasAVX512VL) Builder.defineMacro("__AVX512VL__"); if (HasAVX512VBMI) Builder.defineMacro("__AVX512VBMI__"); if (HasAVX512VBMI2) Builder.defineMacro("__AVX512VBMI2__"); if (HasAVX512IFMA) Builder.defineMacro("__AVX512IFMA__"); if (HasAVX512VP2INTERSECT) Builder.defineMacro("__AVX512VP2INTERSECT__"); if (HasSHA) Builder.defineMacro("__SHA__"); if (HasFXSR) Builder.defineMacro("__FXSR__"); if (HasXSAVE) Builder.defineMacro("__XSAVE__"); if (HasXSAVEOPT) Builder.defineMacro("__XSAVEOPT__"); if (HasXSAVEC) Builder.defineMacro("__XSAVEC__"); if (HasXSAVES) Builder.defineMacro("__XSAVES__"); if (HasPKU) Builder.defineMacro("__PKU__"); if (HasCLFLUSHOPT) Builder.defineMacro("__CLFLUSHOPT__"); if (HasCLWB) Builder.defineMacro("__CLWB__"); if (HasWBNOINVD) Builder.defineMacro("__WBNOINVD__"); if (HasSHSTK) Builder.defineMacro("__SHSTK__"); if (HasSGX) Builder.defineMacro("__SGX__"); if (HasPREFETCHWT1) Builder.defineMacro("__PREFETCHWT1__"); if (HasCLZERO) Builder.defineMacro("__CLZERO__"); if (HasKL) Builder.defineMacro("__KL__"); if (HasWIDEKL) Builder.defineMacro("__WIDEKL__"); if (HasRDPID) Builder.defineMacro("__RDPID__"); if (HasCLDEMOTE) Builder.defineMacro("__CLDEMOTE__"); if (HasWAITPKG) Builder.defineMacro("__WAITPKG__"); if (HasMOVDIRI) Builder.defineMacro("__MOVDIRI__"); if (HasMOVDIR64B) Builder.defineMacro("__MOVDIR64B__"); if (HasPCONFIG) Builder.defineMacro("__PCONFIG__"); if (HasPTWRITE) Builder.defineMacro("__PTWRITE__"); if (HasINVPCID) Builder.defineMacro("__INVPCID__"); if (HasENQCMD) Builder.defineMacro("__ENQCMD__"); if (HasHRESET) Builder.defineMacro("__HRESET__"); if (HasAMXTILE) Builder.defineMacro("__AMXTILE__"); if (HasAMXINT8) Builder.defineMacro("__AMXINT8__"); if (HasAMXBF16) Builder.defineMacro("__AMXBF16__"); if (HasAVXVNNI) Builder.defineMacro("__AVXVNNI__"); if (HasSERIALIZE) Builder.defineMacro("__SERIALIZE__"); if (HasTSXLDTRK) Builder.defineMacro("__TSXLDTRK__"); if (HasUINTR) Builder.defineMacro("__UINTR__"); // Each case falls through to the previous one here. switch (SSELevel) { case AVX512F: Builder.defineMacro("__AVX512F__"); LLVM_FALLTHROUGH; case AVX2: Builder.defineMacro("__AVX2__"); LLVM_FALLTHROUGH; case AVX: Builder.defineMacro("__AVX__"); LLVM_FALLTHROUGH; case SSE42: Builder.defineMacro("__SSE4_2__"); LLVM_FALLTHROUGH; case SSE41: Builder.defineMacro("__SSE4_1__"); LLVM_FALLTHROUGH; case SSSE3: Builder.defineMacro("__SSSE3__"); LLVM_FALLTHROUGH; case SSE3: Builder.defineMacro("__SSE3__"); LLVM_FALLTHROUGH; case SSE2: Builder.defineMacro("__SSE2__"); Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied. LLVM_FALLTHROUGH; case SSE1: Builder.defineMacro("__SSE__"); Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied. LLVM_FALLTHROUGH; case NoSSE: break; } if (Opts.MicrosoftExt && getTriple().getArch() == llvm::Triple::x86) { switch (SSELevel) { case AVX512F: case AVX2: case AVX: case SSE42: case SSE41: case SSSE3: case SSE3: case SSE2: Builder.defineMacro("_M_IX86_FP", Twine(2)); break; case SSE1: Builder.defineMacro("_M_IX86_FP", Twine(1)); break; default: Builder.defineMacro("_M_IX86_FP", Twine(0)); break; } } // Each case falls through to the previous one here. switch (MMX3DNowLevel) { case AMD3DNowAthlon: Builder.defineMacro("__3dNOW_A__"); LLVM_FALLTHROUGH; case AMD3DNow: Builder.defineMacro("__3dNOW__"); LLVM_FALLTHROUGH; case MMX: Builder.defineMacro("__MMX__"); LLVM_FALLTHROUGH; case NoMMX3DNow: break; } if (CPU >= CK_i486 || CPU == CK_None) { Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); } if (HasCX8) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); if (HasCX16 && getTriple().getArch() == llvm::Triple::x86_64) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"); if (HasFloat128) Builder.defineMacro("__SIZEOF_FLOAT128__", "16"); } bool X86TargetInfo::isValidFeatureName(StringRef Name) const { return llvm::StringSwitch(Name) .Case("3dnow", true) .Case("3dnowa", true) .Case("adx", true) .Case("aes", true) .Case("amx-bf16", true) .Case("amx-int8", true) .Case("amx-tile", true) .Case("avx", true) .Case("avx2", true) .Case("avx512f", true) .Case("avx512cd", true) .Case("avx512vpopcntdq", true) .Case("avx512vnni", true) .Case("avx512bf16", true) .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512dq", true) .Case("avx512bitalg", true) .Case("avx512bw", true) .Case("avx512vl", true) .Case("avx512vbmi", true) .Case("avx512vbmi2", true) .Case("avx512ifma", true) .Case("avx512vp2intersect", true) .Case("avxvnni", true) .Case("bmi", true) .Case("bmi2", true) .Case("cldemote", true) .Case("clflushopt", true) .Case("clwb", true) .Case("clzero", true) .Case("cx16", true) .Case("enqcmd", true) .Case("f16c", true) .Case("fma", true) .Case("fma4", true) .Case("fsgsbase", true) .Case("fxsr", true) .Case("gfni", true) .Case("hreset", true) .Case("invpcid", true) .Case("kl", true) .Case("widekl", true) .Case("lwp", true) .Case("lzcnt", true) .Case("mmx", true) .Case("movbe", true) .Case("movdiri", true) .Case("movdir64b", true) .Case("mwaitx", true) .Case("pclmul", true) .Case("pconfig", true) .Case("pku", true) .Case("popcnt", true) .Case("prefetchwt1", true) .Case("prfchw", true) .Case("ptwrite", true) .Case("rdpid", true) .Case("rdrnd", true) .Case("rdseed", true) .Case("rtm", true) .Case("sahf", true) .Case("serialize", true) .Case("sgx", true) .Case("sha", true) .Case("shstk", true) .Case("sse", true) .Case("sse2", true) .Case("sse3", true) .Case("ssse3", true) .Case("sse4", true) .Case("sse4.1", true) .Case("sse4.2", true) .Case("sse4a", true) .Case("tbm", true) .Case("tsxldtrk", true) .Case("uintr", true) .Case("vaes", true) .Case("vpclmulqdq", true) .Case("wbnoinvd", true) .Case("waitpkg", true) .Case("x87", true) .Case("xop", true) .Case("xsave", true) .Case("xsavec", true) .Case("xsaves", true) .Case("xsaveopt", true) .Default(false); } bool X86TargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) .Case("adx", HasADX) .Case("aes", HasAES) .Case("amx-bf16", HasAMXBF16) .Case("amx-int8", HasAMXINT8) .Case("amx-tile", HasAMXTILE) .Case("avxvnni", HasAVXVNNI) .Case("avx", SSELevel >= AVX) .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) .Case("avx512vnni", HasAVX512VNNI) .Case("avx512bf16", HasAVX512BF16) .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) .Case("avx512bitalg", HasAVX512BITALG) .Case("avx512bw", HasAVX512BW) .Case("avx512vl", HasAVX512VL) .Case("avx512vbmi", HasAVX512VBMI) .Case("avx512vbmi2", HasAVX512VBMI2) .Case("avx512ifma", HasAVX512IFMA) .Case("avx512vp2intersect", HasAVX512VP2INTERSECT) .Case("bmi", HasBMI) .Case("bmi2", HasBMI2) .Case("cldemote", HasCLDEMOTE) .Case("clflushopt", HasCLFLUSHOPT) .Case("clwb", HasCLWB) .Case("clzero", HasCLZERO) .Case("cx8", HasCX8) .Case("cx16", HasCX16) .Case("enqcmd", HasENQCMD) .Case("f16c", HasF16C) .Case("fma", HasFMA) .Case("fma4", XOPLevel >= FMA4) .Case("fsgsbase", HasFSGSBASE) .Case("fxsr", HasFXSR) .Case("gfni", HasGFNI) .Case("hreset", HasHRESET) .Case("invpcid", HasINVPCID) .Case("kl", HasKL) .Case("widekl", HasWIDEKL) .Case("lwp", HasLWP) .Case("lzcnt", HasLZCNT) .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon) .Case("mmx", MMX3DNowLevel >= MMX) .Case("movbe", HasMOVBE) .Case("movdiri", HasMOVDIRI) .Case("movdir64b", HasMOVDIR64B) .Case("mwaitx", HasMWAITX) .Case("pclmul", HasPCLMUL) .Case("pconfig", HasPCONFIG) .Case("pku", HasPKU) .Case("popcnt", HasPOPCNT) .Case("prefetchwt1", HasPREFETCHWT1) .Case("prfchw", HasPRFCHW) .Case("ptwrite", HasPTWRITE) .Case("rdpid", HasRDPID) .Case("rdrnd", HasRDRND) .Case("rdseed", HasRDSEED) .Case("retpoline-external-thunk", HasRetpolineExternalThunk) .Case("rtm", HasRTM) .Case("sahf", HasLAHFSAHF) .Case("serialize", HasSERIALIZE) .Case("sgx", HasSGX) .Case("sha", HasSHA) .Case("shstk", HasSHSTK) .Case("sse", SSELevel >= SSE1) .Case("sse2", SSELevel >= SSE2) .Case("sse3", SSELevel >= SSE3) .Case("ssse3", SSELevel >= SSSE3) .Case("sse4.1", SSELevel >= SSE41) .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) .Case("tsxldtrk", HasTSXLDTRK) .Case("uintr", HasUINTR) .Case("vaes", HasVAES) .Case("vpclmulqdq", HasVPCLMULQDQ) .Case("wbnoinvd", HasWBNOINVD) .Case("waitpkg", HasWAITPKG) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) .Case("xop", XOPLevel >= XOP) .Case("xsave", HasXSAVE) .Case("xsavec", HasXSAVEC) .Case("xsaves", HasXSAVES) .Case("xsaveopt", HasXSAVEOPT) .Default(false); } // We can't use a generic validation scheme for the features accepted here // versus subtarget features accepted in the target attribute because the // bitfield structure that's initialized in the runtime only supports the // below currently rather than the full range of subtarget features. (See // X86TargetInfo::hasFeature for a somewhat comprehensive list). bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const { return llvm::StringSwitch(FeatureStr) #define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, true) #include "llvm/Support/X86TargetParser.def" .Default(false); } static llvm::X86::ProcessorFeatures getFeature(StringRef Name) { return llvm::StringSwitch(Name) #define X86_FEATURE_COMPAT(ENUM, STR) .Case(STR, llvm::X86::FEATURE_##ENUM) #include "llvm/Support/X86TargetParser.def" ; // Note, this function should only be used after ensuring the value is // correct, so it asserts if the value is out of range. } static unsigned getFeaturePriority(llvm::X86::ProcessorFeatures Feat) { enum class FeatPriority { #define FEATURE(FEAT) FEAT, #include "clang/Basic/X86Target.def" }; switch (Feat) { #define FEATURE(FEAT) \ case llvm::X86::FEAT: \ return static_cast(FeatPriority::FEAT); #include "clang/Basic/X86Target.def" default: llvm_unreachable("No Feature Priority for non-CPUSupports Features"); } } unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { // Valid CPUs have a 'key feature' that compares just better than its key // feature. using namespace llvm::X86; CPUKind Kind = parseArchX86(Name); if (Kind != CK_None) { ProcessorFeatures KeyFeature = getKeyFeature(Kind); return (getFeaturePriority(KeyFeature) << 1) + 1; } // Now we know we have a feature, so get its priority and shift it a few so // that we have sufficient room for the CPUs (above). return getFeaturePriority(getFeature(Name)) << 1; } bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const { return llvm::StringSwitch(Name) #define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, true) #define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, true) #include "clang/Basic/X86Target.def" .Default(false); } static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) { return llvm::StringSwitch(Name) #define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, NAME) #include "clang/Basic/X86Target.def" .Default(Name); } char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const { return llvm::StringSwitch(CPUSpecificCPUDispatchNameDealias(Name)) #define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) #include "clang/Basic/X86Target.def" .Default(0); } void X86TargetInfo::getCPUSpecificCPUDispatchFeatures( StringRef Name, llvm::SmallVectorImpl &Features) const { StringRef WholeList = llvm::StringSwitch(CPUSpecificCPUDispatchNameDealias(Name)) #define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) #include "clang/Basic/X86Target.def" .Default(""); WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); } // We can't use a generic validation scheme for the cpus accepted here // versus subtarget cpus accepted in the target attribute because the // variables intitialized by the runtime only support the below currently // rather than the full range of cpus. bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const { return llvm::StringSwitch(FeatureStr) #define X86_VENDOR(ENUM, STRING) .Case(STRING, true) #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true) #define X86_CPU_TYPE(ENUM, STR) .Case(STR, true) #define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true) #include "llvm/Support/X86TargetParser.def" .Default(false); } static unsigned matchAsmCCConstraint(const char *&Name) { auto RV = llvm::StringSwitch(Name) .Case("@cca", 4) .Case("@ccae", 5) .Case("@ccb", 4) .Case("@ccbe", 5) .Case("@ccc", 4) .Case("@cce", 4) .Case("@ccz", 4) .Case("@ccg", 4) .Case("@ccge", 5) .Case("@ccl", 4) .Case("@ccle", 5) .Case("@ccna", 5) .Case("@ccnae", 6) .Case("@ccnb", 5) .Case("@ccnbe", 6) .Case("@ccnc", 5) .Case("@ccne", 5) .Case("@ccnz", 5) .Case("@ccng", 5) .Case("@ccnge", 6) .Case("@ccnl", 5) .Case("@ccnle", 6) .Case("@ccno", 5) .Case("@ccnp", 5) .Case("@ccns", 5) .Case("@cco", 4) .Case("@ccp", 4) .Case("@ccs", 4) .Default(0); return RV; } bool X86TargetInfo::validateAsmConstraint( const char *&Name, TargetInfo::ConstraintInfo &Info) const { switch (*Name) { default: return false; // Constant constraints. case 'e': // 32-bit signed integer constant for use with sign-extending x86_64 // instructions. case 'Z': // 32-bit unsigned integer constant for use with zero-extending // x86_64 instructions. case 's': Info.setRequiresImmediate(); return true; case 'I': Info.setRequiresImmediate(0, 31); return true; case 'J': Info.setRequiresImmediate(0, 63); return true; case 'K': Info.setRequiresImmediate(-128, 127); return true; case 'L': Info.setRequiresImmediate({int(0xff), int(0xffff), int(0xffffffff)}); return true; case 'M': Info.setRequiresImmediate(0, 3); return true; case 'N': Info.setRequiresImmediate(0, 255); return true; case 'O': Info.setRequiresImmediate(0, 127); return true; // Register constraints. case 'Y': // 'Y' is the first character for several 2-character constraints. // Shift the pointer to the second character of the constraint. Name++; switch (*Name) { default: return false; case 'z': // First SSE register. case '2': case 't': // Any SSE register, when SSE2 is enabled. case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled. case 'm': // Any MMX register, when inter-unit moves enabled. case 'k': // AVX512 arch mask registers: k1-k7. Info.setAllowsRegister(); return true; } case 'f': // Any x87 floating point stack register. // Constraint 'f' cannot be used for output operands. if (Info.ConstraintStr[0] == '=') return false; Info.setAllowsRegister(); return true; case 'a': // eax. case 'b': // ebx. case 'c': // ecx. case 'd': // edx. case 'S': // esi. case 'D': // edi. case 'A': // edx:eax. case 't': // Top of floating point stack. case 'u': // Second from top of floating point stack. case 'q': // Any register accessible as [r]l: a, b, c, and d. case 'y': // Any MMX register. case 'v': // Any {X,Y,Z}MM register (Arch & context dependent) case 'x': // Any SSE register. case 'k': // Any AVX512 mask register (same as Yk, additionally allows k0 // for intermideate k reg operations). case 'Q': // Any register accessible as [r]h: a, b, c, and d. case 'R': // "Legacy" registers: ax, bx, cx, dx, di, si, sp, bp. case 'l': // "Index" registers: any general register that can be used as an // index in a base+index memory access. Info.setAllowsRegister(); return true; // Floating point constant constraints. case 'C': // SSE floating point constant. case 'G': // x87 floating point constant. return true; case '@': // CC condition changes. if (auto Len = matchAsmCCConstraint(Name)) { Name += Len - 1; Info.setAllowsRegister(); return true; } return false; } } // Below is based on the following information: // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ // | Processor Name | Cache Line Size (Bytes) | Source | // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ // | i386 | 64 | https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf | // | i486 | 16 | "four doublewords" (doubleword = 32 bits, 4 bits * 32 bits = 16 bytes) https://en.wikichip.org/w/images/d/d3/i486_MICROPROCESSOR_HARDWARE_REFERENCE_MANUAL_%281990%29.pdf and http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.126.4216&rep=rep1&type=pdf (page 29) | // | i586/Pentium MMX | 32 | https://www.7-cpu.com/cpu/P-MMX.html | // | i686/Pentium | 32 | https://www.7-cpu.com/cpu/P6.html | // | Netburst/Pentium4 | 64 | https://www.7-cpu.com/cpu/P4-180.html | // | Atom | 64 | https://www.7-cpu.com/cpu/Atom.html | // | Westmere | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/sandy_bridge_(client) "Cache Architecture" | // | Sandy Bridge | 64 | https://en.wikipedia.org/wiki/Sandy_Bridge and https://www.7-cpu.com/cpu/SandyBridge.html | // | Ivy Bridge | 64 | https://blog.stuffedcow.net/2013/01/ivb-cache-replacement/ and https://www.7-cpu.com/cpu/IvyBridge.html | // | Haswell | 64 | https://www.7-cpu.com/cpu/Haswell.html | // | Boadwell | 64 | https://www.7-cpu.com/cpu/Broadwell.html | // | Skylake (including skylake-avx512) | 64 | https://www.nas.nasa.gov/hecc/support/kb/skylake-processors_550.html "Cache Hierarchy" | // | Cascade Lake | 64 | https://www.nas.nasa.gov/hecc/support/kb/cascade-lake-processors_579.html "Cache Hierarchy" | // | Skylake | 64 | https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake "Memory Hierarchy" | // | Ice Lake | 64 | https://www.7-cpu.com/cpu/Ice_Lake.html | // | Knights Landing | 64 | https://software.intel.com/en-us/articles/intel-xeon-phi-processor-7200-family-memory-management-optimizations "The Intel® Xeon Phi™ Processor Architecture" | // | Knights Mill | 64 | https://software.intel.com/sites/default/files/managed/9e/bc/64-ia-32-architectures-optimization-manual.pdf?countrylabel=Colombia "2.5.5.2 L1 DCache " | // +------------------------------------+-------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+ Optional X86TargetInfo::getCPUCacheLineSize() const { using namespace llvm::X86; switch (CPU) { // i386 case CK_i386: // i486 case CK_i486: case CK_WinChipC6: case CK_WinChip2: case CK_C3: // Lakemont case CK_Lakemont: return 16; // i586 case CK_i586: case CK_Pentium: case CK_PentiumMMX: // i686 case CK_PentiumPro: case CK_i686: case CK_Pentium2: case CK_Pentium3: case CK_PentiumM: case CK_C3_2: // K6 case CK_K6: case CK_K6_2: case CK_K6_3: // Geode case CK_Geode: return 32; // Netburst case CK_Pentium4: case CK_Prescott: case CK_Nocona: // Atom case CK_Bonnell: case CK_Silvermont: case CK_Goldmont: case CK_GoldmontPlus: case CK_Tremont: case CK_Westmere: case CK_SandyBridge: case CK_IvyBridge: case CK_Haswell: case CK_Broadwell: case CK_SkylakeClient: case CK_SkylakeServer: case CK_Cascadelake: case CK_Nehalem: case CK_Cooperlake: case CK_Cannonlake: case CK_Tigerlake: case CK_SapphireRapids: case CK_IcelakeClient: case CK_IcelakeServer: case CK_Alderlake: case CK_KNL: case CK_KNM: // K7 case CK_Athlon: case CK_AthlonXP: // K8 case CK_K8: case CK_K8SSE3: case CK_AMDFAM10: // Bobcat case CK_BTVER1: case CK_BTVER2: // Bulldozer case CK_BDVER1: case CK_BDVER2: case CK_BDVER3: case CK_BDVER4: // Zen case CK_ZNVER1: case CK_ZNVER2: case CK_ZNVER3: // Deprecated case CK_x86_64: case CK_x86_64_v2: case CK_x86_64_v3: case CK_x86_64_v4: case CK_Yonah: case CK_Penryn: case CK_Core2: return 64; // The following currently have unknown cache line sizes (but they are probably all 64): // Core case CK_None: return None; } llvm_unreachable("Unknown CPU kind"); } bool X86TargetInfo::validateOutputSize(const llvm::StringMap &FeatureMap, StringRef Constraint, unsigned Size) const { // Strip off constraint modifiers. while (Constraint[0] == '=' || Constraint[0] == '+' || Constraint[0] == '&') Constraint = Constraint.substr(1); return validateOperandSize(FeatureMap, Constraint, Size); } bool X86TargetInfo::validateInputSize(const llvm::StringMap &FeatureMap, StringRef Constraint, unsigned Size) const { return validateOperandSize(FeatureMap, Constraint, Size); } bool X86TargetInfo::validateOperandSize(const llvm::StringMap &FeatureMap, StringRef Constraint, unsigned Size) const { switch (Constraint[0]) { default: break; case 'k': // Registers k0-k7 (AVX512) size limit is 64 bit. case 'y': return Size <= 64; case 'f': case 't': case 'u': return Size <= 128; case 'Y': // 'Y' is the first character for several 2-character constraints. switch (Constraint[1]) { default: return false; case 'm': // 'Ym' is synonymous with 'y'. case 'k': return Size <= 64; case 'z': // XMM0/YMM/ZMM0 if (FeatureMap.lookup("avx512f")) // ZMM0 can be used if target supports AVX512F. return Size <= 512U; else if (FeatureMap.lookup("avx")) // YMM0 can be used if target supports AVX. return Size <= 256U; else if (FeatureMap.lookup("sse")) return Size <= 128U; return false; case 'i': case 't': case '2': // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled. if (SSELevel < SSE2) return false; break; } break; case 'v': case 'x': if (FeatureMap.lookup("avx512f")) // 512-bit zmm registers can be used if target supports AVX512F. return Size <= 512U; else if (FeatureMap.lookup("avx")) // 256-bit ymm registers can be used if target supports AVX. return Size <= 256U; return Size <= 128U; } return true; } std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { switch (*Constraint) { case '@': if (auto Len = matchAsmCCConstraint(Constraint)) { std::string Converted = "{" + std::string(Constraint, Len) + "}"; Constraint += Len - 1; return Converted; } return std::string(1, *Constraint); case 'a': return std::string("{ax}"); case 'b': return std::string("{bx}"); case 'c': return std::string("{cx}"); case 'd': return std::string("{dx}"); case 'S': return std::string("{si}"); case 'D': return std::string("{di}"); case 'p': // address return std::string("im"); case 't': // top of floating point stack. return std::string("{st}"); case 'u': // second from top of floating point stack. return std::string("{st(1)}"); // second from top of floating point stack. case 'Y': switch (Constraint[1]) { default: // Break from inner switch and fall through (copy single char), // continue parsing after copying the current constraint into // the return string. break; case 'k': case 'm': case 'i': case 't': case 'z': case '2': // "^" hints llvm that this is a 2 letter constraint. // "Constraint++" is used to promote the string iterator // to the next constraint. return std::string("^") + std::string(Constraint++, 2); } LLVM_FALLTHROUGH; default: return std::string(1, *Constraint); } } void X86TargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; llvm::X86::fillValidCPUArchList(Values, Only64Bit); } void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl &Values) const { llvm::X86::fillValidTuneCPUList(Values); } ArrayRef X86TargetInfo::getGCCRegNames() const { return llvm::makeArrayRef(GCCRegNames); } ArrayRef X86TargetInfo::getGCCAddlRegNames() const { return llvm::makeArrayRef(AddlRegNames); } ArrayRef X86_32TargetInfo::getTargetBuiltins() const { return llvm::makeArrayRef(BuiltinInfoX86, clang::X86::LastX86CommonBuiltin - Builtin::FirstTSBuiltin + 1); } ArrayRef X86_64TargetInfo::getTargetBuiltins() const { return llvm::makeArrayRef(BuiltinInfoX86, X86::LastTSBuiltin - Builtin::FirstTSBuiltin); } diff --git a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc index 7f181258eab5..b7da65987557 100644 --- a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc +++ b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_ioctl.inc @@ -1,613 +1,604 @@ //===-- sanitizer_common_interceptors_ioctl.inc -----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Ioctl handling in common sanitizer interceptors. //===----------------------------------------------------------------------===// #if !SANITIZER_NETBSD #include "sanitizer_flags.h" struct ioctl_desc { unsigned req; // FIXME: support read+write arguments. Currently READWRITE and WRITE do the // same thing. // XXX: The declarations below may use WRITE instead of READWRITE, unless // explicitly noted. enum { NONE, READ, WRITE, READWRITE, CUSTOM } type : 3; unsigned size : 29; const char* name; }; const unsigned ioctl_table_max = 500; static ioctl_desc ioctl_table[ioctl_table_max]; static unsigned ioctl_table_size = 0; // This can not be declared as a global, because references to struct_*_sz // require a global initializer. And this table must be available before global // initializers are run. static void ioctl_table_fill() { #define _(rq, tp, sz) \ if (IOCTL_##rq != IOCTL_NOT_PRESENT) { \ CHECK(ioctl_table_size < ioctl_table_max); \ ioctl_table[ioctl_table_size].req = IOCTL_##rq; \ ioctl_table[ioctl_table_size].type = ioctl_desc::tp; \ ioctl_table[ioctl_table_size].size = sz; \ ioctl_table[ioctl_table_size].name = #rq; \ ++ioctl_table_size; \ } _(FIOASYNC, READ, sizeof(int)); _(FIOCLEX, NONE, 0); _(FIOGETOWN, WRITE, sizeof(int)); _(FIONBIO, READ, sizeof(int)); _(FIONCLEX, NONE, 0); _(FIOSETOWN, READ, sizeof(int)); _(SIOCATMARK, WRITE, sizeof(int)); _(SIOCGIFCONF, CUSTOM, 0); _(SIOCGPGRP, WRITE, sizeof(int)); _(SIOCSPGRP, READ, sizeof(int)); #if !SANITIZER_SOLARIS _(TIOCCONS, NONE, 0); #endif _(TIOCEXCL, NONE, 0); _(TIOCGETD, WRITE, sizeof(int)); _(TIOCGPGRP, WRITE, pid_t_sz); _(TIOCGWINSZ, WRITE, struct_winsize_sz); _(TIOCMBIC, READ, sizeof(int)); _(TIOCMBIS, READ, sizeof(int)); _(TIOCMGET, WRITE, sizeof(int)); _(TIOCMSET, READ, sizeof(int)); _(TIOCNOTTY, NONE, 0); _(TIOCNXCL, NONE, 0); _(TIOCOUTQ, WRITE, sizeof(int)); _(TIOCPKT, READ, sizeof(int)); _(TIOCSCTTY, NONE, 0); _(TIOCSETD, READ, sizeof(int)); _(TIOCSPGRP, READ, pid_t_sz); _(TIOCSTI, READ, sizeof(char)); _(TIOCSWINSZ, READ, struct_winsize_sz); #if !SANITIZER_IOS _(SIOCADDMULTI, READ, struct_ifreq_sz); _(SIOCDELMULTI, READ, struct_ifreq_sz); _(SIOCGIFADDR, WRITE, struct_ifreq_sz); _(SIOCGIFBRDADDR, WRITE, struct_ifreq_sz); _(SIOCGIFDSTADDR, WRITE, struct_ifreq_sz); _(SIOCGIFFLAGS, WRITE, struct_ifreq_sz); _(SIOCGIFMETRIC, WRITE, struct_ifreq_sz); _(SIOCGIFMTU, WRITE, struct_ifreq_sz); _(SIOCGIFNETMASK, WRITE, struct_ifreq_sz); _(SIOCSIFADDR, READ, struct_ifreq_sz); _(SIOCSIFBRDADDR, READ, struct_ifreq_sz); _(SIOCSIFDSTADDR, READ, struct_ifreq_sz); _(SIOCSIFFLAGS, READ, struct_ifreq_sz); _(SIOCSIFMETRIC, READ, struct_ifreq_sz); _(SIOCSIFMTU, READ, struct_ifreq_sz); _(SIOCSIFNETMASK, READ, struct_ifreq_sz); #endif #if (SANITIZER_LINUX && !SANITIZER_ANDROID) _(SIOCGETSGCNT, WRITE, struct_sioc_sg_req_sz); _(SIOCGETVIFCNT, WRITE, struct_sioc_vif_req_sz); #endif #if SANITIZER_LINUX // Conflicting request ids. // _(CDROMAUDIOBUFSIZ, NONE, 0); // _(SNDCTL_TMR_CONTINUE, NONE, 0); // _(SNDCTL_TMR_START, NONE, 0); // _(SNDCTL_TMR_STOP, NONE, 0); // _(SOUND_MIXER_READ_LOUD, WRITE, sizeof(int)); // same as ...READ_ENHANCE // _(SOUND_MIXER_READ_MUTE, WRITE, sizeof(int)); // same as ...READ_ENHANCE // _(SOUND_MIXER_WRITE_LOUD, WRITE, sizeof(int)); // same as ...WRITE_ENHANCE // _(SOUND_MIXER_WRITE_MUTE, WRITE, sizeof(int)); // same as ...WRITE_ENHANCE _(BLKFLSBUF, NONE, 0); _(BLKGETSIZE, WRITE, sizeof(uptr)); _(BLKRAGET, WRITE, sizeof(int)); _(BLKRASET, NONE, 0); _(BLKROGET, WRITE, sizeof(int)); _(BLKROSET, READ, sizeof(int)); _(BLKRRPART, NONE, 0); _(CDROMEJECT, NONE, 0); _(CDROMEJECT_SW, NONE, 0); _(CDROMMULTISESSION, WRITE, struct_cdrom_multisession_sz); _(CDROMPAUSE, NONE, 0); _(CDROMPLAYMSF, READ, struct_cdrom_msf_sz); _(CDROMPLAYTRKIND, READ, struct_cdrom_ti_sz); _(CDROMREADAUDIO, READ, struct_cdrom_read_audio_sz); _(CDROMREADCOOKED, READ, struct_cdrom_msf_sz); _(CDROMREADMODE1, READ, struct_cdrom_msf_sz); _(CDROMREADMODE2, READ, struct_cdrom_msf_sz); _(CDROMREADRAW, READ, struct_cdrom_msf_sz); _(CDROMREADTOCENTRY, WRITE, struct_cdrom_tocentry_sz); _(CDROMREADTOCHDR, WRITE, struct_cdrom_tochdr_sz); _(CDROMRESET, NONE, 0); _(CDROMRESUME, NONE, 0); _(CDROMSEEK, READ, struct_cdrom_msf_sz); _(CDROMSTART, NONE, 0); _(CDROMSTOP, NONE, 0); _(CDROMSUBCHNL, WRITE, struct_cdrom_subchnl_sz); _(CDROMVOLCTRL, READ, struct_cdrom_volctrl_sz); _(CDROMVOLREAD, WRITE, struct_cdrom_volctrl_sz); _(CDROM_GET_UPC, WRITE, 8); _(EVIOCGABS, WRITE, struct_input_absinfo_sz); // fixup _(EVIOCGBIT, WRITE, struct_input_id_sz); // fixup _(EVIOCGEFFECTS, WRITE, sizeof(int)); _(EVIOCGID, WRITE, struct_input_id_sz); _(EVIOCGKEY, WRITE, 0); _(EVIOCGKEYCODE, WRITE, sizeof(int) * 2); _(EVIOCGLED, WRITE, 0); _(EVIOCGNAME, WRITE, 0); _(EVIOCGPHYS, WRITE, 0); _(EVIOCGRAB, READ, sizeof(int)); _(EVIOCGREP, WRITE, sizeof(int) * 2); _(EVIOCGSND, WRITE, 0); _(EVIOCGSW, WRITE, 0); _(EVIOCGUNIQ, WRITE, 0); _(EVIOCGVERSION, WRITE, sizeof(int)); _(EVIOCRMFF, READ, sizeof(int)); _(EVIOCSABS, READ, struct_input_absinfo_sz); // fixup _(EVIOCSFF, READ, struct_ff_effect_sz); _(EVIOCSKEYCODE, READ, sizeof(int) * 2); _(EVIOCSREP, READ, sizeof(int) * 2); _(FDCLRPRM, NONE, 0); _(FDDEFPRM, READ, struct_floppy_struct_sz); _(FDFLUSH, NONE, 0); _(FDFMTBEG, NONE, 0); _(FDFMTEND, NONE, 0); _(FDFMTTRK, READ, struct_format_descr_sz); _(FDGETDRVPRM, WRITE, struct_floppy_drive_params_sz); _(FDGETDRVSTAT, WRITE, struct_floppy_drive_struct_sz); _(FDGETDRVTYP, WRITE, 16); _(FDGETFDCSTAT, WRITE, struct_floppy_fdc_state_sz); _(FDGETMAXERRS, WRITE, struct_floppy_max_errors_sz); _(FDGETPRM, WRITE, struct_floppy_struct_sz); _(FDMSGOFF, NONE, 0); _(FDMSGON, NONE, 0); _(FDPOLLDRVSTAT, WRITE, struct_floppy_drive_struct_sz); _(FDRAWCMD, WRITE, struct_floppy_raw_cmd_sz); _(FDRESET, NONE, 0); _(FDSETDRVPRM, READ, struct_floppy_drive_params_sz); _(FDSETEMSGTRESH, NONE, 0); _(FDSETMAXERRS, READ, struct_floppy_max_errors_sz); _(FDSETPRM, READ, struct_floppy_struct_sz); _(FDTWADDLE, NONE, 0); _(FDWERRORCLR, NONE, 0); _(FDWERRORGET, WRITE, struct_floppy_write_errors_sz); _(HDIO_DRIVE_CMD, WRITE, sizeof(int)); _(HDIO_GETGEO, WRITE, struct_hd_geometry_sz); _(HDIO_GET_32BIT, WRITE, sizeof(int)); _(HDIO_GET_DMA, WRITE, sizeof(int)); _(HDIO_GET_IDENTITY, WRITE, struct_hd_driveid_sz); _(HDIO_GET_KEEPSETTINGS, WRITE, sizeof(int)); _(HDIO_GET_MULTCOUNT, WRITE, sizeof(int)); _(HDIO_GET_NOWERR, WRITE, sizeof(int)); _(HDIO_GET_UNMASKINTR, WRITE, sizeof(int)); _(HDIO_SET_32BIT, NONE, 0); _(HDIO_SET_DMA, NONE, 0); _(HDIO_SET_KEEPSETTINGS, NONE, 0); _(HDIO_SET_MULTCOUNT, NONE, 0); _(HDIO_SET_NOWERR, NONE, 0); _(HDIO_SET_UNMASKINTR, NONE, 0); _(MTIOCGET, WRITE, struct_mtget_sz); _(MTIOCPOS, WRITE, struct_mtpos_sz); _(MTIOCTOP, READ, struct_mtop_sz); _(PPPIOCGASYNCMAP, WRITE, sizeof(int)); _(PPPIOCGDEBUG, WRITE, sizeof(int)); _(PPPIOCGFLAGS, WRITE, sizeof(int)); _(PPPIOCGUNIT, WRITE, sizeof(int)); _(PPPIOCGXASYNCMAP, WRITE, sizeof(int) * 8); _(PPPIOCSASYNCMAP, READ, sizeof(int)); _(PPPIOCSDEBUG, READ, sizeof(int)); _(PPPIOCSFLAGS, READ, sizeof(int)); _(PPPIOCSMAXCID, READ, sizeof(int)); _(PPPIOCSMRU, READ, sizeof(int)); _(PPPIOCSXASYNCMAP, READ, sizeof(int) * 8); _(SIOCADDRT, READ, struct_rtentry_sz); _(SIOCDARP, READ, struct_arpreq_sz); _(SIOCDELRT, READ, struct_rtentry_sz); _(SIOCDRARP, READ, struct_arpreq_sz); _(SIOCGARP, WRITE, struct_arpreq_sz); _(SIOCGIFENCAP, WRITE, sizeof(int)); _(SIOCGIFHWADDR, WRITE, struct_ifreq_sz); _(SIOCGIFMAP, WRITE, struct_ifreq_sz); _(SIOCGIFMEM, WRITE, struct_ifreq_sz); _(SIOCGIFNAME, NONE, 0); _(SIOCGIFSLAVE, NONE, 0); _(SIOCGRARP, WRITE, struct_arpreq_sz); _(SIOCGSTAMP, WRITE, timeval_sz); _(SIOCSARP, READ, struct_arpreq_sz); _(SIOCSIFENCAP, READ, sizeof(int)); _(SIOCSIFHWADDR, READ, struct_ifreq_sz); _(SIOCSIFLINK, NONE, 0); _(SIOCSIFMAP, READ, struct_ifreq_sz); _(SIOCSIFMEM, READ, struct_ifreq_sz); _(SIOCSIFSLAVE, NONE, 0); _(SIOCSRARP, READ, struct_arpreq_sz); _(SNDCTL_COPR_HALT, WRITE, struct_copr_debug_buf_sz); _(SNDCTL_COPR_LOAD, READ, struct_copr_buffer_sz); _(SNDCTL_COPR_RCODE, WRITE, struct_copr_debug_buf_sz); _(SNDCTL_COPR_RCVMSG, WRITE, struct_copr_msg_sz); _(SNDCTL_COPR_RDATA, WRITE, struct_copr_debug_buf_sz); _(SNDCTL_COPR_RESET, NONE, 0); _(SNDCTL_COPR_RUN, WRITE, struct_copr_debug_buf_sz); _(SNDCTL_COPR_SENDMSG, READ, struct_copr_msg_sz); _(SNDCTL_COPR_WCODE, READ, struct_copr_debug_buf_sz); _(SNDCTL_COPR_WDATA, READ, struct_copr_debug_buf_sz); _(SNDCTL_DSP_GETBLKSIZE, WRITE, sizeof(int)); _(SNDCTL_DSP_GETFMTS, WRITE, sizeof(int)); _(SNDCTL_DSP_NONBLOCK, NONE, 0); _(SNDCTL_DSP_POST, NONE, 0); _(SNDCTL_DSP_RESET, NONE, 0); _(SNDCTL_DSP_SETFMT, WRITE, sizeof(int)); _(SNDCTL_DSP_SETFRAGMENT, WRITE, sizeof(int)); _(SNDCTL_DSP_SPEED, WRITE, sizeof(int)); _(SNDCTL_DSP_STEREO, WRITE, sizeof(int)); _(SNDCTL_DSP_SUBDIVIDE, WRITE, sizeof(int)); _(SNDCTL_DSP_SYNC, NONE, 0); _(SNDCTL_FM_4OP_ENABLE, READ, sizeof(int)); _(SNDCTL_FM_LOAD_INSTR, READ, struct_sbi_instrument_sz); _(SNDCTL_MIDI_INFO, WRITE, struct_midi_info_sz); _(SNDCTL_MIDI_PRETIME, WRITE, sizeof(int)); _(SNDCTL_SEQ_CTRLRATE, WRITE, sizeof(int)); _(SNDCTL_SEQ_GETINCOUNT, WRITE, sizeof(int)); _(SNDCTL_SEQ_GETOUTCOUNT, WRITE, sizeof(int)); _(SNDCTL_SEQ_NRMIDIS, WRITE, sizeof(int)); _(SNDCTL_SEQ_NRSYNTHS, WRITE, sizeof(int)); _(SNDCTL_SEQ_OUTOFBAND, READ, struct_seq_event_rec_sz); _(SNDCTL_SEQ_PANIC, NONE, 0); _(SNDCTL_SEQ_PERCMODE, NONE, 0); _(SNDCTL_SEQ_RESET, NONE, 0); _(SNDCTL_SEQ_RESETSAMPLES, READ, sizeof(int)); _(SNDCTL_SEQ_SYNC, NONE, 0); _(SNDCTL_SEQ_TESTMIDI, READ, sizeof(int)); _(SNDCTL_SEQ_THRESHOLD, READ, sizeof(int)); _(SNDCTL_SYNTH_INFO, WRITE, struct_synth_info_sz); _(SNDCTL_SYNTH_MEMAVL, WRITE, sizeof(int)); _(SNDCTL_TMR_METRONOME, READ, sizeof(int)); _(SNDCTL_TMR_SELECT, WRITE, sizeof(int)); _(SNDCTL_TMR_SOURCE, WRITE, sizeof(int)); _(SNDCTL_TMR_TEMPO, WRITE, sizeof(int)); _(SNDCTL_TMR_TIMEBASE, WRITE, sizeof(int)); _(SOUND_MIXER_READ_ALTPCM, WRITE, sizeof(int)); _(SOUND_MIXER_READ_BASS, WRITE, sizeof(int)); _(SOUND_MIXER_READ_CAPS, WRITE, sizeof(int)); _(SOUND_MIXER_READ_CD, WRITE, sizeof(int)); _(SOUND_MIXER_READ_DEVMASK, WRITE, sizeof(int)); _(SOUND_MIXER_READ_ENHANCE, WRITE, sizeof(int)); _(SOUND_MIXER_READ_IGAIN, WRITE, sizeof(int)); _(SOUND_MIXER_READ_IMIX, WRITE, sizeof(int)); _(SOUND_MIXER_READ_LINE, WRITE, sizeof(int)); _(SOUND_MIXER_READ_LINE1, WRITE, sizeof(int)); _(SOUND_MIXER_READ_LINE2, WRITE, sizeof(int)); _(SOUND_MIXER_READ_LINE3, WRITE, sizeof(int)); _(SOUND_MIXER_READ_MIC, WRITE, sizeof(int)); _(SOUND_MIXER_READ_OGAIN, WRITE, sizeof(int)); _(SOUND_MIXER_READ_PCM, WRITE, sizeof(int)); _(SOUND_MIXER_READ_RECLEV, WRITE, sizeof(int)); _(SOUND_MIXER_READ_RECMASK, WRITE, sizeof(int)); _(SOUND_MIXER_READ_RECSRC, WRITE, sizeof(int)); _(SOUND_MIXER_READ_SPEAKER, WRITE, sizeof(int)); _(SOUND_MIXER_READ_STEREODEVS, WRITE, sizeof(int)); _(SOUND_MIXER_READ_SYNTH, WRITE, sizeof(int)); _(SOUND_MIXER_READ_TREBLE, WRITE, sizeof(int)); _(SOUND_MIXER_READ_VOLUME, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_ALTPCM, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_BASS, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_CD, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_ENHANCE, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_IGAIN, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_IMIX, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_LINE, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_LINE1, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_LINE2, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_LINE3, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_MIC, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_OGAIN, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_PCM, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_RECLEV, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_RECSRC, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_SPEAKER, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_SYNTH, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_TREBLE, WRITE, sizeof(int)); _(SOUND_MIXER_WRITE_VOLUME, WRITE, sizeof(int)); _(SOUND_PCM_READ_BITS, WRITE, sizeof(int)); _(SOUND_PCM_READ_CHANNELS, WRITE, sizeof(int)); _(SOUND_PCM_READ_FILTER, WRITE, sizeof(int)); _(SOUND_PCM_READ_RATE, WRITE, sizeof(int)); _(SOUND_PCM_WRITE_CHANNELS, WRITE, sizeof(int)); _(SOUND_PCM_WRITE_FILTER, WRITE, sizeof(int)); _(TCFLSH, NONE, 0); #if SANITIZER_GLIBC _(TCGETA, WRITE, struct_termio_sz); #endif _(TCGETS, WRITE, struct_termios_sz); _(TCSBRK, NONE, 0); _(TCSBRKP, NONE, 0); #if SANITIZER_GLIBC _(TCSETA, READ, struct_termio_sz); _(TCSETAF, READ, struct_termio_sz); _(TCSETAW, READ, struct_termio_sz); #endif _(TCSETS, READ, struct_termios_sz); _(TCSETSF, READ, struct_termios_sz); _(TCSETSW, READ, struct_termios_sz); _(TCXONC, NONE, 0); _(TIOCGLCKTRMIOS, WRITE, struct_termios_sz); _(TIOCGSOFTCAR, WRITE, sizeof(int)); _(TIOCINQ, WRITE, sizeof(int)); _(TIOCLINUX, READ, sizeof(char)); _(TIOCSERCONFIG, NONE, 0); _(TIOCSERGETLSR, WRITE, sizeof(int)); _(TIOCSERGWILD, WRITE, sizeof(int)); _(TIOCSERSWILD, READ, sizeof(int)); _(TIOCSLCKTRMIOS, READ, struct_termios_sz); _(TIOCSSOFTCAR, READ, sizeof(int)); _(VT_ACTIVATE, NONE, 0); _(VT_DISALLOCATE, NONE, 0); _(VT_GETMODE, WRITE, struct_vt_mode_sz); _(VT_GETSTATE, WRITE, struct_vt_stat_sz); _(VT_OPENQRY, WRITE, sizeof(int)); _(VT_RELDISP, NONE, 0); _(VT_RESIZE, READ, struct_vt_sizes_sz); _(VT_RESIZEX, READ, struct_vt_consize_sz); _(VT_SENDSIG, NONE, 0); _(VT_SETMODE, READ, struct_vt_mode_sz); _(VT_WAITACTIVE, NONE, 0); #endif #if SANITIZER_GLIBC // _(SIOCDEVPLIP, WRITE, struct_ifreq_sz); // the same as EQL_ENSLAVE - _(CYGETDEFTHRESH, WRITE, sizeof(int)); - _(CYGETDEFTIMEOUT, WRITE, sizeof(int)); - _(CYGETMON, WRITE, struct_cyclades_monitor_sz); - _(CYGETTHRESH, WRITE, sizeof(int)); - _(CYGETTIMEOUT, WRITE, sizeof(int)); - _(CYSETDEFTHRESH, NONE, 0); - _(CYSETDEFTIMEOUT, NONE, 0); - _(CYSETTHRESH, NONE, 0); - _(CYSETTIMEOUT, NONE, 0); _(EQL_EMANCIPATE, WRITE, struct_ifreq_sz); _(EQL_ENSLAVE, WRITE, struct_ifreq_sz); _(EQL_GETMASTRCFG, WRITE, struct_ifreq_sz); _(EQL_GETSLAVECFG, WRITE, struct_ifreq_sz); _(EQL_SETMASTRCFG, WRITE, struct_ifreq_sz); _(EQL_SETSLAVECFG, WRITE, struct_ifreq_sz); _(EVIOCGKEYCODE_V2, WRITE, struct_input_keymap_entry_sz); _(EVIOCGPROP, WRITE, 0); _(EVIOCSKEYCODE_V2, READ, struct_input_keymap_entry_sz); _(FS_IOC_GETFLAGS, WRITE, sizeof(int)); _(FS_IOC_GETVERSION, WRITE, sizeof(int)); _(FS_IOC_SETFLAGS, READ, sizeof(int)); _(FS_IOC_SETVERSION, READ, sizeof(int)); _(GIO_CMAP, WRITE, 48); _(GIO_FONT, WRITE, 8192); _(GIO_SCRNMAP, WRITE, e_tabsz); _(GIO_UNIMAP, WRITE, struct_unimapdesc_sz); _(GIO_UNISCRNMAP, WRITE, sizeof(short) * e_tabsz); _(KDADDIO, NONE, 0); _(KDDELIO, NONE, 0); _(KDDISABIO, NONE, 0); _(KDENABIO, NONE, 0); _(KDGETKEYCODE, WRITE, struct_kbkeycode_sz); _(KDGETLED, WRITE, 1); _(KDGETMODE, WRITE, sizeof(int)); _(KDGKBDIACR, WRITE, struct_kbdiacrs_sz); _(KDGKBENT, WRITE, struct_kbentry_sz); _(KDGKBLED, WRITE, sizeof(int)); _(KDGKBMETA, WRITE, sizeof(int)); _(KDGKBMODE, WRITE, sizeof(int)); _(KDGKBSENT, WRITE, struct_kbsentry_sz); _(KDGKBTYPE, WRITE, 1); _(KDMAPDISP, NONE, 0); _(KDMKTONE, NONE, 0); _(KDSETKEYCODE, READ, struct_kbkeycode_sz); _(KDSETLED, NONE, 0); _(KDSETMODE, NONE, 0); _(KDSIGACCEPT, NONE, 0); _(KDSKBDIACR, READ, struct_kbdiacrs_sz); _(KDSKBENT, READ, struct_kbentry_sz); _(KDSKBLED, NONE, 0); _(KDSKBMETA, NONE, 0); _(KDSKBMODE, NONE, 0); _(KDSKBSENT, READ, struct_kbsentry_sz); _(KDUNMAPDISP, NONE, 0); _(KIOCSOUND, NONE, 0); _(LPABORT, NONE, 0); _(LPABORTOPEN, NONE, 0); _(LPCAREFUL, NONE, 0); _(LPCHAR, NONE, 0); _(LPGETIRQ, WRITE, sizeof(int)); _(LPGETSTATUS, WRITE, sizeof(int)); _(LPRESET, NONE, 0); _(LPSETIRQ, NONE, 0); _(LPTIME, NONE, 0); _(LPWAIT, NONE, 0); _(MTIOCGETCONFIG, WRITE, struct_mtconfiginfo_sz); _(MTIOCSETCONFIG, READ, struct_mtconfiginfo_sz); _(PIO_CMAP, NONE, 0); _(PIO_FONT, READ, 8192); _(PIO_SCRNMAP, READ, e_tabsz); _(PIO_UNIMAP, READ, struct_unimapdesc_sz); _(PIO_UNIMAPCLR, READ, struct_unimapinit_sz); _(PIO_UNISCRNMAP, READ, sizeof(short) * e_tabsz); _(SCSI_IOCTL_PROBE_HOST, READ, sizeof(int)); _(SCSI_IOCTL_TAGGED_DISABLE, NONE, 0); _(SCSI_IOCTL_TAGGED_ENABLE, NONE, 0); _(SNDCTL_DSP_GETISPACE, WRITE, struct_audio_buf_info_sz); _(SNDCTL_DSP_GETOSPACE, WRITE, struct_audio_buf_info_sz); _(TIOCGSERIAL, WRITE, struct_serial_struct_sz); _(TIOCSERGETMULTI, WRITE, struct_serial_multiport_struct_sz); _(TIOCSERSETMULTI, READ, struct_serial_multiport_struct_sz); _(TIOCSSERIAL, READ, struct_serial_struct_sz); // The following ioctl requests are shared between AX25, IPX, netrom and // mrouted. // _(SIOCAIPXITFCRT, READ, sizeof(char)); // _(SIOCAX25GETUID, READ, struct_sockaddr_ax25_sz); // _(SIOCNRGETPARMS, WRITE, struct_nr_parms_struct_sz); // _(SIOCAIPXPRISLT, READ, sizeof(char)); // _(SIOCNRSETPARMS, READ, struct_nr_parms_struct_sz); // _(SIOCAX25ADDUID, READ, struct_sockaddr_ax25_sz); // _(SIOCNRDECOBS, NONE, 0); // _(SIOCAX25DELUID, READ, struct_sockaddr_ax25_sz); // _(SIOCIPXCFGDATA, WRITE, struct_ipx_config_data_sz); // _(SIOCAX25NOUID, READ, sizeof(int)); // _(SIOCNRRTCTL, READ, sizeof(int)); // _(SIOCAX25DIGCTL, READ, sizeof(int)); // _(SIOCAX25GETPARMS, WRITE, struct_ax25_parms_struct_sz); // _(SIOCAX25SETPARMS, READ, struct_ax25_parms_struct_sz); #endif #undef _ } static bool ioctl_initialized = false; struct ioctl_desc_compare { bool operator()(const ioctl_desc& left, const ioctl_desc& right) const { return left.req < right.req; } }; static void ioctl_init() { ioctl_table_fill(); Sort(ioctl_table, ioctl_table_size, ioctl_desc_compare()); bool bad = false; for (unsigned i = 0; i < ioctl_table_size - 1; ++i) { if (ioctl_table[i].req >= ioctl_table[i + 1].req) { Printf("Duplicate or unsorted ioctl request id %x >= %x (%s vs %s)\n", ioctl_table[i].req, ioctl_table[i + 1].req, ioctl_table[i].name, ioctl_table[i + 1].name); bad = true; } } if (bad) Die(); ioctl_initialized = true; } // Handle the most evil ioctls that encode argument value as part of request id. static unsigned ioctl_request_fixup(unsigned req) { #if SANITIZER_LINUX // Strip size and event number. const unsigned kEviocgbitMask = (IOC_SIZEMASK << IOC_SIZESHIFT) | EVIOC_EV_MAX; if ((req & ~kEviocgbitMask) == IOCTL_EVIOCGBIT) return IOCTL_EVIOCGBIT; // Strip absolute axis number. if ((req & ~EVIOC_ABS_MAX) == IOCTL_EVIOCGABS) return IOCTL_EVIOCGABS; if ((req & ~EVIOC_ABS_MAX) == IOCTL_EVIOCSABS) return IOCTL_EVIOCSABS; #endif return req; } static const ioctl_desc *ioctl_table_lookup(unsigned req) { int left = 0; int right = ioctl_table_size; while (left < right) { int mid = (left + right) / 2; if (ioctl_table[mid].req < req) left = mid + 1; else right = mid; } if (left == right && ioctl_table[left].req == req) return ioctl_table + left; else return nullptr; } static bool ioctl_decode(unsigned req, ioctl_desc *desc) { CHECK(desc); desc->req = req; desc->name = ""; desc->size = IOC_SIZE(req); // Sanity check. if (desc->size > 0xFFFF) return false; unsigned dir = IOC_DIR(req); switch (dir) { case IOC_NONE: desc->type = ioctl_desc::NONE; break; case IOC_READ | IOC_WRITE: desc->type = ioctl_desc::READWRITE; break; case IOC_READ: desc->type = ioctl_desc::WRITE; break; case IOC_WRITE: desc->type = ioctl_desc::READ; break; default: return false; } // Size can be 0 iff type is NONE. if ((desc->type == IOC_NONE) != (desc->size == 0)) return false; // Sanity check. if (IOC_TYPE(req) == 0) return false; return true; } static const ioctl_desc *ioctl_lookup(unsigned req) { req = ioctl_request_fixup(req); const ioctl_desc *desc = ioctl_table_lookup(req); if (desc) return desc; // Try stripping access size from the request id. desc = ioctl_table_lookup(req & ~(IOC_SIZEMASK << IOC_SIZESHIFT)); // Sanity check: requests that encode access size are either read or write and // have size of 0 in the table. if (desc && desc->size == 0 && (desc->type == ioctl_desc::READWRITE || desc->type == ioctl_desc::WRITE || desc->type == ioctl_desc::READ)) return desc; return nullptr; } static void ioctl_common_pre(void *ctx, const ioctl_desc *desc, int d, unsigned request, void *arg) { if (desc->type == ioctl_desc::READ || desc->type == ioctl_desc::READWRITE) { unsigned size = desc->size ? desc->size : IOC_SIZE(request); COMMON_INTERCEPTOR_READ_RANGE(ctx, arg, size); } if (desc->type != ioctl_desc::CUSTOM) return; if (request == IOCTL_SIOCGIFCONF) { struct __sanitizer_ifconf *ifc = (__sanitizer_ifconf *)arg; COMMON_INTERCEPTOR_READ_RANGE(ctx, (char*)&ifc->ifc_len, sizeof(ifc->ifc_len)); } } static void ioctl_common_post(void *ctx, const ioctl_desc *desc, int res, int d, unsigned request, void *arg) { if (desc->type == ioctl_desc::WRITE || desc->type == ioctl_desc::READWRITE) { // FIXME: add verbose output unsigned size = desc->size ? desc->size : IOC_SIZE(request); COMMON_INTERCEPTOR_WRITE_RANGE(ctx, arg, size); } if (desc->type != ioctl_desc::CUSTOM) return; if (request == IOCTL_SIOCGIFCONF) { struct __sanitizer_ifconf *ifc = (__sanitizer_ifconf *)arg; COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifc->ifc_ifcu.ifcu_req, ifc->ifc_len); } } #endif diff --git a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp index 12dd39e674ac..7abaeb880bf3 100644 --- a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp +++ b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.cpp @@ -1,1298 +1,1287 @@ //===-- sanitizer_platform_limits_posix.cpp -------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of Sanitizer common code. // // Sizes and layouts of platform-specific POSIX data structures. //===----------------------------------------------------------------------===// #if defined(__linux__) || defined(__APPLE__) // Tests in this file assume that off_t-dependent data structures match the // libc ABI. For example, struct dirent here is what readdir() function (as // exported from libc) returns, and not the user-facing "dirent", which // depends on _FILE_OFFSET_BITS setting. // To get this "true" dirent definition, we undefine _FILE_OFFSET_BITS below. #undef _FILE_OFFSET_BITS #endif // Must go after undef _FILE_OFFSET_BITS. #include "sanitizer_platform.h" #if SANITIZER_LINUX || SANITIZER_MAC // Must go after undef _FILE_OFFSET_BITS. #include "sanitizer_glibc_version.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if !SANITIZER_MAC #include #endif #if !SANITIZER_IOS #include #endif #if !SANITIZER_ANDROID #include #include #include #endif #if SANITIZER_LINUX #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif #if SANITIZER_IOS #undef IOC_DIRMASK #endif #if SANITIZER_LINUX # include # include #if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || \ SANITIZER_RISCV64 # include # ifdef __arm__ typedef struct user_fpregs elf_fpregset_t; # define ARM_VFPREGS_SIZE_ASAN (32 * 8 /*fpregs*/ + 4 /*fpscr*/) # if !defined(ARM_VFPREGS_SIZE) # define ARM_VFPREGS_SIZE ARM_VFPREGS_SIZE_ASAN # endif # endif # endif # include #endif #if !SANITIZER_ANDROID #include #include #include #endif #if SANITIZER_LINUX #if SANITIZER_GLIBC #include #include #include #include #include #include #if HAVE_RPC_XDR_H # include #endif #include #else #include #include #include #endif // SANITIZER_GLIBC #if SANITIZER_ANDROID #include #else #include #include #include #include #include #include #include #if defined(__mips64) # include #endif #include -#include #include #include #include #include #include #include #include #include #include #include #endif // SANITIZER_ANDROID #include #include #include #include #else #include #endif // SANITIZER_LINUX #if SANITIZER_MAC #include #include #include #endif // Include these after system headers to avoid name clashes and ambiguities. #include "sanitizer_internal_defs.h" #include "sanitizer_platform_limits_posix.h" namespace __sanitizer { unsigned struct_utsname_sz = sizeof(struct utsname); unsigned struct_stat_sz = sizeof(struct stat); #if !SANITIZER_IOS && !(SANITIZER_MAC && TARGET_CPU_ARM64) unsigned struct_stat64_sz = sizeof(struct stat64); #endif // !SANITIZER_IOS && !(SANITIZER_MAC && TARGET_CPU_ARM64) unsigned struct_rusage_sz = sizeof(struct rusage); unsigned struct_tm_sz = sizeof(struct tm); unsigned struct_passwd_sz = sizeof(struct passwd); unsigned struct_group_sz = sizeof(struct group); unsigned siginfo_t_sz = sizeof(siginfo_t); unsigned struct_sigaction_sz = sizeof(struct sigaction); unsigned struct_stack_t_sz = sizeof(stack_t); unsigned struct_itimerval_sz = sizeof(struct itimerval); unsigned pthread_t_sz = sizeof(pthread_t); unsigned pthread_mutex_t_sz = sizeof(pthread_mutex_t); unsigned pthread_cond_t_sz = sizeof(pthread_cond_t); unsigned pid_t_sz = sizeof(pid_t); unsigned timeval_sz = sizeof(timeval); unsigned uid_t_sz = sizeof(uid_t); unsigned gid_t_sz = sizeof(gid_t); unsigned mbstate_t_sz = sizeof(mbstate_t); unsigned sigset_t_sz = sizeof(sigset_t); unsigned struct_timezone_sz = sizeof(struct timezone); unsigned struct_tms_sz = sizeof(struct tms); unsigned struct_sigevent_sz = sizeof(struct sigevent); unsigned struct_sched_param_sz = sizeof(struct sched_param); unsigned struct_regex_sz = sizeof(regex_t); unsigned struct_regmatch_sz = sizeof(regmatch_t); #if (SANITIZER_MAC && !TARGET_CPU_ARM64) && !SANITIZER_IOS unsigned struct_statfs64_sz = sizeof(struct statfs64); #endif // (SANITIZER_MAC && !TARGET_CPU_ARM64) && !SANITIZER_IOS #if SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC unsigned struct_fstab_sz = sizeof(struct fstab); #endif // SANITIZER_GLIBC || SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC #if !SANITIZER_ANDROID unsigned struct_statfs_sz = sizeof(struct statfs); unsigned struct_sockaddr_sz = sizeof(struct sockaddr); unsigned ucontext_t_sz = sizeof(ucontext_t); #endif // !SANITIZER_ANDROID #if SANITIZER_LINUX unsigned struct_epoll_event_sz = sizeof(struct epoll_event); unsigned struct_sysinfo_sz = sizeof(struct sysinfo); unsigned __user_cap_header_struct_sz = sizeof(struct __user_cap_header_struct); unsigned __user_cap_data_struct_sz = sizeof(struct __user_cap_data_struct); unsigned struct_new_utsname_sz = sizeof(struct new_utsname); unsigned struct_old_utsname_sz = sizeof(struct old_utsname); unsigned struct_oldold_utsname_sz = sizeof(struct oldold_utsname); #endif // SANITIZER_LINUX #if SANITIZER_LINUX unsigned struct_rlimit_sz = sizeof(struct rlimit); unsigned struct_timespec_sz = sizeof(struct timespec); unsigned struct_utimbuf_sz = sizeof(struct utimbuf); unsigned struct_itimerspec_sz = sizeof(struct itimerspec); #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID // Use pre-computed size of struct ustat to avoid which // has been removed from glibc 2.28. #if defined(__aarch64__) || defined(__s390x__) || defined(__mips64) || \ defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) || \ defined(__x86_64__) || SANITIZER_RISCV64 #define SIZEOF_STRUCT_USTAT 32 #elif defined(__arm__) || defined(__i386__) || defined(__mips__) \ || defined(__powerpc__) || defined(__s390__) || defined(__sparc__) #define SIZEOF_STRUCT_USTAT 20 #else #error Unknown size of struct ustat #endif unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT; unsigned struct_rlimit64_sz = sizeof(struct rlimit64); unsigned struct_statvfs64_sz = sizeof(struct statvfs64); unsigned struct_crypt_data_sz = sizeof(struct crypt_data); #endif // SANITIZER_LINUX && !SANITIZER_ANDROID #if SANITIZER_LINUX && !SANITIZER_ANDROID unsigned struct_timex_sz = sizeof(struct timex); unsigned struct_msqid_ds_sz = sizeof(struct msqid_ds); unsigned struct_mq_attr_sz = sizeof(struct mq_attr); unsigned struct_statvfs_sz = sizeof(struct statvfs); #endif // SANITIZER_LINUX && !SANITIZER_ANDROID const uptr sig_ign = (uptr)SIG_IGN; const uptr sig_dfl = (uptr)SIG_DFL; const uptr sig_err = (uptr)SIG_ERR; const uptr sa_siginfo = (uptr)SA_SIGINFO; #if SANITIZER_LINUX int e_tabsz = (int)E_TABSZ; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID unsigned struct_shminfo_sz = sizeof(struct shminfo); unsigned struct_shm_info_sz = sizeof(struct shm_info); int shmctl_ipc_stat = (int)IPC_STAT; int shmctl_ipc_info = (int)IPC_INFO; int shmctl_shm_info = (int)SHM_INFO; int shmctl_shm_stat = (int)SHM_STAT; #endif #if !SANITIZER_MAC && !SANITIZER_FREEBSD unsigned struct_utmp_sz = sizeof(struct utmp); #endif #if !SANITIZER_ANDROID unsigned struct_utmpx_sz = sizeof(struct utmpx); #endif int map_fixed = MAP_FIXED; int af_inet = (int)AF_INET; int af_inet6 = (int)AF_INET6; uptr __sanitizer_in_addr_sz(int af) { if (af == AF_INET) return sizeof(struct in_addr); else if (af == AF_INET6) return sizeof(struct in6_addr); else return 0; } #if SANITIZER_LINUX unsigned struct_ElfW_Phdr_sz = sizeof(ElfW(Phdr)); #elif SANITIZER_FREEBSD unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr); #endif #if SANITIZER_GLIBC int glob_nomatch = GLOB_NOMATCH; int glob_altdirfunc = GLOB_ALTDIRFUNC; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ defined(__s390__) || SANITIZER_RISCV64) #if defined(__mips64) || defined(__powerpc64__) || defined(__arm__) unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t); #elif SANITIZER_RISCV64 unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct __riscv_q_ext_state); #elif defined(__aarch64__) unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state); #elif defined(__s390__) unsigned struct_user_regs_struct_sz = sizeof(struct _user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct _user_fpregs_struct); #else unsigned struct_user_regs_struct_sz = sizeof(struct user_regs_struct); unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct); #endif // __mips64 || __powerpc64__ || __aarch64__ #if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \ defined(__aarch64__) || defined(__arm__) || defined(__s390__) || \ SANITIZER_RISCV64 unsigned struct_user_fpxregs_struct_sz = 0; #else unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct); #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__ // || __s390__ #ifdef __arm__ unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE; #else unsigned struct_user_vfpregs_struct_sz = 0; #endif int ptrace_peektext = PTRACE_PEEKTEXT; int ptrace_peekdata = PTRACE_PEEKDATA; int ptrace_peekuser = PTRACE_PEEKUSER; #if (defined(PTRACE_GETREGS) && defined(PTRACE_SETREGS)) || \ (defined(PT_GETREGS) && defined(PT_SETREGS)) int ptrace_getregs = PTRACE_GETREGS; int ptrace_setregs = PTRACE_SETREGS; #else int ptrace_getregs = -1; int ptrace_setregs = -1; #endif #if (defined(PTRACE_GETFPREGS) && defined(PTRACE_SETFPREGS)) || \ (defined(PT_GETFPREGS) && defined(PT_SETFPREGS)) int ptrace_getfpregs = PTRACE_GETFPREGS; int ptrace_setfpregs = PTRACE_SETFPREGS; #else int ptrace_getfpregs = -1; int ptrace_setfpregs = -1; #endif #if (defined(PTRACE_GETFPXREGS) && defined(PTRACE_SETFPXREGS)) || \ (defined(PT_GETFPXREGS) && defined(PT_SETFPXREGS)) int ptrace_getfpxregs = PTRACE_GETFPXREGS; int ptrace_setfpxregs = PTRACE_SETFPXREGS; #else int ptrace_getfpxregs = -1; int ptrace_setfpxregs = -1; #endif // PTRACE_GETFPXREGS/PTRACE_SETFPXREGS #if defined(PTRACE_GETVFPREGS) && defined(PTRACE_SETVFPREGS) int ptrace_getvfpregs = PTRACE_GETVFPREGS; int ptrace_setvfpregs = PTRACE_SETVFPREGS; #else int ptrace_getvfpregs = -1; int ptrace_setvfpregs = -1; #endif int ptrace_geteventmsg = PTRACE_GETEVENTMSG; #if (defined(PTRACE_GETSIGINFO) && defined(PTRACE_SETSIGINFO)) || \ (defined(PT_GETSIGINFO) && defined(PT_SETSIGINFO)) int ptrace_getsiginfo = PTRACE_GETSIGINFO; int ptrace_setsiginfo = PTRACE_SETSIGINFO; #else int ptrace_getsiginfo = -1; int ptrace_setsiginfo = -1; #endif // PTRACE_GETSIGINFO/PTRACE_SETSIGINFO #if defined(PTRACE_GETREGSET) && defined(PTRACE_SETREGSET) int ptrace_getregset = PTRACE_GETREGSET; int ptrace_setregset = PTRACE_SETREGSET; #else int ptrace_getregset = -1; int ptrace_setregset = -1; #endif // PTRACE_GETREGSET/PTRACE_SETREGSET #endif unsigned path_max = PATH_MAX; // ioctl arguments unsigned struct_ifreq_sz = sizeof(struct ifreq); unsigned struct_termios_sz = sizeof(struct termios); unsigned struct_winsize_sz = sizeof(struct winsize); #if SANITIZER_LINUX unsigned struct_arpreq_sz = sizeof(struct arpreq); unsigned struct_cdrom_msf_sz = sizeof(struct cdrom_msf); unsigned struct_cdrom_multisession_sz = sizeof(struct cdrom_multisession); unsigned struct_cdrom_read_audio_sz = sizeof(struct cdrom_read_audio); unsigned struct_cdrom_subchnl_sz = sizeof(struct cdrom_subchnl); unsigned struct_cdrom_ti_sz = sizeof(struct cdrom_ti); unsigned struct_cdrom_tocentry_sz = sizeof(struct cdrom_tocentry); unsigned struct_cdrom_tochdr_sz = sizeof(struct cdrom_tochdr); unsigned struct_cdrom_volctrl_sz = sizeof(struct cdrom_volctrl); unsigned struct_ff_effect_sz = sizeof(struct ff_effect); unsigned struct_floppy_drive_params_sz = sizeof(struct floppy_drive_params); unsigned struct_floppy_drive_struct_sz = sizeof(struct floppy_drive_struct); unsigned struct_floppy_fdc_state_sz = sizeof(struct floppy_fdc_state); unsigned struct_floppy_max_errors_sz = sizeof(struct floppy_max_errors); unsigned struct_floppy_raw_cmd_sz = sizeof(struct floppy_raw_cmd); unsigned struct_floppy_struct_sz = sizeof(struct floppy_struct); unsigned struct_floppy_write_errors_sz = sizeof(struct floppy_write_errors); unsigned struct_format_descr_sz = sizeof(struct format_descr); unsigned struct_hd_driveid_sz = sizeof(struct hd_driveid); unsigned struct_hd_geometry_sz = sizeof(struct hd_geometry); unsigned struct_input_absinfo_sz = sizeof(struct input_absinfo); unsigned struct_input_id_sz = sizeof(struct input_id); unsigned struct_mtpos_sz = sizeof(struct mtpos); unsigned struct_rtentry_sz = sizeof(struct rtentry); #if SANITIZER_GLIBC || SANITIZER_ANDROID unsigned struct_termio_sz = sizeof(struct termio); #endif unsigned struct_vt_consize_sz = sizeof(struct vt_consize); unsigned struct_vt_sizes_sz = sizeof(struct vt_sizes); unsigned struct_vt_stat_sz = sizeof(struct vt_stat); #endif // SANITIZER_LINUX #if SANITIZER_LINUX #if SOUND_VERSION >= 0x040000 unsigned struct_copr_buffer_sz = 0; unsigned struct_copr_debug_buf_sz = 0; unsigned struct_copr_msg_sz = 0; #else unsigned struct_copr_buffer_sz = sizeof(struct copr_buffer); unsigned struct_copr_debug_buf_sz = sizeof(struct copr_debug_buf); unsigned struct_copr_msg_sz = sizeof(struct copr_msg); #endif unsigned struct_midi_info_sz = sizeof(struct midi_info); unsigned struct_mtget_sz = sizeof(struct mtget); unsigned struct_mtop_sz = sizeof(struct mtop); unsigned struct_sbi_instrument_sz = sizeof(struct sbi_instrument); unsigned struct_seq_event_rec_sz = sizeof(struct seq_event_rec); unsigned struct_synth_info_sz = sizeof(struct synth_info); unsigned struct_vt_mode_sz = sizeof(struct vt_mode); #endif // SANITIZER_LINUX #if SANITIZER_GLIBC unsigned struct_ax25_parms_struct_sz = sizeof(struct ax25_parms_struct); - unsigned struct_cyclades_monitor_sz = sizeof(struct cyclades_monitor); #if EV_VERSION > (0x010000) unsigned struct_input_keymap_entry_sz = sizeof(struct input_keymap_entry); #else unsigned struct_input_keymap_entry_sz = 0; #endif unsigned struct_ipx_config_data_sz = sizeof(struct ipx_config_data); unsigned struct_kbdiacrs_sz = sizeof(struct kbdiacrs); unsigned struct_kbentry_sz = sizeof(struct kbentry); unsigned struct_kbkeycode_sz = sizeof(struct kbkeycode); unsigned struct_kbsentry_sz = sizeof(struct kbsentry); unsigned struct_mtconfiginfo_sz = sizeof(struct mtconfiginfo); unsigned struct_nr_parms_struct_sz = sizeof(struct nr_parms_struct); unsigned struct_scc_modem_sz = sizeof(struct scc_modem); unsigned struct_scc_stat_sz = sizeof(struct scc_stat); unsigned struct_serial_multiport_struct_sz = sizeof(struct serial_multiport_struct); unsigned struct_serial_struct_sz = sizeof(struct serial_struct); unsigned struct_sockaddr_ax25_sz = sizeof(struct sockaddr_ax25); unsigned struct_unimapdesc_sz = sizeof(struct unimapdesc); unsigned struct_unimapinit_sz = sizeof(struct unimapinit); unsigned struct_audio_buf_info_sz = sizeof(struct audio_buf_info); unsigned struct_ppp_stats_sz = sizeof(struct ppp_stats); #endif // SANITIZER_GLIBC #if !SANITIZER_ANDROID && !SANITIZER_MAC unsigned struct_sioc_sg_req_sz = sizeof(struct sioc_sg_req); unsigned struct_sioc_vif_req_sz = sizeof(struct sioc_vif_req); #endif const unsigned long __sanitizer_bufsiz = BUFSIZ; const unsigned IOCTL_NOT_PRESENT = 0; unsigned IOCTL_FIOASYNC = FIOASYNC; unsigned IOCTL_FIOCLEX = FIOCLEX; unsigned IOCTL_FIOGETOWN = FIOGETOWN; unsigned IOCTL_FIONBIO = FIONBIO; unsigned IOCTL_FIONCLEX = FIONCLEX; unsigned IOCTL_FIOSETOWN = FIOSETOWN; unsigned IOCTL_SIOCADDMULTI = SIOCADDMULTI; unsigned IOCTL_SIOCATMARK = SIOCATMARK; unsigned IOCTL_SIOCDELMULTI = SIOCDELMULTI; unsigned IOCTL_SIOCGIFADDR = SIOCGIFADDR; unsigned IOCTL_SIOCGIFBRDADDR = SIOCGIFBRDADDR; unsigned IOCTL_SIOCGIFCONF = SIOCGIFCONF; unsigned IOCTL_SIOCGIFDSTADDR = SIOCGIFDSTADDR; unsigned IOCTL_SIOCGIFFLAGS = SIOCGIFFLAGS; unsigned IOCTL_SIOCGIFMETRIC = SIOCGIFMETRIC; unsigned IOCTL_SIOCGIFMTU = SIOCGIFMTU; unsigned IOCTL_SIOCGIFNETMASK = SIOCGIFNETMASK; unsigned IOCTL_SIOCGPGRP = SIOCGPGRP; unsigned IOCTL_SIOCSIFADDR = SIOCSIFADDR; unsigned IOCTL_SIOCSIFBRDADDR = SIOCSIFBRDADDR; unsigned IOCTL_SIOCSIFDSTADDR = SIOCSIFDSTADDR; unsigned IOCTL_SIOCSIFFLAGS = SIOCSIFFLAGS; unsigned IOCTL_SIOCSIFMETRIC = SIOCSIFMETRIC; unsigned IOCTL_SIOCSIFMTU = SIOCSIFMTU; unsigned IOCTL_SIOCSIFNETMASK = SIOCSIFNETMASK; unsigned IOCTL_SIOCSPGRP = SIOCSPGRP; unsigned IOCTL_TIOCCONS = TIOCCONS; unsigned IOCTL_TIOCEXCL = TIOCEXCL; unsigned IOCTL_TIOCGETD = TIOCGETD; unsigned IOCTL_TIOCGPGRP = TIOCGPGRP; unsigned IOCTL_TIOCGWINSZ = TIOCGWINSZ; unsigned IOCTL_TIOCMBIC = TIOCMBIC; unsigned IOCTL_TIOCMBIS = TIOCMBIS; unsigned IOCTL_TIOCMGET = TIOCMGET; unsigned IOCTL_TIOCMSET = TIOCMSET; unsigned IOCTL_TIOCNOTTY = TIOCNOTTY; unsigned IOCTL_TIOCNXCL = TIOCNXCL; unsigned IOCTL_TIOCOUTQ = TIOCOUTQ; unsigned IOCTL_TIOCPKT = TIOCPKT; unsigned IOCTL_TIOCSCTTY = TIOCSCTTY; unsigned IOCTL_TIOCSETD = TIOCSETD; unsigned IOCTL_TIOCSPGRP = TIOCSPGRP; unsigned IOCTL_TIOCSTI = TIOCSTI; unsigned IOCTL_TIOCSWINSZ = TIOCSWINSZ; #if SANITIZER_LINUX && !SANITIZER_ANDROID unsigned IOCTL_SIOCGETSGCNT = SIOCGETSGCNT; unsigned IOCTL_SIOCGETVIFCNT = SIOCGETVIFCNT; #endif #if SANITIZER_LINUX unsigned IOCTL_EVIOCGABS = EVIOCGABS(0); unsigned IOCTL_EVIOCGBIT = EVIOCGBIT(0, 0); unsigned IOCTL_EVIOCGEFFECTS = EVIOCGEFFECTS; unsigned IOCTL_EVIOCGID = EVIOCGID; unsigned IOCTL_EVIOCGKEY = EVIOCGKEY(0); unsigned IOCTL_EVIOCGKEYCODE = EVIOCGKEYCODE; unsigned IOCTL_EVIOCGLED = EVIOCGLED(0); unsigned IOCTL_EVIOCGNAME = EVIOCGNAME(0); unsigned IOCTL_EVIOCGPHYS = EVIOCGPHYS(0); unsigned IOCTL_EVIOCGRAB = EVIOCGRAB; unsigned IOCTL_EVIOCGREP = EVIOCGREP; unsigned IOCTL_EVIOCGSND = EVIOCGSND(0); unsigned IOCTL_EVIOCGSW = EVIOCGSW(0); unsigned IOCTL_EVIOCGUNIQ = EVIOCGUNIQ(0); unsigned IOCTL_EVIOCGVERSION = EVIOCGVERSION; unsigned IOCTL_EVIOCRMFF = EVIOCRMFF; unsigned IOCTL_EVIOCSABS = EVIOCSABS(0); unsigned IOCTL_EVIOCSFF = EVIOCSFF; unsigned IOCTL_EVIOCSKEYCODE = EVIOCSKEYCODE; unsigned IOCTL_EVIOCSREP = EVIOCSREP; unsigned IOCTL_BLKFLSBUF = BLKFLSBUF; unsigned IOCTL_BLKGETSIZE = BLKGETSIZE; unsigned IOCTL_BLKRAGET = BLKRAGET; unsigned IOCTL_BLKRASET = BLKRASET; unsigned IOCTL_BLKROGET = BLKROGET; unsigned IOCTL_BLKROSET = BLKROSET; unsigned IOCTL_BLKRRPART = BLKRRPART; unsigned IOCTL_CDROMAUDIOBUFSIZ = CDROMAUDIOBUFSIZ; unsigned IOCTL_CDROMEJECT = CDROMEJECT; unsigned IOCTL_CDROMEJECT_SW = CDROMEJECT_SW; unsigned IOCTL_CDROMMULTISESSION = CDROMMULTISESSION; unsigned IOCTL_CDROMPAUSE = CDROMPAUSE; unsigned IOCTL_CDROMPLAYMSF = CDROMPLAYMSF; unsigned IOCTL_CDROMPLAYTRKIND = CDROMPLAYTRKIND; unsigned IOCTL_CDROMREADAUDIO = CDROMREADAUDIO; unsigned IOCTL_CDROMREADCOOKED = CDROMREADCOOKED; unsigned IOCTL_CDROMREADMODE1 = CDROMREADMODE1; unsigned IOCTL_CDROMREADMODE2 = CDROMREADMODE2; unsigned IOCTL_CDROMREADRAW = CDROMREADRAW; unsigned IOCTL_CDROMREADTOCENTRY = CDROMREADTOCENTRY; unsigned IOCTL_CDROMREADTOCHDR = CDROMREADTOCHDR; unsigned IOCTL_CDROMRESET = CDROMRESET; unsigned IOCTL_CDROMRESUME = CDROMRESUME; unsigned IOCTL_CDROMSEEK = CDROMSEEK; unsigned IOCTL_CDROMSTART = CDROMSTART; unsigned IOCTL_CDROMSTOP = CDROMSTOP; unsigned IOCTL_CDROMSUBCHNL = CDROMSUBCHNL; unsigned IOCTL_CDROMVOLCTRL = CDROMVOLCTRL; unsigned IOCTL_CDROMVOLREAD = CDROMVOLREAD; unsigned IOCTL_CDROM_GET_UPC = CDROM_GET_UPC; unsigned IOCTL_FDCLRPRM = FDCLRPRM; unsigned IOCTL_FDDEFPRM = FDDEFPRM; unsigned IOCTL_FDFLUSH = FDFLUSH; unsigned IOCTL_FDFMTBEG = FDFMTBEG; unsigned IOCTL_FDFMTEND = FDFMTEND; unsigned IOCTL_FDFMTTRK = FDFMTTRK; unsigned IOCTL_FDGETDRVPRM = FDGETDRVPRM; unsigned IOCTL_FDGETDRVSTAT = FDGETDRVSTAT; unsigned IOCTL_FDGETDRVTYP = FDGETDRVTYP; unsigned IOCTL_FDGETFDCSTAT = FDGETFDCSTAT; unsigned IOCTL_FDGETMAXERRS = FDGETMAXERRS; unsigned IOCTL_FDGETPRM = FDGETPRM; unsigned IOCTL_FDMSGOFF = FDMSGOFF; unsigned IOCTL_FDMSGON = FDMSGON; unsigned IOCTL_FDPOLLDRVSTAT = FDPOLLDRVSTAT; unsigned IOCTL_FDRAWCMD = FDRAWCMD; unsigned IOCTL_FDRESET = FDRESET; unsigned IOCTL_FDSETDRVPRM = FDSETDRVPRM; unsigned IOCTL_FDSETEMSGTRESH = FDSETEMSGTRESH; unsigned IOCTL_FDSETMAXERRS = FDSETMAXERRS; unsigned IOCTL_FDSETPRM = FDSETPRM; unsigned IOCTL_FDTWADDLE = FDTWADDLE; unsigned IOCTL_FDWERRORCLR = FDWERRORCLR; unsigned IOCTL_FDWERRORGET = FDWERRORGET; unsigned IOCTL_HDIO_DRIVE_CMD = HDIO_DRIVE_CMD; unsigned IOCTL_HDIO_GETGEO = HDIO_GETGEO; unsigned IOCTL_HDIO_GET_32BIT = HDIO_GET_32BIT; unsigned IOCTL_HDIO_GET_DMA = HDIO_GET_DMA; unsigned IOCTL_HDIO_GET_IDENTITY = HDIO_GET_IDENTITY; unsigned IOCTL_HDIO_GET_KEEPSETTINGS = HDIO_GET_KEEPSETTINGS; unsigned IOCTL_HDIO_GET_MULTCOUNT = HDIO_GET_MULTCOUNT; unsigned IOCTL_HDIO_GET_NOWERR = HDIO_GET_NOWERR; unsigned IOCTL_HDIO_GET_UNMASKINTR = HDIO_GET_UNMASKINTR; unsigned IOCTL_HDIO_SET_32BIT = HDIO_SET_32BIT; unsigned IOCTL_HDIO_SET_DMA = HDIO_SET_DMA; unsigned IOCTL_HDIO_SET_KEEPSETTINGS = HDIO_SET_KEEPSETTINGS; unsigned IOCTL_HDIO_SET_MULTCOUNT = HDIO_SET_MULTCOUNT; unsigned IOCTL_HDIO_SET_NOWERR = HDIO_SET_NOWERR; unsigned IOCTL_HDIO_SET_UNMASKINTR = HDIO_SET_UNMASKINTR; unsigned IOCTL_MTIOCPOS = MTIOCPOS; unsigned IOCTL_PPPIOCGASYNCMAP = PPPIOCGASYNCMAP; unsigned IOCTL_PPPIOCGDEBUG = PPPIOCGDEBUG; unsigned IOCTL_PPPIOCGFLAGS = PPPIOCGFLAGS; unsigned IOCTL_PPPIOCGUNIT = PPPIOCGUNIT; unsigned IOCTL_PPPIOCGXASYNCMAP = PPPIOCGXASYNCMAP; unsigned IOCTL_PPPIOCSASYNCMAP = PPPIOCSASYNCMAP; unsigned IOCTL_PPPIOCSDEBUG = PPPIOCSDEBUG; unsigned IOCTL_PPPIOCSFLAGS = PPPIOCSFLAGS; unsigned IOCTL_PPPIOCSMAXCID = PPPIOCSMAXCID; unsigned IOCTL_PPPIOCSMRU = PPPIOCSMRU; unsigned IOCTL_PPPIOCSXASYNCMAP = PPPIOCSXASYNCMAP; unsigned IOCTL_SIOCADDRT = SIOCADDRT; unsigned IOCTL_SIOCDARP = SIOCDARP; unsigned IOCTL_SIOCDELRT = SIOCDELRT; unsigned IOCTL_SIOCDRARP = SIOCDRARP; unsigned IOCTL_SIOCGARP = SIOCGARP; unsigned IOCTL_SIOCGIFENCAP = SIOCGIFENCAP; unsigned IOCTL_SIOCGIFHWADDR = SIOCGIFHWADDR; unsigned IOCTL_SIOCGIFMAP = SIOCGIFMAP; unsigned IOCTL_SIOCGIFMEM = SIOCGIFMEM; unsigned IOCTL_SIOCGIFNAME = SIOCGIFNAME; unsigned IOCTL_SIOCGIFSLAVE = SIOCGIFSLAVE; unsigned IOCTL_SIOCGRARP = SIOCGRARP; unsigned IOCTL_SIOCGSTAMP = SIOCGSTAMP; unsigned IOCTL_SIOCSARP = SIOCSARP; unsigned IOCTL_SIOCSIFENCAP = SIOCSIFENCAP; unsigned IOCTL_SIOCSIFHWADDR = SIOCSIFHWADDR; unsigned IOCTL_SIOCSIFLINK = SIOCSIFLINK; unsigned IOCTL_SIOCSIFMAP = SIOCSIFMAP; unsigned IOCTL_SIOCSIFMEM = SIOCSIFMEM; unsigned IOCTL_SIOCSIFSLAVE = SIOCSIFSLAVE; unsigned IOCTL_SIOCSRARP = SIOCSRARP; # if SOUND_VERSION >= 0x040000 unsigned IOCTL_SNDCTL_COPR_HALT = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_LOAD = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_RCODE = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_RCVMSG = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_RDATA = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_RESET = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_RUN = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_SENDMSG = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_WCODE = IOCTL_NOT_PRESENT; unsigned IOCTL_SNDCTL_COPR_WDATA = IOCTL_NOT_PRESENT; unsigned IOCTL_SOUND_PCM_READ_BITS = IOCTL_NOT_PRESENT; unsigned IOCTL_SOUND_PCM_READ_CHANNELS = IOCTL_NOT_PRESENT; unsigned IOCTL_SOUND_PCM_READ_FILTER = IOCTL_NOT_PRESENT; unsigned IOCTL_SOUND_PCM_READ_RATE = IOCTL_NOT_PRESENT; unsigned IOCTL_SOUND_PCM_WRITE_CHANNELS = IOCTL_NOT_PRESENT; unsigned IOCTL_SOUND_PCM_WRITE_FILTER = IOCTL_NOT_PRESENT; # else // SOUND_VERSION unsigned IOCTL_SNDCTL_COPR_HALT = SNDCTL_COPR_HALT; unsigned IOCTL_SNDCTL_COPR_LOAD = SNDCTL_COPR_LOAD; unsigned IOCTL_SNDCTL_COPR_RCODE = SNDCTL_COPR_RCODE; unsigned IOCTL_SNDCTL_COPR_RCVMSG = SNDCTL_COPR_RCVMSG; unsigned IOCTL_SNDCTL_COPR_RDATA = SNDCTL_COPR_RDATA; unsigned IOCTL_SNDCTL_COPR_RESET = SNDCTL_COPR_RESET; unsigned IOCTL_SNDCTL_COPR_RUN = SNDCTL_COPR_RUN; unsigned IOCTL_SNDCTL_COPR_SENDMSG = SNDCTL_COPR_SENDMSG; unsigned IOCTL_SNDCTL_COPR_WCODE = SNDCTL_COPR_WCODE; unsigned IOCTL_SNDCTL_COPR_WDATA = SNDCTL_COPR_WDATA; unsigned IOCTL_SOUND_PCM_READ_BITS = SOUND_PCM_READ_BITS; unsigned IOCTL_SOUND_PCM_READ_CHANNELS = SOUND_PCM_READ_CHANNELS; unsigned IOCTL_SOUND_PCM_READ_FILTER = SOUND_PCM_READ_FILTER; unsigned IOCTL_SOUND_PCM_READ_RATE = SOUND_PCM_READ_RATE; unsigned IOCTL_SOUND_PCM_WRITE_CHANNELS = SOUND_PCM_WRITE_CHANNELS; unsigned IOCTL_SOUND_PCM_WRITE_FILTER = SOUND_PCM_WRITE_FILTER; #endif // SOUND_VERSION unsigned IOCTL_TCFLSH = TCFLSH; unsigned IOCTL_TCGETA = TCGETA; unsigned IOCTL_TCGETS = TCGETS; unsigned IOCTL_TCSBRK = TCSBRK; unsigned IOCTL_TCSBRKP = TCSBRKP; unsigned IOCTL_TCSETA = TCSETA; unsigned IOCTL_TCSETAF = TCSETAF; unsigned IOCTL_TCSETAW = TCSETAW; unsigned IOCTL_TCSETS = TCSETS; unsigned IOCTL_TCSETSF = TCSETSF; unsigned IOCTL_TCSETSW = TCSETSW; unsigned IOCTL_TCXONC = TCXONC; unsigned IOCTL_TIOCGLCKTRMIOS = TIOCGLCKTRMIOS; unsigned IOCTL_TIOCGSOFTCAR = TIOCGSOFTCAR; unsigned IOCTL_TIOCINQ = TIOCINQ; unsigned IOCTL_TIOCLINUX = TIOCLINUX; unsigned IOCTL_TIOCSERCONFIG = TIOCSERCONFIG; unsigned IOCTL_TIOCSERGETLSR = TIOCSERGETLSR; unsigned IOCTL_TIOCSERGWILD = TIOCSERGWILD; unsigned IOCTL_TIOCSERSWILD = TIOCSERSWILD; unsigned IOCTL_TIOCSLCKTRMIOS = TIOCSLCKTRMIOS; unsigned IOCTL_TIOCSSOFTCAR = TIOCSSOFTCAR; unsigned IOCTL_VT_DISALLOCATE = VT_DISALLOCATE; unsigned IOCTL_VT_GETSTATE = VT_GETSTATE; unsigned IOCTL_VT_RESIZE = VT_RESIZE; unsigned IOCTL_VT_RESIZEX = VT_RESIZEX; unsigned IOCTL_VT_SENDSIG = VT_SENDSIG; unsigned IOCTL_MTIOCGET = MTIOCGET; unsigned IOCTL_MTIOCTOP = MTIOCTOP; unsigned IOCTL_SNDCTL_DSP_GETBLKSIZE = SNDCTL_DSP_GETBLKSIZE; unsigned IOCTL_SNDCTL_DSP_GETFMTS = SNDCTL_DSP_GETFMTS; unsigned IOCTL_SNDCTL_DSP_NONBLOCK = SNDCTL_DSP_NONBLOCK; unsigned IOCTL_SNDCTL_DSP_POST = SNDCTL_DSP_POST; unsigned IOCTL_SNDCTL_DSP_RESET = SNDCTL_DSP_RESET; unsigned IOCTL_SNDCTL_DSP_SETFMT = SNDCTL_DSP_SETFMT; unsigned IOCTL_SNDCTL_DSP_SETFRAGMENT = SNDCTL_DSP_SETFRAGMENT; unsigned IOCTL_SNDCTL_DSP_SPEED = SNDCTL_DSP_SPEED; unsigned IOCTL_SNDCTL_DSP_STEREO = SNDCTL_DSP_STEREO; unsigned IOCTL_SNDCTL_DSP_SUBDIVIDE = SNDCTL_DSP_SUBDIVIDE; unsigned IOCTL_SNDCTL_DSP_SYNC = SNDCTL_DSP_SYNC; unsigned IOCTL_SNDCTL_FM_4OP_ENABLE = SNDCTL_FM_4OP_ENABLE; unsigned IOCTL_SNDCTL_FM_LOAD_INSTR = SNDCTL_FM_LOAD_INSTR; unsigned IOCTL_SNDCTL_MIDI_INFO = SNDCTL_MIDI_INFO; unsigned IOCTL_SNDCTL_MIDI_PRETIME = SNDCTL_MIDI_PRETIME; unsigned IOCTL_SNDCTL_SEQ_CTRLRATE = SNDCTL_SEQ_CTRLRATE; unsigned IOCTL_SNDCTL_SEQ_GETINCOUNT = SNDCTL_SEQ_GETINCOUNT; unsigned IOCTL_SNDCTL_SEQ_GETOUTCOUNT = SNDCTL_SEQ_GETOUTCOUNT; unsigned IOCTL_SNDCTL_SEQ_NRMIDIS = SNDCTL_SEQ_NRMIDIS; unsigned IOCTL_SNDCTL_SEQ_NRSYNTHS = SNDCTL_SEQ_NRSYNTHS; unsigned IOCTL_SNDCTL_SEQ_OUTOFBAND = SNDCTL_SEQ_OUTOFBAND; unsigned IOCTL_SNDCTL_SEQ_PANIC = SNDCTL_SEQ_PANIC; unsigned IOCTL_SNDCTL_SEQ_PERCMODE = SNDCTL_SEQ_PERCMODE; unsigned IOCTL_SNDCTL_SEQ_RESET = SNDCTL_SEQ_RESET; unsigned IOCTL_SNDCTL_SEQ_RESETSAMPLES = SNDCTL_SEQ_RESETSAMPLES; unsigned IOCTL_SNDCTL_SEQ_SYNC = SNDCTL_SEQ_SYNC; unsigned IOCTL_SNDCTL_SEQ_TESTMIDI = SNDCTL_SEQ_TESTMIDI; unsigned IOCTL_SNDCTL_SEQ_THRESHOLD = SNDCTL_SEQ_THRESHOLD; unsigned IOCTL_SNDCTL_SYNTH_INFO = SNDCTL_SYNTH_INFO; unsigned IOCTL_SNDCTL_SYNTH_MEMAVL = SNDCTL_SYNTH_MEMAVL; unsigned IOCTL_SNDCTL_TMR_CONTINUE = SNDCTL_TMR_CONTINUE; unsigned IOCTL_SNDCTL_TMR_METRONOME = SNDCTL_TMR_METRONOME; unsigned IOCTL_SNDCTL_TMR_SELECT = SNDCTL_TMR_SELECT; unsigned IOCTL_SNDCTL_TMR_SOURCE = SNDCTL_TMR_SOURCE; unsigned IOCTL_SNDCTL_TMR_START = SNDCTL_TMR_START; unsigned IOCTL_SNDCTL_TMR_STOP = SNDCTL_TMR_STOP; unsigned IOCTL_SNDCTL_TMR_TEMPO = SNDCTL_TMR_TEMPO; unsigned IOCTL_SNDCTL_TMR_TIMEBASE = SNDCTL_TMR_TIMEBASE; unsigned IOCTL_SOUND_MIXER_READ_ALTPCM = SOUND_MIXER_READ_ALTPCM; unsigned IOCTL_SOUND_MIXER_READ_BASS = SOUND_MIXER_READ_BASS; unsigned IOCTL_SOUND_MIXER_READ_CAPS = SOUND_MIXER_READ_CAPS; unsigned IOCTL_SOUND_MIXER_READ_CD = SOUND_MIXER_READ_CD; unsigned IOCTL_SOUND_MIXER_READ_DEVMASK = SOUND_MIXER_READ_DEVMASK; unsigned IOCTL_SOUND_MIXER_READ_ENHANCE = SOUND_MIXER_READ_ENHANCE; unsigned IOCTL_SOUND_MIXER_READ_IGAIN = SOUND_MIXER_READ_IGAIN; unsigned IOCTL_SOUND_MIXER_READ_IMIX = SOUND_MIXER_READ_IMIX; unsigned IOCTL_SOUND_MIXER_READ_LINE = SOUND_MIXER_READ_LINE; unsigned IOCTL_SOUND_MIXER_READ_LINE1 = SOUND_MIXER_READ_LINE1; unsigned IOCTL_SOUND_MIXER_READ_LINE2 = SOUND_MIXER_READ_LINE2; unsigned IOCTL_SOUND_MIXER_READ_LINE3 = SOUND_MIXER_READ_LINE3; unsigned IOCTL_SOUND_MIXER_READ_LOUD = SOUND_MIXER_READ_LOUD; unsigned IOCTL_SOUND_MIXER_READ_MIC = SOUND_MIXER_READ_MIC; unsigned IOCTL_SOUND_MIXER_READ_MUTE = SOUND_MIXER_READ_MUTE; unsigned IOCTL_SOUND_MIXER_READ_OGAIN = SOUND_MIXER_READ_OGAIN; unsigned IOCTL_SOUND_MIXER_READ_PCM = SOUND_MIXER_READ_PCM; unsigned IOCTL_SOUND_MIXER_READ_RECLEV = SOUND_MIXER_READ_RECLEV; unsigned IOCTL_SOUND_MIXER_READ_RECMASK = SOUND_MIXER_READ_RECMASK; unsigned IOCTL_SOUND_MIXER_READ_RECSRC = SOUND_MIXER_READ_RECSRC; unsigned IOCTL_SOUND_MIXER_READ_SPEAKER = SOUND_MIXER_READ_SPEAKER; unsigned IOCTL_SOUND_MIXER_READ_STEREODEVS = SOUND_MIXER_READ_STEREODEVS; unsigned IOCTL_SOUND_MIXER_READ_SYNTH = SOUND_MIXER_READ_SYNTH; unsigned IOCTL_SOUND_MIXER_READ_TREBLE = SOUND_MIXER_READ_TREBLE; unsigned IOCTL_SOUND_MIXER_READ_VOLUME = SOUND_MIXER_READ_VOLUME; unsigned IOCTL_SOUND_MIXER_WRITE_ALTPCM = SOUND_MIXER_WRITE_ALTPCM; unsigned IOCTL_SOUND_MIXER_WRITE_BASS = SOUND_MIXER_WRITE_BASS; unsigned IOCTL_SOUND_MIXER_WRITE_CD = SOUND_MIXER_WRITE_CD; unsigned IOCTL_SOUND_MIXER_WRITE_ENHANCE = SOUND_MIXER_WRITE_ENHANCE; unsigned IOCTL_SOUND_MIXER_WRITE_IGAIN = SOUND_MIXER_WRITE_IGAIN; unsigned IOCTL_SOUND_MIXER_WRITE_IMIX = SOUND_MIXER_WRITE_IMIX; unsigned IOCTL_SOUND_MIXER_WRITE_LINE = SOUND_MIXER_WRITE_LINE; unsigned IOCTL_SOUND_MIXER_WRITE_LINE1 = SOUND_MIXER_WRITE_LINE1; unsigned IOCTL_SOUND_MIXER_WRITE_LINE2 = SOUND_MIXER_WRITE_LINE2; unsigned IOCTL_SOUND_MIXER_WRITE_LINE3 = SOUND_MIXER_WRITE_LINE3; unsigned IOCTL_SOUND_MIXER_WRITE_LOUD = SOUND_MIXER_WRITE_LOUD; unsigned IOCTL_SOUND_MIXER_WRITE_MIC = SOUND_MIXER_WRITE_MIC; unsigned IOCTL_SOUND_MIXER_WRITE_MUTE = SOUND_MIXER_WRITE_MUTE; unsigned IOCTL_SOUND_MIXER_WRITE_OGAIN = SOUND_MIXER_WRITE_OGAIN; unsigned IOCTL_SOUND_MIXER_WRITE_PCM = SOUND_MIXER_WRITE_PCM; unsigned IOCTL_SOUND_MIXER_WRITE_RECLEV = SOUND_MIXER_WRITE_RECLEV; unsigned IOCTL_SOUND_MIXER_WRITE_RECSRC = SOUND_MIXER_WRITE_RECSRC; unsigned IOCTL_SOUND_MIXER_WRITE_SPEAKER = SOUND_MIXER_WRITE_SPEAKER; unsigned IOCTL_SOUND_MIXER_WRITE_SYNTH = SOUND_MIXER_WRITE_SYNTH; unsigned IOCTL_SOUND_MIXER_WRITE_TREBLE = SOUND_MIXER_WRITE_TREBLE; unsigned IOCTL_SOUND_MIXER_WRITE_VOLUME = SOUND_MIXER_WRITE_VOLUME; unsigned IOCTL_VT_ACTIVATE = VT_ACTIVATE; unsigned IOCTL_VT_GETMODE = VT_GETMODE; unsigned IOCTL_VT_OPENQRY = VT_OPENQRY; unsigned IOCTL_VT_RELDISP = VT_RELDISP; unsigned IOCTL_VT_SETMODE = VT_SETMODE; unsigned IOCTL_VT_WAITACTIVE = VT_WAITACTIVE; #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID - unsigned IOCTL_CYGETDEFTHRESH = CYGETDEFTHRESH; - unsigned IOCTL_CYGETDEFTIMEOUT = CYGETDEFTIMEOUT; - unsigned IOCTL_CYGETMON = CYGETMON; - unsigned IOCTL_CYGETTHRESH = CYGETTHRESH; - unsigned IOCTL_CYGETTIMEOUT = CYGETTIMEOUT; - unsigned IOCTL_CYSETDEFTHRESH = CYSETDEFTHRESH; - unsigned IOCTL_CYSETDEFTIMEOUT = CYSETDEFTIMEOUT; - unsigned IOCTL_CYSETTHRESH = CYSETTHRESH; - unsigned IOCTL_CYSETTIMEOUT = CYSETTIMEOUT; unsigned IOCTL_EQL_EMANCIPATE = EQL_EMANCIPATE; unsigned IOCTL_EQL_ENSLAVE = EQL_ENSLAVE; unsigned IOCTL_EQL_GETMASTRCFG = EQL_GETMASTRCFG; unsigned IOCTL_EQL_GETSLAVECFG = EQL_GETSLAVECFG; unsigned IOCTL_EQL_SETMASTRCFG = EQL_SETMASTRCFG; unsigned IOCTL_EQL_SETSLAVECFG = EQL_SETSLAVECFG; #if EV_VERSION > (0x010000) unsigned IOCTL_EVIOCGKEYCODE_V2 = EVIOCGKEYCODE_V2; unsigned IOCTL_EVIOCGPROP = EVIOCGPROP(0); unsigned IOCTL_EVIOCSKEYCODE_V2 = EVIOCSKEYCODE_V2; #else unsigned IOCTL_EVIOCGKEYCODE_V2 = IOCTL_NOT_PRESENT; unsigned IOCTL_EVIOCGPROP = IOCTL_NOT_PRESENT; unsigned IOCTL_EVIOCSKEYCODE_V2 = IOCTL_NOT_PRESENT; #endif unsigned IOCTL_FS_IOC_GETFLAGS = FS_IOC_GETFLAGS; unsigned IOCTL_FS_IOC_GETVERSION = FS_IOC_GETVERSION; unsigned IOCTL_FS_IOC_SETFLAGS = FS_IOC_SETFLAGS; unsigned IOCTL_FS_IOC_SETVERSION = FS_IOC_SETVERSION; unsigned IOCTL_GIO_CMAP = GIO_CMAP; unsigned IOCTL_GIO_FONT = GIO_FONT; unsigned IOCTL_GIO_UNIMAP = GIO_UNIMAP; unsigned IOCTL_GIO_UNISCRNMAP = GIO_UNISCRNMAP; unsigned IOCTL_KDADDIO = KDADDIO; unsigned IOCTL_KDDELIO = KDDELIO; unsigned IOCTL_KDGETKEYCODE = KDGETKEYCODE; unsigned IOCTL_KDGKBDIACR = KDGKBDIACR; unsigned IOCTL_KDGKBENT = KDGKBENT; unsigned IOCTL_KDGKBLED = KDGKBLED; unsigned IOCTL_KDGKBMETA = KDGKBMETA; unsigned IOCTL_KDGKBSENT = KDGKBSENT; unsigned IOCTL_KDMAPDISP = KDMAPDISP; unsigned IOCTL_KDSETKEYCODE = KDSETKEYCODE; unsigned IOCTL_KDSIGACCEPT = KDSIGACCEPT; unsigned IOCTL_KDSKBDIACR = KDSKBDIACR; unsigned IOCTL_KDSKBENT = KDSKBENT; unsigned IOCTL_KDSKBLED = KDSKBLED; unsigned IOCTL_KDSKBMETA = KDSKBMETA; unsigned IOCTL_KDSKBSENT = KDSKBSENT; unsigned IOCTL_KDUNMAPDISP = KDUNMAPDISP; unsigned IOCTL_LPABORT = LPABORT; unsigned IOCTL_LPABORTOPEN = LPABORTOPEN; unsigned IOCTL_LPCAREFUL = LPCAREFUL; unsigned IOCTL_LPCHAR = LPCHAR; unsigned IOCTL_LPGETIRQ = LPGETIRQ; unsigned IOCTL_LPGETSTATUS = LPGETSTATUS; unsigned IOCTL_LPRESET = LPRESET; unsigned IOCTL_LPSETIRQ = LPSETIRQ; unsigned IOCTL_LPTIME = LPTIME; unsigned IOCTL_LPWAIT = LPWAIT; unsigned IOCTL_MTIOCGETCONFIG = MTIOCGETCONFIG; unsigned IOCTL_MTIOCSETCONFIG = MTIOCSETCONFIG; unsigned IOCTL_PIO_CMAP = PIO_CMAP; unsigned IOCTL_PIO_FONT = PIO_FONT; unsigned IOCTL_PIO_UNIMAP = PIO_UNIMAP; unsigned IOCTL_PIO_UNIMAPCLR = PIO_UNIMAPCLR; unsigned IOCTL_PIO_UNISCRNMAP = PIO_UNISCRNMAP; #if SANITIZER_GLIBC unsigned IOCTL_SCSI_IOCTL_GET_IDLUN = SCSI_IOCTL_GET_IDLUN; unsigned IOCTL_SCSI_IOCTL_PROBE_HOST = SCSI_IOCTL_PROBE_HOST; unsigned IOCTL_SCSI_IOCTL_TAGGED_DISABLE = SCSI_IOCTL_TAGGED_DISABLE; unsigned IOCTL_SCSI_IOCTL_TAGGED_ENABLE = SCSI_IOCTL_TAGGED_ENABLE; unsigned IOCTL_SIOCAIPXITFCRT = SIOCAIPXITFCRT; unsigned IOCTL_SIOCAIPXPRISLT = SIOCAIPXPRISLT; unsigned IOCTL_SIOCAX25ADDUID = SIOCAX25ADDUID; unsigned IOCTL_SIOCAX25DELUID = SIOCAX25DELUID; unsigned IOCTL_SIOCAX25GETPARMS = SIOCAX25GETPARMS; unsigned IOCTL_SIOCAX25GETUID = SIOCAX25GETUID; unsigned IOCTL_SIOCAX25NOUID = SIOCAX25NOUID; unsigned IOCTL_SIOCAX25SETPARMS = SIOCAX25SETPARMS; unsigned IOCTL_SIOCDEVPLIP = SIOCDEVPLIP; unsigned IOCTL_SIOCIPXCFGDATA = SIOCIPXCFGDATA; unsigned IOCTL_SIOCNRDECOBS = SIOCNRDECOBS; unsigned IOCTL_SIOCNRGETPARMS = SIOCNRGETPARMS; unsigned IOCTL_SIOCNRRTCTL = SIOCNRRTCTL; unsigned IOCTL_SIOCNRSETPARMS = SIOCNRSETPARMS; #endif unsigned IOCTL_TIOCGSERIAL = TIOCGSERIAL; unsigned IOCTL_TIOCSERGETMULTI = TIOCSERGETMULTI; unsigned IOCTL_TIOCSERSETMULTI = TIOCSERSETMULTI; unsigned IOCTL_TIOCSSERIAL = TIOCSSERIAL; #endif // SANITIZER_LINUX && !SANITIZER_ANDROID #if SANITIZER_LINUX && !SANITIZER_ANDROID unsigned IOCTL_GIO_SCRNMAP = GIO_SCRNMAP; unsigned IOCTL_KDDISABIO = KDDISABIO; unsigned IOCTL_KDENABIO = KDENABIO; unsigned IOCTL_KDGETLED = KDGETLED; unsigned IOCTL_KDGETMODE = KDGETMODE; unsigned IOCTL_KDGKBMODE = KDGKBMODE; unsigned IOCTL_KDGKBTYPE = KDGKBTYPE; unsigned IOCTL_KDMKTONE = KDMKTONE; unsigned IOCTL_KDSETLED = KDSETLED; unsigned IOCTL_KDSETMODE = KDSETMODE; unsigned IOCTL_KDSKBMODE = KDSKBMODE; unsigned IOCTL_KIOCSOUND = KIOCSOUND; unsigned IOCTL_PIO_SCRNMAP = PIO_SCRNMAP; unsigned IOCTL_SNDCTL_DSP_GETISPACE = SNDCTL_DSP_GETISPACE; unsigned IOCTL_SNDCTL_DSP_GETOSPACE = SNDCTL_DSP_GETOSPACE; #endif // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID const int si_SEGV_MAPERR = SEGV_MAPERR; const int si_SEGV_ACCERR = SEGV_ACCERR; } // namespace __sanitizer using namespace __sanitizer; COMPILER_CHECK(sizeof(__sanitizer_pthread_attr_t) >= sizeof(pthread_attr_t)); COMPILER_CHECK(sizeof(socklen_t) == sizeof(unsigned)); CHECK_TYPE_SIZE(pthread_key_t); #if SANITIZER_LINUX // FIXME: We define those on Linux and Mac, but only check on Linux. COMPILER_CHECK(IOC_NRBITS == _IOC_NRBITS); COMPILER_CHECK(IOC_TYPEBITS == _IOC_TYPEBITS); COMPILER_CHECK(IOC_SIZEBITS == _IOC_SIZEBITS); COMPILER_CHECK(IOC_DIRBITS == _IOC_DIRBITS); COMPILER_CHECK(IOC_NRMASK == _IOC_NRMASK); COMPILER_CHECK(IOC_TYPEMASK == _IOC_TYPEMASK); COMPILER_CHECK(IOC_SIZEMASK == _IOC_SIZEMASK); COMPILER_CHECK(IOC_DIRMASK == _IOC_DIRMASK); COMPILER_CHECK(IOC_NRSHIFT == _IOC_NRSHIFT); COMPILER_CHECK(IOC_TYPESHIFT == _IOC_TYPESHIFT); COMPILER_CHECK(IOC_SIZESHIFT == _IOC_SIZESHIFT); COMPILER_CHECK(IOC_DIRSHIFT == _IOC_DIRSHIFT); COMPILER_CHECK(IOC_NONE == _IOC_NONE); COMPILER_CHECK(IOC_WRITE == _IOC_WRITE); COMPILER_CHECK(IOC_READ == _IOC_READ); COMPILER_CHECK(EVIOC_ABS_MAX == ABS_MAX); COMPILER_CHECK(EVIOC_EV_MAX == EV_MAX); COMPILER_CHECK(IOC_SIZE(0x12345678) == _IOC_SIZE(0x12345678)); COMPILER_CHECK(IOC_DIR(0x12345678) == _IOC_DIR(0x12345678)); COMPILER_CHECK(IOC_NR(0x12345678) == _IOC_NR(0x12345678)); COMPILER_CHECK(IOC_TYPE(0x12345678) == _IOC_TYPE(0x12345678)); #endif // SANITIZER_LINUX #if SANITIZER_LINUX || SANITIZER_FREEBSD // There are more undocumented fields in dl_phdr_info that we are not interested // in. COMPILER_CHECK(sizeof(__sanitizer_dl_phdr_info) <= sizeof(dl_phdr_info)); CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_addr); CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_name); CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_phdr); CHECK_SIZE_AND_OFFSET(dl_phdr_info, dlpi_phnum); #endif // SANITIZER_LINUX || SANITIZER_FREEBSD #if SANITIZER_GLIBC || SANITIZER_FREEBSD CHECK_TYPE_SIZE(glob_t); CHECK_SIZE_AND_OFFSET(glob_t, gl_pathc); CHECK_SIZE_AND_OFFSET(glob_t, gl_pathv); CHECK_SIZE_AND_OFFSET(glob_t, gl_offs); CHECK_SIZE_AND_OFFSET(glob_t, gl_flags); CHECK_SIZE_AND_OFFSET(glob_t, gl_closedir); CHECK_SIZE_AND_OFFSET(glob_t, gl_readdir); CHECK_SIZE_AND_OFFSET(glob_t, gl_opendir); CHECK_SIZE_AND_OFFSET(glob_t, gl_lstat); CHECK_SIZE_AND_OFFSET(glob_t, gl_stat); #endif // SANITIZER_GLIBC || SANITIZER_FREEBSD CHECK_TYPE_SIZE(addrinfo); CHECK_SIZE_AND_OFFSET(addrinfo, ai_flags); CHECK_SIZE_AND_OFFSET(addrinfo, ai_family); CHECK_SIZE_AND_OFFSET(addrinfo, ai_socktype); CHECK_SIZE_AND_OFFSET(addrinfo, ai_protocol); CHECK_SIZE_AND_OFFSET(addrinfo, ai_protocol); CHECK_SIZE_AND_OFFSET(addrinfo, ai_addrlen); CHECK_SIZE_AND_OFFSET(addrinfo, ai_canonname); CHECK_SIZE_AND_OFFSET(addrinfo, ai_addr); CHECK_TYPE_SIZE(hostent); CHECK_SIZE_AND_OFFSET(hostent, h_name); CHECK_SIZE_AND_OFFSET(hostent, h_aliases); CHECK_SIZE_AND_OFFSET(hostent, h_addrtype); CHECK_SIZE_AND_OFFSET(hostent, h_length); CHECK_SIZE_AND_OFFSET(hostent, h_addr_list); CHECK_TYPE_SIZE(iovec); CHECK_SIZE_AND_OFFSET(iovec, iov_base); CHECK_SIZE_AND_OFFSET(iovec, iov_len); // In POSIX, int msg_iovlen; socklen_t msg_controllen; socklen_t cmsg_len; but // many implementations don't conform to the standard. Since we pick the // non-conforming glibc definition, exclude the checks for musl (incompatible // sizes but compatible offsets). CHECK_TYPE_SIZE(msghdr); CHECK_SIZE_AND_OFFSET(msghdr, msg_name); CHECK_SIZE_AND_OFFSET(msghdr, msg_namelen); CHECK_SIZE_AND_OFFSET(msghdr, msg_iov); #if SANITIZER_GLIBC || SANITIZER_ANDROID CHECK_SIZE_AND_OFFSET(msghdr, msg_iovlen); #endif CHECK_SIZE_AND_OFFSET(msghdr, msg_control); #if SANITIZER_GLIBC || SANITIZER_ANDROID CHECK_SIZE_AND_OFFSET(msghdr, msg_controllen); #endif CHECK_SIZE_AND_OFFSET(msghdr, msg_flags); CHECK_TYPE_SIZE(cmsghdr); #if SANITIZER_GLIBC || SANITIZER_ANDROID CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_len); #endif CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_level); CHECK_SIZE_AND_OFFSET(cmsghdr, cmsg_type); #if SANITIZER_LINUX && (__ANDROID_API__ >= 21 || __GLIBC_PREREQ (2, 14)) CHECK_TYPE_SIZE(mmsghdr); CHECK_SIZE_AND_OFFSET(mmsghdr, msg_hdr); CHECK_SIZE_AND_OFFSET(mmsghdr, msg_len); #endif COMPILER_CHECK(sizeof(__sanitizer_dirent) <= sizeof(dirent)); CHECK_SIZE_AND_OFFSET(dirent, d_ino); #if SANITIZER_MAC CHECK_SIZE_AND_OFFSET(dirent, d_seekoff); #elif SANITIZER_FREEBSD // There is no 'd_off' field on FreeBSD. #else CHECK_SIZE_AND_OFFSET(dirent, d_off); #endif CHECK_SIZE_AND_OFFSET(dirent, d_reclen); #if SANITIZER_LINUX && !SANITIZER_ANDROID COMPILER_CHECK(sizeof(__sanitizer_dirent64) <= sizeof(dirent64)); CHECK_SIZE_AND_OFFSET(dirent64, d_ino); CHECK_SIZE_AND_OFFSET(dirent64, d_off); CHECK_SIZE_AND_OFFSET(dirent64, d_reclen); #endif CHECK_TYPE_SIZE(ifconf); CHECK_SIZE_AND_OFFSET(ifconf, ifc_len); CHECK_SIZE_AND_OFFSET(ifconf, ifc_ifcu); CHECK_TYPE_SIZE(pollfd); CHECK_SIZE_AND_OFFSET(pollfd, fd); CHECK_SIZE_AND_OFFSET(pollfd, events); CHECK_SIZE_AND_OFFSET(pollfd, revents); CHECK_TYPE_SIZE(nfds_t); CHECK_TYPE_SIZE(sigset_t); COMPILER_CHECK(sizeof(__sanitizer_sigaction) == sizeof(struct sigaction)); // Can't write checks for sa_handler and sa_sigaction due to them being // preprocessor macros. CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask); #if !defined(__s390x__) || __GLIBC_PREREQ (2, 20) // On s390x glibc 2.19 and earlier sa_flags was unsigned long, and sa_resv // didn't exist. CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_flags); #endif #if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32) CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_restorer); #endif #if SANITIZER_LINUX CHECK_TYPE_SIZE(__sysctl_args); CHECK_SIZE_AND_OFFSET(__sysctl_args, name); CHECK_SIZE_AND_OFFSET(__sysctl_args, nlen); CHECK_SIZE_AND_OFFSET(__sysctl_args, oldval); CHECK_SIZE_AND_OFFSET(__sysctl_args, oldlenp); CHECK_SIZE_AND_OFFSET(__sysctl_args, newval); CHECK_SIZE_AND_OFFSET(__sysctl_args, newlen); CHECK_TYPE_SIZE(__kernel_uid_t); CHECK_TYPE_SIZE(__kernel_gid_t); #if SANITIZER_USES_UID16_SYSCALLS CHECK_TYPE_SIZE(__kernel_old_uid_t); CHECK_TYPE_SIZE(__kernel_old_gid_t); #endif CHECK_TYPE_SIZE(__kernel_off_t); CHECK_TYPE_SIZE(__kernel_loff_t); CHECK_TYPE_SIZE(__kernel_fd_set); #endif #if !SANITIZER_ANDROID CHECK_TYPE_SIZE(wordexp_t); CHECK_SIZE_AND_OFFSET(wordexp_t, we_wordc); CHECK_SIZE_AND_OFFSET(wordexp_t, we_wordv); CHECK_SIZE_AND_OFFSET(wordexp_t, we_offs); #endif CHECK_TYPE_SIZE(tm); CHECK_SIZE_AND_OFFSET(tm, tm_sec); CHECK_SIZE_AND_OFFSET(tm, tm_min); CHECK_SIZE_AND_OFFSET(tm, tm_hour); CHECK_SIZE_AND_OFFSET(tm, tm_mday); CHECK_SIZE_AND_OFFSET(tm, tm_mon); CHECK_SIZE_AND_OFFSET(tm, tm_year); CHECK_SIZE_AND_OFFSET(tm, tm_wday); CHECK_SIZE_AND_OFFSET(tm, tm_yday); CHECK_SIZE_AND_OFFSET(tm, tm_isdst); CHECK_SIZE_AND_OFFSET(tm, tm_gmtoff); CHECK_SIZE_AND_OFFSET(tm, tm_zone); #if SANITIZER_LINUX CHECK_TYPE_SIZE(mntent); CHECK_SIZE_AND_OFFSET(mntent, mnt_fsname); CHECK_SIZE_AND_OFFSET(mntent, mnt_dir); CHECK_SIZE_AND_OFFSET(mntent, mnt_type); CHECK_SIZE_AND_OFFSET(mntent, mnt_opts); CHECK_SIZE_AND_OFFSET(mntent, mnt_freq); CHECK_SIZE_AND_OFFSET(mntent, mnt_passno); #endif CHECK_TYPE_SIZE(ether_addr); #if SANITIZER_GLIBC || SANITIZER_FREEBSD CHECK_TYPE_SIZE(ipc_perm); # if SANITIZER_FREEBSD CHECK_SIZE_AND_OFFSET(ipc_perm, key); CHECK_SIZE_AND_OFFSET(ipc_perm, seq); # else CHECK_SIZE_AND_OFFSET(ipc_perm, __key); CHECK_SIZE_AND_OFFSET(ipc_perm, __seq); # endif CHECK_SIZE_AND_OFFSET(ipc_perm, uid); CHECK_SIZE_AND_OFFSET(ipc_perm, gid); CHECK_SIZE_AND_OFFSET(ipc_perm, cuid); CHECK_SIZE_AND_OFFSET(ipc_perm, cgid); #if !SANITIZER_LINUX || __GLIBC_PREREQ (2, 31) /* glibc 2.30 and earlier provided 16-bit mode field instead of 32-bit on many architectures. */ CHECK_SIZE_AND_OFFSET(ipc_perm, mode); #endif CHECK_TYPE_SIZE(shmid_ds); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_perm); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_segsz); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_atime); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_dtime); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_ctime); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_cpid); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_lpid); CHECK_SIZE_AND_OFFSET(shmid_ds, shm_nattch); #endif CHECK_TYPE_SIZE(clock_t); #if SANITIZER_LINUX CHECK_TYPE_SIZE(clockid_t); #endif #if !SANITIZER_ANDROID CHECK_TYPE_SIZE(ifaddrs); CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_next); CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_name); CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_addr); CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_netmask); #if SANITIZER_LINUX || SANITIZER_FREEBSD // Compare against the union, because we can't reach into the union in a // compliant way. #ifdef ifa_dstaddr #undef ifa_dstaddr #endif # if SANITIZER_FREEBSD CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_dstaddr); # else COMPILER_CHECK(sizeof(((__sanitizer_ifaddrs *)nullptr)->ifa_dstaddr) == sizeof(((ifaddrs *)nullptr)->ifa_ifu)); COMPILER_CHECK(offsetof(__sanitizer_ifaddrs, ifa_dstaddr) == offsetof(ifaddrs, ifa_ifu)); # endif // SANITIZER_FREEBSD #else CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_dstaddr); #endif // SANITIZER_LINUX CHECK_SIZE_AND_OFFSET(ifaddrs, ifa_data); #endif #if SANITIZER_GLIBC || SANITIZER_ANDROID COMPILER_CHECK(sizeof(__sanitizer_struct_mallinfo) == sizeof(struct mallinfo)); #endif #if !SANITIZER_ANDROID CHECK_TYPE_SIZE(timeb); CHECK_SIZE_AND_OFFSET(timeb, time); CHECK_SIZE_AND_OFFSET(timeb, millitm); CHECK_SIZE_AND_OFFSET(timeb, timezone); CHECK_SIZE_AND_OFFSET(timeb, dstflag); #endif CHECK_TYPE_SIZE(passwd); CHECK_SIZE_AND_OFFSET(passwd, pw_name); CHECK_SIZE_AND_OFFSET(passwd, pw_passwd); CHECK_SIZE_AND_OFFSET(passwd, pw_uid); CHECK_SIZE_AND_OFFSET(passwd, pw_gid); CHECK_SIZE_AND_OFFSET(passwd, pw_dir); CHECK_SIZE_AND_OFFSET(passwd, pw_shell); #if !SANITIZER_ANDROID CHECK_SIZE_AND_OFFSET(passwd, pw_gecos); #endif #if SANITIZER_MAC CHECK_SIZE_AND_OFFSET(passwd, pw_change); CHECK_SIZE_AND_OFFSET(passwd, pw_expire); CHECK_SIZE_AND_OFFSET(passwd, pw_class); #endif CHECK_TYPE_SIZE(group); CHECK_SIZE_AND_OFFSET(group, gr_name); CHECK_SIZE_AND_OFFSET(group, gr_passwd); CHECK_SIZE_AND_OFFSET(group, gr_gid); CHECK_SIZE_AND_OFFSET(group, gr_mem); #if HAVE_RPC_XDR_H CHECK_TYPE_SIZE(XDR); CHECK_SIZE_AND_OFFSET(XDR, x_op); CHECK_SIZE_AND_OFFSET(XDR, x_ops); CHECK_SIZE_AND_OFFSET(XDR, x_public); CHECK_SIZE_AND_OFFSET(XDR, x_private); CHECK_SIZE_AND_OFFSET(XDR, x_base); CHECK_SIZE_AND_OFFSET(XDR, x_handy); COMPILER_CHECK(__sanitizer_XDR_ENCODE == XDR_ENCODE); COMPILER_CHECK(__sanitizer_XDR_DECODE == XDR_DECODE); COMPILER_CHECK(__sanitizer_XDR_FREE == XDR_FREE); #endif #if SANITIZER_GLIBC COMPILER_CHECK(sizeof(__sanitizer_FILE) <= sizeof(FILE)); CHECK_SIZE_AND_OFFSET(FILE, _flags); CHECK_SIZE_AND_OFFSET(FILE, _IO_read_ptr); CHECK_SIZE_AND_OFFSET(FILE, _IO_read_end); CHECK_SIZE_AND_OFFSET(FILE, _IO_read_base); CHECK_SIZE_AND_OFFSET(FILE, _IO_write_ptr); CHECK_SIZE_AND_OFFSET(FILE, _IO_write_end); CHECK_SIZE_AND_OFFSET(FILE, _IO_write_base); CHECK_SIZE_AND_OFFSET(FILE, _IO_buf_base); CHECK_SIZE_AND_OFFSET(FILE, _IO_buf_end); CHECK_SIZE_AND_OFFSET(FILE, _IO_save_base); CHECK_SIZE_AND_OFFSET(FILE, _IO_backup_base); CHECK_SIZE_AND_OFFSET(FILE, _IO_save_end); CHECK_SIZE_AND_OFFSET(FILE, _markers); CHECK_SIZE_AND_OFFSET(FILE, _chain); CHECK_SIZE_AND_OFFSET(FILE, _fileno); COMPILER_CHECK(sizeof(__sanitizer__obstack_chunk) <= sizeof(_obstack_chunk)); CHECK_SIZE_AND_OFFSET(_obstack_chunk, limit); CHECK_SIZE_AND_OFFSET(_obstack_chunk, prev); CHECK_TYPE_SIZE(obstack); CHECK_SIZE_AND_OFFSET(obstack, chunk_size); CHECK_SIZE_AND_OFFSET(obstack, chunk); CHECK_SIZE_AND_OFFSET(obstack, object_base); CHECK_SIZE_AND_OFFSET(obstack, next_free); CHECK_TYPE_SIZE(cookie_io_functions_t); CHECK_SIZE_AND_OFFSET(cookie_io_functions_t, read); CHECK_SIZE_AND_OFFSET(cookie_io_functions_t, write); CHECK_SIZE_AND_OFFSET(cookie_io_functions_t, seek); CHECK_SIZE_AND_OFFSET(cookie_io_functions_t, close); #endif // SANITIZER_GLIBC #if SANITIZER_LINUX || SANITIZER_FREEBSD CHECK_TYPE_SIZE(sem_t); #endif #if SANITIZER_LINUX && defined(__arm__) COMPILER_CHECK(ARM_VFPREGS_SIZE == ARM_VFPREGS_SIZE_ASAN); #endif #endif // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_MAC diff --git a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h index 5839dd77ea75..d3e9a6353909 100644 --- a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h +++ b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h @@ -1,1455 +1,1445 @@ //===-- sanitizer_platform_limits_posix.h ---------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is a part of Sanitizer common code. // // Sizes and layouts of platform-specific POSIX data structures. //===----------------------------------------------------------------------===// #ifndef SANITIZER_PLATFORM_LIMITS_POSIX_H #define SANITIZER_PLATFORM_LIMITS_POSIX_H #if SANITIZER_LINUX || SANITIZER_MAC #include "sanitizer_internal_defs.h" #include "sanitizer_platform.h" #if defined(__sparc__) // FIXME: This can't be included from tsan which does not support sparc yet. #include "sanitizer_glibc_version.h" #endif # define GET_LINK_MAP_BY_DLOPEN_HANDLE(handle) ((link_map*)(handle)) namespace __sanitizer { extern unsigned struct_utsname_sz; extern unsigned struct_stat_sz; #if !SANITIZER_IOS extern unsigned struct_stat64_sz; #endif extern unsigned struct_rusage_sz; extern unsigned siginfo_t_sz; extern unsigned struct_itimerval_sz; extern unsigned pthread_t_sz; extern unsigned pthread_mutex_t_sz; extern unsigned pthread_cond_t_sz; extern unsigned pid_t_sz; extern unsigned timeval_sz; extern unsigned uid_t_sz; extern unsigned gid_t_sz; extern unsigned mbstate_t_sz; extern unsigned struct_timezone_sz; extern unsigned struct_tms_sz; extern unsigned struct_itimerspec_sz; extern unsigned struct_sigevent_sz; extern unsigned struct_stack_t_sz; extern unsigned struct_sched_param_sz; extern unsigned struct_statfs64_sz; extern unsigned struct_regex_sz; extern unsigned struct_regmatch_sz; #if !SANITIZER_ANDROID extern unsigned struct_fstab_sz; extern unsigned struct_statfs_sz; extern unsigned struct_sockaddr_sz; extern unsigned ucontext_t_sz; #endif // !SANITIZER_ANDROID #if SANITIZER_LINUX #if defined(__x86_64__) const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 0; #elif defined(__i386__) const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 96; #elif defined(__arm__) const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__aarch64__) const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__powerpc__) && !defined(__powerpc64__) const unsigned struct_kernel_stat_sz = 72; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__powerpc64__) const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__mips__) const unsigned struct_kernel_stat_sz = SANITIZER_ANDROID ? FIRST_32_SECOND_64(104, 128) : FIRST_32_SECOND_64(160, 216); const unsigned struct_kernel_stat64_sz = 104; #elif defined(__s390__) && !defined(__s390x__) const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; #elif defined(__s390x__) const unsigned struct_kernel_stat_sz = 144; const unsigned struct_kernel_stat64_sz = 0; #elif defined(__sparc__) && defined(__arch64__) const unsigned struct___old_kernel_stat_sz = 0; const unsigned struct_kernel_stat_sz = 104; const unsigned struct_kernel_stat64_sz = 144; #elif defined(__sparc__) && !defined(__arch64__) const unsigned struct___old_kernel_stat_sz = 0; const unsigned struct_kernel_stat_sz = 64; const unsigned struct_kernel_stat64_sz = 104; #elif SANITIZER_RISCV64 const unsigned struct_kernel_stat_sz = 128; const unsigned struct_kernel_stat64_sz = 0; // RISCV64 does not use stat64 #endif struct __sanitizer_perf_event_attr { unsigned type; unsigned size; // More fields that vary with the kernel version. }; extern unsigned struct_epoll_event_sz; extern unsigned struct_sysinfo_sz; extern unsigned __user_cap_header_struct_sz; extern unsigned __user_cap_data_struct_sz; extern unsigned struct_new_utsname_sz; extern unsigned struct_old_utsname_sz; extern unsigned struct_oldold_utsname_sz; const unsigned struct_kexec_segment_sz = 4 * sizeof(unsigned long); #endif // SANITIZER_LINUX #if SANITIZER_LINUX #if defined(__powerpc64__) || defined(__riscv) || defined(__s390__) const unsigned struct___old_kernel_stat_sz = 0; #elif !defined(__sparc__) const unsigned struct___old_kernel_stat_sz = 32; #endif extern unsigned struct_rlimit_sz; extern unsigned struct_utimbuf_sz; extern unsigned struct_timespec_sz; struct __sanitizer_iocb { u64 aio_data; u32 aio_key_or_aio_reserved1; // Simply crazy. u32 aio_reserved1_or_aio_key; // Luckily, we don't need these. u16 aio_lio_opcode; s16 aio_reqprio; u32 aio_fildes; u64 aio_buf; u64 aio_nbytes; s64 aio_offset; u64 aio_reserved2; u64 aio_reserved3; }; struct __sanitizer_io_event { u64 data; u64 obj; u64 res; u64 res2; }; const unsigned iocb_cmd_pread = 0; const unsigned iocb_cmd_pwrite = 1; const unsigned iocb_cmd_preadv = 7; const unsigned iocb_cmd_pwritev = 8; struct __sanitizer___sysctl_args { int *name; int nlen; void *oldval; uptr *oldlenp; void *newval; uptr newlen; unsigned long ___unused[4]; }; const unsigned old_sigset_t_sz = sizeof(unsigned long); struct __sanitizer_sem_t { #if SANITIZER_ANDROID && defined(_LP64) int data[4]; #elif SANITIZER_ANDROID && !defined(_LP64) int data; #elif SANITIZER_LINUX uptr data[4]; #endif }; #endif // SANITIZER_LINUX #if SANITIZER_ANDROID struct __sanitizer_struct_mallinfo { uptr v[10]; }; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_struct_mallinfo { int v[10]; }; extern unsigned struct_ustat_sz; extern unsigned struct_rlimit64_sz; extern unsigned struct_statvfs64_sz; struct __sanitizer_ipc_perm { int __key; int uid; int gid; int cuid; int cgid; #ifdef __powerpc__ unsigned mode; unsigned __seq; u64 __unused1; u64 __unused2; #elif defined(__sparc__) unsigned mode; unsigned short __pad2; unsigned short __seq; unsigned long long __unused1; unsigned long long __unused2; #else unsigned int mode; unsigned short __seq; unsigned short __pad2; #if defined(__x86_64__) && !defined(_LP64) u64 __unused1; u64 __unused2; #else unsigned long __unused1; unsigned long __unused2; #endif #endif }; struct __sanitizer_shmid_ds { __sanitizer_ipc_perm shm_perm; #if defined(__sparc__) #if !defined(__arch64__) u32 __pad1; #endif long shm_atime; #if !defined(__arch64__) u32 __pad2; #endif long shm_dtime; #if !defined(__arch64__) u32 __pad3; #endif long shm_ctime; uptr shm_segsz; int shm_cpid; int shm_lpid; unsigned long shm_nattch; unsigned long __glibc_reserved1; unsigned long __glibc_reserved2; #else #ifndef __powerpc__ uptr shm_segsz; #elif !defined(__powerpc64__) uptr __unused0; #endif #if defined(__x86_64__) && !defined(_LP64) u64 shm_atime; u64 shm_dtime; u64 shm_ctime; #else uptr shm_atime; #if !defined(_LP64) && !defined(__mips__) uptr __unused1; #endif uptr shm_dtime; #if !defined(_LP64) && !defined(__mips__) uptr __unused2; #endif uptr shm_ctime; #if !defined(_LP64) && !defined(__mips__) uptr __unused3; #endif #endif #ifdef __powerpc__ uptr shm_segsz; #endif int shm_cpid; int shm_lpid; #if defined(__x86_64__) && !defined(_LP64) u64 shm_nattch; u64 __unused4; u64 __unused5; #else uptr shm_nattch; uptr __unused4; uptr __unused5; #endif #endif }; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned struct_msqid_ds_sz; extern unsigned struct_mq_attr_sz; extern unsigned struct_timex_sz; extern unsigned struct_statvfs_sz; extern unsigned struct_crypt_data_sz; #endif // SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_iovec { void *iov_base; uptr iov_len; }; #if !SANITIZER_ANDROID struct __sanitizer_ifaddrs { struct __sanitizer_ifaddrs *ifa_next; char *ifa_name; unsigned int ifa_flags; void *ifa_addr; // (struct sockaddr *) void *ifa_netmask; // (struct sockaddr *) // This is a union on Linux. # ifdef ifa_dstaddr # undef ifa_dstaddr # endif void *ifa_dstaddr; // (struct sockaddr *) void *ifa_data; }; #endif // !SANITIZER_ANDROID #if SANITIZER_MAC typedef unsigned long __sanitizer_pthread_key_t; #else typedef unsigned __sanitizer_pthread_key_t; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_XDR { int x_op; void *x_ops; uptr x_public; uptr x_private; uptr x_base; unsigned x_handy; }; const int __sanitizer_XDR_ENCODE = 0; const int __sanitizer_XDR_DECODE = 1; const int __sanitizer_XDR_FREE = 2; #endif struct __sanitizer_passwd { char *pw_name; char *pw_passwd; int pw_uid; int pw_gid; #if SANITIZER_MAC long pw_change; char *pw_class; #endif #if !(SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32)) char *pw_gecos; #endif char *pw_dir; char *pw_shell; #if SANITIZER_MAC long pw_expire; #endif }; struct __sanitizer_group { char *gr_name; char *gr_passwd; int gr_gid; char **gr_mem; }; #if defined(__x86_64__) && !defined(_LP64) typedef long long __sanitizer_time_t; #else typedef long __sanitizer_time_t; #endif typedef long __sanitizer_suseconds_t; struct __sanitizer_timeval { __sanitizer_time_t tv_sec; __sanitizer_suseconds_t tv_usec; }; struct __sanitizer_itimerval { struct __sanitizer_timeval it_interval; struct __sanitizer_timeval it_value; }; struct __sanitizer_timeb { __sanitizer_time_t time; unsigned short millitm; short timezone; short dstflag; }; struct __sanitizer_ether_addr { u8 octet[6]; }; struct __sanitizer_tm { int tm_sec; int tm_min; int tm_hour; int tm_mday; int tm_mon; int tm_year; int tm_wday; int tm_yday; int tm_isdst; long int tm_gmtoff; const char *tm_zone; }; #if SANITIZER_LINUX struct __sanitizer_mntent { char *mnt_fsname; char *mnt_dir; char *mnt_type; char *mnt_opts; int mnt_freq; int mnt_passno; }; struct __sanitizer_file_handle { unsigned int handle_bytes; int handle_type; unsigned char f_handle[1]; // variable sized }; #endif #if SANITIZER_MAC struct __sanitizer_msghdr { void *msg_name; unsigned msg_namelen; struct __sanitizer_iovec *msg_iov; unsigned msg_iovlen; void *msg_control; unsigned msg_controllen; int msg_flags; }; struct __sanitizer_cmsghdr { unsigned cmsg_len; int cmsg_level; int cmsg_type; }; #else // In POSIX, int msg_iovlen; socklen_t msg_controllen; socklen_t cmsg_len; but // many implementations don't conform to the standard. struct __sanitizer_msghdr { void *msg_name; unsigned msg_namelen; struct __sanitizer_iovec *msg_iov; uptr msg_iovlen; void *msg_control; uptr msg_controllen; int msg_flags; }; struct __sanitizer_cmsghdr { uptr cmsg_len; int cmsg_level; int cmsg_type; }; #endif #if SANITIZER_LINUX struct __sanitizer_mmsghdr { __sanitizer_msghdr msg_hdr; unsigned int msg_len; }; #endif #if SANITIZER_MAC struct __sanitizer_dirent { unsigned long long d_ino; unsigned long long d_seekoff; unsigned short d_reclen; // more fields that we don't care about }; #elif SANITIZER_ANDROID || defined(__x86_64__) struct __sanitizer_dirent { unsigned long long d_ino; unsigned long long d_off; unsigned short d_reclen; // more fields that we don't care about }; #else struct __sanitizer_dirent { uptr d_ino; uptr d_off; unsigned short d_reclen; // more fields that we don't care about }; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_dirent64 { unsigned long long d_ino; unsigned long long d_off; unsigned short d_reclen; // more fields that we don't care about }; #endif #if defined(__x86_64__) && !defined(_LP64) typedef long long __sanitizer_clock_t; #else typedef long __sanitizer_clock_t; #endif #if SANITIZER_LINUX typedef int __sanitizer_clockid_t; #endif #if SANITIZER_LINUX #if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \ defined(__mips__) typedef unsigned __sanitizer___kernel_uid_t; typedef unsigned __sanitizer___kernel_gid_t; #else typedef unsigned short __sanitizer___kernel_uid_t; typedef unsigned short __sanitizer___kernel_gid_t; #endif #if defined(__x86_64__) && !defined(_LP64) typedef long long __sanitizer___kernel_off_t; #else typedef long __sanitizer___kernel_off_t; #endif #if defined(__powerpc__) || defined(__mips__) || defined(__riscv) typedef unsigned int __sanitizer___kernel_old_uid_t; typedef unsigned int __sanitizer___kernel_old_gid_t; #else typedef unsigned short __sanitizer___kernel_old_uid_t; typedef unsigned short __sanitizer___kernel_old_gid_t; #endif typedef long long __sanitizer___kernel_loff_t; typedef struct { unsigned long fds_bits[1024 / (8 * sizeof(long))]; } __sanitizer___kernel_fd_set; #endif // This thing depends on the platform. We are only interested in the upper // limit. Verified with a compiler assert in .cpp. union __sanitizer_pthread_attr_t { char size[128]; void *align; }; #if SANITIZER_ANDROID # if SANITIZER_MIPS typedef unsigned long __sanitizer_sigset_t[16 / sizeof(unsigned long)]; # else typedef unsigned long __sanitizer_sigset_t; # endif #elif SANITIZER_MAC typedef unsigned __sanitizer_sigset_t; #elif SANITIZER_LINUX struct __sanitizer_sigset_t { // The size is determined by looking at sizeof of real sigset_t on linux. uptr val[128 / sizeof(uptr)]; }; #endif struct __sanitizer_siginfo { // The size is determined by looking at sizeof of real siginfo_t on linux. u64 opaque[128 / sizeof(u64)]; }; using __sanitizer_sighandler_ptr = void (*)(int sig); using __sanitizer_sigactionhandler_ptr = void (*)(int sig, __sanitizer_siginfo *siginfo, void *uctx); // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros. #if SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 64) struct __sanitizer_sigaction { unsigned sa_flags; union { __sanitizer_sigactionhandler_ptr sigaction; __sanitizer_sighandler_ptr handler; }; __sanitizer_sigset_t sa_mask; void (*sa_restorer)(); }; #elif SANITIZER_ANDROID && SANITIZER_MIPS32 // check this before WORDSIZE == 32 struct __sanitizer_sigaction { unsigned sa_flags; union { __sanitizer_sigactionhandler_ptr sigaction; __sanitizer_sighandler_ptr handler; }; __sanitizer_sigset_t sa_mask; }; #elif SANITIZER_ANDROID && (SANITIZER_WORDSIZE == 32) struct __sanitizer_sigaction { union { __sanitizer_sigactionhandler_ptr sigaction; __sanitizer_sighandler_ptr handler; }; __sanitizer_sigset_t sa_mask; uptr sa_flags; void (*sa_restorer)(); }; #else // !SANITIZER_ANDROID struct __sanitizer_sigaction { #if defined(__mips__) && !SANITIZER_FREEBSD unsigned int sa_flags; #endif union { __sanitizer_sigactionhandler_ptr sigaction; __sanitizer_sighandler_ptr handler; }; #if SANITIZER_FREEBSD int sa_flags; __sanitizer_sigset_t sa_mask; #else #if defined(__s390x__) int sa_resv; #else __sanitizer_sigset_t sa_mask; #endif #ifndef __mips__ #if defined(__sparc__) #if __GLIBC_PREREQ (2, 20) // On sparc glibc 2.19 and earlier sa_flags was unsigned long. #if defined(__arch64__) // To maintain ABI compatibility on sparc64 when switching to an int, // __glibc_reserved0 was added. int __glibc_reserved0; #endif int sa_flags; #else unsigned long sa_flags; #endif #else int sa_flags; #endif #endif #endif #if SANITIZER_LINUX void (*sa_restorer)(); #endif #if defined(__mips__) && (SANITIZER_WORDSIZE == 32) int sa_resv[1]; #endif #if defined(__s390x__) __sanitizer_sigset_t sa_mask; #endif }; #endif // !SANITIZER_ANDROID #if defined(__mips__) struct __sanitizer_kernel_sigset_t { uptr sig[2]; }; #else struct __sanitizer_kernel_sigset_t { u8 sig[8]; }; #endif // Linux system headers define the 'sa_handler' and 'sa_sigaction' macros. #if SANITIZER_MIPS struct __sanitizer_kernel_sigaction_t { unsigned int sa_flags; union { void (*handler)(int signo); void (*sigaction)(int signo, __sanitizer_siginfo *info, void *ctx); }; __sanitizer_kernel_sigset_t sa_mask; void (*sa_restorer)(void); }; #else struct __sanitizer_kernel_sigaction_t { union { void (*handler)(int signo); void (*sigaction)(int signo, __sanitizer_siginfo *info, void *ctx); }; unsigned long sa_flags; void (*sa_restorer)(void); __sanitizer_kernel_sigset_t sa_mask; }; #endif extern const uptr sig_ign; extern const uptr sig_dfl; extern const uptr sig_err; extern const uptr sa_siginfo; #if SANITIZER_LINUX extern int e_tabsz; #endif extern int af_inet; extern int af_inet6; uptr __sanitizer_in_addr_sz(int af); #if SANITIZER_LINUX struct __sanitizer_dl_phdr_info { uptr dlpi_addr; const char *dlpi_name; const void *dlpi_phdr; short dlpi_phnum; }; extern unsigned struct_ElfW_Phdr_sz; #endif struct __sanitizer_protoent { char *p_name; char **p_aliases; int p_proto; }; struct __sanitizer_addrinfo { int ai_flags; int ai_family; int ai_socktype; int ai_protocol; #if SANITIZER_ANDROID || SANITIZER_MAC unsigned ai_addrlen; char *ai_canonname; void *ai_addr; #else // LINUX unsigned ai_addrlen; void *ai_addr; char *ai_canonname; #endif struct __sanitizer_addrinfo *ai_next; }; struct __sanitizer_hostent { char *h_name; char **h_aliases; int h_addrtype; int h_length; char **h_addr_list; }; struct __sanitizer_pollfd { int fd; short events; short revents; }; #if SANITIZER_ANDROID || SANITIZER_MAC typedef unsigned __sanitizer_nfds_t; #else typedef unsigned long __sanitizer_nfds_t; #endif #if !SANITIZER_ANDROID # if SANITIZER_LINUX struct __sanitizer_glob_t { uptr gl_pathc; char **gl_pathv; uptr gl_offs; int gl_flags; void (*gl_closedir)(void *dirp); void *(*gl_readdir)(void *dirp); void *(*gl_opendir)(const char *); int (*gl_lstat)(const char *, void *); int (*gl_stat)(const char *, void *); }; # endif // SANITIZER_LINUX # if SANITIZER_LINUX extern int glob_nomatch; extern int glob_altdirfunc; # endif #endif // !SANITIZER_ANDROID extern unsigned path_max; struct __sanitizer_wordexp_t { uptr we_wordc; char **we_wordv; uptr we_offs; }; #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer_FILE { int _flags; char *_IO_read_ptr; char *_IO_read_end; char *_IO_read_base; char *_IO_write_base; char *_IO_write_ptr; char *_IO_write_end; char *_IO_buf_base; char *_IO_buf_end; char *_IO_save_base; char *_IO_backup_base; char *_IO_save_end; void *_markers; __sanitizer_FILE *_chain; int _fileno; }; # define SANITIZER_HAS_STRUCT_FILE 1 #else typedef void __sanitizer_FILE; # define SANITIZER_HAS_STRUCT_FILE 0 #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID && \ (defined(__i386) || defined(__x86_64) || defined(__mips64) || \ defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \ defined(__s390__) || SANITIZER_RISCV64) extern unsigned struct_user_regs_struct_sz; extern unsigned struct_user_fpregs_struct_sz; extern unsigned struct_user_fpxregs_struct_sz; extern unsigned struct_user_vfpregs_struct_sz; extern int ptrace_peektext; extern int ptrace_peekdata; extern int ptrace_peekuser; extern int ptrace_getregs; extern int ptrace_setregs; extern int ptrace_getfpregs; extern int ptrace_setfpregs; extern int ptrace_getfpxregs; extern int ptrace_setfpxregs; extern int ptrace_getvfpregs; extern int ptrace_setvfpregs; extern int ptrace_getsiginfo; extern int ptrace_setsiginfo; extern int ptrace_getregset; extern int ptrace_setregset; extern int ptrace_geteventmsg; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned struct_shminfo_sz; extern unsigned struct_shm_info_sz; extern int shmctl_ipc_stat; extern int shmctl_ipc_info; extern int shmctl_shm_info; extern int shmctl_shm_stat; #endif #if !SANITIZER_MAC && !SANITIZER_FREEBSD extern unsigned struct_utmp_sz; #endif #if !SANITIZER_ANDROID extern unsigned struct_utmpx_sz; #endif extern int map_fixed; // ioctl arguments struct __sanitizer_ifconf { int ifc_len; union { void *ifcu_req; } ifc_ifcu; #if SANITIZER_MAC } __attribute__((packed)); #else }; #endif #if SANITIZER_LINUX && !SANITIZER_ANDROID struct __sanitizer__obstack_chunk { char *limit; struct __sanitizer__obstack_chunk *prev; }; struct __sanitizer_obstack { long chunk_size; struct __sanitizer__obstack_chunk *chunk; char *object_base; char *next_free; uptr more_fields[7]; }; typedef uptr (*__sanitizer_cookie_io_read)(void *cookie, char *buf, uptr size); typedef uptr (*__sanitizer_cookie_io_write)(void *cookie, const char *buf, uptr size); typedef int (*__sanitizer_cookie_io_seek)(void *cookie, u64 *offset, int whence); typedef int (*__sanitizer_cookie_io_close)(void *cookie); struct __sanitizer_cookie_io_functions_t { __sanitizer_cookie_io_read read; __sanitizer_cookie_io_write write; __sanitizer_cookie_io_seek seek; __sanitizer_cookie_io_close close; }; #endif #define IOC_NRBITS 8 #define IOC_TYPEBITS 8 #if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \ defined(__sparc__) #define IOC_SIZEBITS 13 #define IOC_DIRBITS 3 #define IOC_NONE 1U #define IOC_WRITE 4U #define IOC_READ 2U #else #define IOC_SIZEBITS 14 #define IOC_DIRBITS 2 #define IOC_NONE 0U #define IOC_WRITE 1U #define IOC_READ 2U #endif #define IOC_NRMASK ((1 << IOC_NRBITS) - 1) #define IOC_TYPEMASK ((1 << IOC_TYPEBITS) - 1) #define IOC_SIZEMASK ((1 << IOC_SIZEBITS) - 1) #if defined(IOC_DIRMASK) #undef IOC_DIRMASK #endif #define IOC_DIRMASK ((1 << IOC_DIRBITS) - 1) #define IOC_NRSHIFT 0 #define IOC_TYPESHIFT (IOC_NRSHIFT + IOC_NRBITS) #define IOC_SIZESHIFT (IOC_TYPESHIFT + IOC_TYPEBITS) #define IOC_DIRSHIFT (IOC_SIZESHIFT + IOC_SIZEBITS) #define EVIOC_EV_MAX 0x1f #define EVIOC_ABS_MAX 0x3f #define IOC_DIR(nr) (((nr) >> IOC_DIRSHIFT) & IOC_DIRMASK) #define IOC_TYPE(nr) (((nr) >> IOC_TYPESHIFT) & IOC_TYPEMASK) #define IOC_NR(nr) (((nr) >> IOC_NRSHIFT) & IOC_NRMASK) #if defined(__sparc__) // In sparc the 14 bits SIZE field overlaps with the // least significant bit of DIR, so either IOC_READ or // IOC_WRITE shall be 1 in order to get a non-zero SIZE. #define IOC_SIZE(nr) \ ((((((nr) >> 29) & 0x7) & (4U | 2U)) == 0) ? 0 : (((nr) >> 16) & 0x3fff)) #else #define IOC_SIZE(nr) (((nr) >> IOC_SIZESHIFT) & IOC_SIZEMASK) #endif extern unsigned struct_ifreq_sz; extern unsigned struct_termios_sz; extern unsigned struct_winsize_sz; #if SANITIZER_LINUX extern unsigned struct_arpreq_sz; extern unsigned struct_cdrom_msf_sz; extern unsigned struct_cdrom_multisession_sz; extern unsigned struct_cdrom_read_audio_sz; extern unsigned struct_cdrom_subchnl_sz; extern unsigned struct_cdrom_ti_sz; extern unsigned struct_cdrom_tocentry_sz; extern unsigned struct_cdrom_tochdr_sz; extern unsigned struct_cdrom_volctrl_sz; extern unsigned struct_ff_effect_sz; extern unsigned struct_floppy_drive_params_sz; extern unsigned struct_floppy_drive_struct_sz; extern unsigned struct_floppy_fdc_state_sz; extern unsigned struct_floppy_max_errors_sz; extern unsigned struct_floppy_raw_cmd_sz; extern unsigned struct_floppy_struct_sz; extern unsigned struct_floppy_write_errors_sz; extern unsigned struct_format_descr_sz; extern unsigned struct_hd_driveid_sz; extern unsigned struct_hd_geometry_sz; extern unsigned struct_input_absinfo_sz; extern unsigned struct_input_id_sz; extern unsigned struct_mtpos_sz; extern unsigned struct_termio_sz; extern unsigned struct_vt_consize_sz; extern unsigned struct_vt_sizes_sz; extern unsigned struct_vt_stat_sz; #endif // SANITIZER_LINUX #if SANITIZER_LINUX extern unsigned struct_copr_buffer_sz; extern unsigned struct_copr_debug_buf_sz; extern unsigned struct_copr_msg_sz; extern unsigned struct_midi_info_sz; extern unsigned struct_mtget_sz; extern unsigned struct_mtop_sz; extern unsigned struct_rtentry_sz; extern unsigned struct_sbi_instrument_sz; extern unsigned struct_seq_event_rec_sz; extern unsigned struct_synth_info_sz; extern unsigned struct_vt_mode_sz; #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned struct_ax25_parms_struct_sz; -extern unsigned struct_cyclades_monitor_sz; extern unsigned struct_input_keymap_entry_sz; extern unsigned struct_ipx_config_data_sz; extern unsigned struct_kbdiacrs_sz; extern unsigned struct_kbentry_sz; extern unsigned struct_kbkeycode_sz; extern unsigned struct_kbsentry_sz; extern unsigned struct_mtconfiginfo_sz; extern unsigned struct_nr_parms_struct_sz; extern unsigned struct_scc_modem_sz; extern unsigned struct_scc_stat_sz; extern unsigned struct_serial_multiport_struct_sz; extern unsigned struct_serial_struct_sz; extern unsigned struct_sockaddr_ax25_sz; extern unsigned struct_unimapdesc_sz; extern unsigned struct_unimapinit_sz; #endif // SANITIZER_LINUX && !SANITIZER_ANDROID extern const unsigned long __sanitizer_bufsiz; #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned struct_audio_buf_info_sz; extern unsigned struct_ppp_stats_sz; #endif // (SANITIZER_LINUX || SANITIZER_FREEBSD) && !SANITIZER_ANDROID #if !SANITIZER_ANDROID && !SANITIZER_MAC extern unsigned struct_sioc_sg_req_sz; extern unsigned struct_sioc_vif_req_sz; #endif // ioctl request identifiers // A special value to mark ioctls that are not present on the target platform, // when it can not be determined without including any system headers. extern const unsigned IOCTL_NOT_PRESENT; extern unsigned IOCTL_FIOASYNC; extern unsigned IOCTL_FIOCLEX; extern unsigned IOCTL_FIOGETOWN; extern unsigned IOCTL_FIONBIO; extern unsigned IOCTL_FIONCLEX; extern unsigned IOCTL_FIOSETOWN; extern unsigned IOCTL_SIOCADDMULTI; extern unsigned IOCTL_SIOCATMARK; extern unsigned IOCTL_SIOCDELMULTI; extern unsigned IOCTL_SIOCGIFADDR; extern unsigned IOCTL_SIOCGIFBRDADDR; extern unsigned IOCTL_SIOCGIFCONF; extern unsigned IOCTL_SIOCGIFDSTADDR; extern unsigned IOCTL_SIOCGIFFLAGS; extern unsigned IOCTL_SIOCGIFMETRIC; extern unsigned IOCTL_SIOCGIFMTU; extern unsigned IOCTL_SIOCGIFNETMASK; extern unsigned IOCTL_SIOCGPGRP; extern unsigned IOCTL_SIOCSIFADDR; extern unsigned IOCTL_SIOCSIFBRDADDR; extern unsigned IOCTL_SIOCSIFDSTADDR; extern unsigned IOCTL_SIOCSIFFLAGS; extern unsigned IOCTL_SIOCSIFMETRIC; extern unsigned IOCTL_SIOCSIFMTU; extern unsigned IOCTL_SIOCSIFNETMASK; extern unsigned IOCTL_SIOCSPGRP; extern unsigned IOCTL_TIOCCONS; extern unsigned IOCTL_TIOCEXCL; extern unsigned IOCTL_TIOCGETD; extern unsigned IOCTL_TIOCGPGRP; extern unsigned IOCTL_TIOCGWINSZ; extern unsigned IOCTL_TIOCMBIC; extern unsigned IOCTL_TIOCMBIS; extern unsigned IOCTL_TIOCMGET; extern unsigned IOCTL_TIOCMSET; extern unsigned IOCTL_TIOCNOTTY; extern unsigned IOCTL_TIOCNXCL; extern unsigned IOCTL_TIOCOUTQ; extern unsigned IOCTL_TIOCPKT; extern unsigned IOCTL_TIOCSCTTY; extern unsigned IOCTL_TIOCSETD; extern unsigned IOCTL_TIOCSPGRP; extern unsigned IOCTL_TIOCSTI; extern unsigned IOCTL_TIOCSWINSZ; #if SANITIZER_LINUX && !SANITIZER_ANDROID extern unsigned IOCTL_SIOCGETSGCNT; extern unsigned IOCTL_SIOCGETVIFCNT; #endif #if SANITIZER_LINUX extern unsigned IOCTL_EVIOCGABS; extern unsigned IOCTL_EVIOCGBIT; extern unsigned IOCTL_EVIOCGEFFECTS; extern unsigned IOCTL_EVIOCGID; extern unsigned IOCTL_EVIOCGKEY; extern unsigned IOCTL_EVIOCGKEYCODE; extern unsigned IOCTL_EVIOCGLED; extern unsigned IOCTL_EVIOCGNAME; extern unsigned IOCTL_EVIOCGPHYS; extern unsigned IOCTL_EVIOCGRAB; extern unsigned IOCTL_EVIOCGREP; extern unsigned IOCTL_EVIOCGSND; extern unsigned IOCTL_EVIOCGSW; extern unsigned IOCTL_EVIOCGUNIQ; extern unsigned IOCTL_EVIOCGVERSION; extern unsigned IOCTL_EVIOCRMFF; extern unsigned IOCTL_EVIOCSABS; extern unsigned IOCTL_EVIOCSFF; extern unsigned IOCTL_EVIOCSKEYCODE; extern unsigned IOCTL_EVIOCSREP; extern unsigned IOCTL_BLKFLSBUF; extern unsigned IOCTL_BLKGETSIZE; extern unsigned IOCTL_BLKRAGET; extern unsigned IOCTL_BLKRASET; extern unsigned IOCTL_BLKROGET; extern unsigned IOCTL_BLKROSET; extern unsigned IOCTL_BLKRRPART; extern unsigned IOCTL_CDROMAUDIOBUFSIZ; extern unsigned IOCTL_CDROMEJECT; extern unsigned IOCTL_CDROMEJECT_SW; extern unsigned IOCTL_CDROMMULTISESSION; extern unsigned IOCTL_CDROMPAUSE; extern unsigned IOCTL_CDROMPLAYMSF; extern unsigned IOCTL_CDROMPLAYTRKIND; extern unsigned IOCTL_CDROMREADAUDIO; extern unsigned IOCTL_CDROMREADCOOKED; extern unsigned IOCTL_CDROMREADMODE1; extern unsigned IOCTL_CDROMREADMODE2; extern unsigned IOCTL_CDROMREADRAW; extern unsigned IOCTL_CDROMREADTOCENTRY; extern unsigned IOCTL_CDROMREADTOCHDR; extern unsigned IOCTL_CDROMRESET; extern unsigned IOCTL_CDROMRESUME; extern unsigned IOCTL_CDROMSEEK; extern unsigned IOCTL_CDROMSTART; extern unsigned IOCTL_CDROMSTOP; extern unsigned IOCTL_CDROMSUBCHNL; extern unsigned IOCTL_CDROMVOLCTRL; extern unsigned IOCTL_CDROMVOLREAD; extern unsigned IOCTL_CDROM_GET_UPC; extern unsigned IOCTL_FDCLRPRM; extern unsigned IOCTL_FDDEFPRM; extern unsigned IOCTL_FDFLUSH; extern unsigned IOCTL_FDFMTBEG; extern unsigned IOCTL_FDFMTEND; extern unsigned IOCTL_FDFMTTRK; extern unsigned IOCTL_FDGETDRVPRM; extern unsigned IOCTL_FDGETDRVSTAT; extern unsigned IOCTL_FDGETDRVTYP; extern unsigned IOCTL_FDGETFDCSTAT; extern unsigned IOCTL_FDGETMAXERRS; extern unsigned IOCTL_FDGETPRM; extern unsigned IOCTL_FDMSGOFF; extern unsigned IOCTL_FDMSGON; extern unsigned IOCTL_FDPOLLDRVSTAT; extern unsigned IOCTL_FDRAWCMD; extern unsigned IOCTL_FDRESET; extern unsigned IOCTL_FDSETDRVPRM; extern unsigned IOCTL_FDSETEMSGTRESH; extern unsigned IOCTL_FDSETMAXERRS; extern unsigned IOCTL_FDSETPRM; extern unsigned IOCTL_FDTWADDLE; extern unsigned IOCTL_FDWERRORCLR; extern unsigned IOCTL_FDWERRORGET; extern unsigned IOCTL_HDIO_DRIVE_CMD; extern unsigned IOCTL_HDIO_GETGEO; extern unsigned IOCTL_HDIO_GET_32BIT; extern unsigned IOCTL_HDIO_GET_DMA; extern unsigned IOCTL_HDIO_GET_IDENTITY; extern unsigned IOCTL_HDIO_GET_KEEPSETTINGS; extern unsigned IOCTL_HDIO_GET_MULTCOUNT; extern unsigned IOCTL_HDIO_GET_NOWERR; extern unsigned IOCTL_HDIO_GET_UNMASKINTR; extern unsigned IOCTL_HDIO_SET_32BIT; extern unsigned IOCTL_HDIO_SET_DMA; extern unsigned IOCTL_HDIO_SET_KEEPSETTINGS; extern unsigned IOCTL_HDIO_SET_MULTCOUNT; extern unsigned IOCTL_HDIO_SET_NOWERR; extern unsigned IOCTL_HDIO_SET_UNMASKINTR; extern unsigned IOCTL_MTIOCPOS; extern unsigned IOCTL_PPPIOCGASYNCMAP; extern unsigned IOCTL_PPPIOCGDEBUG; extern unsigned IOCTL_PPPIOCGFLAGS; extern unsigned IOCTL_PPPIOCGUNIT; extern unsigned IOCTL_PPPIOCGXASYNCMAP; extern unsigned IOCTL_PPPIOCSASYNCMAP; extern unsigned IOCTL_PPPIOCSDEBUG; extern unsigned IOCTL_PPPIOCSFLAGS; extern unsigned IOCTL_PPPIOCSMAXCID; extern unsigned IOCTL_PPPIOCSMRU; extern unsigned IOCTL_PPPIOCSXASYNCMAP; extern unsigned IOCTL_SIOCDARP; extern unsigned IOCTL_SIOCDRARP; extern unsigned IOCTL_SIOCGARP; extern unsigned IOCTL_SIOCGIFENCAP; extern unsigned IOCTL_SIOCGIFHWADDR; extern unsigned IOCTL_SIOCGIFMAP; extern unsigned IOCTL_SIOCGIFMEM; extern unsigned IOCTL_SIOCGIFNAME; extern unsigned IOCTL_SIOCGIFSLAVE; extern unsigned IOCTL_SIOCGRARP; extern unsigned IOCTL_SIOCGSTAMP; extern unsigned IOCTL_SIOCSARP; extern unsigned IOCTL_SIOCSIFENCAP; extern unsigned IOCTL_SIOCSIFHWADDR; extern unsigned IOCTL_SIOCSIFLINK; extern unsigned IOCTL_SIOCSIFMAP; extern unsigned IOCTL_SIOCSIFMEM; extern unsigned IOCTL_SIOCSIFSLAVE; extern unsigned IOCTL_SIOCSRARP; extern unsigned IOCTL_SNDCTL_COPR_HALT; extern unsigned IOCTL_SNDCTL_COPR_LOAD; extern unsigned IOCTL_SNDCTL_COPR_RCODE; extern unsigned IOCTL_SNDCTL_COPR_RCVMSG; extern unsigned IOCTL_SNDCTL_COPR_RDATA; extern unsigned IOCTL_SNDCTL_COPR_RESET; extern unsigned IOCTL_SNDCTL_COPR_RUN; extern unsigned IOCTL_SNDCTL_COPR_SENDMSG; extern unsigned IOCTL_SNDCTL_COPR_WCODE; extern unsigned IOCTL_SNDCTL_COPR_WDATA; extern unsigned IOCTL_TCFLSH; extern unsigned IOCTL_TCGETA; extern unsigned IOCTL_TCGETS; extern unsigned IOCTL_TCSBRK; extern unsigned IOCTL_TCSBRKP; extern unsigned IOCTL_TCSETA; extern unsigned IOCTL_TCSETAF; extern unsigned IOCTL_TCSETAW; extern unsigned IOCTL_TCSETS; extern unsigned IOCTL_TCSETSF; extern unsigned IOCTL_TCSETSW; extern unsigned IOCTL_TCXONC; extern unsigned IOCTL_TIOCGLCKTRMIOS; extern unsigned IOCTL_TIOCGSOFTCAR; extern unsigned IOCTL_TIOCINQ; extern unsigned IOCTL_TIOCLINUX; extern unsigned IOCTL_TIOCSERCONFIG; extern unsigned IOCTL_TIOCSERGETLSR; extern unsigned IOCTL_TIOCSERGWILD; extern unsigned IOCTL_TIOCSERSWILD; extern unsigned IOCTL_TIOCSLCKTRMIOS; extern unsigned IOCTL_TIOCSSOFTCAR; extern unsigned IOCTL_VT_DISALLOCATE; extern unsigned IOCTL_VT_GETSTATE; extern unsigned IOCTL_VT_RESIZE; extern unsigned IOCTL_VT_RESIZEX; extern unsigned IOCTL_VT_SENDSIG; extern unsigned IOCTL_MTIOCGET; extern unsigned IOCTL_MTIOCTOP; extern unsigned IOCTL_SIOCADDRT; extern unsigned IOCTL_SIOCDELRT; extern unsigned IOCTL_SNDCTL_DSP_GETBLKSIZE; extern unsigned IOCTL_SNDCTL_DSP_GETFMTS; extern unsigned IOCTL_SNDCTL_DSP_NONBLOCK; extern unsigned IOCTL_SNDCTL_DSP_POST; extern unsigned IOCTL_SNDCTL_DSP_RESET; extern unsigned IOCTL_SNDCTL_DSP_SETFMT; extern unsigned IOCTL_SNDCTL_DSP_SETFRAGMENT; extern unsigned IOCTL_SNDCTL_DSP_SPEED; extern unsigned IOCTL_SNDCTL_DSP_STEREO; extern unsigned IOCTL_SNDCTL_DSP_SUBDIVIDE; extern unsigned IOCTL_SNDCTL_DSP_SYNC; extern unsigned IOCTL_SNDCTL_FM_4OP_ENABLE; extern unsigned IOCTL_SNDCTL_FM_LOAD_INSTR; extern unsigned IOCTL_SNDCTL_MIDI_INFO; extern unsigned IOCTL_SNDCTL_MIDI_PRETIME; extern unsigned IOCTL_SNDCTL_SEQ_CTRLRATE; extern unsigned IOCTL_SNDCTL_SEQ_GETINCOUNT; extern unsigned IOCTL_SNDCTL_SEQ_GETOUTCOUNT; extern unsigned IOCTL_SNDCTL_SEQ_NRMIDIS; extern unsigned IOCTL_SNDCTL_SEQ_NRSYNTHS; extern unsigned IOCTL_SNDCTL_SEQ_OUTOFBAND; extern unsigned IOCTL_SNDCTL_SEQ_PANIC; extern unsigned IOCTL_SNDCTL_SEQ_PERCMODE; extern unsigned IOCTL_SNDCTL_SEQ_RESET; extern unsigned IOCTL_SNDCTL_SEQ_RESETSAMPLES; extern unsigned IOCTL_SNDCTL_SEQ_SYNC; extern unsigned IOCTL_SNDCTL_SEQ_TESTMIDI; extern unsigned IOCTL_SNDCTL_SEQ_THRESHOLD; extern unsigned IOCTL_SNDCTL_SYNTH_INFO; extern unsigned IOCTL_SNDCTL_SYNTH_MEMAVL; extern unsigned IOCTL_SNDCTL_TMR_CONTINUE; extern unsigned IOCTL_SNDCTL_TMR_METRONOME; extern unsigned IOCTL_SNDCTL_TMR_SELECT; extern unsigned IOCTL_SNDCTL_TMR_SOURCE; extern unsigned IOCTL_SNDCTL_TMR_START; extern unsigned IOCTL_SNDCTL_TMR_STOP; extern unsigned IOCTL_SNDCTL_TMR_TEMPO; extern unsigned IOCTL_SNDCTL_TMR_TIMEBASE; extern unsigned IOCTL_SOUND_MIXER_READ_ALTPCM; extern unsigned IOCTL_SOUND_MIXER_READ_BASS; extern unsigned IOCTL_SOUND_MIXER_READ_CAPS; extern unsigned IOCTL_SOUND_MIXER_READ_CD; extern unsigned IOCTL_SOUND_MIXER_READ_DEVMASK; extern unsigned IOCTL_SOUND_MIXER_READ_ENHANCE; extern unsigned IOCTL_SOUND_MIXER_READ_IGAIN; extern unsigned IOCTL_SOUND_MIXER_READ_IMIX; extern unsigned IOCTL_SOUND_MIXER_READ_LINE1; extern unsigned IOCTL_SOUND_MIXER_READ_LINE2; extern unsigned IOCTL_SOUND_MIXER_READ_LINE3; extern unsigned IOCTL_SOUND_MIXER_READ_LINE; extern unsigned IOCTL_SOUND_MIXER_READ_LOUD; extern unsigned IOCTL_SOUND_MIXER_READ_MIC; extern unsigned IOCTL_SOUND_MIXER_READ_MUTE; extern unsigned IOCTL_SOUND_MIXER_READ_OGAIN; extern unsigned IOCTL_SOUND_MIXER_READ_PCM; extern unsigned IOCTL_SOUND_MIXER_READ_RECLEV; extern unsigned IOCTL_SOUND_MIXER_READ_RECMASK; extern unsigned IOCTL_SOUND_MIXER_READ_RECSRC; extern unsigned IOCTL_SOUND_MIXER_READ_SPEAKER; extern unsigned IOCTL_SOUND_MIXER_READ_STEREODEVS; extern unsigned IOCTL_SOUND_MIXER_READ_SYNTH; extern unsigned IOCTL_SOUND_MIXER_READ_TREBLE; extern unsigned IOCTL_SOUND_MIXER_READ_VOLUME; extern unsigned IOCTL_SOUND_MIXER_WRITE_ALTPCM; extern unsigned IOCTL_SOUND_MIXER_WRITE_BASS; extern unsigned IOCTL_SOUND_MIXER_WRITE_CD; extern unsigned IOCTL_SOUND_MIXER_WRITE_ENHANCE; extern unsigned IOCTL_SOUND_MIXER_WRITE_IGAIN; extern unsigned IOCTL_SOUND_MIXER_WRITE_IMIX; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE1; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE2; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE3; extern unsigned IOCTL_SOUND_MIXER_WRITE_LINE; extern unsigned IOCTL_SOUND_MIXER_WRITE_LOUD; extern unsigned IOCTL_SOUND_MIXER_WRITE_MIC; extern unsigned IOCTL_SOUND_MIXER_WRITE_MUTE; extern unsigned IOCTL_SOUND_MIXER_WRITE_OGAIN; extern unsigned IOCTL_SOUND_MIXER_WRITE_PCM; extern unsigned IOCTL_SOUND_MIXER_WRITE_RECLEV; extern unsigned IOCTL_SOUND_MIXER_WRITE_RECSRC; extern unsigned IOCTL_SOUND_MIXER_WRITE_SPEAKER; extern unsigned IOCTL_SOUND_MIXER_WRITE_SYNTH; extern unsigned IOCTL_SOUND_MIXER_WRITE_TREBLE; extern unsigned IOCTL_SOUND_MIXER_WRITE_VOLUME; extern unsigned IOCTL_SOUND_PCM_READ_BITS; extern unsigned IOCTL_SOUND_PCM_READ_CHANNELS; extern unsigned IOCTL_SOUND_PCM_READ_FILTER; extern unsigned IOCTL_SOUND_PCM_READ_RATE; extern unsigned IOCTL_SOUND_PCM_WRITE_CHANNELS; extern unsigned IOCTL_SOUND_PCM_WRITE_FILTER; extern unsigned IOCTL_VT_ACTIVATE; extern unsigned IOCTL_VT_GETMODE; extern unsigned IOCTL_VT_OPENQRY; extern unsigned IOCTL_VT_RELDISP; extern unsigned IOCTL_VT_SETMODE; extern unsigned IOCTL_VT_WAITACTIVE; #endif // SANITIZER_LINUX #if SANITIZER_LINUX && !SANITIZER_ANDROID -extern unsigned IOCTL_CYGETDEFTHRESH; -extern unsigned IOCTL_CYGETDEFTIMEOUT; -extern unsigned IOCTL_CYGETMON; -extern unsigned IOCTL_CYGETTHRESH; -extern unsigned IOCTL_CYGETTIMEOUT; -extern unsigned IOCTL_CYSETDEFTHRESH; -extern unsigned IOCTL_CYSETDEFTIMEOUT; -extern unsigned IOCTL_CYSETTHRESH; -extern unsigned IOCTL_CYSETTIMEOUT; extern unsigned IOCTL_EQL_EMANCIPATE; extern unsigned IOCTL_EQL_ENSLAVE; extern unsigned IOCTL_EQL_GETMASTRCFG; extern unsigned IOCTL_EQL_GETSLAVECFG; extern unsigned IOCTL_EQL_SETMASTRCFG; extern unsigned IOCTL_EQL_SETSLAVECFG; extern unsigned IOCTL_EVIOCGKEYCODE_V2; extern unsigned IOCTL_EVIOCGPROP; extern unsigned IOCTL_EVIOCSKEYCODE_V2; extern unsigned IOCTL_FS_IOC_GETFLAGS; extern unsigned IOCTL_FS_IOC_GETVERSION; extern unsigned IOCTL_FS_IOC_SETFLAGS; extern unsigned IOCTL_FS_IOC_SETVERSION; extern unsigned IOCTL_GIO_CMAP; extern unsigned IOCTL_GIO_FONT; extern unsigned IOCTL_GIO_UNIMAP; extern unsigned IOCTL_GIO_UNISCRNMAP; extern unsigned IOCTL_KDADDIO; extern unsigned IOCTL_KDDELIO; extern unsigned IOCTL_KDGETKEYCODE; extern unsigned IOCTL_KDGKBDIACR; extern unsigned IOCTL_KDGKBENT; extern unsigned IOCTL_KDGKBLED; extern unsigned IOCTL_KDGKBMETA; extern unsigned IOCTL_KDGKBSENT; extern unsigned IOCTL_KDMAPDISP; extern unsigned IOCTL_KDSETKEYCODE; extern unsigned IOCTL_KDSIGACCEPT; extern unsigned IOCTL_KDSKBDIACR; extern unsigned IOCTL_KDSKBENT; extern unsigned IOCTL_KDSKBLED; extern unsigned IOCTL_KDSKBMETA; extern unsigned IOCTL_KDSKBSENT; extern unsigned IOCTL_KDUNMAPDISP; extern unsigned IOCTL_LPABORT; extern unsigned IOCTL_LPABORTOPEN; extern unsigned IOCTL_LPCAREFUL; extern unsigned IOCTL_LPCHAR; extern unsigned IOCTL_LPGETIRQ; extern unsigned IOCTL_LPGETSTATUS; extern unsigned IOCTL_LPRESET; extern unsigned IOCTL_LPSETIRQ; extern unsigned IOCTL_LPTIME; extern unsigned IOCTL_LPWAIT; extern unsigned IOCTL_MTIOCGETCONFIG; extern unsigned IOCTL_MTIOCSETCONFIG; extern unsigned IOCTL_PIO_CMAP; extern unsigned IOCTL_PIO_FONT; extern unsigned IOCTL_PIO_UNIMAP; extern unsigned IOCTL_PIO_UNIMAPCLR; extern unsigned IOCTL_PIO_UNISCRNMAP; extern unsigned IOCTL_SCSI_IOCTL_GET_IDLUN; extern unsigned IOCTL_SCSI_IOCTL_PROBE_HOST; extern unsigned IOCTL_SCSI_IOCTL_TAGGED_DISABLE; extern unsigned IOCTL_SCSI_IOCTL_TAGGED_ENABLE; extern unsigned IOCTL_SIOCAIPXITFCRT; extern unsigned IOCTL_SIOCAIPXPRISLT; extern unsigned IOCTL_SIOCAX25ADDUID; extern unsigned IOCTL_SIOCAX25DELUID; extern unsigned IOCTL_SIOCAX25GETPARMS; extern unsigned IOCTL_SIOCAX25GETUID; extern unsigned IOCTL_SIOCAX25NOUID; extern unsigned IOCTL_SIOCAX25SETPARMS; extern unsigned IOCTL_SIOCDEVPLIP; extern unsigned IOCTL_SIOCIPXCFGDATA; extern unsigned IOCTL_SIOCNRDECOBS; extern unsigned IOCTL_SIOCNRGETPARMS; extern unsigned IOCTL_SIOCNRRTCTL; extern unsigned IOCTL_SIOCNRSETPARMS; extern unsigned IOCTL_SNDCTL_DSP_GETISPACE; extern unsigned IOCTL_SNDCTL_DSP_GETOSPACE; extern unsigned IOCTL_TIOCGSERIAL; extern unsigned IOCTL_TIOCSERGETMULTI; extern unsigned IOCTL_TIOCSERSETMULTI; extern unsigned IOCTL_TIOCSSERIAL; extern unsigned IOCTL_GIO_SCRNMAP; extern unsigned IOCTL_KDDISABIO; extern unsigned IOCTL_KDENABIO; extern unsigned IOCTL_KDGETLED; extern unsigned IOCTL_KDGETMODE; extern unsigned IOCTL_KDGKBMODE; extern unsigned IOCTL_KDGKBTYPE; extern unsigned IOCTL_KDMKTONE; extern unsigned IOCTL_KDSETLED; extern unsigned IOCTL_KDSETMODE; extern unsigned IOCTL_KDSKBMODE; extern unsigned IOCTL_KIOCSOUND; extern unsigned IOCTL_PIO_SCRNMAP; #endif extern const int si_SEGV_MAPERR; extern const int si_SEGV_ACCERR; } // namespace __sanitizer #define CHECK_TYPE_SIZE(TYPE) \ COMPILER_CHECK(sizeof(__sanitizer_##TYPE) == sizeof(TYPE)) #define CHECK_SIZE_AND_OFFSET(CLASS, MEMBER) \ COMPILER_CHECK(sizeof(((__sanitizer_##CLASS *)NULL)->MEMBER) == \ sizeof(((CLASS *)NULL)->MEMBER)); \ COMPILER_CHECK(offsetof(__sanitizer_##CLASS, MEMBER) == \ offsetof(CLASS, MEMBER)) // For sigaction, which is a function and struct at the same time, // and thus requires explicit "struct" in sizeof() expression. #define CHECK_STRUCT_SIZE_AND_OFFSET(CLASS, MEMBER) \ COMPILER_CHECK(sizeof(((struct __sanitizer_##CLASS *)NULL)->MEMBER) == \ sizeof(((struct CLASS *)NULL)->MEMBER)); \ COMPILER_CHECK(offsetof(struct __sanitizer_##CLASS, MEMBER) == \ offsetof(struct CLASS, MEMBER)) #define SIGACTION_SYMNAME sigaction #endif // SANITIZER_LINUX || SANITIZER_MAC #endif diff --git a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp index d29438cf9dbd..12603da1750d 100644 --- a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp +++ b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cpp @@ -1,505 +1,510 @@ //===-- sanitizer_posix_libcdep.cpp ---------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is shared between AddressSanitizer and ThreadSanitizer // run-time libraries and implements libc-dependent POSIX-specific functions // from sanitizer_libc.h. //===----------------------------------------------------------------------===// #include "sanitizer_platform.h" #if SANITIZER_POSIX #include "sanitizer_common.h" #include "sanitizer_flags.h" #include "sanitizer_platform_limits_netbsd.h" #include "sanitizer_platform_limits_posix.h" #include "sanitizer_platform_limits_solaris.h" #include "sanitizer_posix.h" #include "sanitizer_procmaps.h" #include #include #include #include #include #include #include #include #include #include #include #include #if SANITIZER_FREEBSD // The MAP_NORESERVE define has been removed in FreeBSD 11.x, and even before // that, it was never implemented. So just define it to zero. #undef MAP_NORESERVE #define MAP_NORESERVE 0 #endif typedef void (*sa_sigaction_t)(int, siginfo_t *, void *); namespace __sanitizer { u32 GetUid() { return getuid(); } uptr GetThreadSelf() { return (uptr)pthread_self(); } void ReleaseMemoryPagesToOS(uptr beg, uptr end) { uptr page_size = GetPageSizeCached(); uptr beg_aligned = RoundUpTo(beg, page_size); uptr end_aligned = RoundDownTo(end, page_size); if (beg_aligned < end_aligned) internal_madvise(beg_aligned, end_aligned - beg_aligned, SANITIZER_MADVISE_DONTNEED); } void SetShadowRegionHugePageMode(uptr addr, uptr size) { #ifdef MADV_NOHUGEPAGE // May not be defined on old systems. if (common_flags()->no_huge_pages_for_shadow) internal_madvise(addr, size, MADV_NOHUGEPAGE); else internal_madvise(addr, size, MADV_HUGEPAGE); #endif // MADV_NOHUGEPAGE } bool DontDumpShadowMemory(uptr addr, uptr length) { #if defined(MADV_DONTDUMP) return internal_madvise(addr, length, MADV_DONTDUMP) == 0; #elif defined(MADV_NOCORE) return internal_madvise(addr, length, MADV_NOCORE) == 0; #else return true; #endif // MADV_DONTDUMP } static rlim_t getlim(int res) { rlimit rlim; CHECK_EQ(0, getrlimit(res, &rlim)); return rlim.rlim_cur; } static void setlim(int res, rlim_t lim) { struct rlimit rlim; if (getrlimit(res, const_cast(&rlim))) { Report("ERROR: %s getrlimit() failed %d\n", SanitizerToolName, errno); Die(); } rlim.rlim_cur = lim; if (setrlimit(res, const_cast(&rlim))) { Report("ERROR: %s setrlimit() failed %d\n", SanitizerToolName, errno); Die(); } } void DisableCoreDumperIfNecessary() { if (common_flags()->disable_coredump) { setlim(RLIMIT_CORE, 0); } } bool StackSizeIsUnlimited() { rlim_t stack_size = getlim(RLIMIT_STACK); return (stack_size == RLIM_INFINITY); } void SetStackSizeLimitInBytes(uptr limit) { setlim(RLIMIT_STACK, (rlim_t)limit); CHECK(!StackSizeIsUnlimited()); } bool AddressSpaceIsUnlimited() { rlim_t as_size = getlim(RLIMIT_AS); return (as_size == RLIM_INFINITY); } void SetAddressSpaceUnlimited() { setlim(RLIMIT_AS, RLIM_INFINITY); CHECK(AddressSpaceIsUnlimited()); } void SleepForSeconds(int seconds) { sleep(seconds); } void SleepForMillis(int millis) { usleep(millis * 1000); } void Abort() { #if !SANITIZER_GO // If we are handling SIGABRT, unhandle it first. // TODO(vitalybuka): Check if handler belongs to sanitizer. if (GetHandleSignalMode(SIGABRT) != kHandleSignalNo) { struct sigaction sigact; internal_memset(&sigact, 0, sizeof(sigact)); sigact.sa_sigaction = (sa_sigaction_t)SIG_DFL; internal_sigaction(SIGABRT, &sigact, nullptr); } #endif abort(); } int Atexit(void (*function)(void)) { #if !SANITIZER_GO return atexit(function); #else return 0; #endif } bool SupportsColoredOutput(fd_t fd) { return isatty(fd) != 0; } #if !SANITIZER_GO // TODO(glider): different tools may require different altstack size. -static const uptr kAltStackSize = SIGSTKSZ * 4; // SIGSTKSZ is not enough. +static uptr GetAltStackSize() { + // Note: since GLIBC_2.31, SIGSTKSZ may be a function call, so this may be + // more costly that you think. However GetAltStackSize is only call 2-3 times + // per thread so don't cache the evaluation. + return SIGSTKSZ * 4; +} void SetAlternateSignalStack() { stack_t altstack, oldstack; CHECK_EQ(0, sigaltstack(nullptr, &oldstack)); // If the alternate stack is already in place, do nothing. // Android always sets an alternate stack, but it's too small for us. if (!SANITIZER_ANDROID && !(oldstack.ss_flags & SS_DISABLE)) return; // TODO(glider): the mapped stack should have the MAP_STACK flag in the // future. It is not required by man 2 sigaltstack now (they're using // malloc()). - void* base = MmapOrDie(kAltStackSize, __func__); + void *base = MmapOrDie(GetAltStackSize(), __func__); altstack.ss_sp = (char*) base; altstack.ss_flags = 0; - altstack.ss_size = kAltStackSize; + altstack.ss_size = GetAltStackSize(); CHECK_EQ(0, sigaltstack(&altstack, nullptr)); } void UnsetAlternateSignalStack() { stack_t altstack, oldstack; altstack.ss_sp = nullptr; altstack.ss_flags = SS_DISABLE; - altstack.ss_size = kAltStackSize; // Some sane value required on Darwin. + altstack.ss_size = GetAltStackSize(); // Some sane value required on Darwin. CHECK_EQ(0, sigaltstack(&altstack, &oldstack)); UnmapOrDie(oldstack.ss_sp, oldstack.ss_size); } static void MaybeInstallSigaction(int signum, SignalHandlerType handler) { if (GetHandleSignalMode(signum) == kHandleSignalNo) return; struct sigaction sigact; internal_memset(&sigact, 0, sizeof(sigact)); sigact.sa_sigaction = (sa_sigaction_t)handler; // Do not block the signal from being received in that signal's handler. // Clients are responsible for handling this correctly. sigact.sa_flags = SA_SIGINFO | SA_NODEFER; if (common_flags()->use_sigaltstack) sigact.sa_flags |= SA_ONSTACK; CHECK_EQ(0, internal_sigaction(signum, &sigact, nullptr)); VReport(1, "Installed the sigaction for signal %d\n", signum); } void InstallDeadlySignalHandlers(SignalHandlerType handler) { // Set the alternate signal stack for the main thread. // This will cause SetAlternateSignalStack to be called twice, but the stack // will be actually set only once. if (common_flags()->use_sigaltstack) SetAlternateSignalStack(); MaybeInstallSigaction(SIGSEGV, handler); MaybeInstallSigaction(SIGBUS, handler); MaybeInstallSigaction(SIGABRT, handler); MaybeInstallSigaction(SIGFPE, handler); MaybeInstallSigaction(SIGILL, handler); MaybeInstallSigaction(SIGTRAP, handler); } bool SignalContext::IsStackOverflow() const { // Access at a reasonable offset above SP, or slightly below it (to account // for x86_64 or PowerPC redzone, ARM push of multiple registers, etc) is // probably a stack overflow. #ifdef __s390__ // On s390, the fault address in siginfo points to start of the page, not // to the precise word that was accessed. Mask off the low bits of sp to // take it into account. bool IsStackAccess = addr >= (sp & ~0xFFF) && addr < sp + 0xFFFF; #else // Let's accept up to a page size away from top of stack. Things like stack // probing can trigger accesses with such large offsets. bool IsStackAccess = addr + GetPageSizeCached() > sp && addr < sp + 0xFFFF; #endif #if __powerpc__ // Large stack frames can be allocated with e.g. // lis r0,-10000 // stdux r1,r1,r0 # store sp to [sp-10000] and update sp by -10000 // If the store faults then sp will not have been updated, so test above // will not work, because the fault address will be more than just "slightly" // below sp. if (!IsStackAccess && IsAccessibleMemoryRange(pc, 4)) { u32 inst = *(unsigned *)pc; u32 ra = (inst >> 16) & 0x1F; u32 opcd = inst >> 26; u32 xo = (inst >> 1) & 0x3FF; // Check for store-with-update to sp. The instructions we accept are: // stbu rs,d(ra) stbux rs,ra,rb // sthu rs,d(ra) sthux rs,ra,rb // stwu rs,d(ra) stwux rs,ra,rb // stdu rs,ds(ra) stdux rs,ra,rb // where ra is r1 (the stack pointer). if (ra == 1 && (opcd == 39 || opcd == 45 || opcd == 37 || opcd == 62 || (opcd == 31 && (xo == 247 || xo == 439 || xo == 183 || xo == 181)))) IsStackAccess = true; } #endif // __powerpc__ // We also check si_code to filter out SEGV caused by something else other // then hitting the guard page or unmapped memory, like, for example, // unaligned memory access. auto si = static_cast(siginfo); return IsStackAccess && (si->si_code == si_SEGV_MAPERR || si->si_code == si_SEGV_ACCERR); } #endif // SANITIZER_GO bool IsAccessibleMemoryRange(uptr beg, uptr size) { uptr page_size = GetPageSizeCached(); // Checking too large memory ranges is slow. CHECK_LT(size, page_size * 10); int sock_pair[2]; if (pipe(sock_pair)) return false; uptr bytes_written = internal_write(sock_pair[1], reinterpret_cast(beg), size); int write_errno; bool result; if (internal_iserror(bytes_written, &write_errno)) { CHECK_EQ(EFAULT, write_errno); result = false; } else { result = (bytes_written == size); } internal_close(sock_pair[0]); internal_close(sock_pair[1]); return result; } void PlatformPrepareForSandboxing(__sanitizer_sandbox_arguments *args) { // Some kinds of sandboxes may forbid filesystem access, so we won't be able // to read the file mappings from /proc/self/maps. Luckily, neither the // process will be able to load additional libraries, so it's fine to use the // cached mappings. MemoryMappingLayout::CacheMemoryMappings(); } static bool MmapFixed(uptr fixed_addr, uptr size, int additional_flags, const char *name) { size = RoundUpTo(size, GetPageSizeCached()); fixed_addr = RoundDownTo(fixed_addr, GetPageSizeCached()); uptr p = MmapNamed((void *)fixed_addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED | additional_flags | MAP_ANON, name); int reserrno; if (internal_iserror(p, &reserrno)) { Report("ERROR: %s failed to " "allocate 0x%zx (%zd) bytes at address %zx (errno: %d)\n", SanitizerToolName, size, size, fixed_addr, reserrno); return false; } IncreaseTotalMmap(size); return true; } bool MmapFixedNoReserve(uptr fixed_addr, uptr size, const char *name) { return MmapFixed(fixed_addr, size, MAP_NORESERVE, name); } bool MmapFixedSuperNoReserve(uptr fixed_addr, uptr size, const char *name) { #if SANITIZER_FREEBSD if (common_flags()->no_huge_pages_for_shadow) return MmapFixedNoReserve(fixed_addr, size, name); // MAP_NORESERVE is implicit with FreeBSD return MmapFixed(fixed_addr, size, MAP_ALIGNED_SUPER, name); #else bool r = MmapFixedNoReserve(fixed_addr, size, name); if (r) SetShadowRegionHugePageMode(fixed_addr, size); return r; #endif } uptr ReservedAddressRange::Init(uptr size, const char *name, uptr fixed_addr) { base_ = fixed_addr ? MmapFixedNoAccess(fixed_addr, size, name) : MmapNoAccess(size); size_ = size; name_ = name; (void)os_handle_; // unsupported return reinterpret_cast(base_); } // Uses fixed_addr for now. // Will use offset instead once we've implemented this function for real. uptr ReservedAddressRange::Map(uptr fixed_addr, uptr size, const char *name) { return reinterpret_cast( MmapFixedOrDieOnFatalError(fixed_addr, size, name)); } uptr ReservedAddressRange::MapOrDie(uptr fixed_addr, uptr size, const char *name) { return reinterpret_cast(MmapFixedOrDie(fixed_addr, size, name)); } void ReservedAddressRange::Unmap(uptr addr, uptr size) { CHECK_LE(size, size_); if (addr == reinterpret_cast(base_)) // If we unmap the whole range, just null out the base. base_ = (size == size_) ? nullptr : reinterpret_cast(addr + size); else CHECK_EQ(addr + size, reinterpret_cast(base_) + size_); size_ -= size; UnmapOrDie(reinterpret_cast(addr), size); } void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) { return (void *)MmapNamed((void *)fixed_addr, size, PROT_NONE, MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE | MAP_ANON, name); } void *MmapNoAccess(uptr size) { unsigned flags = MAP_PRIVATE | MAP_ANON | MAP_NORESERVE; return (void *)internal_mmap(nullptr, size, PROT_NONE, flags, -1, 0); } // This function is defined elsewhere if we intercepted pthread_attr_getstack. extern "C" { SANITIZER_WEAK_ATTRIBUTE int real_pthread_attr_getstack(void *attr, void **addr, size_t *size); } // extern "C" int my_pthread_attr_getstack(void *attr, void **addr, uptr *size) { #if !SANITIZER_GO && !SANITIZER_MAC if (&real_pthread_attr_getstack) return real_pthread_attr_getstack((pthread_attr_t *)attr, addr, (size_t *)size); #endif return pthread_attr_getstack((pthread_attr_t *)attr, addr, (size_t *)size); } #if !SANITIZER_GO void AdjustStackSize(void *attr_) { pthread_attr_t *attr = (pthread_attr_t *)attr_; uptr stackaddr = 0; uptr stacksize = 0; my_pthread_attr_getstack(attr, (void**)&stackaddr, &stacksize); // GLibC will return (0 - stacksize) as the stack address in the case when // stacksize is set, but stackaddr is not. bool stack_set = (stackaddr != 0) && (stackaddr + stacksize != 0); // We place a lot of tool data into TLS, account for that. const uptr minstacksize = GetTlsSize() + 128*1024; if (stacksize < minstacksize) { if (!stack_set) { if (stacksize != 0) { VPrintf(1, "Sanitizer: increasing stacksize %zu->%zu\n", stacksize, minstacksize); pthread_attr_setstacksize(attr, minstacksize); } } else { Printf("Sanitizer: pre-allocated stack size is insufficient: " "%zu < %zu\n", stacksize, minstacksize); Printf("Sanitizer: pthread_create is likely to fail.\n"); } } } #endif // !SANITIZER_GO pid_t StartSubprocess(const char *program, const char *const argv[], const char *const envp[], fd_t stdin_fd, fd_t stdout_fd, fd_t stderr_fd) { auto file_closer = at_scope_exit([&] { if (stdin_fd != kInvalidFd) { internal_close(stdin_fd); } if (stdout_fd != kInvalidFd) { internal_close(stdout_fd); } if (stderr_fd != kInvalidFd) { internal_close(stderr_fd); } }); int pid = internal_fork(); if (pid < 0) { int rverrno; if (internal_iserror(pid, &rverrno)) { Report("WARNING: failed to fork (errno %d)\n", rverrno); } return pid; } if (pid == 0) { // Child subprocess if (stdin_fd != kInvalidFd) { internal_close(STDIN_FILENO); internal_dup2(stdin_fd, STDIN_FILENO); internal_close(stdin_fd); } if (stdout_fd != kInvalidFd) { internal_close(STDOUT_FILENO); internal_dup2(stdout_fd, STDOUT_FILENO); internal_close(stdout_fd); } if (stderr_fd != kInvalidFd) { internal_close(STDERR_FILENO); internal_dup2(stderr_fd, STDERR_FILENO); internal_close(stderr_fd); } for (int fd = sysconf(_SC_OPEN_MAX); fd > 2; fd--) internal_close(fd); internal_execve(program, const_cast(&argv[0]), const_cast(envp)); internal__exit(1); } return pid; } bool IsProcessRunning(pid_t pid) { int process_status; uptr waitpid_status = internal_waitpid(pid, &process_status, WNOHANG); int local_errno; if (internal_iserror(waitpid_status, &local_errno)) { VReport(1, "Waiting on the process failed (errno %d).\n", local_errno); return false; } return waitpid_status == 0; } int WaitForProcess(pid_t pid) { int process_status; uptr waitpid_status = internal_waitpid(pid, &process_status, 0); int local_errno; if (internal_iserror(waitpid_status, &local_errno)) { VReport(1, "Waiting on the process failed (errno %d).\n", local_errno); return -1; } return process_status; } bool IsStateDetached(int state) { return state == PTHREAD_CREATE_DETACHED; } } // namespace __sanitizer #endif // SANITIZER_POSIX diff --git a/contrib/llvm-project/libcxx/include/__availability b/contrib/llvm-project/libcxx/include/__availability index db2267c8eb16..cc3b6fabdab1 100644 --- a/contrib/llvm-project/libcxx/include/__availability +++ b/contrib/llvm-project/libcxx/include/__availability @@ -1,206 +1,249 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP___AVAILABILITY #define _LIBCPP___AVAILABILITY #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif // Libc++ is shipped by various vendors. In particular, it is used as a system // library on macOS, iOS and other Apple platforms. In order for users to be // able to compile a binary that is intended to be deployed to an older version // of a platform, Clang provides availability attributes [1]. These attributes // can be placed on declarations and are used to describe the life cycle of a // symbol in the library. // // The main goal is to ensure a compile-time error if a symbol that hasn't been // introduced in a previously released library is used in a program that targets // that previously released library. Normally, this would be a load-time error // when one tries to launch the program against the older library. // // For example, the filesystem library was introduced in the dylib in macOS 10.15. // If a user compiles on a macOS 10.15 host but targets macOS 10.13 with their // program, the compiler would normally not complain (because the required // declarations are in the headers), but the dynamic loader would fail to find // the symbols when actually trying to launch the program on macOS 10.13. To // turn this into a compile-time issue instead, declarations are annotated with // when they were introduced, and the compiler can produce a diagnostic if the // program references something that isn't available on the deployment target. // // This mechanism is general in nature, and any vendor can add their markup to // the library (see below). Whenever a new feature is added that requires support // in the shared library, a macro should be added below to mark this feature // as unavailable. When vendors decide to ship the feature as part of their // shared library, they can update the markup appropriately. // +// Furthermore, many features in the standard library have corresponding +// feature-test macros. When a feature is made unavailable on some deployment +// target, a macro should be defined to signal that it is unavailable. That +// macro can then be picked up when feature-test macros are generated (see +// generate_feature_test_macro_components.py) to make sure that feature-test +// macros don't announce a feature as being implemented if it has been marked +// as unavailable. +// // Note that this mechanism is disabled by default in the "upstream" libc++. // Availability annotations are only meaningful when shipping libc++ inside // a platform (i.e. as a system library), and so vendors that want them should // turn those annotations on at CMake configuration time. // // [1]: https://clang.llvm.org/docs/AttributeReference.html#availability // For backwards compatibility, allow users to define _LIBCPP_DISABLE_AVAILABILITY // for a while. #if defined(_LIBCPP_DISABLE_AVAILABILITY) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # endif #endif // Availability markup is disabled when building the library, or when the compiler // doesn't support the proper attributes. #if defined(_LIBCPP_BUILDING_LIBRARY) || \ defined(_LIBCXXABI_BUILDING_LIBRARY) || \ !__has_feature(attribute_availability_with_strict) || \ !__has_feature(attribute_availability_in_templates) || \ !__has_extension(pragma_clang_attribute_external_declaration) # if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) # define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS # endif #endif #if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) // This controls the availability of std::shared_mutex and std::shared_timed_mutex, // which were added to the dylib later. # define _LIBCPP_AVAILABILITY_SHARED_MUTEX +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex // These macros control the availability of std::bad_optional_access and // other exception types. These were put in the shared library to prevent // code bloat from every user program defining the vtable for these exception // types. # define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS # define _LIBCPP_AVAILABILITY_BAD_ANY_CAST // This controls the availability of std::uncaught_exceptions(). # define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS // This controls the availability of the sized version of ::operator delete, // which was added to the dylib later. # define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE // This controls the availability of the std::future_error exception. # define _LIBCPP_AVAILABILITY_FUTURE_ERROR // This controls the availability of std::type_info's vtable. // I can't imagine how using std::type_info can work at all if // this isn't supported. # define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE // This controls the availability of std::locale::category members // (e.g. std::locale::collate), which are defined in the dylib. # define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY // This controls the availability of atomic operations on std::shared_ptr // (e.g. `std::atomic_store(std::shared_ptr)`), which require a shared // lock table located in the dylib. # define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR // These macros control the availability of all parts of that // depend on something in the dylib. # define _LIBCPP_AVAILABILITY_FILESYSTEM # define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH # define _LIBCPP_AVAILABILITY_FILESYSTEM_POP +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem // This controls the availability of std::to_chars. # define _LIBCPP_AVAILABILITY_TO_CHARS // This controls the availability of the C++20 synchronization library, // which requires shared library support for various operations // (see libcxx/src/atomic.cpp). # define _LIBCPP_AVAILABILITY_SYNC +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore #elif defined(__APPLE__) # define _LIBCPP_AVAILABILITY_SHARED_MUTEX \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101200) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 100000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 100000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 30000) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex +# endif + # define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ __attribute__((availability(macosx,strict,introduced=10.13))) \ __attribute__((availability(ios,strict,introduced=11.0))) \ __attribute__((availability(tvos,strict,introduced=11.0))) \ __attribute__((availability(watchos,strict,introduced=4.0))) # define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS \ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS + # define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) + # define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE \ __attribute__((availability(macosx,strict,introduced=10.12))) \ __attribute__((availability(ios,strict,introduced=10.0))) \ __attribute__((availability(tvos,strict,introduced=10.0))) \ __attribute__((availability(watchos,strict,introduced=3.0))) + # define _LIBCPP_AVAILABILITY_FUTURE_ERROR \ __attribute__((availability(ios,strict,introduced=6.0))) + # define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) + # define _LIBCPP_AVAILABILITY_FILESYSTEM \ __attribute__((availability(macosx,strict,introduced=10.15))) \ __attribute__((availability(ios,strict,introduced=13.0))) \ __attribute__((availability(tvos,strict,introduced=13.0))) \ __attribute__((availability(watchos,strict,introduced=6.0))) # define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH \ _Pragma("clang attribute push(__attribute__((availability(macosx,strict,introduced=10.15))), apply_to=any(function,record))") \ _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") # define _LIBCPP_AVAILABILITY_FILESYSTEM_POP \ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") \ _Pragma("clang attribute pop") +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 130000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 60000) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem +# endif + # define _LIBCPP_AVAILABILITY_TO_CHARS \ _LIBCPP_AVAILABILITY_FILESYSTEM + + // Note: Those are not ABI-stable yet, so we can't ship them. # define _LIBCPP_AVAILABILITY_SYNC \ __attribute__((unavailable)) +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch +# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore #else // ...New vendors can add availability markup here... # error "It looks like you're trying to enable vendor availability markup, but you haven't defined the corresponding macros yet!" #endif // Define availability attributes that depend on _LIBCPP_NO_EXCEPTIONS. // Those are defined in terms of the availability attributes above, and // should not be vendor-specific. #if defined(_LIBCPP_NO_EXCEPTIONS) # define _LIBCPP_AVAILABILITY_FUTURE # define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST # define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS #else # define _LIBCPP_AVAILABILITY_FUTURE _LIBCPP_AVAILABILITY_FUTURE_ERROR # define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_ANY_CAST # define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS #endif #endif // _LIBCPP___AVAILABILITY diff --git a/contrib/llvm-project/libcxx/include/__bit_reference b/contrib/llvm-project/libcxx/include/__bit_reference index 9cfb4b84e653..d44ad03d3134 100644 --- a/contrib/llvm-project/libcxx/include/__bit_reference +++ b/contrib/llvm-project/libcxx/include/__bit_reference @@ -1,1305 +1,1303 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP___BIT_REFERENCE #define _LIBCPP___BIT_REFERENCE #include <__config> #include <__bits> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif _LIBCPP_PUSH_MACROS #include <__undef_macros> _LIBCPP_BEGIN_NAMESPACE_STD template class __bit_iterator; template class __bit_const_reference; template struct __has_storage_type { static const bool value = false; }; template ::value> class __bit_reference { typedef typename _Cp::__storage_type __storage_type; typedef typename _Cp::__storage_pointer __storage_pointer; __storage_pointer __seg_; __storage_type __mask_; friend typename _Cp::__self; friend class __bit_const_reference<_Cp>; friend class __bit_iterator<_Cp, false>; public: _LIBCPP_INLINE_VISIBILITY __bit_reference(const __bit_reference&) = default; _LIBCPP_INLINE_VISIBILITY operator bool() const _NOEXCEPT {return static_cast(*__seg_ & __mask_);} _LIBCPP_INLINE_VISIBILITY bool operator ~() const _NOEXCEPT {return !static_cast(*this);} _LIBCPP_INLINE_VISIBILITY __bit_reference& operator=(bool __x) _NOEXCEPT { if (__x) *__seg_ |= __mask_; else *__seg_ &= ~__mask_; return *this; } _LIBCPP_INLINE_VISIBILITY __bit_reference& operator=(const __bit_reference& __x) _NOEXCEPT {return operator=(static_cast(__x));} _LIBCPP_INLINE_VISIBILITY void flip() _NOEXCEPT {*__seg_ ^= __mask_;} _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {return __bit_iterator<_Cp, false>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: _LIBCPP_INLINE_VISIBILITY __bit_reference(__storage_pointer __s, __storage_type __m) _NOEXCEPT : __seg_(__s), __mask_(__m) {} }; template class __bit_reference<_Cp, false> { }; template inline _LIBCPP_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, __bit_reference<_Cp> __y) _NOEXCEPT { bool __t = __x; __x = __y; __y = __t; } template inline _LIBCPP_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, __bit_reference<_Dp> __y) _NOEXCEPT { bool __t = __x; __x = __y; __y = __t; } template inline _LIBCPP_INLINE_VISIBILITY void swap(__bit_reference<_Cp> __x, bool& __y) _NOEXCEPT { bool __t = __x; __x = __y; __y = __t; } template inline _LIBCPP_INLINE_VISIBILITY void swap(bool& __x, __bit_reference<_Cp> __y) _NOEXCEPT { bool __t = __x; __x = __y; __y = __t; } template class __bit_const_reference { typedef typename _Cp::__storage_type __storage_type; typedef typename _Cp::__const_storage_pointer __storage_pointer; __storage_pointer __seg_; __storage_type __mask_; friend typename _Cp::__self; friend class __bit_iterator<_Cp, true>; public: _LIBCPP_INLINE_VISIBILITY __bit_const_reference(const __bit_const_reference&) = default; _LIBCPP_INLINE_VISIBILITY __bit_const_reference(const __bit_reference<_Cp>& __x) _NOEXCEPT : __seg_(__x.__seg_), __mask_(__x.__mask_) {} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR operator bool() const _NOEXCEPT {return static_cast(*__seg_ & __mask_);} _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {return __bit_iterator<_Cp, true>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __bit_const_reference(__storage_pointer __s, __storage_type __m) _NOEXCEPT : __seg_(__s), __mask_(__m) {} __bit_const_reference& operator=(const __bit_const_reference&) = delete; }; // find template __bit_iterator<_Cp, _IsConst> __find_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { typedef __bit_iterator<_Cp, _IsConst> _It; typedef typename _It::__storage_type __storage_type; static const int __bits_per_word = _It::__bits_per_word; // do first partial word if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b = *__first.__seg_ & __m; if (__b) return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); if (__n == __dn) return __first + __n; __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) if (*__first.__seg_) return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(*__first.__seg_))); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b = *__first.__seg_ & __m; if (__b) return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); } return _It(__first.__seg_, static_cast(__n)); } template __bit_iterator<_Cp, _IsConst> __find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { typedef __bit_iterator<_Cp, _IsConst> _It; typedef typename _It::__storage_type __storage_type; const int __bits_per_word = _It::__bits_per_word; // do first partial word if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b = ~*__first.__seg_ & __m; if (__b) return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); if (__n == __dn) return __first + __n; __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) { __storage_type __b = ~*__first.__seg_; if (__b) return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); } // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b = ~*__first.__seg_ & __m; if (__b) return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); } return _It(__first.__seg_, static_cast(__n)); } template inline _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, _IsConst> find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { if (static_cast(__value_)) return _VSTD::__find_bool_true(__first, static_cast(__last - __first)); return _VSTD::__find_bool_false(__first, static_cast(__last - __first)); } // count template typename __bit_iterator<_Cp, _IsConst>::difference_type __count_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { typedef __bit_iterator<_Cp, _IsConst> _It; typedef typename _It::__storage_type __storage_type; typedef typename _It::difference_type difference_type; const int __bits_per_word = _It::__bits_per_word; difference_type __r = 0; // do first partial word if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __r = _VSTD::__libcpp_popcount(*__first.__seg_ & __m); __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) __r += _VSTD::__libcpp_popcount(*__first.__seg_); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __r += _VSTD::__libcpp_popcount(*__first.__seg_ & __m); } return __r; } template typename __bit_iterator<_Cp, _IsConst>::difference_type __count_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { typedef __bit_iterator<_Cp, _IsConst> _It; typedef typename _It::__storage_type __storage_type; typedef typename _It::difference_type difference_type; const int __bits_per_word = _It::__bits_per_word; difference_type __r = 0; // do first partial word if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __r = _VSTD::__libcpp_popcount(~*__first.__seg_ & __m); __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) __r += _VSTD::__libcpp_popcount(~*__first.__seg_); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __r += _VSTD::__libcpp_popcount(~*__first.__seg_ & __m); } return __r; } template inline _LIBCPP_INLINE_VISIBILITY typename __bit_iterator<_Cp, _IsConst>::difference_type count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { if (static_cast(__value_)) return _VSTD::__count_bool_true(__first, static_cast(__last - __first)); return _VSTD::__count_bool_false(__first, static_cast(__last - __first)); } // fill_n template void __fill_n_false(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { typedef __bit_iterator<_Cp, false> _It; typedef typename _It::__storage_type __storage_type; const int __bits_per_word = _It::__bits_per_word; // do first partial word if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); *__first.__seg_ &= ~__m; __n -= __dn; ++__first.__seg_; } // do middle whole words __storage_type __nw = __n / __bits_per_word; _VSTD::memset(_VSTD::__to_address(__first.__seg_), 0, __nw * sizeof(__storage_type)); __n -= __nw * __bits_per_word; // do last partial word if (__n > 0) { __first.__seg_ += __nw; __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); *__first.__seg_ &= ~__m; } } template void __fill_n_true(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { typedef __bit_iterator<_Cp, false> _It; typedef typename _It::__storage_type __storage_type; const int __bits_per_word = _It::__bits_per_word; // do first partial word if (__first.__ctz_ != 0) { __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); *__first.__seg_ |= __m; __n -= __dn; ++__first.__seg_; } // do middle whole words __storage_type __nw = __n / __bits_per_word; _VSTD::memset(_VSTD::__to_address(__first.__seg_), -1, __nw * sizeof(__storage_type)); __n -= __nw * __bits_per_word; // do last partial word if (__n > 0) { __first.__seg_ += __nw; __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); *__first.__seg_ |= __m; } } template inline _LIBCPP_INLINE_VISIBILITY void fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __value_) { if (__n > 0) { if (__value_) _VSTD::__fill_n_true(__first, __n); else _VSTD::__fill_n_false(__first, __n); } } // fill template inline _LIBCPP_INLINE_VISIBILITY void fill(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __last, bool __value_) { _VSTD::fill_n(__first, static_cast(__last - __first), __value_); } // copy template __bit_iterator<_Cp, false> __copy_aligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { typedef __bit_iterator<_Cp, _IsConst> _In; typedef typename _In::difference_type difference_type; typedef typename _In::__storage_type __storage_type; const int __bits_per_word = _In::__bits_per_word; difference_type __n = __last - __first; if (__n > 0) { // do first word if (__first.__ctz_ != 0) { unsigned __clz = __bits_per_word - __first.__ctz_; difference_type __dn = _VSTD::min(static_cast(__clz), __n); __n -= __dn; __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); __storage_type __b = *__first.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b; __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); ++__first.__seg_; // __first.__ctz_ = 0; } // __first.__ctz_ == 0; // do middle words __storage_type __nw = __n / __bits_per_word; _VSTD::memmove(_VSTD::__to_address(__result.__seg_), _VSTD::__to_address(__first.__seg_), __nw * sizeof(__storage_type)); __n -= __nw * __bits_per_word; __result.__seg_ += __nw; // do last word if (__n > 0) { __first.__seg_ += __nw; __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b = *__first.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b; __result.__ctz_ = static_cast(__n); } } return __result; } template __bit_iterator<_Cp, false> __copy_unaligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { typedef __bit_iterator<_Cp, _IsConst> _In; typedef typename _In::difference_type difference_type; typedef typename _In::__storage_type __storage_type; static const int __bits_per_word = _In::__bits_per_word; difference_type __n = __last - __first; if (__n > 0) { // do first word if (__first.__ctz_ != 0) { unsigned __clz_f = __bits_per_word - __first.__ctz_; difference_type __dn = _VSTD::min(static_cast(__clz_f), __n); __n -= __dn; __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b = *__first.__seg_ & __m; unsigned __clz_r = __bits_per_word - __result.__ctz_; __storage_type __ddn = _VSTD::min<__storage_type>(__dn, __clz_r); __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); *__result.__seg_ &= ~__m; if (__result.__ctz_ > __first.__ctz_) *__result.__seg_ |= __b << (__result.__ctz_ - __first.__ctz_); else *__result.__seg_ |= __b >> (__first.__ctz_ - __result.__ctz_); __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); __dn -= __ddn; if (__dn > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __dn); *__result.__seg_ &= ~__m; *__result.__seg_ |= __b >> (__first.__ctz_ + __ddn); __result.__ctz_ = static_cast(__dn); } ++__first.__seg_; // __first.__ctz_ = 0; } // __first.__ctz_ == 0; // do middle words unsigned __clz_r = __bits_per_word - __result.__ctz_; __storage_type __m = ~__storage_type(0) << __result.__ctz_; for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { __storage_type __b = *__first.__seg_; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b << __result.__ctz_; ++__result.__seg_; *__result.__seg_ &= __m; *__result.__seg_ |= __b >> __clz_r; } // do last word if (__n > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b = *__first.__seg_ & __m; __storage_type __dn = _VSTD::min(__n, static_cast(__clz_r)); __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); *__result.__seg_ &= ~__m; *__result.__seg_ |= __b << __result.__ctz_; __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); __n -= __dn; if (__n > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __n); *__result.__seg_ &= ~__m; *__result.__seg_ |= __b >> __dn; __result.__ctz_ = static_cast(__n); } } } return __result; } template inline _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, false> copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { if (__first.__ctz_ == __result.__ctz_) return _VSTD::__copy_aligned(__first, __last, __result); return _VSTD::__copy_unaligned(__first, __last, __result); } // copy_backward template __bit_iterator<_Cp, false> __copy_backward_aligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { typedef __bit_iterator<_Cp, _IsConst> _In; typedef typename _In::difference_type difference_type; typedef typename _In::__storage_type __storage_type; const int __bits_per_word = _In::__bits_per_word; difference_type __n = __last - __first; if (__n > 0) { // do first word if (__last.__ctz_ != 0) { difference_type __dn = _VSTD::min(static_cast(__last.__ctz_), __n); __n -= __dn; unsigned __clz = __bits_per_word - __last.__ctz_; __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz); __storage_type __b = *__last.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b; __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); // __last.__ctz_ = 0 } // __last.__ctz_ == 0 || __n == 0 // __result.__ctz_ == 0 || __n == 0 // do middle words __storage_type __nw = __n / __bits_per_word; __result.__seg_ -= __nw; __last.__seg_ -= __nw; _VSTD::memmove(_VSTD::__to_address(__result.__seg_), _VSTD::__to_address(__last.__seg_), __nw * sizeof(__storage_type)); __n -= __nw * __bits_per_word; // do last word if (__n > 0) { __storage_type __m = ~__storage_type(0) << (__bits_per_word - __n); __storage_type __b = *--__last.__seg_ & __m; *--__result.__seg_ &= ~__m; *__result.__seg_ |= __b; __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); } } return __result; } template __bit_iterator<_Cp, false> __copy_backward_unaligned(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { typedef __bit_iterator<_Cp, _IsConst> _In; typedef typename _In::difference_type difference_type; typedef typename _In::__storage_type __storage_type; const int __bits_per_word = _In::__bits_per_word; difference_type __n = __last - __first; if (__n > 0) { // do first word if (__last.__ctz_ != 0) { difference_type __dn = _VSTD::min(static_cast(__last.__ctz_), __n); __n -= __dn; unsigned __clz_l = __bits_per_word - __last.__ctz_; __storage_type __m = (~__storage_type(0) << (__last.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_l); __storage_type __b = *__last.__seg_ & __m; unsigned __clz_r = __bits_per_word - __result.__ctz_; __storage_type __ddn = _VSTD::min(__dn, static_cast(__result.__ctz_)); if (__ddn > 0) { __m = (~__storage_type(0) << (__result.__ctz_ - __ddn)) & (~__storage_type(0) >> __clz_r); *__result.__seg_ &= ~__m; if (__result.__ctz_ > __last.__ctz_) *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); else *__result.__seg_ |= __b >> (__last.__ctz_ - __result.__ctz_); __result.__ctz_ = static_cast(((-__ddn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); __dn -= __ddn; } if (__dn > 0) { // __result.__ctz_ == 0 --__result.__seg_; __result.__ctz_ = static_cast(-__dn & (__bits_per_word - 1)); __m = ~__storage_type(0) << __result.__ctz_; *__result.__seg_ &= ~__m; __last.__ctz_ -= __dn + __ddn; *__result.__seg_ |= __b << (__result.__ctz_ - __last.__ctz_); } // __last.__ctz_ = 0 } // __last.__ctz_ == 0 || __n == 0 // __result.__ctz_ != 0 || __n == 0 // do middle words unsigned __clz_r = __bits_per_word - __result.__ctz_; __storage_type __m = ~__storage_type(0) >> __clz_r; for (; __n >= __bits_per_word; __n -= __bits_per_word) { __storage_type __b = *--__last.__seg_; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b >> __clz_r; *--__result.__seg_ &= __m; *__result.__seg_ |= __b << __result.__ctz_; } // do last word if (__n > 0) { __m = ~__storage_type(0) << (__bits_per_word - __n); __storage_type __b = *--__last.__seg_ & __m; __clz_r = __bits_per_word - __result.__ctz_; __storage_type __dn = _VSTD::min(__n, static_cast(__result.__ctz_)); __m = (~__storage_type(0) << (__result.__ctz_ - __dn)) & (~__storage_type(0) >> __clz_r); *__result.__seg_ &= ~__m; *__result.__seg_ |= __b >> (__bits_per_word - __result.__ctz_); __result.__ctz_ = static_cast(((-__dn & (__bits_per_word - 1)) + __result.__ctz_) % __bits_per_word); __n -= __dn; if (__n > 0) { // __result.__ctz_ == 0 --__result.__seg_; __result.__ctz_ = static_cast(-__n & (__bits_per_word - 1)); __m = ~__storage_type(0) << __result.__ctz_; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b << (__result.__ctz_ - (__bits_per_word - __n - __dn)); } } } return __result; } template inline _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, false> copy_backward(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { if (__last.__ctz_ == __result.__ctz_) return _VSTD::__copy_backward_aligned(__first, __last, __result); return _VSTD::__copy_backward_unaligned(__first, __last, __result); } // move template inline _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, false> move(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { return _VSTD::copy(__first, __last, __result); } // move_backward template inline _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, false> move_backward(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { return _VSTD::copy_backward(__first, __last, __result); } // swap_ranges template __bit_iterator<__C2, false> __swap_ranges_aligned(__bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, __bit_iterator<__C2, false> __result) { typedef __bit_iterator<__C1, false> _I1; typedef typename _I1::difference_type difference_type; typedef typename _I1::__storage_type __storage_type; const int __bits_per_word = _I1::__bits_per_word; difference_type __n = __last - __first; if (__n > 0) { // do first word if (__first.__ctz_ != 0) { unsigned __clz = __bits_per_word - __first.__ctz_; difference_type __dn = _VSTD::min(static_cast(__clz), __n); __n -= __dn; __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); __storage_type __b1 = *__first.__seg_ & __m; *__first.__seg_ &= ~__m; __storage_type __b2 = *__result.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b1; *__first.__seg_ |= __b2; __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); ++__first.__seg_; // __first.__ctz_ = 0; } // __first.__ctz_ == 0; // do middle words for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_, ++__result.__seg_) swap(*__first.__seg_, *__result.__seg_); // do last word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b1 = *__first.__seg_ & __m; *__first.__seg_ &= ~__m; __storage_type __b2 = *__result.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b1; *__first.__seg_ |= __b2; __result.__ctz_ = static_cast(__n); } } return __result; } template __bit_iterator<__C2, false> __swap_ranges_unaligned(__bit_iterator<__C1, false> __first, __bit_iterator<__C1, false> __last, __bit_iterator<__C2, false> __result) { typedef __bit_iterator<__C1, false> _I1; typedef typename _I1::difference_type difference_type; typedef typename _I1::__storage_type __storage_type; const int __bits_per_word = _I1::__bits_per_word; difference_type __n = __last - __first; if (__n > 0) { // do first word if (__first.__ctz_ != 0) { unsigned __clz_f = __bits_per_word - __first.__ctz_; difference_type __dn = _VSTD::min(static_cast(__clz_f), __n); __n -= __dn; __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b1 = *__first.__seg_ & __m; *__first.__seg_ &= ~__m; unsigned __clz_r = __bits_per_word - __result.__ctz_; __storage_type __ddn = _VSTD::min<__storage_type>(__dn, __clz_r); __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); __storage_type __b2 = *__result.__seg_ & __m; *__result.__seg_ &= ~__m; if (__result.__ctz_ > __first.__ctz_) { unsigned __s = __result.__ctz_ - __first.__ctz_; *__result.__seg_ |= __b1 << __s; *__first.__seg_ |= __b2 >> __s; } else { unsigned __s = __first.__ctz_ - __result.__ctz_; *__result.__seg_ |= __b1 >> __s; *__first.__seg_ |= __b2 << __s; } __result.__seg_ += (__ddn + __result.__ctz_) / __bits_per_word; __result.__ctz_ = static_cast((__ddn + __result.__ctz_) % __bits_per_word); __dn -= __ddn; if (__dn > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __dn); __b2 = *__result.__seg_ & __m; *__result.__seg_ &= ~__m; unsigned __s = __first.__ctz_ + __ddn; *__result.__seg_ |= __b1 >> __s; *__first.__seg_ |= __b2 << __s; __result.__ctz_ = static_cast(__dn); } ++__first.__seg_; // __first.__ctz_ = 0; } // __first.__ctz_ == 0; // do middle words __storage_type __m = ~__storage_type(0) << __result.__ctz_; unsigned __clz_r = __bits_per_word - __result.__ctz_; for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first.__seg_) { __storage_type __b1 = *__first.__seg_; __storage_type __b2 = *__result.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b1 << __result.__ctz_; *__first.__seg_ = __b2 >> __result.__ctz_; ++__result.__seg_; __b2 = *__result.__seg_ & ~__m; *__result.__seg_ &= __m; *__result.__seg_ |= __b1 >> __clz_r; *__first.__seg_ |= __b2 << __clz_r; } // do last word if (__n > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b1 = *__first.__seg_ & __m; *__first.__seg_ &= ~__m; __storage_type __dn = _VSTD::min<__storage_type>(__n, __clz_r); __m = (~__storage_type(0) << __result.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); __storage_type __b2 = *__result.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b1 << __result.__ctz_; *__first.__seg_ |= __b2 >> __result.__ctz_; __result.__seg_ += (__dn + __result.__ctz_) / __bits_per_word; __result.__ctz_ = static_cast((__dn + __result.__ctz_) % __bits_per_word); __n -= __dn; if (__n > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __n); __b2 = *__result.__seg_ & __m; *__result.__seg_ &= ~__m; *__result.__seg_ |= __b1 >> __dn; *__first.__seg_ |= __b2 << __dn; __result.__ctz_ = static_cast(__n); } } } return __result; } template inline _LIBCPP_INLINE_VISIBILITY __bit_iterator<__C2, false> swap_ranges(__bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __last1, __bit_iterator<__C2, false> __first2) { if (__first1.__ctz_ == __first2.__ctz_) return _VSTD::__swap_ranges_aligned(__first1, __last1, __first2); return _VSTD::__swap_ranges_unaligned(__first1, __last1, __first2); } // rotate template struct __bit_array { typedef typename _Cp::difference_type difference_type; typedef typename _Cp::__storage_type __storage_type; typedef typename _Cp::__storage_pointer __storage_pointer; typedef typename _Cp::iterator iterator; static const unsigned __bits_per_word = _Cp::__bits_per_word; static const unsigned _Np = 4; difference_type __size_; __storage_type __word_[_Np]; _LIBCPP_INLINE_VISIBILITY static difference_type capacity() {return static_cast(_Np * __bits_per_word);} _LIBCPP_INLINE_VISIBILITY explicit __bit_array(difference_type __s) : __size_(__s) {} _LIBCPP_INLINE_VISIBILITY iterator begin() { return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]), 0); } _LIBCPP_INLINE_VISIBILITY iterator end() { return iterator(pointer_traits<__storage_pointer>::pointer_to(__word_[0]) + __size_ / __bits_per_word, static_cast(__size_ % __bits_per_word)); } }; template __bit_iterator<_Cp, false> rotate(__bit_iterator<_Cp, false> __first, __bit_iterator<_Cp, false> __middle, __bit_iterator<_Cp, false> __last) { typedef __bit_iterator<_Cp, false> _I1; typedef typename _I1::difference_type difference_type; difference_type __d1 = __middle - __first; difference_type __d2 = __last - __middle; _I1 __r = __first + __d2; while (__d1 != 0 && __d2 != 0) { if (__d1 <= __d2) { if (__d1 <= __bit_array<_Cp>::capacity()) { __bit_array<_Cp> __b(__d1); _VSTD::copy(__first, __middle, __b.begin()); _VSTD::copy(__b.begin(), __b.end(), _VSTD::copy(__middle, __last, __first)); break; } else { __bit_iterator<_Cp, false> __mp = _VSTD::swap_ranges(__first, __middle, __middle); __first = __middle; __middle = __mp; __d2 -= __d1; } } else { if (__d2 <= __bit_array<_Cp>::capacity()) { __bit_array<_Cp> __b(__d2); _VSTD::copy(__middle, __last, __b.begin()); _VSTD::copy_backward(__b.begin(), __b.end(), _VSTD::copy_backward(__first, __middle, __last)); break; } else { __bit_iterator<_Cp, false> __mp = __first + __d2; _VSTD::swap_ranges(__first, __mp, __middle); __first = __mp; __d1 -= __d2; } } } return __r; } // equal template bool __equal_unaligned(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { typedef __bit_iterator<_Cp, _IC1> _It; typedef typename _It::difference_type difference_type; typedef typename _It::__storage_type __storage_type; static const int __bits_per_word = _It::__bits_per_word; difference_type __n = __last1 - __first1; if (__n > 0) { // do first word if (__first1.__ctz_ != 0) { unsigned __clz_f = __bits_per_word - __first1.__ctz_; difference_type __dn = _VSTD::min(static_cast(__clz_f), __n); __n -= __dn; __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b = *__first1.__seg_ & __m; unsigned __clz_r = __bits_per_word - __first2.__ctz_; __storage_type __ddn = _VSTD::min<__storage_type>(__dn, __clz_r); __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __ddn)); if (__first2.__ctz_ > __first1.__ctz_) { if ((*__first2.__seg_ & __m) != (__b << (__first2.__ctz_ - __first1.__ctz_))) return false; } else { if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ - __first2.__ctz_))) return false; } __first2.__seg_ += (__ddn + __first2.__ctz_) / __bits_per_word; __first2.__ctz_ = static_cast((__ddn + __first2.__ctz_) % __bits_per_word); __dn -= __ddn; if (__dn > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __dn); if ((*__first2.__seg_ & __m) != (__b >> (__first1.__ctz_ + __ddn))) return false; __first2.__ctz_ = static_cast(__dn); } ++__first1.__seg_; // __first1.__ctz_ = 0; } // __first1.__ctz_ == 0; // do middle words unsigned __clz_r = __bits_per_word - __first2.__ctz_; __storage_type __m = ~__storage_type(0) << __first2.__ctz_; for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_) { __storage_type __b = *__first1.__seg_; if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) return false; ++__first2.__seg_; if ((*__first2.__seg_ & ~__m) != (__b >> __clz_r)) return false; } // do last word if (__n > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b = *__first1.__seg_ & __m; __storage_type __dn = _VSTD::min(__n, static_cast(__clz_r)); __m = (~__storage_type(0) << __first2.__ctz_) & (~__storage_type(0) >> (__clz_r - __dn)); if ((*__first2.__seg_ & __m) != (__b << __first2.__ctz_)) return false; __first2.__seg_ += (__dn + __first2.__ctz_) / __bits_per_word; __first2.__ctz_ = static_cast((__dn + __first2.__ctz_) % __bits_per_word); __n -= __dn; if (__n > 0) { __m = ~__storage_type(0) >> (__bits_per_word - __n); if ((*__first2.__seg_ & __m) != (__b >> __dn)) return false; } } } return true; } template bool __equal_aligned(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { typedef __bit_iterator<_Cp, _IC1> _It; typedef typename _It::difference_type difference_type; typedef typename _It::__storage_type __storage_type; static const int __bits_per_word = _It::__bits_per_word; difference_type __n = __last1 - __first1; if (__n > 0) { // do first word if (__first1.__ctz_ != 0) { unsigned __clz = __bits_per_word - __first1.__ctz_; difference_type __dn = _VSTD::min(static_cast(__clz), __n); __n -= __dn; __storage_type __m = (~__storage_type(0) << __first1.__ctz_) & (~__storage_type(0) >> (__clz - __dn)); if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) return false; ++__first2.__seg_; ++__first1.__seg_; // __first1.__ctz_ = 0; // __first2.__ctz_ = 0; } // __first1.__ctz_ == 0; // __first2.__ctz_ == 0; // do middle words for (; __n >= __bits_per_word; __n -= __bits_per_word, ++__first1.__seg_, ++__first2.__seg_) if (*__first2.__seg_ != *__first1.__seg_) return false; // do last word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); if ((*__first2.__seg_ & __m) != (*__first1.__seg_ & __m)) return false; } } return true; } template inline _LIBCPP_INLINE_VISIBILITY bool equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { if (__first1.__ctz_ == __first2.__ctz_) return _VSTD::__equal_aligned(__first1, __last1, __first2); return _VSTD::__equal_unaligned(__first1, __last1, __first2); } template class __bit_iterator { public: typedef typename _Cp::difference_type difference_type; typedef bool value_type; typedef __bit_iterator pointer; typedef typename conditional<_IsConst, __bit_const_reference<_Cp>, __bit_reference<_Cp> >::type reference; typedef random_access_iterator_tag iterator_category; private: typedef typename _Cp::__storage_type __storage_type; typedef typename conditional<_IsConst, typename _Cp::__const_storage_pointer, typename _Cp::__storage_pointer>::type __storage_pointer; static const unsigned __bits_per_word = _Cp::__bits_per_word; __storage_pointer __seg_; unsigned __ctz_; public: _LIBCPP_INLINE_VISIBILITY __bit_iterator() _NOEXCEPT #if _LIBCPP_STD_VER > 11 : __seg_(nullptr), __ctz_(0) #endif {} - // avoid re-declaring a copy constructor for the non-const version. - using __type_for_copy_to_const = - _If<_IsConst, __bit_iterator<_Cp, false>, struct __private_nat>; - + // When _IsConst=false, this is the copy constructor. + // It is non-trivial. Making it trivial would break ABI. + // When _IsConst=true, this is a converting constructor; + // the copy and move constructors are implicitly generated + // and trivial. _LIBCPP_INLINE_VISIBILITY - __bit_iterator(const __type_for_copy_to_const& __it) _NOEXCEPT + __bit_iterator(const __bit_iterator<_Cp, false>& __it) _NOEXCEPT : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - // The non-const __bit_iterator has historically had a non-trivial - // copy constructor (as a quirk of its construction). We need to maintain - // this for ABI purposes. - using __type_for_abi_non_trivial_copy_ctor = - _If; - - _LIBCPP_INLINE_VISIBILITY - __bit_iterator(__type_for_abi_non_trivial_copy_ctor const& __it) _NOEXCEPT - : __seg_(__it.__seg_), __ctz_(__it.__ctz_) {} - - // Always declare the copy assignment operator since the implicit declaration - // is deprecated. + // When _IsConst=false, we have a user-provided copy constructor, + // so we must also provide a copy assignment operator because + // the implicit generation of a defaulted one is deprecated. + // When _IsConst=true, the assignment operators are + // implicitly generated and trivial. _LIBCPP_INLINE_VISIBILITY - __bit_iterator& operator=(__bit_iterator const&) = default; + __bit_iterator& operator=(const _If<_IsConst, struct __private_nat, __bit_iterator>& __it) { + __seg_ = __it.__seg_; + __ctz_ = __it.__ctz_; + return *this; + } _LIBCPP_INLINE_VISIBILITY reference operator*() const _NOEXCEPT {return reference(__seg_, __storage_type(1) << __ctz_);} _LIBCPP_INLINE_VISIBILITY __bit_iterator& operator++() { if (__ctz_ != __bits_per_word-1) ++__ctz_; else { __ctz_ = 0; ++__seg_; } return *this; } _LIBCPP_INLINE_VISIBILITY __bit_iterator operator++(int) { __bit_iterator __tmp = *this; ++(*this); return __tmp; } _LIBCPP_INLINE_VISIBILITY __bit_iterator& operator--() { if (__ctz_ != 0) --__ctz_; else { __ctz_ = __bits_per_word - 1; --__seg_; } return *this; } _LIBCPP_INLINE_VISIBILITY __bit_iterator operator--(int) { __bit_iterator __tmp = *this; --(*this); return __tmp; } _LIBCPP_INLINE_VISIBILITY __bit_iterator& operator+=(difference_type __n) { if (__n >= 0) __seg_ += (__n + __ctz_) / __bits_per_word; else __seg_ += static_cast(__n - __bits_per_word + __ctz_ + 1) / static_cast(__bits_per_word); __n &= (__bits_per_word - 1); __ctz_ = static_cast((__n + __ctz_) % __bits_per_word); return *this; } _LIBCPP_INLINE_VISIBILITY __bit_iterator& operator-=(difference_type __n) { return *this += -__n; } _LIBCPP_INLINE_VISIBILITY __bit_iterator operator+(difference_type __n) const { __bit_iterator __t(*this); __t += __n; return __t; } _LIBCPP_INLINE_VISIBILITY __bit_iterator operator-(difference_type __n) const { __bit_iterator __t(*this); __t -= __n; return __t; } _LIBCPP_INLINE_VISIBILITY friend __bit_iterator operator+(difference_type __n, const __bit_iterator& __it) {return __it + __n;} _LIBCPP_INLINE_VISIBILITY friend difference_type operator-(const __bit_iterator& __x, const __bit_iterator& __y) {return (__x.__seg_ - __y.__seg_) * __bits_per_word + __x.__ctz_ - __y.__ctz_;} _LIBCPP_INLINE_VISIBILITY reference operator[](difference_type __n) const {return *(*this + __n);} _LIBCPP_INLINE_VISIBILITY friend bool operator==(const __bit_iterator& __x, const __bit_iterator& __y) {return __x.__seg_ == __y.__seg_ && __x.__ctz_ == __y.__ctz_;} _LIBCPP_INLINE_VISIBILITY friend bool operator!=(const __bit_iterator& __x, const __bit_iterator& __y) {return !(__x == __y);} _LIBCPP_INLINE_VISIBILITY friend bool operator<(const __bit_iterator& __x, const __bit_iterator& __y) {return __x.__seg_ < __y.__seg_ || (__x.__seg_ == __y.__seg_ && __x.__ctz_ < __y.__ctz_);} _LIBCPP_INLINE_VISIBILITY friend bool operator>(const __bit_iterator& __x, const __bit_iterator& __y) {return __y < __x;} _LIBCPP_INLINE_VISIBILITY friend bool operator<=(const __bit_iterator& __x, const __bit_iterator& __y) {return !(__y < __x);} _LIBCPP_INLINE_VISIBILITY friend bool operator>=(const __bit_iterator& __x, const __bit_iterator& __y) {return !(__x < __y);} private: _LIBCPP_INLINE_VISIBILITY __bit_iterator(__storage_pointer __s, unsigned __ctz) _NOEXCEPT : __seg_(__s), __ctz_(__ctz) {} friend typename _Cp::__self; friend class __bit_reference<_Cp>; friend class __bit_const_reference<_Cp>; friend class __bit_iterator<_Cp, true>; template friend struct __bit_array; template friend void __fill_n_false(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); template friend void __fill_n_true(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); template friend __bit_iterator<_Dp, false> __copy_aligned(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template friend __bit_iterator<_Dp, false> __copy_unaligned(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template friend __bit_iterator<_Dp, false> copy(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template friend __bit_iterator<_Dp, false> __copy_backward_aligned(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template friend __bit_iterator<_Dp, false> __copy_backward_unaligned(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template friend __bit_iterator<_Dp, false> copy_backward(__bit_iterator<_Dp, _IC> __first, __bit_iterator<_Dp, _IC> __last, __bit_iterator<_Dp, false> __result); template friend __bit_iterator<__C2, false> __swap_ranges_aligned(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); template friend __bit_iterator<__C2, false> __swap_ranges_unaligned(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); template friend __bit_iterator<__C2, false> swap_ranges(__bit_iterator<__C1, false>, __bit_iterator<__C1, false>, __bit_iterator<__C2, false>); template friend __bit_iterator<_Dp, false> rotate(__bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>, __bit_iterator<_Dp, false>); template friend bool __equal_aligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); template friend bool __equal_unaligned(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); template friend bool equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); template friend __bit_iterator<_Dp, _IC> __find_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); template friend __bit_iterator<_Dp, _IC> __find_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); template friend typename __bit_iterator<_Dp, _IC>::difference_type __count_bool_true(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); template friend typename __bit_iterator<_Dp, _IC>::difference_type __count_bool_false(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); }; _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #endif // _LIBCPP___BIT_REFERENCE diff --git a/contrib/llvm-project/libcxx/include/memory b/contrib/llvm-project/libcxx/include/memory index efb10c8fd25b..62235cf72b35 100644 --- a/contrib/llvm-project/libcxx/include/memory +++ b/contrib/llvm-project/libcxx/include/memory @@ -1,4294 +1,4314 @@ // -*- C++ -*- //===-------------------------- memory ------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP_MEMORY #define _LIBCPP_MEMORY /* memory synopsis namespace std { struct allocator_arg_t { }; inline constexpr allocator_arg_t allocator_arg = allocator_arg_t(); template struct uses_allocator; template struct pointer_traits { typedef Ptr pointer; typedef
element_type; typedef
difference_type; template using rebind =
; static pointer pointer_to(
); }; template struct pointer_traits { typedef T* pointer; typedef T element_type; typedef ptrdiff_t difference_type; template using rebind = U*; static pointer pointer_to(
) noexcept; // constexpr in C++20 }; template constexpr T* to_address(T* p) noexcept; // C++20 template constexpr auto to_address(const Ptr& p) noexcept; // C++20 template struct allocator_traits { typedef Alloc allocator_type; typedef typename allocator_type::value_type value_type; typedef Alloc::pointer | value_type* pointer; typedef Alloc::const_pointer | pointer_traits::rebind const_pointer; typedef Alloc::void_pointer | pointer_traits::rebind void_pointer; typedef Alloc::const_void_pointer | pointer_traits::rebind const_void_pointer; typedef Alloc::difference_type | pointer_traits::difference_type difference_type; typedef Alloc::size_type | make_unsigned::type size_type; typedef Alloc::propagate_on_container_copy_assignment | false_type propagate_on_container_copy_assignment; typedef Alloc::propagate_on_container_move_assignment | false_type propagate_on_container_move_assignment; typedef Alloc::propagate_on_container_swap | false_type propagate_on_container_swap; typedef Alloc::is_always_equal | is_empty is_always_equal; template using rebind_alloc = Alloc::rebind::other | Alloc; template using rebind_traits = allocator_traits>; static pointer allocate(allocator_type& a, size_type n); // constexpr and [[nodiscard]] in C++20 static pointer allocate(allocator_type& a, size_type n, const_void_pointer hint); // constexpr and [[nodiscard]] in C++20 static void deallocate(allocator_type& a, pointer p, size_type n) noexcept; // constexpr in C++20 template static void construct(allocator_type& a, T* p, Args&&... args); // constexpr in C++20 template static void destroy(allocator_type& a, T* p); // constexpr in C++20 static size_type max_size(const allocator_type& a); // noexcept in C++14, constexpr in C++20 static allocator_type select_on_container_copy_construction(const allocator_type& a); // constexpr in C++20 }; template <> class allocator // removed in C++20 { public: typedef void* pointer; // deprecated in C++17 typedef const void* const_pointer; // deprecated in C++17 typedef void value_type; // deprecated in C++17 template struct rebind {typedef allocator<_Up> other;}; // deprecated in C++17 }; template class allocator { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T* pointer; // deprecated in C++17, removed in C++20 typedef const T* const_pointer; // deprecated in C++17, removed in C++20 typedef typename add_lvalue_reference::type reference; // deprecated in C++17, removed in C++20 typedef typename add_lvalue_reference::type const_reference; // deprecated in C++17, removed in C++20 typedef T value_type; template struct rebind {typedef allocator other;}; // deprecated in C++17, removed in C++20 typedef true_type propagate_on_container_move_assignment; typedef true_type is_always_equal; constexpr allocator() noexcept; // constexpr in C++20 constexpr allocator(const allocator&) noexcept; // constexpr in C++20 template constexpr allocator(const allocator&) noexcept; // constexpr in C++20 ~allocator(); // constexpr in C++20 pointer address(reference x) const noexcept; // deprecated in C++17, removed in C++20 const_pointer address(const_reference x) const noexcept; // deprecated in C++17, removed in C++20 T* allocate(size_t n, const void* hint); // deprecated in C++17, removed in C++20 T* allocate(size_t n); // constexpr in C++20 void deallocate(T* p, size_t n) noexcept; // constexpr in C++20 size_type max_size() const noexcept; // deprecated in C++17, removed in C++20 template void construct(U* p, Args&&... args); // deprecated in C++17, removed in C++20 template void destroy(U* p); // deprecated in C++17, removed in C++20 }; template bool operator==(const allocator&, const allocator&) noexcept; // constexpr in C++20 template bool operator!=(const allocator&, const allocator&) noexcept; // constexpr in C++20 template class raw_storage_iterator : public iterator // purposefully not C++03 { public: explicit raw_storage_iterator(OutputIterator x); raw_storage_iterator& operator*(); raw_storage_iterator& operator=(const T& element); raw_storage_iterator& operator++(); raw_storage_iterator operator++(int); }; template pair get_temporary_buffer(ptrdiff_t n) noexcept; template void return_temporary_buffer(T* p) noexcept; template T* addressof(T& r) noexcept; template T* addressof(const T&& r) noexcept = delete; template ForwardIterator uninitialized_copy(InputIterator first, InputIterator last, ForwardIterator result); template ForwardIterator uninitialized_copy_n(InputIterator first, Size n, ForwardIterator result); template void uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& x); template ForwardIterator uninitialized_fill_n(ForwardIterator first, Size n, const T& x); template constexpr T* construct_at(T* location, Args&& ...args); // since C++20 template void destroy_at(T* location); // constexpr in C++20 template void destroy(ForwardIterator first, ForwardIterator last); // constexpr in C++20 template ForwardIterator destroy_n(ForwardIterator first, Size n); // constexpr in C++20 template ForwardIterator uninitialized_move(InputIterator first, InputIterator last, ForwardIterator result); template pair uninitialized_move_n(InputIterator first, Size n, ForwardIterator result); template void uninitialized_value_construct(ForwardIterator first, ForwardIterator last); template ForwardIterator uninitialized_value_construct_n(ForwardIterator first, Size n); template void uninitialized_default_construct(ForwardIterator first, ForwardIterator last); template ForwardIterator uninitialized_default_construct_n(ForwardIterator first, Size n); template struct auto_ptr_ref {}; // deprecated in C++11, removed in C++17 template class auto_ptr // deprecated in C++11, removed in C++17 { public: typedef X element_type; explicit auto_ptr(X* p =0) throw(); auto_ptr(auto_ptr&) throw(); template auto_ptr(auto_ptr&) throw(); auto_ptr& operator=(auto_ptr&) throw(); template auto_ptr& operator=(auto_ptr&) throw(); auto_ptr& operator=(auto_ptr_ref r) throw(); ~auto_ptr() throw(); typename add_lvalue_reference::type operator*() const throw(); X* operator->() const throw(); X* get() const throw(); X* release() throw(); void reset(X* p =0) throw(); auto_ptr(auto_ptr_ref) throw(); template operator auto_ptr_ref() throw(); template operator auto_ptr() throw(); }; template struct default_delete { constexpr default_delete() noexcept = default; template default_delete(const default_delete&) noexcept; void operator()(T*) const noexcept; }; template struct default_delete { constexpr default_delete() noexcept = default; void operator()(T*) const noexcept; template void operator()(U*) const = delete; }; template > class unique_ptr { public: typedef see below pointer; typedef T element_type; typedef D deleter_type; // constructors constexpr unique_ptr() noexcept; explicit unique_ptr(pointer p) noexcept; unique_ptr(pointer p, see below d1) noexcept; unique_ptr(pointer p, see below d2) noexcept; unique_ptr(unique_ptr&& u) noexcept; unique_ptr(nullptr_t) noexcept : unique_ptr() { } template unique_ptr(unique_ptr&& u) noexcept; template unique_ptr(auto_ptr&& u) noexcept; // removed in C++17 // destructor ~unique_ptr(); // assignment unique_ptr& operator=(unique_ptr&& u) noexcept; template unique_ptr& operator=(unique_ptr&& u) noexcept; unique_ptr& operator=(nullptr_t) noexcept; // observers typename add_lvalue_reference::type operator*() const; pointer operator->() const noexcept; pointer get() const noexcept; deleter_type& get_deleter() noexcept; const deleter_type& get_deleter() const noexcept; explicit operator bool() const noexcept; // modifiers pointer release() noexcept; void reset(pointer p = pointer()) noexcept; void swap(unique_ptr& u) noexcept; }; template class unique_ptr { public: typedef implementation-defined pointer; typedef T element_type; typedef D deleter_type; // constructors constexpr unique_ptr() noexcept; explicit unique_ptr(pointer p) noexcept; unique_ptr(pointer p, see below d) noexcept; unique_ptr(pointer p, see below d) noexcept; unique_ptr(unique_ptr&& u) noexcept; unique_ptr(nullptr_t) noexcept : unique_ptr() { } // destructor ~unique_ptr(); // assignment unique_ptr& operator=(unique_ptr&& u) noexcept; unique_ptr& operator=(nullptr_t) noexcept; // observers T& operator[](size_t i) const; pointer get() const noexcept; deleter_type& get_deleter() noexcept; const deleter_type& get_deleter() const noexcept; explicit operator bool() const noexcept; // modifiers pointer release() noexcept; void reset(pointer p = pointer()) noexcept; void reset(nullptr_t) noexcept; template void reset(U) = delete; void swap(unique_ptr& u) noexcept; }; template void swap(unique_ptr& x, unique_ptr& y) noexcept; template bool operator==(const unique_ptr& x, const unique_ptr& y); template bool operator!=(const unique_ptr& x, const unique_ptr& y); template bool operator<(const unique_ptr& x, const unique_ptr& y); template bool operator<=(const unique_ptr& x, const unique_ptr& y); template bool operator>(const unique_ptr& x, const unique_ptr& y); template bool operator>=(const unique_ptr& x, const unique_ptr& y); template bool operator==(const unique_ptr& x, nullptr_t) noexcept; template bool operator==(nullptr_t, const unique_ptr& y) noexcept; template bool operator!=(const unique_ptr& x, nullptr_t) noexcept; template bool operator!=(nullptr_t, const unique_ptr& y) noexcept; template bool operator<(const unique_ptr& x, nullptr_t); template bool operator<(nullptr_t, const unique_ptr& y); template bool operator<=(const unique_ptr& x, nullptr_t); template bool operator<=(nullptr_t, const unique_ptr& y); template bool operator>(const unique_ptr& x, nullptr_t); template bool operator>(nullptr_t, const unique_ptr& y); template bool operator>=(const unique_ptr& x, nullptr_t); template bool operator>=(nullptr_t, const unique_ptr& y); class bad_weak_ptr : public std::exception { bad_weak_ptr() noexcept; }; template unique_ptr make_unique(Args&&... args); // C++14 template unique_ptr make_unique(size_t n); // C++14 template unspecified make_unique(Args&&...) = delete; // C++14, T == U[N] template basic_ostream& operator<< (basic_ostream& os, unique_ptr const& p); template class shared_ptr { public: typedef T element_type; typedef weak_ptr weak_type; // C++17 // constructors: constexpr shared_ptr() noexcept; template explicit shared_ptr(Y* p); template shared_ptr(Y* p, D d); template shared_ptr(Y* p, D d, A a); template shared_ptr(nullptr_t p, D d); template shared_ptr(nullptr_t p, D d, A a); template shared_ptr(const shared_ptr& r, T *p) noexcept; shared_ptr(const shared_ptr& r) noexcept; template shared_ptr(const shared_ptr& r) noexcept; shared_ptr(shared_ptr&& r) noexcept; template shared_ptr(shared_ptr&& r) noexcept; template explicit shared_ptr(const weak_ptr& r); template shared_ptr(auto_ptr&& r); // removed in C++17 template shared_ptr(unique_ptr&& r); shared_ptr(nullptr_t) : shared_ptr() { } // destructor: ~shared_ptr(); // assignment: shared_ptr& operator=(const shared_ptr& r) noexcept; template shared_ptr& operator=(const shared_ptr& r) noexcept; shared_ptr& operator=(shared_ptr&& r) noexcept; template shared_ptr& operator=(shared_ptr&& r); template shared_ptr& operator=(auto_ptr&& r); // removed in C++17 template shared_ptr& operator=(unique_ptr&& r); // modifiers: void swap(shared_ptr& r) noexcept; void reset() noexcept; template void reset(Y* p); template void reset(Y* p, D d); template void reset(Y* p, D d, A a); // observers: T* get() const noexcept; T& operator*() const noexcept; T* operator->() const noexcept; long use_count() const noexcept; bool unique() const noexcept; explicit operator bool() const noexcept; template bool owner_before(shared_ptr const& b) const noexcept; template bool owner_before(weak_ptr const& b) const noexcept; }; template shared_ptr(weak_ptr) -> shared_ptr; template shared_ptr(unique_ptr) -> shared_ptr; // shared_ptr comparisons: template bool operator==(shared_ptr const& a, shared_ptr const& b) noexcept; template bool operator!=(shared_ptr const& a, shared_ptr const& b) noexcept; template bool operator<(shared_ptr const& a, shared_ptr const& b) noexcept; template bool operator>(shared_ptr const& a, shared_ptr const& b) noexcept; template bool operator<=(shared_ptr const& a, shared_ptr const& b) noexcept; template bool operator>=(shared_ptr const& a, shared_ptr const& b) noexcept; template bool operator==(const shared_ptr& x, nullptr_t) noexcept; template bool operator==(nullptr_t, const shared_ptr& y) noexcept; template bool operator!=(const shared_ptr& x, nullptr_t) noexcept; template bool operator!=(nullptr_t, const shared_ptr& y) noexcept; template bool operator<(const shared_ptr& x, nullptr_t) noexcept; template bool operator<(nullptr_t, const shared_ptr& y) noexcept; template bool operator<=(const shared_ptr& x, nullptr_t) noexcept; template bool operator<=(nullptr_t, const shared_ptr& y) noexcept; template bool operator>(const shared_ptr& x, nullptr_t) noexcept; template bool operator>(nullptr_t, const shared_ptr& y) noexcept; template bool operator>=(const shared_ptr& x, nullptr_t) noexcept; template bool operator>=(nullptr_t, const shared_ptr& y) noexcept; // shared_ptr specialized algorithms: template void swap(shared_ptr& a, shared_ptr& b) noexcept; // shared_ptr casts: template shared_ptr static_pointer_cast(shared_ptr const& r) noexcept; template shared_ptr dynamic_pointer_cast(shared_ptr const& r) noexcept; template shared_ptr const_pointer_cast(shared_ptr const& r) noexcept; // shared_ptr I/O: template basic_ostream& operator<< (basic_ostream& os, shared_ptr const& p); // shared_ptr get_deleter: template D* get_deleter(shared_ptr const& p) noexcept; template shared_ptr make_shared(Args&&... args); template shared_ptr allocate_shared(const A& a, Args&&... args); template class weak_ptr { public: typedef T element_type; // constructors constexpr weak_ptr() noexcept; template weak_ptr(shared_ptr const& r) noexcept; weak_ptr(weak_ptr const& r) noexcept; template weak_ptr(weak_ptr const& r) noexcept; weak_ptr(weak_ptr&& r) noexcept; // C++14 template weak_ptr(weak_ptr&& r) noexcept; // C++14 // destructor ~weak_ptr(); // assignment weak_ptr& operator=(weak_ptr const& r) noexcept; template weak_ptr& operator=(weak_ptr const& r) noexcept; template weak_ptr& operator=(shared_ptr const& r) noexcept; weak_ptr& operator=(weak_ptr&& r) noexcept; // C++14 template weak_ptr& operator=(weak_ptr&& r) noexcept; // C++14 // modifiers void swap(weak_ptr& r) noexcept; void reset() noexcept; // observers long use_count() const noexcept; bool expired() const noexcept; shared_ptr lock() const noexcept; template bool owner_before(shared_ptr const& b) const noexcept; template bool owner_before(weak_ptr const& b) const noexcept; }; template weak_ptr(shared_ptr) -> weak_ptr; // weak_ptr specialized algorithms: template void swap(weak_ptr& a, weak_ptr& b) noexcept; // class owner_less: template struct owner_less; template struct owner_less > : binary_function, shared_ptr, bool> { typedef bool result_type; bool operator()(shared_ptr const&, shared_ptr const&) const noexcept; bool operator()(shared_ptr const&, weak_ptr const&) const noexcept; bool operator()(weak_ptr const&, shared_ptr const&) const noexcept; }; template struct owner_less > : binary_function, weak_ptr, bool> { typedef bool result_type; bool operator()(weak_ptr const&, weak_ptr const&) const noexcept; bool operator()(shared_ptr const&, weak_ptr const&) const noexcept; bool operator()(weak_ptr const&, shared_ptr const&) const noexcept; }; template <> // Added in C++14 struct owner_less { template bool operator()( shared_ptr<_Tp> const& __x, shared_ptr<_Up> const& __y) const noexcept; template bool operator()( shared_ptr<_Tp> const& __x, weak_ptr<_Up> const& __y) const noexcept; template bool operator()( weak_ptr<_Tp> const& __x, shared_ptr<_Up> const& __y) const noexcept; template bool operator()( weak_ptr<_Tp> const& __x, weak_ptr<_Up> const& __y) const noexcept; typedef void is_transparent; }; template class enable_shared_from_this { protected: constexpr enable_shared_from_this() noexcept; enable_shared_from_this(enable_shared_from_this const&) noexcept; enable_shared_from_this& operator=(enable_shared_from_this const&) noexcept; ~enable_shared_from_this(); public: shared_ptr shared_from_this(); shared_ptr shared_from_this() const; }; template bool atomic_is_lock_free(const shared_ptr* p); template shared_ptr atomic_load(const shared_ptr* p); template shared_ptr atomic_load_explicit(const shared_ptr* p, memory_order mo); template void atomic_store(shared_ptr* p, shared_ptr r); template void atomic_store_explicit(shared_ptr* p, shared_ptr r, memory_order mo); template shared_ptr atomic_exchange(shared_ptr* p, shared_ptr r); template shared_ptr atomic_exchange_explicit(shared_ptr* p, shared_ptr r, memory_order mo); template bool atomic_compare_exchange_weak(shared_ptr* p, shared_ptr* v, shared_ptr w); template bool atomic_compare_exchange_strong( shared_ptr* p, shared_ptr* v, shared_ptr w); template bool atomic_compare_exchange_weak_explicit(shared_ptr* p, shared_ptr* v, shared_ptr w, memory_order success, memory_order failure); template bool atomic_compare_exchange_strong_explicit(shared_ptr* p, shared_ptr* v, shared_ptr w, memory_order success, memory_order failure); // Hash support template struct hash; template struct hash >; template struct hash >; template inline constexpr bool uses_allocator_v = uses_allocator::value; // Pointer safety enum class pointer_safety { relaxed, preferred, strict }; void declare_reachable(void *p); template T *undeclare_reachable(T *p); void declare_no_pointers(char *p, size_t n); void undeclare_no_pointers(char *p, size_t n); pointer_safety get_pointer_safety() noexcept; void* align(size_t alignment, size_t size, void*& ptr, size_t& space); } // std */ #include <__config> #include <__availability> #include #include #include #include #include #include #include #include #include <__functional_base> #include #include #include #include #include <__memory/allocator_traits.h> #include <__memory/base.h> #include <__memory/pointer_traits.h> #include <__memory/utilities.h> #if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) # include #endif #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif _LIBCPP_PUSH_MACROS #include <__undef_macros> _LIBCPP_BEGIN_NAMESPACE_STD template inline _LIBCPP_INLINE_VISIBILITY _ValueType __libcpp_relaxed_load(_ValueType const* __value) { #if !defined(_LIBCPP_HAS_NO_THREADS) && \ defined(__ATOMIC_RELAXED) && \ (__has_builtin(__atomic_load_n) || defined(_LIBCPP_COMPILER_GCC)) return __atomic_load_n(__value, __ATOMIC_RELAXED); #else return *__value; #endif } template inline _LIBCPP_INLINE_VISIBILITY _ValueType __libcpp_acquire_load(_ValueType const* __value) { #if !defined(_LIBCPP_HAS_NO_THREADS) && \ defined(__ATOMIC_ACQUIRE) && \ (__has_builtin(__atomic_load_n) || defined(_LIBCPP_COMPILER_GCC)) return __atomic_load_n(__value, __ATOMIC_ACQUIRE); #else return *__value; #endif } template struct __to_address_helper; template <> struct __to_address_helper { template using __return_type = decltype(pointer_traits<_Pointer>::to_address(_VSTD::declval())); template _LIBCPP_CONSTEXPR static __return_type<_Pointer> __do_it(const _Pointer &__p) _NOEXCEPT { return pointer_traits<_Pointer>::to_address(__p); } }; template using __choose_to_address = __to_address_helper<_IsValidExpansion<__to_address_helper<_Dummy>::template __return_type, _Pointer>::value>; template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR _Tp* __to_address(_Tp* __p) _NOEXCEPT { static_assert(!is_function<_Tp>::value, "_Tp is a function type"); return __p; } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR typename __choose_to_address<_Pointer>::template __return_type<_Pointer> __to_address(const _Pointer& __p) _NOEXCEPT { return __choose_to_address<_Pointer>::__do_it(__p); } template <> struct __to_address_helper { template using __return_type = typename pointer_traits<_Pointer>::element_type*; template _LIBCPP_CONSTEXPR static __return_type<_Pointer> __do_it(const _Pointer &__p) _NOEXCEPT { return _VSTD::__to_address(__p.operator->()); } }; #if _LIBCPP_STD_VER > 17 template inline _LIBCPP_INLINE_VISIBILITY constexpr _Tp* to_address(_Tp* __p) _NOEXCEPT { static_assert(!is_function_v<_Tp>, "_Tp is a function type"); return __p; } template inline _LIBCPP_INLINE_VISIBILITY constexpr auto to_address(const _Pointer& __p) _NOEXCEPT { return _VSTD::__to_address(__p); } #endif template class allocator; #if _LIBCPP_STD_VER <= 17 template <> class _LIBCPP_TEMPLATE_VIS allocator { public: _LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef void value_type; template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; template <> class _LIBCPP_TEMPLATE_VIS allocator { public: _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type; template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;}; }; #endif +// This class provides a non-trivial default constructor to the class that derives from it +// if the condition is satisfied. +// +// The second template parameter exists to allow giving a unique type to __non_trivial_if, +// which makes it possible to avoid breaking the ABI when making this a base class of an +// existing class. Without that, imagine we have classes D1 and D2, both of which used to +// have no base classes, but which now derive from __non_trivial_if. The layout of a class +// that inherits from both D1 and D2 will change because the two __non_trivial_if base +// classes are not allowed to share the same address. +// +// By making those __non_trivial_if base classes unique, we work around this problem and +// it is safe to start deriving from __non_trivial_if in existing classes. +template +struct __non_trivial_if { }; + +template +struct __non_trivial_if { + _LIBCPP_INLINE_VISIBILITY + _LIBCPP_CONSTEXPR __non_trivial_if() _NOEXCEPT { } +}; + // allocator +// +// Note: For ABI compatibility between C++20 and previous standards, we make +// allocator trivial in C++20. template class _LIBCPP_TEMPLATE_VIS allocator + : private __non_trivial_if::value, allocator<_Tp> > { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef _Tp value_type; typedef true_type propagate_on_container_move_assignment; typedef true_type is_always_equal; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - allocator() _NOEXCEPT { } + allocator() _NOEXCEPT _LIBCPP_DEFAULT template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 allocator(const allocator<_Up>&) _NOEXCEPT { } _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp* allocate(size_t __n) { if (__n > allocator_traits::max_size(*this)) __throw_length_error("allocator::allocate(size_t n)" " 'n' exceeds maximum supported size"); if (__libcpp_is_constant_evaluated()) { return static_cast<_Tp*>(::operator new(__n * sizeof(_Tp))); } else { return static_cast<_Tp*>(_VSTD::__libcpp_allocate(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp))); } } _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void deallocate(_Tp* __p, size_t __n) _NOEXCEPT { if (__libcpp_is_constant_evaluated()) { ::operator delete(__p); } else { _VSTD::__libcpp_deallocate((void*)__p, __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); } } // C++20 Removed members #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS) _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp* pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp* const_pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef _Tp& reference; _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp& const_reference; template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind { typedef allocator<_Up> other; }; _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY pointer address(reference __x) const _NOEXCEPT { return _VSTD::addressof(__x); } _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY const_pointer address(const_reference __x) const _NOEXCEPT { return _VSTD::addressof(__x); } _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_IN_CXX17 _Tp* allocate(size_t __n, const void*) { return allocate(__n); } _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY size_type max_size() const _NOEXCEPT { return size_type(~0) / sizeof(_Tp); } template _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY void construct(_Up* __p, _Args&&... __args) { ::new ((void*)__p) _Up(_VSTD::forward<_Args>(__args)...); } _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY void destroy(pointer __p) { __p->~_Tp(); } #endif }; template class _LIBCPP_TEMPLATE_VIS allocator + : private __non_trivial_if::value, allocator > { public: typedef size_t size_type; typedef ptrdiff_t difference_type; typedef const _Tp value_type; typedef true_type propagate_on_container_move_assignment; typedef true_type is_always_equal; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 - allocator() _NOEXCEPT { } + allocator() _NOEXCEPT _LIBCPP_DEFAULT template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 allocator(const allocator<_Up>&) _NOEXCEPT { } _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 const _Tp* allocate(size_t __n) { if (__n > allocator_traits::max_size(*this)) __throw_length_error("allocator::allocate(size_t n)" " 'n' exceeds maximum supported size"); if (__libcpp_is_constant_evaluated()) { return static_cast(::operator new(__n * sizeof(_Tp))); } else { return static_cast(_VSTD::__libcpp_allocate(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp))); } } _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void deallocate(const _Tp* __p, size_t __n) { if (__libcpp_is_constant_evaluated()) { ::operator delete(const_cast<_Tp*>(__p)); } else { _VSTD::__libcpp_deallocate((void*) const_cast<_Tp *>(__p), __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); } } // C++20 Removed members #if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS) _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp* pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp* const_pointer; _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp& reference; _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp& const_reference; template struct _LIBCPP_DEPRECATED_IN_CXX17 rebind { typedef allocator<_Up> other; }; _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY const_pointer address(const_reference __x) const _NOEXCEPT { return _VSTD::addressof(__x); } _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_IN_CXX17 const _Tp* allocate(size_t __n, const void*) { return allocate(__n); } _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY size_type max_size() const _NOEXCEPT { return size_type(~0) / sizeof(_Tp); } template _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY void construct(_Up* __p, _Args&&... __args) { ::new ((void*)__p) _Up(_VSTD::forward<_Args>(__args)...); } _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_INLINE_VISIBILITY void destroy(pointer __p) { __p->~_Tp(); } #endif }; template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator==(const allocator<_Tp>&, const allocator<_Up>&) _NOEXCEPT {return true;} template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 bool operator!=(const allocator<_Tp>&, const allocator<_Up>&) _NOEXCEPT {return false;} template _LIBCPP_INLINE_VISIBILITY void __construct_forward_with_exception_guarantees(_Alloc& __a, _Ptr __begin1, _Ptr __end1, _Ptr& __begin2) { static_assert(__is_cpp17_move_insertable<_Alloc>::value, "The specified type does not meet the requirements of Cpp17MoveInsertable"); typedef allocator_traits<_Alloc> _Traits; for (; __begin1 != __end1; ++__begin1, (void)++__begin2) { _Traits::construct(__a, _VSTD::__to_address(__begin2), #ifdef _LIBCPP_NO_EXCEPTIONS _VSTD::move(*__begin1) #else _VSTD::move_if_noexcept(*__begin1) #endif ); } } template ::value || !__has_construct<_Alloc, _Tp*, _Tp>::value) && is_trivially_move_constructible<_Tp>::value >::type> _LIBCPP_INLINE_VISIBILITY void __construct_forward_with_exception_guarantees(_Alloc&, _Tp* __begin1, _Tp* __end1, _Tp*& __begin2) { ptrdiff_t _Np = __end1 - __begin1; if (_Np > 0) { _VSTD::memcpy(__begin2, __begin1, _Np * sizeof(_Tp)); __begin2 += _Np; } } template _LIBCPP_INLINE_VISIBILITY void __construct_range_forward(_Alloc& __a, _Iter __begin1, _Iter __end1, _Ptr& __begin2) { typedef allocator_traits<_Alloc> _Traits; for (; __begin1 != __end1; ++__begin1, (void) ++__begin2) { _Traits::construct(__a, _VSTD::__to_address(__begin2), *__begin1); } } template ::type, class _RawDest = typename remove_const<_Dest>::type, class = typename enable_if< is_trivially_copy_constructible<_Dest>::value && is_same<_RawSource, _RawDest>::value && (__is_default_allocator<_Alloc>::value || !__has_construct<_Alloc, _Dest*, _Source&>::value) >::type> _LIBCPP_INLINE_VISIBILITY void __construct_range_forward(_Alloc&, _Source* __begin1, _Source* __end1, _Dest*& __begin2) { ptrdiff_t _Np = __end1 - __begin1; if (_Np > 0) { _VSTD::memcpy(const_cast<_RawDest*>(__begin2), __begin1, _Np * sizeof(_Dest)); __begin2 += _Np; } } template _LIBCPP_INLINE_VISIBILITY void __construct_backward_with_exception_guarantees(_Alloc& __a, _Ptr __begin1, _Ptr __end1, _Ptr& __end2) { static_assert(__is_cpp17_move_insertable<_Alloc>::value, "The specified type does not meet the requirements of Cpp17MoveInsertable"); typedef allocator_traits<_Alloc> _Traits; while (__end1 != __begin1) { _Traits::construct(__a, _VSTD::__to_address(__end2 - 1), #ifdef _LIBCPP_NO_EXCEPTIONS _VSTD::move(*--__end1) #else _VSTD::move_if_noexcept(*--__end1) #endif ); --__end2; } } template ::value || !__has_construct<_Alloc, _Tp*, _Tp>::value) && is_trivially_move_constructible<_Tp>::value >::type> _LIBCPP_INLINE_VISIBILITY void __construct_backward_with_exception_guarantees(_Alloc&, _Tp* __begin1, _Tp* __end1, _Tp*& __end2) { ptrdiff_t _Np = __end1 - __begin1; __end2 -= _Np; if (_Np > 0) _VSTD::memcpy(__end2, __begin1, _Np * sizeof(_Tp)); } template class _LIBCPP_TEMPLATE_VIS raw_storage_iterator : public iterator&> // purposefully not C++03 { private: _OutputIterator __x_; public: _LIBCPP_INLINE_VISIBILITY explicit raw_storage_iterator(_OutputIterator __x) : __x_(__x) {} _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator*() {return *this;} _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator=(const _Tp& __element) {::new ((void*)_VSTD::addressof(*__x_)) _Tp(__element); return *this;} #if _LIBCPP_STD_VER >= 14 _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator=(_Tp&& __element) {::new ((void*)_VSTD::addressof(*__x_)) _Tp(_VSTD::move(__element)); return *this;} #endif _LIBCPP_INLINE_VISIBILITY raw_storage_iterator& operator++() {++__x_; return *this;} _LIBCPP_INLINE_VISIBILITY raw_storage_iterator operator++(int) {raw_storage_iterator __t(*this); ++__x_; return __t;} #if _LIBCPP_STD_VER >= 14 _LIBCPP_INLINE_VISIBILITY _OutputIterator base() const { return __x_; } #endif }; template _LIBCPP_NODISCARD_EXT _LIBCPP_NO_CFI pair<_Tp*, ptrdiff_t> get_temporary_buffer(ptrdiff_t __n) _NOEXCEPT { pair<_Tp*, ptrdiff_t> __r(0, 0); const ptrdiff_t __m = (~ptrdiff_t(0) ^ ptrdiff_t(ptrdiff_t(1) << (sizeof(ptrdiff_t) * __CHAR_BIT__ - 1))) / sizeof(_Tp); if (__n > __m) __n = __m; while (__n > 0) { #if !defined(_LIBCPP_HAS_NO_ALIGNED_ALLOCATION) if (__is_overaligned_for_new(_LIBCPP_ALIGNOF(_Tp))) { align_val_t __al = align_val_t(alignment_of<_Tp>::value); __r.first = static_cast<_Tp*>(::operator new( __n * sizeof(_Tp), __al, nothrow)); } else { __r.first = static_cast<_Tp*>(::operator new( __n * sizeof(_Tp), nothrow)); } #else if (__is_overaligned_for_new(_LIBCPP_ALIGNOF(_Tp))) { // Since aligned operator new is unavailable, return an empty // buffer rather than one with invalid alignment. return __r; } __r.first = static_cast<_Tp*>(::operator new(__n * sizeof(_Tp), nothrow)); #endif if (__r.first) { __r.second = __n; break; } __n /= 2; } return __r; } template inline _LIBCPP_INLINE_VISIBILITY void return_temporary_buffer(_Tp* __p) _NOEXCEPT { _VSTD::__libcpp_deallocate_unsized((void*)__p, _LIBCPP_ALIGNOF(_Tp)); } #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template struct _LIBCPP_DEPRECATED_IN_CXX11 auto_ptr_ref { _Tp* __ptr_; }; template class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 auto_ptr { private: _Tp* __ptr_; public: typedef _Tp element_type; _LIBCPP_INLINE_VISIBILITY explicit auto_ptr(_Tp* __p = 0) _NOEXCEPT : __ptr_(__p) {} _LIBCPP_INLINE_VISIBILITY auto_ptr(auto_ptr& __p) _NOEXCEPT : __ptr_(__p.release()) {} template _LIBCPP_INLINE_VISIBILITY auto_ptr(auto_ptr<_Up>& __p) _NOEXCEPT : __ptr_(__p.release()) {} _LIBCPP_INLINE_VISIBILITY auto_ptr& operator=(auto_ptr& __p) _NOEXCEPT {reset(__p.release()); return *this;} template _LIBCPP_INLINE_VISIBILITY auto_ptr& operator=(auto_ptr<_Up>& __p) _NOEXCEPT {reset(__p.release()); return *this;} _LIBCPP_INLINE_VISIBILITY auto_ptr& operator=(auto_ptr_ref<_Tp> __p) _NOEXCEPT {reset(__p.__ptr_); return *this;} _LIBCPP_INLINE_VISIBILITY ~auto_ptr() _NOEXCEPT {delete __ptr_;} _LIBCPP_INLINE_VISIBILITY _Tp& operator*() const _NOEXCEPT {return *__ptr_;} _LIBCPP_INLINE_VISIBILITY _Tp* operator->() const _NOEXCEPT {return __ptr_;} _LIBCPP_INLINE_VISIBILITY _Tp* get() const _NOEXCEPT {return __ptr_;} _LIBCPP_INLINE_VISIBILITY _Tp* release() _NOEXCEPT { _Tp* __t = __ptr_; __ptr_ = nullptr; return __t; } _LIBCPP_INLINE_VISIBILITY void reset(_Tp* __p = 0) _NOEXCEPT { if (__ptr_ != __p) delete __ptr_; __ptr_ = __p; } _LIBCPP_INLINE_VISIBILITY auto_ptr(auto_ptr_ref<_Tp> __p) _NOEXCEPT : __ptr_(__p.__ptr_) {} template _LIBCPP_INLINE_VISIBILITY operator auto_ptr_ref<_Up>() _NOEXCEPT {auto_ptr_ref<_Up> __t; __t.__ptr_ = release(); return __t;} template _LIBCPP_INLINE_VISIBILITY operator auto_ptr<_Up>() _NOEXCEPT {return auto_ptr<_Up>(release());} }; template <> class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX11 auto_ptr { public: typedef void element_type; }; #endif // Tag used to default initialize one or both of the pair's elements. struct __default_init_tag {}; struct __value_init_tag {}; template ::value && !__libcpp_is_final<_Tp>::value> struct __compressed_pair_elem { typedef _Tp _ParamT; typedef _Tp& reference; typedef const _Tp& const_reference; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __compressed_pair_elem(__default_init_tag) {} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __compressed_pair_elem(__value_init_tag) : __value_() {} template ::type>::value >::type> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(_Up&& __u) : __value_(_VSTD::forward<_Up>(__u)) { } #ifndef _LIBCPP_CXX03_LANG template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 __compressed_pair_elem(piecewise_construct_t, tuple<_Args...> __args, __tuple_indices<_Indexes...>) : __value_(_VSTD::forward<_Args>(_VSTD::get<_Indexes>(__args))...) {} #endif _LIBCPP_INLINE_VISIBILITY reference __get() _NOEXCEPT { return __value_; } _LIBCPP_INLINE_VISIBILITY const_reference __get() const _NOEXCEPT { return __value_; } private: _Tp __value_; }; template struct __compressed_pair_elem<_Tp, _Idx, true> : private _Tp { typedef _Tp _ParamT; typedef _Tp& reference; typedef const _Tp& const_reference; typedef _Tp __value_type; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __compressed_pair_elem() = default; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __compressed_pair_elem(__default_init_tag) {} _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __compressed_pair_elem(__value_init_tag) : __value_type() {} template ::type>::value >::type> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(_Up&& __u) : __value_type(_VSTD::forward<_Up>(__u)) {} #ifndef _LIBCPP_CXX03_LANG template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 __compressed_pair_elem(piecewise_construct_t, tuple<_Args...> __args, __tuple_indices<_Indexes...>) : __value_type(_VSTD::forward<_Args>(_VSTD::get<_Indexes>(__args))...) {} #endif _LIBCPP_INLINE_VISIBILITY reference __get() _NOEXCEPT { return *this; } _LIBCPP_INLINE_VISIBILITY const_reference __get() const _NOEXCEPT { return *this; } }; template class __compressed_pair : private __compressed_pair_elem<_T1, 0>, private __compressed_pair_elem<_T2, 1> { public: // NOTE: This static assert should never fire because __compressed_pair // is *almost never* used in a scenario where it's possible for T1 == T2. // (The exception is std::function where it is possible that the function // object and the allocator have the same type). static_assert((!is_same<_T1, _T2>::value), "__compressed_pair cannot be instantiated when T1 and T2 are the same type; " "The current implementation is NOT ABI-compatible with the previous " "implementation for this configuration"); typedef _LIBCPP_NODEBUG_TYPE __compressed_pair_elem<_T1, 0> _Base1; typedef _LIBCPP_NODEBUG_TYPE __compressed_pair_elem<_T2, 1> _Base2; template , _Dummy>::value && __dependent_type, _Dummy>::value >::type > _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __compressed_pair() : _Base1(__value_init_tag()), _Base2(__value_init_tag()) {} template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __compressed_pair(_U1&& __t1, _U2&& __t2) : _Base1(_VSTD::forward<_U1>(__t1)), _Base2(_VSTD::forward<_U2>(__t2)) {} #ifndef _LIBCPP_CXX03_LANG template _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14 __compressed_pair(piecewise_construct_t __pc, tuple<_Args1...> __first_args, tuple<_Args2...> __second_args) : _Base1(__pc, _VSTD::move(__first_args), typename __make_tuple_indices::type()), _Base2(__pc, _VSTD::move(__second_args), typename __make_tuple_indices::type()) {} #endif _LIBCPP_INLINE_VISIBILITY typename _Base1::reference first() _NOEXCEPT { return static_cast<_Base1&>(*this).__get(); } _LIBCPP_INLINE_VISIBILITY typename _Base1::const_reference first() const _NOEXCEPT { return static_cast<_Base1 const&>(*this).__get(); } _LIBCPP_INLINE_VISIBILITY typename _Base2::reference second() _NOEXCEPT { return static_cast<_Base2&>(*this).__get(); } _LIBCPP_INLINE_VISIBILITY typename _Base2::const_reference second() const _NOEXCEPT { return static_cast<_Base2 const&>(*this).__get(); } _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR static _Base1* __get_first_base(__compressed_pair* __pair) _NOEXCEPT { return static_cast<_Base1*>(__pair); } _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR static _Base2* __get_second_base(__compressed_pair* __pair) _NOEXCEPT { return static_cast<_Base2*>(__pair); } _LIBCPP_INLINE_VISIBILITY void swap(__compressed_pair& __x) _NOEXCEPT_(__is_nothrow_swappable<_T1>::value && __is_nothrow_swappable<_T2>::value) { using _VSTD::swap; swap(first(), __x.first()); swap(second(), __x.second()); } }; template inline _LIBCPP_INLINE_VISIBILITY void swap(__compressed_pair<_T1, _T2>& __x, __compressed_pair<_T1, _T2>& __y) _NOEXCEPT_(__is_nothrow_swappable<_T1>::value && __is_nothrow_swappable<_T2>::value) { __x.swap(__y); } // default_delete template struct _LIBCPP_TEMPLATE_VIS default_delete { static_assert(!is_function<_Tp>::value, "default_delete cannot be instantiated for function types"); #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY constexpr default_delete() _NOEXCEPT = default; #else _LIBCPP_INLINE_VISIBILITY default_delete() {} #endif template _LIBCPP_INLINE_VISIBILITY default_delete(const default_delete<_Up>&, typename enable_if::value>::type* = 0) _NOEXCEPT {} _LIBCPP_INLINE_VISIBILITY void operator()(_Tp* __ptr) const _NOEXCEPT { static_assert(sizeof(_Tp) > 0, "default_delete can not delete incomplete type"); static_assert(!is_void<_Tp>::value, "default_delete can not delete incomplete type"); delete __ptr; } }; template struct _LIBCPP_TEMPLATE_VIS default_delete<_Tp[]> { private: template struct _EnableIfConvertible : enable_if::value> {}; public: #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY constexpr default_delete() _NOEXCEPT = default; #else _LIBCPP_INLINE_VISIBILITY default_delete() {} #endif template _LIBCPP_INLINE_VISIBILITY default_delete(const default_delete<_Up[]>&, typename _EnableIfConvertible<_Up>::type* = 0) _NOEXCEPT {} template _LIBCPP_INLINE_VISIBILITY typename _EnableIfConvertible<_Up>::type operator()(_Up* __ptr) const _NOEXCEPT { static_assert(sizeof(_Tp) > 0, "default_delete can not delete incomplete type"); static_assert(!is_void<_Tp>::value, "default_delete can not delete void type"); delete[] __ptr; } }; template struct __unique_ptr_deleter_sfinae { static_assert(!is_reference<_Deleter>::value, "incorrect specialization"); typedef const _Deleter& __lval_ref_type; typedef _Deleter&& __good_rval_ref_type; typedef true_type __enable_rval_overload; }; template struct __unique_ptr_deleter_sfinae<_Deleter const&> { typedef const _Deleter& __lval_ref_type; typedef const _Deleter&& __bad_rval_ref_type; typedef false_type __enable_rval_overload; }; template struct __unique_ptr_deleter_sfinae<_Deleter&> { typedef _Deleter& __lval_ref_type; typedef _Deleter&& __bad_rval_ref_type; typedef false_type __enable_rval_overload; }; #if defined(_LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI) # define _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI __attribute__((trivial_abi)) #else # define _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI #endif template > class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS unique_ptr { public: typedef _Tp element_type; typedef _Dp deleter_type; typedef _LIBCPP_NODEBUG_TYPE typename __pointer<_Tp, deleter_type>::type pointer; static_assert(!is_rvalue_reference::value, "the specified deleter type cannot be an rvalue reference"); private: __compressed_pair __ptr_; struct __nat { int __for_bool_; }; typedef _LIBCPP_NODEBUG_TYPE __unique_ptr_deleter_sfinae<_Dp> _DeleterSFINAE; template using _LValRefType _LIBCPP_NODEBUG_TYPE = typename __dependent_type<_DeleterSFINAE, _Dummy>::__lval_ref_type; template using _GoodRValRefType _LIBCPP_NODEBUG_TYPE = typename __dependent_type<_DeleterSFINAE, _Dummy>::__good_rval_ref_type; template using _BadRValRefType _LIBCPP_NODEBUG_TYPE = typename __dependent_type<_DeleterSFINAE, _Dummy>::__bad_rval_ref_type; template , _Dummy>::type> using _EnableIfDeleterDefaultConstructible _LIBCPP_NODEBUG_TYPE = typename enable_if::value && !is_pointer<_Deleter>::value>::type; template using _EnableIfDeleterConstructible _LIBCPP_NODEBUG_TYPE = typename enable_if::value>::type; template using _EnableIfMoveConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if< is_convertible::value && !is_array<_Up>::value >::type; template using _EnableIfDeleterConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if< (is_reference<_Dp>::value && is_same<_Dp, _UDel>::value) || (!is_reference<_Dp>::value && is_convertible<_UDel, _Dp>::value) >::type; template using _EnableIfDeleterAssignable = typename enable_if< is_assignable<_Dp&, _UDel&&>::value >::type; public: template > _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} template > _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} template > _LIBCPP_INLINE_VISIBILITY explicit unique_ptr(pointer __p) _NOEXCEPT : __ptr_(__p, __default_init_tag()) {} template > > _LIBCPP_INLINE_VISIBILITY unique_ptr(pointer __p, _LValRefType<_Dummy> __d) _NOEXCEPT : __ptr_(__p, __d) {} template > > _LIBCPP_INLINE_VISIBILITY unique_ptr(pointer __p, _GoodRValRefType<_Dummy> __d) _NOEXCEPT : __ptr_(__p, _VSTD::move(__d)) { static_assert(!is_reference::value, "rvalue deleter bound to reference"); } template > > _LIBCPP_INLINE_VISIBILITY unique_ptr(pointer __p, _BadRValRefType<_Dummy> __d) = delete; _LIBCPP_INLINE_VISIBILITY unique_ptr(unique_ptr&& __u) _NOEXCEPT : __ptr_(__u.release(), _VSTD::forward(__u.get_deleter())) { } template , _Up>, class = _EnableIfDeleterConvertible<_Ep> > _LIBCPP_INLINE_VISIBILITY unique_ptr(unique_ptr<_Up, _Ep>&& __u) _NOEXCEPT : __ptr_(__u.release(), _VSTD::forward<_Ep>(__u.get_deleter())) {} #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template _LIBCPP_INLINE_VISIBILITY unique_ptr(auto_ptr<_Up>&& __p, typename enable_if::value && is_same<_Dp, default_delete<_Tp> >::value, __nat>::type = __nat()) _NOEXCEPT : __ptr_(__p.release(), __default_init_tag()) {} #endif _LIBCPP_INLINE_VISIBILITY unique_ptr& operator=(unique_ptr&& __u) _NOEXCEPT { reset(__u.release()); __ptr_.second() = _VSTD::forward(__u.get_deleter()); return *this; } template , _Up>, class = _EnableIfDeleterAssignable<_Ep> > _LIBCPP_INLINE_VISIBILITY unique_ptr& operator=(unique_ptr<_Up, _Ep>&& __u) _NOEXCEPT { reset(__u.release()); __ptr_.second() = _VSTD::forward<_Ep>(__u.get_deleter()); return *this; } #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template _LIBCPP_INLINE_VISIBILITY typename enable_if::value && is_same<_Dp, default_delete<_Tp> >::value, unique_ptr&>::type operator=(auto_ptr<_Up> __p) { reset(__p.release()); return *this; } #endif #ifdef _LIBCPP_CXX03_LANG unique_ptr(unique_ptr const&) = delete; unique_ptr& operator=(unique_ptr const&) = delete; #endif _LIBCPP_INLINE_VISIBILITY ~unique_ptr() { reset(); } _LIBCPP_INLINE_VISIBILITY unique_ptr& operator=(nullptr_t) _NOEXCEPT { reset(); return *this; } _LIBCPP_INLINE_VISIBILITY typename add_lvalue_reference<_Tp>::type operator*() const { return *__ptr_.first(); } _LIBCPP_INLINE_VISIBILITY pointer operator->() const _NOEXCEPT { return __ptr_.first(); } _LIBCPP_INLINE_VISIBILITY pointer get() const _NOEXCEPT { return __ptr_.first(); } _LIBCPP_INLINE_VISIBILITY deleter_type& get_deleter() _NOEXCEPT { return __ptr_.second(); } _LIBCPP_INLINE_VISIBILITY const deleter_type& get_deleter() const _NOEXCEPT { return __ptr_.second(); } _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT operator bool() const _NOEXCEPT { return __ptr_.first() != nullptr; } _LIBCPP_INLINE_VISIBILITY pointer release() _NOEXCEPT { pointer __t = __ptr_.first(); __ptr_.first() = pointer(); return __t; } _LIBCPP_INLINE_VISIBILITY void reset(pointer __p = pointer()) _NOEXCEPT { pointer __tmp = __ptr_.first(); __ptr_.first() = __p; if (__tmp) __ptr_.second()(__tmp); } _LIBCPP_INLINE_VISIBILITY void swap(unique_ptr& __u) _NOEXCEPT { __ptr_.swap(__u.__ptr_); } }; template class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS unique_ptr<_Tp[], _Dp> { public: typedef _Tp element_type; typedef _Dp deleter_type; typedef typename __pointer<_Tp, deleter_type>::type pointer; private: __compressed_pair __ptr_; template struct _CheckArrayPointerConversion : is_same<_From, pointer> {}; template struct _CheckArrayPointerConversion<_FromElem*> : integral_constant::value || (is_same::value && is_convertible<_FromElem(*)[], element_type(*)[]>::value) > {}; typedef __unique_ptr_deleter_sfinae<_Dp> _DeleterSFINAE; template using _LValRefType _LIBCPP_NODEBUG_TYPE = typename __dependent_type<_DeleterSFINAE, _Dummy>::__lval_ref_type; template using _GoodRValRefType _LIBCPP_NODEBUG_TYPE = typename __dependent_type<_DeleterSFINAE, _Dummy>::__good_rval_ref_type; template using _BadRValRefType _LIBCPP_NODEBUG_TYPE = typename __dependent_type<_DeleterSFINAE, _Dummy>::__bad_rval_ref_type; template , _Dummy>::type> using _EnableIfDeleterDefaultConstructible _LIBCPP_NODEBUG_TYPE = typename enable_if::value && !is_pointer<_Deleter>::value>::type; template using _EnableIfDeleterConstructible _LIBCPP_NODEBUG_TYPE = typename enable_if::value>::type; template using _EnableIfPointerConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if< _CheckArrayPointerConversion<_Pp>::value >::type; template using _EnableIfMoveConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if< is_array<_Up>::value && is_same::value && is_same::value && is_convertible<_ElemT(*)[], element_type(*)[]>::value >::type; template using _EnableIfDeleterConvertible _LIBCPP_NODEBUG_TYPE = typename enable_if< (is_reference<_Dp>::value && is_same<_Dp, _UDel>::value) || (!is_reference<_Dp>::value && is_convertible<_UDel, _Dp>::value) >::type; template using _EnableIfDeleterAssignable _LIBCPP_NODEBUG_TYPE = typename enable_if< is_assignable<_Dp&, _UDel&&>::value >::type; public: template > _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} template > _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {} template , class = _EnableIfPointerConvertible<_Pp> > _LIBCPP_INLINE_VISIBILITY explicit unique_ptr(_Pp __p) _NOEXCEPT : __ptr_(__p, __default_init_tag()) {} template >, class = _EnableIfPointerConvertible<_Pp> > _LIBCPP_INLINE_VISIBILITY unique_ptr(_Pp __p, _LValRefType<_Dummy> __d) _NOEXCEPT : __ptr_(__p, __d) {} template > > _LIBCPP_INLINE_VISIBILITY unique_ptr(nullptr_t, _LValRefType<_Dummy> __d) _NOEXCEPT : __ptr_(nullptr, __d) {} template >, class = _EnableIfPointerConvertible<_Pp> > _LIBCPP_INLINE_VISIBILITY unique_ptr(_Pp __p, _GoodRValRefType<_Dummy> __d) _NOEXCEPT : __ptr_(__p, _VSTD::move(__d)) { static_assert(!is_reference::value, "rvalue deleter bound to reference"); } template > > _LIBCPP_INLINE_VISIBILITY unique_ptr(nullptr_t, _GoodRValRefType<_Dummy> __d) _NOEXCEPT : __ptr_(nullptr, _VSTD::move(__d)) { static_assert(!is_reference::value, "rvalue deleter bound to reference"); } template >, class = _EnableIfPointerConvertible<_Pp> > _LIBCPP_INLINE_VISIBILITY unique_ptr(_Pp __p, _BadRValRefType<_Dummy> __d) = delete; _LIBCPP_INLINE_VISIBILITY unique_ptr(unique_ptr&& __u) _NOEXCEPT : __ptr_(__u.release(), _VSTD::forward(__u.get_deleter())) { } _LIBCPP_INLINE_VISIBILITY unique_ptr& operator=(unique_ptr&& __u) _NOEXCEPT { reset(__u.release()); __ptr_.second() = _VSTD::forward(__u.get_deleter()); return *this; } template , _Up>, class = _EnableIfDeleterConvertible<_Ep> > _LIBCPP_INLINE_VISIBILITY unique_ptr(unique_ptr<_Up, _Ep>&& __u) _NOEXCEPT : __ptr_(__u.release(), _VSTD::forward<_Ep>(__u.get_deleter())) { } template , _Up>, class = _EnableIfDeleterAssignable<_Ep> > _LIBCPP_INLINE_VISIBILITY unique_ptr& operator=(unique_ptr<_Up, _Ep>&& __u) _NOEXCEPT { reset(__u.release()); __ptr_.second() = _VSTD::forward<_Ep>(__u.get_deleter()); return *this; } #ifdef _LIBCPP_CXX03_LANG unique_ptr(unique_ptr const&) = delete; unique_ptr& operator=(unique_ptr const&) = delete; #endif public: _LIBCPP_INLINE_VISIBILITY ~unique_ptr() { reset(); } _LIBCPP_INLINE_VISIBILITY unique_ptr& operator=(nullptr_t) _NOEXCEPT { reset(); return *this; } _LIBCPP_INLINE_VISIBILITY typename add_lvalue_reference<_Tp>::type operator[](size_t __i) const { return __ptr_.first()[__i]; } _LIBCPP_INLINE_VISIBILITY pointer get() const _NOEXCEPT { return __ptr_.first(); } _LIBCPP_INLINE_VISIBILITY deleter_type& get_deleter() _NOEXCEPT { return __ptr_.second(); } _LIBCPP_INLINE_VISIBILITY const deleter_type& get_deleter() const _NOEXCEPT { return __ptr_.second(); } _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT operator bool() const _NOEXCEPT { return __ptr_.first() != nullptr; } _LIBCPP_INLINE_VISIBILITY pointer release() _NOEXCEPT { pointer __t = __ptr_.first(); __ptr_.first() = pointer(); return __t; } template _LIBCPP_INLINE_VISIBILITY typename enable_if< _CheckArrayPointerConversion<_Pp>::value >::type reset(_Pp __p) _NOEXCEPT { pointer __tmp = __ptr_.first(); __ptr_.first() = __p; if (__tmp) __ptr_.second()(__tmp); } _LIBCPP_INLINE_VISIBILITY void reset(nullptr_t = nullptr) _NOEXCEPT { pointer __tmp = __ptr_.first(); __ptr_.first() = nullptr; if (__tmp) __ptr_.second()(__tmp); } _LIBCPP_INLINE_VISIBILITY void swap(unique_ptr& __u) _NOEXCEPT { __ptr_.swap(__u.__ptr_); } }; template inline _LIBCPP_INLINE_VISIBILITY typename enable_if< __is_swappable<_Dp>::value, void >::type swap(unique_ptr<_Tp, _Dp>& __x, unique_ptr<_Tp, _Dp>& __y) _NOEXCEPT {__x.swap(__y);} template inline _LIBCPP_INLINE_VISIBILITY bool operator==(const unique_ptr<_T1, _D1>& __x, const unique_ptr<_T2, _D2>& __y) {return __x.get() == __y.get();} template inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const unique_ptr<_T1, _D1>& __x, const unique_ptr<_T2, _D2>& __y) {return !(__x == __y);} template inline _LIBCPP_INLINE_VISIBILITY bool operator< (const unique_ptr<_T1, _D1>& __x, const unique_ptr<_T2, _D2>& __y) { typedef typename unique_ptr<_T1, _D1>::pointer _P1; typedef typename unique_ptr<_T2, _D2>::pointer _P2; typedef typename common_type<_P1, _P2>::type _Vp; return less<_Vp>()(__x.get(), __y.get()); } template inline _LIBCPP_INLINE_VISIBILITY bool operator> (const unique_ptr<_T1, _D1>& __x, const unique_ptr<_T2, _D2>& __y) {return __y < __x;} template inline _LIBCPP_INLINE_VISIBILITY bool operator<=(const unique_ptr<_T1, _D1>& __x, const unique_ptr<_T2, _D2>& __y) {return !(__y < __x);} template inline _LIBCPP_INLINE_VISIBILITY bool operator>=(const unique_ptr<_T1, _D1>& __x, const unique_ptr<_T2, _D2>& __y) {return !(__x < __y);} template inline _LIBCPP_INLINE_VISIBILITY bool operator==(const unique_ptr<_T1, _D1>& __x, nullptr_t) _NOEXCEPT { return !__x; } template inline _LIBCPP_INLINE_VISIBILITY bool operator==(nullptr_t, const unique_ptr<_T1, _D1>& __x) _NOEXCEPT { return !__x; } template inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const unique_ptr<_T1, _D1>& __x, nullptr_t) _NOEXCEPT { return static_cast(__x); } template inline _LIBCPP_INLINE_VISIBILITY bool operator!=(nullptr_t, const unique_ptr<_T1, _D1>& __x) _NOEXCEPT { return static_cast(__x); } template inline _LIBCPP_INLINE_VISIBILITY bool operator<(const unique_ptr<_T1, _D1>& __x, nullptr_t) { typedef typename unique_ptr<_T1, _D1>::pointer _P1; return less<_P1>()(__x.get(), nullptr); } template inline _LIBCPP_INLINE_VISIBILITY bool operator<(nullptr_t, const unique_ptr<_T1, _D1>& __x) { typedef typename unique_ptr<_T1, _D1>::pointer _P1; return less<_P1>()(nullptr, __x.get()); } template inline _LIBCPP_INLINE_VISIBILITY bool operator>(const unique_ptr<_T1, _D1>& __x, nullptr_t) { return nullptr < __x; } template inline _LIBCPP_INLINE_VISIBILITY bool operator>(nullptr_t, const unique_ptr<_T1, _D1>& __x) { return __x < nullptr; } template inline _LIBCPP_INLINE_VISIBILITY bool operator<=(const unique_ptr<_T1, _D1>& __x, nullptr_t) { return !(nullptr < __x); } template inline _LIBCPP_INLINE_VISIBILITY bool operator<=(nullptr_t, const unique_ptr<_T1, _D1>& __x) { return !(__x < nullptr); } template inline _LIBCPP_INLINE_VISIBILITY bool operator>=(const unique_ptr<_T1, _D1>& __x, nullptr_t) { return !(__x < nullptr); } template inline _LIBCPP_INLINE_VISIBILITY bool operator>=(nullptr_t, const unique_ptr<_T1, _D1>& __x) { return !(nullptr < __x); } #if _LIBCPP_STD_VER > 11 template struct __unique_if { typedef unique_ptr<_Tp> __unique_single; }; template struct __unique_if<_Tp[]> { typedef unique_ptr<_Tp[]> __unique_array_unknown_bound; }; template struct __unique_if<_Tp[_Np]> { typedef void __unique_array_known_bound; }; template inline _LIBCPP_INLINE_VISIBILITY typename __unique_if<_Tp>::__unique_single make_unique(_Args&&... __args) { return unique_ptr<_Tp>(new _Tp(_VSTD::forward<_Args>(__args)...)); } template inline _LIBCPP_INLINE_VISIBILITY typename __unique_if<_Tp>::__unique_array_unknown_bound make_unique(size_t __n) { typedef typename remove_extent<_Tp>::type _Up; return unique_ptr<_Tp>(new _Up[__n]()); } template typename __unique_if<_Tp>::__unique_array_known_bound make_unique(_Args&&...) = delete; #endif // _LIBCPP_STD_VER > 11 template #ifdef _LIBCPP_CXX03_LANG struct _LIBCPP_TEMPLATE_VIS hash > #else struct _LIBCPP_TEMPLATE_VIS hash<__enable_hash_helper< unique_ptr<_Tp, _Dp>, typename unique_ptr<_Tp, _Dp>::pointer> > #endif { typedef unique_ptr<_Tp, _Dp> argument_type; typedef size_t result_type; _LIBCPP_INLINE_VISIBILITY result_type operator()(const argument_type& __ptr) const { typedef typename argument_type::pointer pointer; return hash()(__ptr.get()); } }; struct __destruct_n { private: size_t __size_; template _LIBCPP_INLINE_VISIBILITY void __process(_Tp* __p, false_type) _NOEXCEPT {for (size_t __i = 0; __i < __size_; ++__i, ++__p) __p->~_Tp();} template _LIBCPP_INLINE_VISIBILITY void __process(_Tp*, true_type) _NOEXCEPT {} _LIBCPP_INLINE_VISIBILITY void __incr(false_type) _NOEXCEPT {++__size_;} _LIBCPP_INLINE_VISIBILITY void __incr(true_type) _NOEXCEPT {} _LIBCPP_INLINE_VISIBILITY void __set(size_t __s, false_type) _NOEXCEPT {__size_ = __s;} _LIBCPP_INLINE_VISIBILITY void __set(size_t, true_type) _NOEXCEPT {} public: _LIBCPP_INLINE_VISIBILITY explicit __destruct_n(size_t __s) _NOEXCEPT : __size_(__s) {} template _LIBCPP_INLINE_VISIBILITY void __incr() _NOEXCEPT {__incr(integral_constant::value>());} template _LIBCPP_INLINE_VISIBILITY void __set(size_t __s, _Tp*) _NOEXCEPT {__set(__s, integral_constant::value>());} template _LIBCPP_INLINE_VISIBILITY void operator()(_Tp* __p) _NOEXCEPT {__process(__p, integral_constant::value>());} }; template class __allocator_destructor { typedef _LIBCPP_NODEBUG_TYPE allocator_traits<_Alloc> __alloc_traits; public: typedef _LIBCPP_NODEBUG_TYPE typename __alloc_traits::pointer pointer; typedef _LIBCPP_NODEBUG_TYPE typename __alloc_traits::size_type size_type; private: _Alloc& __alloc_; size_type __s_; public: _LIBCPP_INLINE_VISIBILITY __allocator_destructor(_Alloc& __a, size_type __s) _NOEXCEPT : __alloc_(__a), __s_(__s) {} _LIBCPP_INLINE_VISIBILITY void operator()(pointer __p) _NOEXCEPT {__alloc_traits::deallocate(__alloc_, __p, __s_);} }; template _ForwardIterator uninitialized_copy(_InputIterator __f, _InputIterator __l, _ForwardIterator __r) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; #ifndef _LIBCPP_NO_EXCEPTIONS _ForwardIterator __s = __r; try { #endif for (; __f != __l; ++__f, (void) ++__r) ::new ((void*)_VSTD::addressof(*__r)) value_type(*__f); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { for (; __s != __r; ++__s) __s->~value_type(); throw; } #endif return __r; } template _ForwardIterator uninitialized_copy_n(_InputIterator __f, _Size __n, _ForwardIterator __r) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; #ifndef _LIBCPP_NO_EXCEPTIONS _ForwardIterator __s = __r; try { #endif for (; __n > 0; ++__f, (void) ++__r, (void) --__n) ::new ((void*)_VSTD::addressof(*__r)) value_type(*__f); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { for (; __s != __r; ++__s) __s->~value_type(); throw; } #endif return __r; } template void uninitialized_fill(_ForwardIterator __f, _ForwardIterator __l, const _Tp& __x) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; #ifndef _LIBCPP_NO_EXCEPTIONS _ForwardIterator __s = __f; try { #endif for (; __f != __l; ++__f) ::new ((void*)_VSTD::addressof(*__f)) value_type(__x); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { for (; __s != __f; ++__s) __s->~value_type(); throw; } #endif } template _ForwardIterator uninitialized_fill_n(_ForwardIterator __f, _Size __n, const _Tp& __x) { typedef typename iterator_traits<_ForwardIterator>::value_type value_type; #ifndef _LIBCPP_NO_EXCEPTIONS _ForwardIterator __s = __f; try { #endif for (; __n > 0; ++__f, (void) --__n) ::new ((void*)_VSTD::addressof(*__f)) value_type(__x); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { for (; __s != __f; ++__s) __s->~value_type(); throw; } #endif return __f; } #if _LIBCPP_STD_VER > 14 template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 void destroy(_ForwardIterator __first, _ForwardIterator __last) { for (; __first != __last; ++__first) _VSTD::destroy_at(_VSTD::addressof(*__first)); } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _ForwardIterator destroy_n(_ForwardIterator __first, _Size __n) { for (; __n > 0; (void)++__first, --__n) _VSTD::destroy_at(_VSTD::addressof(*__first)); return __first; } template inline _LIBCPP_INLINE_VISIBILITY void uninitialized_default_construct(_ForwardIterator __first, _ForwardIterator __last) { using _Vt = typename iterator_traits<_ForwardIterator>::value_type; auto __idx = __first; #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif for (; __idx != __last; ++__idx) ::new ((void*)_VSTD::addressof(*__idx)) _Vt; #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { _VSTD::destroy(__first, __idx); throw; } #endif } template inline _LIBCPP_INLINE_VISIBILITY _ForwardIterator uninitialized_default_construct_n(_ForwardIterator __first, _Size __n) { using _Vt = typename iterator_traits<_ForwardIterator>::value_type; auto __idx = __first; #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif for (; __n > 0; (void)++__idx, --__n) ::new ((void*)_VSTD::addressof(*__idx)) _Vt; return __idx; #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { _VSTD::destroy(__first, __idx); throw; } #endif } template inline _LIBCPP_INLINE_VISIBILITY void uninitialized_value_construct(_ForwardIterator __first, _ForwardIterator __last) { using _Vt = typename iterator_traits<_ForwardIterator>::value_type; auto __idx = __first; #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif for (; __idx != __last; ++__idx) ::new ((void*)_VSTD::addressof(*__idx)) _Vt(); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { _VSTD::destroy(__first, __idx); throw; } #endif } template inline _LIBCPP_INLINE_VISIBILITY _ForwardIterator uninitialized_value_construct_n(_ForwardIterator __first, _Size __n) { using _Vt = typename iterator_traits<_ForwardIterator>::value_type; auto __idx = __first; #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif for (; __n > 0; (void)++__idx, --__n) ::new ((void*)_VSTD::addressof(*__idx)) _Vt(); return __idx; #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { _VSTD::destroy(__first, __idx); throw; } #endif } template inline _LIBCPP_INLINE_VISIBILITY _ForwardIt uninitialized_move(_InputIt __first, _InputIt __last, _ForwardIt __first_res) { using _Vt = typename iterator_traits<_ForwardIt>::value_type; auto __idx = __first_res; #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif for (; __first != __last; (void)++__idx, ++__first) ::new ((void*)_VSTD::addressof(*__idx)) _Vt(_VSTD::move(*__first)); return __idx; #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { _VSTD::destroy(__first_res, __idx); throw; } #endif } template inline _LIBCPP_INLINE_VISIBILITY pair<_InputIt, _ForwardIt> uninitialized_move_n(_InputIt __first, _Size __n, _ForwardIt __first_res) { using _Vt = typename iterator_traits<_ForwardIt>::value_type; auto __idx = __first_res; #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif for (; __n > 0; ++__idx, (void)++__first, --__n) ::new ((void*)_VSTD::addressof(*__idx)) _Vt(_VSTD::move(*__first)); return {__first, __idx}; #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { _VSTD::destroy(__first_res, __idx); throw; } #endif } #endif // _LIBCPP_STD_VER > 14 // NOTE: Relaxed and acq/rel atomics (for increment and decrement respectively) // should be sufficient for thread safety. // See https://bugs.llvm.org/show_bug.cgi?id=22803 #if defined(__clang__) && __has_builtin(__atomic_add_fetch) \ && defined(__ATOMIC_RELAXED) \ && defined(__ATOMIC_ACQ_REL) # define _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT #elif defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT #endif template inline _LIBCPP_INLINE_VISIBILITY _Tp __libcpp_atomic_refcount_increment(_Tp& __t) _NOEXCEPT { #if defined(_LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT) && !defined(_LIBCPP_HAS_NO_THREADS) return __atomic_add_fetch(&__t, 1, __ATOMIC_RELAXED); #else return __t += 1; #endif } template inline _LIBCPP_INLINE_VISIBILITY _Tp __libcpp_atomic_refcount_decrement(_Tp& __t) _NOEXCEPT { #if defined(_LIBCPP_HAS_BUILTIN_ATOMIC_SUPPORT) && !defined(_LIBCPP_HAS_NO_THREADS) return __atomic_add_fetch(&__t, -1, __ATOMIC_ACQ_REL); #else return __t -= 1; #endif } class _LIBCPP_EXCEPTION_ABI bad_weak_ptr : public std::exception { public: bad_weak_ptr() _NOEXCEPT = default; bad_weak_ptr(const bad_weak_ptr&) _NOEXCEPT = default; virtual ~bad_weak_ptr() _NOEXCEPT; virtual const char* what() const _NOEXCEPT; }; _LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY void __throw_bad_weak_ptr() { #ifndef _LIBCPP_NO_EXCEPTIONS throw bad_weak_ptr(); #else _VSTD::abort(); #endif } template class _LIBCPP_TEMPLATE_VIS weak_ptr; class _LIBCPP_TYPE_VIS __shared_count { __shared_count(const __shared_count&); __shared_count& operator=(const __shared_count&); protected: long __shared_owners_; virtual ~__shared_count(); private: virtual void __on_zero_shared() _NOEXCEPT = 0; public: _LIBCPP_INLINE_VISIBILITY explicit __shared_count(long __refs = 0) _NOEXCEPT : __shared_owners_(__refs) {} #if defined(_LIBCPP_BUILDING_LIBRARY) && \ defined(_LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS) void __add_shared() _NOEXCEPT; bool __release_shared() _NOEXCEPT; #else _LIBCPP_INLINE_VISIBILITY void __add_shared() _NOEXCEPT { __libcpp_atomic_refcount_increment(__shared_owners_); } _LIBCPP_INLINE_VISIBILITY bool __release_shared() _NOEXCEPT { if (__libcpp_atomic_refcount_decrement(__shared_owners_) == -1) { __on_zero_shared(); return true; } return false; } #endif _LIBCPP_INLINE_VISIBILITY long use_count() const _NOEXCEPT { return __libcpp_relaxed_load(&__shared_owners_) + 1; } }; class _LIBCPP_TYPE_VIS __shared_weak_count : private __shared_count { long __shared_weak_owners_; public: _LIBCPP_INLINE_VISIBILITY explicit __shared_weak_count(long __refs = 0) _NOEXCEPT : __shared_count(__refs), __shared_weak_owners_(__refs) {} protected: virtual ~__shared_weak_count(); public: #if defined(_LIBCPP_BUILDING_LIBRARY) && \ defined(_LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS) void __add_shared() _NOEXCEPT; void __add_weak() _NOEXCEPT; void __release_shared() _NOEXCEPT; #else _LIBCPP_INLINE_VISIBILITY void __add_shared() _NOEXCEPT { __shared_count::__add_shared(); } _LIBCPP_INLINE_VISIBILITY void __add_weak() _NOEXCEPT { __libcpp_atomic_refcount_increment(__shared_weak_owners_); } _LIBCPP_INLINE_VISIBILITY void __release_shared() _NOEXCEPT { if (__shared_count::__release_shared()) __release_weak(); } #endif void __release_weak() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY long use_count() const _NOEXCEPT {return __shared_count::use_count();} __shared_weak_count* lock() _NOEXCEPT; virtual const void* __get_deleter(const type_info&) const _NOEXCEPT; private: virtual void __on_zero_shared_weak() _NOEXCEPT = 0; }; template class __shared_ptr_pointer : public __shared_weak_count { __compressed_pair<__compressed_pair<_Tp, _Dp>, _Alloc> __data_; public: _LIBCPP_INLINE_VISIBILITY __shared_ptr_pointer(_Tp __p, _Dp __d, _Alloc __a) : __data_(__compressed_pair<_Tp, _Dp>(__p, _VSTD::move(__d)), _VSTD::move(__a)) {} #ifndef _LIBCPP_NO_RTTI virtual const void* __get_deleter(const type_info&) const _NOEXCEPT; #endif private: virtual void __on_zero_shared() _NOEXCEPT; virtual void __on_zero_shared_weak() _NOEXCEPT; }; #ifndef _LIBCPP_NO_RTTI template const void* __shared_ptr_pointer<_Tp, _Dp, _Alloc>::__get_deleter(const type_info& __t) const _NOEXCEPT { return __t == typeid(_Dp) ? _VSTD::addressof(__data_.first().second()) : nullptr; } #endif // _LIBCPP_NO_RTTI template void __shared_ptr_pointer<_Tp, _Dp, _Alloc>::__on_zero_shared() _NOEXCEPT { __data_.first().second()(__data_.first().first()); __data_.first().second().~_Dp(); } template void __shared_ptr_pointer<_Tp, _Dp, _Alloc>::__on_zero_shared_weak() _NOEXCEPT { typedef typename __allocator_traits_rebind<_Alloc, __shared_ptr_pointer>::type _Al; typedef allocator_traits<_Al> _ATraits; typedef pointer_traits _PTraits; _Al __a(__data_.second()); __data_.second().~_Alloc(); __a.deallocate(_PTraits::pointer_to(*this), 1); } template struct __shared_ptr_emplace : __shared_weak_count { template _LIBCPP_HIDE_FROM_ABI explicit __shared_ptr_emplace(_Alloc __a, _Args&& ...__args) : __storage_(_VSTD::move(__a)) { #if _LIBCPP_STD_VER > 17 using _TpAlloc = typename __allocator_traits_rebind<_Alloc, _Tp>::type; _TpAlloc __tmp(*__get_alloc()); allocator_traits<_TpAlloc>::construct(__tmp, __get_elem(), _VSTD::forward<_Args>(__args)...); #else ::new ((void*)__get_elem()) _Tp(_VSTD::forward<_Args>(__args)...); #endif } _LIBCPP_HIDE_FROM_ABI _Alloc* __get_alloc() _NOEXCEPT { return __storage_.__get_alloc(); } _LIBCPP_HIDE_FROM_ABI _Tp* __get_elem() _NOEXCEPT { return __storage_.__get_elem(); } private: virtual void __on_zero_shared() _NOEXCEPT { #if _LIBCPP_STD_VER > 17 using _TpAlloc = typename __allocator_traits_rebind<_Alloc, _Tp>::type; _TpAlloc __tmp(*__get_alloc()); allocator_traits<_TpAlloc>::destroy(__tmp, __get_elem()); #else __get_elem()->~_Tp(); #endif } virtual void __on_zero_shared_weak() _NOEXCEPT { using _ControlBlockAlloc = typename __allocator_traits_rebind<_Alloc, __shared_ptr_emplace>::type; using _ControlBlockPointer = typename allocator_traits<_ControlBlockAlloc>::pointer; _ControlBlockAlloc __tmp(*__get_alloc()); __storage_.~_Storage(); allocator_traits<_ControlBlockAlloc>::deallocate(__tmp, pointer_traits<_ControlBlockPointer>::pointer_to(*this), 1); } // This class implements the control block for non-array shared pointers created // through `std::allocate_shared` and `std::make_shared`. // // In previous versions of the library, we used a compressed pair to store // both the _Alloc and the _Tp. This implies using EBO, which is incompatible // with Allocator construction for _Tp. To allow implementing P0674 in C++20, // we now use a properly aligned char buffer while making sure that we maintain // the same layout that we had when we used a compressed pair. using _CompressedPair = __compressed_pair<_Alloc, _Tp>; struct _ALIGNAS_TYPE(_CompressedPair) _Storage { char __blob_[sizeof(_CompressedPair)]; _LIBCPP_HIDE_FROM_ABI explicit _Storage(_Alloc&& __a) { ::new ((void*)__get_alloc()) _Alloc(_VSTD::move(__a)); } _LIBCPP_HIDE_FROM_ABI ~_Storage() { __get_alloc()->~_Alloc(); } _Alloc* __get_alloc() _NOEXCEPT { _CompressedPair *__as_pair = reinterpret_cast<_CompressedPair*>(__blob_); typename _CompressedPair::_Base1* __first = _CompressedPair::__get_first_base(__as_pair); _Alloc *__alloc = reinterpret_cast<_Alloc*>(__first); return __alloc; } _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT { _CompressedPair *__as_pair = reinterpret_cast<_CompressedPair*>(__blob_); typename _CompressedPair::_Base2* __second = _CompressedPair::__get_second_base(__as_pair); _Tp *__elem = reinterpret_cast<_Tp*>(__second); return __elem; } }; static_assert(_LIBCPP_ALIGNOF(_Storage) == _LIBCPP_ALIGNOF(_CompressedPair), ""); static_assert(sizeof(_Storage) == sizeof(_CompressedPair), ""); _Storage __storage_; }; struct __shared_ptr_dummy_rebind_allocator_type; template <> class _LIBCPP_TEMPLATE_VIS allocator<__shared_ptr_dummy_rebind_allocator_type> { public: template struct rebind { typedef allocator<_Other> other; }; }; template class _LIBCPP_TEMPLATE_VIS enable_shared_from_this; template struct __compatible_with #if _LIBCPP_STD_VER > 14 : is_convertible*, remove_extent_t<_Up>*> {}; #else : is_convertible<_Tp*, _Up*> {}; #endif // _LIBCPP_STD_VER > 14 #if defined(_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI) # define _LIBCPP_SHARED_PTR_TRIVIAL_ABI __attribute__((trivial_abi)) #else # define _LIBCPP_SHARED_PTR_TRIVIAL_ABI #endif template class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr { public: #if _LIBCPP_STD_VER > 14 typedef weak_ptr<_Tp> weak_type; typedef remove_extent_t<_Tp> element_type; #else typedef _Tp element_type; #endif private: element_type* __ptr_; __shared_weak_count* __cntrl_; struct __nat {int __for_bool_;}; public: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR shared_ptr() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR shared_ptr(nullptr_t) _NOEXCEPT; template explicit shared_ptr(_Yp* __p, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type = __nat()); template shared_ptr(_Yp* __p, _Dp __d, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type = __nat()); template shared_ptr(_Yp* __p, _Dp __d, _Alloc __a, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type = __nat()); template shared_ptr(nullptr_t __p, _Dp __d); template shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a); template _LIBCPP_INLINE_VISIBILITY shared_ptr(const shared_ptr<_Yp>& __r, element_type* __p) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY shared_ptr(const shared_ptr& __r) _NOEXCEPT; template _LIBCPP_INLINE_VISIBILITY shared_ptr(const shared_ptr<_Yp>& __r, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type = __nat()) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY shared_ptr(shared_ptr&& __r) _NOEXCEPT; template _LIBCPP_INLINE_VISIBILITY shared_ptr(shared_ptr<_Yp>&& __r, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type = __nat()) _NOEXCEPT; template explicit shared_ptr(const weak_ptr<_Yp>& __r, typename enable_if::value, __nat>::type= __nat()); #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template shared_ptr(auto_ptr<_Yp>&& __r, typename enable_if::value, __nat>::type = __nat()); #endif template shared_ptr(unique_ptr<_Yp, _Dp>&&, typename enable_if < !is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type = __nat()); template shared_ptr(unique_ptr<_Yp, _Dp>&&, typename enable_if < is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type = __nat()); ~shared_ptr(); _LIBCPP_INLINE_VISIBILITY shared_ptr& operator=(const shared_ptr& __r) _NOEXCEPT; template typename enable_if < __compatible_with<_Yp, element_type>::value, shared_ptr& >::type _LIBCPP_INLINE_VISIBILITY operator=(const shared_ptr<_Yp>& __r) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY shared_ptr& operator=(shared_ptr&& __r) _NOEXCEPT; template typename enable_if < __compatible_with<_Yp, element_type>::value, shared_ptr& >::type _LIBCPP_INLINE_VISIBILITY operator=(shared_ptr<_Yp>&& __r); #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template _LIBCPP_INLINE_VISIBILITY typename enable_if < !is_array<_Yp>::value && is_convertible<_Yp*, element_type*>::value, shared_ptr >::type& operator=(auto_ptr<_Yp>&& __r); #endif template typename enable_if < - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, shared_ptr& >::type _LIBCPP_INLINE_VISIBILITY operator=(unique_ptr<_Yp, _Dp>&& __r); _LIBCPP_INLINE_VISIBILITY void swap(shared_ptr& __r) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY void reset() _NOEXCEPT; template typename enable_if < __compatible_with<_Yp, element_type>::value, void >::type _LIBCPP_INLINE_VISIBILITY reset(_Yp* __p); template typename enable_if < __compatible_with<_Yp, element_type>::value, void >::type _LIBCPP_INLINE_VISIBILITY reset(_Yp* __p, _Dp __d); template typename enable_if < __compatible_with<_Yp, element_type>::value, void >::type _LIBCPP_INLINE_VISIBILITY reset(_Yp* __p, _Dp __d, _Alloc __a); _LIBCPP_INLINE_VISIBILITY element_type* get() const _NOEXCEPT {return __ptr_;} _LIBCPP_INLINE_VISIBILITY typename add_lvalue_reference::type operator*() const _NOEXCEPT {return *__ptr_;} _LIBCPP_INLINE_VISIBILITY element_type* operator->() const _NOEXCEPT { static_assert(!_VSTD::is_array<_Tp>::value, "std::shared_ptr::operator-> is only valid when T is not an array type."); return __ptr_; } _LIBCPP_INLINE_VISIBILITY long use_count() const _NOEXCEPT {return __cntrl_ ? __cntrl_->use_count() : 0;} _LIBCPP_INLINE_VISIBILITY bool unique() const _NOEXCEPT {return use_count() == 1;} _LIBCPP_INLINE_VISIBILITY _LIBCPP_EXPLICIT operator bool() const _NOEXCEPT {return get() != nullptr;} template _LIBCPP_INLINE_VISIBILITY bool owner_before(shared_ptr<_Up> const& __p) const _NOEXCEPT {return __cntrl_ < __p.__cntrl_;} template _LIBCPP_INLINE_VISIBILITY bool owner_before(weak_ptr<_Up> const& __p) const _NOEXCEPT {return __cntrl_ < __p.__cntrl_;} _LIBCPP_INLINE_VISIBILITY bool __owner_equivalent(const shared_ptr& __p) const {return __cntrl_ == __p.__cntrl_;} #if _LIBCPP_STD_VER > 14 typename add_lvalue_reference::type _LIBCPP_INLINE_VISIBILITY operator[](ptrdiff_t __i) const { static_assert(_VSTD::is_array<_Tp>::value, "std::shared_ptr::operator[] is only valid when T is an array type."); return __ptr_[__i]; } #endif #ifndef _LIBCPP_NO_RTTI template _LIBCPP_INLINE_VISIBILITY _Dp* __get_deleter() const _NOEXCEPT {return static_cast<_Dp*>(__cntrl_ ? const_cast(__cntrl_->__get_deleter(typeid(_Dp))) : nullptr);} #endif // _LIBCPP_NO_RTTI template static shared_ptr<_Tp> __create_with_control_block(_Yp* __p, _CntrlBlk* __cntrl) _NOEXCEPT { shared_ptr<_Tp> __r; __r.__ptr_ = __p; __r.__cntrl_ = __cntrl; __r.__enable_weak_this(__r.__ptr_, __r.__ptr_); return __r; } private: template ::value> struct __shared_ptr_default_allocator { typedef allocator<_Yp> type; }; template struct __shared_ptr_default_allocator<_Yp, true> { typedef allocator<__shared_ptr_dummy_rebind_allocator_type> type; }; template _LIBCPP_INLINE_VISIBILITY typename enable_if* >::value, void>::type __enable_weak_this(const enable_shared_from_this<_Yp>* __e, _OrigPtr* __ptr) _NOEXCEPT { typedef typename remove_cv<_Yp>::type _RawYp; if (__e && __e->__weak_this_.expired()) { __e->__weak_this_ = shared_ptr<_RawYp>(*this, const_cast<_RawYp*>(static_cast(__ptr))); } } _LIBCPP_INLINE_VISIBILITY void __enable_weak_this(...) _NOEXCEPT {} template struct __shared_ptr_default_delete : default_delete<_Yp> {}; template struct __shared_ptr_default_delete<_Yp[_Sz], _Un> : default_delete<_Yp[]> {}; template struct __shared_ptr_default_delete<_Yp[], _Un> : default_delete<_Yp[]> {}; template friend class _LIBCPP_TEMPLATE_VIS shared_ptr; template friend class _LIBCPP_TEMPLATE_VIS weak_ptr; }; #ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES template shared_ptr(weak_ptr<_Tp>) -> shared_ptr<_Tp>; template shared_ptr(unique_ptr<_Tp, _Dp>) -> shared_ptr<_Tp>; #endif template inline _LIBCPP_CONSTEXPR shared_ptr<_Tp>::shared_ptr() _NOEXCEPT : __ptr_(nullptr), __cntrl_(nullptr) { } template inline _LIBCPP_CONSTEXPR shared_ptr<_Tp>::shared_ptr(nullptr_t) _NOEXCEPT : __ptr_(nullptr), __cntrl_(nullptr) { } template template shared_ptr<_Tp>::shared_ptr(_Yp* __p, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type) : __ptr_(__p) { unique_ptr<_Yp> __hold(__p); typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; typedef __shared_ptr_pointer<_Yp*, __shared_ptr_default_delete<_Tp, _Yp>, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__p, __shared_ptr_default_delete<_Tp, _Yp>(), _AllocT()); __hold.release(); __enable_weak_this(__p, __p); } template template shared_ptr<_Tp>::shared_ptr(_Yp* __p, _Dp __d, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type) : __ptr_(__p) { #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif // _LIBCPP_NO_EXCEPTIONS typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__p, __d, _AllocT()); __enable_weak_this(__p, __p); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { __d(__p); throw; } #endif // _LIBCPP_NO_EXCEPTIONS } template template shared_ptr<_Tp>::shared_ptr(nullptr_t __p, _Dp __d) : __ptr_(nullptr) { #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif // _LIBCPP_NO_EXCEPTIONS typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT; typedef __shared_ptr_pointer _CntrlBlk; __cntrl_ = new _CntrlBlk(__p, __d, _AllocT()); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { __d(__p); throw; } #endif // _LIBCPP_NO_EXCEPTIONS } template template shared_ptr<_Tp>::shared_ptr(_Yp* __p, _Dp __d, _Alloc __a, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type) : __ptr_(__p) { #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif // _LIBCPP_NO_EXCEPTIONS typedef __shared_ptr_pointer<_Yp*, _Dp, _Alloc> _CntrlBlk; typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; typedef __allocator_destructor<_A2> _D2; _A2 __a2(__a); unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); ::new ((void*)_VSTD::addressof(*__hold2.get())) _CntrlBlk(__p, __d, __a); __cntrl_ = _VSTD::addressof(*__hold2.release()); __enable_weak_this(__p, __p); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { __d(__p); throw; } #endif // _LIBCPP_NO_EXCEPTIONS } template template shared_ptr<_Tp>::shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a) : __ptr_(nullptr) { #ifndef _LIBCPP_NO_EXCEPTIONS try { #endif // _LIBCPP_NO_EXCEPTIONS typedef __shared_ptr_pointer _CntrlBlk; typedef typename __allocator_traits_rebind<_Alloc, _CntrlBlk>::type _A2; typedef __allocator_destructor<_A2> _D2; _A2 __a2(__a); unique_ptr<_CntrlBlk, _D2> __hold2(__a2.allocate(1), _D2(__a2, 1)); ::new ((void*)_VSTD::addressof(*__hold2.get())) _CntrlBlk(__p, __d, __a); __cntrl_ = _VSTD::addressof(*__hold2.release()); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) { __d(__p); throw; } #endif // _LIBCPP_NO_EXCEPTIONS } template template inline shared_ptr<_Tp>::shared_ptr(const shared_ptr<_Yp>& __r, element_type *__p) _NOEXCEPT : __ptr_(__p), __cntrl_(__r.__cntrl_) { if (__cntrl_) __cntrl_->__add_shared(); } template inline shared_ptr<_Tp>::shared_ptr(const shared_ptr& __r) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { if (__cntrl_) __cntrl_->__add_shared(); } template template inline shared_ptr<_Tp>::shared_ptr(const shared_ptr<_Yp>& __r, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { if (__cntrl_) __cntrl_->__add_shared(); } template inline shared_ptr<_Tp>::shared_ptr(shared_ptr&& __r) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { __r.__ptr_ = nullptr; __r.__cntrl_ = nullptr; } template template inline shared_ptr<_Tp>::shared_ptr(shared_ptr<_Yp>&& __r, typename enable_if<__compatible_with<_Yp, element_type>::value, __nat>::type) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { __r.__ptr_ = nullptr; __r.__cntrl_ = nullptr; } #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template template shared_ptr<_Tp>::shared_ptr(auto_ptr<_Yp>&& __r, typename enable_if::value, __nat>::type) : __ptr_(__r.get()) { typedef __shared_ptr_pointer<_Yp*, default_delete<_Yp>, allocator<_Yp> > _CntrlBlk; __cntrl_ = new _CntrlBlk(__r.get(), default_delete<_Yp>(), allocator<_Yp>()); __enable_weak_this(__r.get(), __r.get()); __r.release(); } #endif template template shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r, typename enable_if < !is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type) : __ptr_(__r.get()) { #if _LIBCPP_STD_VER > 11 if (__ptr_ == nullptr) __cntrl_ = nullptr; else #endif { typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk; + typedef __shared_ptr_pointer::pointer, _Dp, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__r.get(), __r.get_deleter(), _AllocT()); __enable_weak_this(__r.get(), __r.get()); } __r.release(); } template template shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp>&& __r, typename enable_if < is_lvalue_reference<_Dp>::value && - !is_array<_Yp>::value && is_convertible::pointer, element_type*>::value, __nat >::type) : __ptr_(__r.get()) { #if _LIBCPP_STD_VER > 11 if (__ptr_ == nullptr) __cntrl_ = nullptr; else #endif { typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; - typedef __shared_ptr_pointer<_Yp*, + typedef __shared_ptr_pointer::pointer, reference_wrapper::type>, _AllocT > _CntrlBlk; __cntrl_ = new _CntrlBlk(__r.get(), _VSTD::ref(__r.get_deleter()), _AllocT()); __enable_weak_this(__r.get(), __r.get()); } __r.release(); } template shared_ptr<_Tp>::~shared_ptr() { if (__cntrl_) __cntrl_->__release_shared(); } template inline shared_ptr<_Tp>& shared_ptr<_Tp>::operator=(const shared_ptr& __r) _NOEXCEPT { shared_ptr(__r).swap(*this); return *this; } template template inline typename enable_if < __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value, shared_ptr<_Tp>& >::type shared_ptr<_Tp>::operator=(const shared_ptr<_Yp>& __r) _NOEXCEPT { shared_ptr(__r).swap(*this); return *this; } template inline shared_ptr<_Tp>& shared_ptr<_Tp>::operator=(shared_ptr&& __r) _NOEXCEPT { shared_ptr(_VSTD::move(__r)).swap(*this); return *this; } template template inline typename enable_if < __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value, shared_ptr<_Tp>& >::type shared_ptr<_Tp>::operator=(shared_ptr<_Yp>&& __r) { shared_ptr(_VSTD::move(__r)).swap(*this); return *this; } #if _LIBCPP_STD_VER <= 14 || defined(_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR) template template inline typename enable_if < !is_array<_Yp>::value && is_convertible<_Yp*, typename shared_ptr<_Tp>::element_type*>::value, shared_ptr<_Tp> >::type& shared_ptr<_Tp>::operator=(auto_ptr<_Yp>&& __r) { shared_ptr(_VSTD::move(__r)).swap(*this); return *this; } #endif template template inline typename enable_if < - !is_array<_Yp>::value && is_convertible::pointer, typename shared_ptr<_Tp>::element_type*>::value, shared_ptr<_Tp>& >::type shared_ptr<_Tp>::operator=(unique_ptr<_Yp, _Dp>&& __r) { shared_ptr(_VSTD::move(__r)).swap(*this); return *this; } template inline void shared_ptr<_Tp>::swap(shared_ptr& __r) _NOEXCEPT { _VSTD::swap(__ptr_, __r.__ptr_); _VSTD::swap(__cntrl_, __r.__cntrl_); } template inline void shared_ptr<_Tp>::reset() _NOEXCEPT { shared_ptr().swap(*this); } template template inline typename enable_if < __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value, void >::type shared_ptr<_Tp>::reset(_Yp* __p) { shared_ptr(__p).swap(*this); } template template inline typename enable_if < __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value, void >::type shared_ptr<_Tp>::reset(_Yp* __p, _Dp __d) { shared_ptr(__p, __d).swap(*this); } template template inline typename enable_if < __compatible_with<_Yp, typename shared_ptr<_Tp>::element_type>::value, void >::type shared_ptr<_Tp>::reset(_Yp* __p, _Dp __d, _Alloc __a) { shared_ptr(__p, __d, __a).swap(*this); } // // std::allocate_shared and std::make_shared // template::value> > _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> allocate_shared(const _Alloc& __a, _Args&& ...__args) { using _ControlBlock = __shared_ptr_emplace<_Tp, _Alloc>; using _ControlBlockAllocator = typename __allocator_traits_rebind<_Alloc, _ControlBlock>::type; __allocation_guard<_ControlBlockAllocator> __guard(__a, 1); ::new ((void*)_VSTD::addressof(*__guard.__get())) _ControlBlock(__a, _VSTD::forward<_Args>(__args)...); auto __control_block = __guard.__release_ptr(); return shared_ptr<_Tp>::__create_with_control_block((*__control_block).__get_elem(), _VSTD::addressof(*__control_block)); } template::value> > _LIBCPP_HIDE_FROM_ABI shared_ptr<_Tp> make_shared(_Args&& ...__args) { return _VSTD::allocate_shared<_Tp>(allocator<_Tp>(), _VSTD::forward<_Args>(__args)...); } template inline _LIBCPP_INLINE_VISIBILITY bool operator==(const shared_ptr<_Tp>& __x, const shared_ptr<_Up>& __y) _NOEXCEPT { return __x.get() == __y.get(); } template inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const shared_ptr<_Tp>& __x, const shared_ptr<_Up>& __y) _NOEXCEPT { return !(__x == __y); } template inline _LIBCPP_INLINE_VISIBILITY bool operator<(const shared_ptr<_Tp>& __x, const shared_ptr<_Up>& __y) _NOEXCEPT { #if _LIBCPP_STD_VER <= 11 typedef typename common_type<_Tp*, _Up*>::type _Vp; return less<_Vp>()(__x.get(), __y.get()); #else return less<>()(__x.get(), __y.get()); #endif } template inline _LIBCPP_INLINE_VISIBILITY bool operator>(const shared_ptr<_Tp>& __x, const shared_ptr<_Up>& __y) _NOEXCEPT { return __y < __x; } template inline _LIBCPP_INLINE_VISIBILITY bool operator<=(const shared_ptr<_Tp>& __x, const shared_ptr<_Up>& __y) _NOEXCEPT { return !(__y < __x); } template inline _LIBCPP_INLINE_VISIBILITY bool operator>=(const shared_ptr<_Tp>& __x, const shared_ptr<_Up>& __y) _NOEXCEPT { return !(__x < __y); } template inline _LIBCPP_INLINE_VISIBILITY bool operator==(const shared_ptr<_Tp>& __x, nullptr_t) _NOEXCEPT { return !__x; } template inline _LIBCPP_INLINE_VISIBILITY bool operator==(nullptr_t, const shared_ptr<_Tp>& __x) _NOEXCEPT { return !__x; } template inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const shared_ptr<_Tp>& __x, nullptr_t) _NOEXCEPT { return static_cast(__x); } template inline _LIBCPP_INLINE_VISIBILITY bool operator!=(nullptr_t, const shared_ptr<_Tp>& __x) _NOEXCEPT { return static_cast(__x); } template inline _LIBCPP_INLINE_VISIBILITY bool operator<(const shared_ptr<_Tp>& __x, nullptr_t) _NOEXCEPT { return less<_Tp*>()(__x.get(), nullptr); } template inline _LIBCPP_INLINE_VISIBILITY bool operator<(nullptr_t, const shared_ptr<_Tp>& __x) _NOEXCEPT { return less<_Tp*>()(nullptr, __x.get()); } template inline _LIBCPP_INLINE_VISIBILITY bool operator>(const shared_ptr<_Tp>& __x, nullptr_t) _NOEXCEPT { return nullptr < __x; } template inline _LIBCPP_INLINE_VISIBILITY bool operator>(nullptr_t, const shared_ptr<_Tp>& __x) _NOEXCEPT { return __x < nullptr; } template inline _LIBCPP_INLINE_VISIBILITY bool operator<=(const shared_ptr<_Tp>& __x, nullptr_t) _NOEXCEPT { return !(nullptr < __x); } template inline _LIBCPP_INLINE_VISIBILITY bool operator<=(nullptr_t, const shared_ptr<_Tp>& __x) _NOEXCEPT { return !(__x < nullptr); } template inline _LIBCPP_INLINE_VISIBILITY bool operator>=(const shared_ptr<_Tp>& __x, nullptr_t) _NOEXCEPT { return !(__x < nullptr); } template inline _LIBCPP_INLINE_VISIBILITY bool operator>=(nullptr_t, const shared_ptr<_Tp>& __x) _NOEXCEPT { return !(nullptr < __x); } template inline _LIBCPP_INLINE_VISIBILITY void swap(shared_ptr<_Tp>& __x, shared_ptr<_Tp>& __y) _NOEXCEPT { __x.swap(__y); } template inline _LIBCPP_INLINE_VISIBILITY shared_ptr<_Tp> static_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { return shared_ptr<_Tp>(__r, static_cast< typename shared_ptr<_Tp>::element_type*>(__r.get())); } template inline _LIBCPP_INLINE_VISIBILITY shared_ptr<_Tp> dynamic_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { typedef typename shared_ptr<_Tp>::element_type _ET; _ET* __p = dynamic_cast<_ET*>(__r.get()); return __p ? shared_ptr<_Tp>(__r, __p) : shared_ptr<_Tp>(); } template shared_ptr<_Tp> const_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { typedef typename shared_ptr<_Tp>::element_type _RTp; return shared_ptr<_Tp>(__r, const_cast<_RTp*>(__r.get())); } template shared_ptr<_Tp> reinterpret_pointer_cast(const shared_ptr<_Up>& __r) _NOEXCEPT { return shared_ptr<_Tp>(__r, reinterpret_cast< typename shared_ptr<_Tp>::element_type*>(__r.get())); } #ifndef _LIBCPP_NO_RTTI template inline _LIBCPP_INLINE_VISIBILITY _Dp* get_deleter(const shared_ptr<_Tp>& __p) _NOEXCEPT { return __p.template __get_deleter<_Dp>(); } #endif // _LIBCPP_NO_RTTI template class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS weak_ptr { public: typedef _Tp element_type; private: element_type* __ptr_; __shared_weak_count* __cntrl_; public: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR weak_ptr() _NOEXCEPT; template _LIBCPP_INLINE_VISIBILITY weak_ptr(shared_ptr<_Yp> const& __r, typename enable_if::value, __nat*>::type = 0) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY weak_ptr(weak_ptr const& __r) _NOEXCEPT; template _LIBCPP_INLINE_VISIBILITY weak_ptr(weak_ptr<_Yp> const& __r, typename enable_if::value, __nat*>::type = 0) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY weak_ptr(weak_ptr&& __r) _NOEXCEPT; template _LIBCPP_INLINE_VISIBILITY weak_ptr(weak_ptr<_Yp>&& __r, typename enable_if::value, __nat*>::type = 0) _NOEXCEPT; ~weak_ptr(); _LIBCPP_INLINE_VISIBILITY weak_ptr& operator=(weak_ptr const& __r) _NOEXCEPT; template typename enable_if < is_convertible<_Yp*, element_type*>::value, weak_ptr& >::type _LIBCPP_INLINE_VISIBILITY operator=(weak_ptr<_Yp> const& __r) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY weak_ptr& operator=(weak_ptr&& __r) _NOEXCEPT; template typename enable_if < is_convertible<_Yp*, element_type*>::value, weak_ptr& >::type _LIBCPP_INLINE_VISIBILITY operator=(weak_ptr<_Yp>&& __r) _NOEXCEPT; template typename enable_if < is_convertible<_Yp*, element_type*>::value, weak_ptr& >::type _LIBCPP_INLINE_VISIBILITY operator=(shared_ptr<_Yp> const& __r) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY void swap(weak_ptr& __r) _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY void reset() _NOEXCEPT; _LIBCPP_INLINE_VISIBILITY long use_count() const _NOEXCEPT {return __cntrl_ ? __cntrl_->use_count() : 0;} _LIBCPP_INLINE_VISIBILITY bool expired() const _NOEXCEPT {return __cntrl_ == nullptr || __cntrl_->use_count() == 0;} shared_ptr<_Tp> lock() const _NOEXCEPT; template _LIBCPP_INLINE_VISIBILITY bool owner_before(const shared_ptr<_Up>& __r) const _NOEXCEPT {return __cntrl_ < __r.__cntrl_;} template _LIBCPP_INLINE_VISIBILITY bool owner_before(const weak_ptr<_Up>& __r) const _NOEXCEPT {return __cntrl_ < __r.__cntrl_;} template friend class _LIBCPP_TEMPLATE_VIS weak_ptr; template friend class _LIBCPP_TEMPLATE_VIS shared_ptr; }; #ifndef _LIBCPP_HAS_NO_DEDUCTION_GUIDES template weak_ptr(shared_ptr<_Tp>) -> weak_ptr<_Tp>; #endif template inline _LIBCPP_CONSTEXPR weak_ptr<_Tp>::weak_ptr() _NOEXCEPT : __ptr_(nullptr), __cntrl_(nullptr) { } template inline weak_ptr<_Tp>::weak_ptr(weak_ptr const& __r) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { if (__cntrl_) __cntrl_->__add_weak(); } template template inline weak_ptr<_Tp>::weak_ptr(shared_ptr<_Yp> const& __r, typename enable_if::value, __nat*>::type) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { if (__cntrl_) __cntrl_->__add_weak(); } template template inline weak_ptr<_Tp>::weak_ptr(weak_ptr<_Yp> const& __r, typename enable_if::value, __nat*>::type) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { if (__cntrl_) __cntrl_->__add_weak(); } template inline weak_ptr<_Tp>::weak_ptr(weak_ptr&& __r) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { __r.__ptr_ = nullptr; __r.__cntrl_ = nullptr; } template template inline weak_ptr<_Tp>::weak_ptr(weak_ptr<_Yp>&& __r, typename enable_if::value, __nat*>::type) _NOEXCEPT : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_) { __r.__ptr_ = nullptr; __r.__cntrl_ = nullptr; } template weak_ptr<_Tp>::~weak_ptr() { if (__cntrl_) __cntrl_->__release_weak(); } template inline weak_ptr<_Tp>& weak_ptr<_Tp>::operator=(weak_ptr const& __r) _NOEXCEPT { weak_ptr(__r).swap(*this); return *this; } template template inline typename enable_if < is_convertible<_Yp*, _Tp*>::value, weak_ptr<_Tp>& >::type weak_ptr<_Tp>::operator=(weak_ptr<_Yp> const& __r) _NOEXCEPT { weak_ptr(__r).swap(*this); return *this; } template inline weak_ptr<_Tp>& weak_ptr<_Tp>::operator=(weak_ptr&& __r) _NOEXCEPT { weak_ptr(_VSTD::move(__r)).swap(*this); return *this; } template template inline typename enable_if < is_convertible<_Yp*, _Tp*>::value, weak_ptr<_Tp>& >::type weak_ptr<_Tp>::operator=(weak_ptr<_Yp>&& __r) _NOEXCEPT { weak_ptr(_VSTD::move(__r)).swap(*this); return *this; } template template inline typename enable_if < is_convertible<_Yp*, _Tp*>::value, weak_ptr<_Tp>& >::type weak_ptr<_Tp>::operator=(shared_ptr<_Yp> const& __r) _NOEXCEPT { weak_ptr(__r).swap(*this); return *this; } template inline void weak_ptr<_Tp>::swap(weak_ptr& __r) _NOEXCEPT { _VSTD::swap(__ptr_, __r.__ptr_); _VSTD::swap(__cntrl_, __r.__cntrl_); } template inline _LIBCPP_INLINE_VISIBILITY void swap(weak_ptr<_Tp>& __x, weak_ptr<_Tp>& __y) _NOEXCEPT { __x.swap(__y); } template inline void weak_ptr<_Tp>::reset() _NOEXCEPT { weak_ptr().swap(*this); } template template shared_ptr<_Tp>::shared_ptr(const weak_ptr<_Yp>& __r, typename enable_if::value, __nat>::type) : __ptr_(__r.__ptr_), __cntrl_(__r.__cntrl_ ? __r.__cntrl_->lock() : __r.__cntrl_) { if (__cntrl_ == nullptr) __throw_bad_weak_ptr(); } template shared_ptr<_Tp> weak_ptr<_Tp>::lock() const _NOEXCEPT { shared_ptr<_Tp> __r; __r.__cntrl_ = __cntrl_ ? __cntrl_->lock() : __cntrl_; if (__r.__cntrl_) __r.__ptr_ = __ptr_; return __r; } #if _LIBCPP_STD_VER > 14 template struct owner_less; #else template struct owner_less; #endif template struct _LIBCPP_TEMPLATE_VIS owner_less > : binary_function, shared_ptr<_Tp>, bool> { typedef bool result_type; _LIBCPP_INLINE_VISIBILITY bool operator()(shared_ptr<_Tp> const& __x, shared_ptr<_Tp> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} _LIBCPP_INLINE_VISIBILITY bool operator()(shared_ptr<_Tp> const& __x, weak_ptr<_Tp> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} _LIBCPP_INLINE_VISIBILITY bool operator()( weak_ptr<_Tp> const& __x, shared_ptr<_Tp> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} }; template struct _LIBCPP_TEMPLATE_VIS owner_less > : binary_function, weak_ptr<_Tp>, bool> { typedef bool result_type; _LIBCPP_INLINE_VISIBILITY bool operator()( weak_ptr<_Tp> const& __x, weak_ptr<_Tp> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} _LIBCPP_INLINE_VISIBILITY bool operator()(shared_ptr<_Tp> const& __x, weak_ptr<_Tp> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} _LIBCPP_INLINE_VISIBILITY bool operator()( weak_ptr<_Tp> const& __x, shared_ptr<_Tp> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} }; #if _LIBCPP_STD_VER > 14 template <> struct _LIBCPP_TEMPLATE_VIS owner_less { template _LIBCPP_INLINE_VISIBILITY bool operator()( shared_ptr<_Tp> const& __x, shared_ptr<_Up> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} template _LIBCPP_INLINE_VISIBILITY bool operator()( shared_ptr<_Tp> const& __x, weak_ptr<_Up> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} template _LIBCPP_INLINE_VISIBILITY bool operator()( weak_ptr<_Tp> const& __x, shared_ptr<_Up> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} template _LIBCPP_INLINE_VISIBILITY bool operator()( weak_ptr<_Tp> const& __x, weak_ptr<_Up> const& __y) const _NOEXCEPT {return __x.owner_before(__y);} typedef void is_transparent; }; #endif template class _LIBCPP_TEMPLATE_VIS enable_shared_from_this { mutable weak_ptr<_Tp> __weak_this_; protected: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR enable_shared_from_this() _NOEXCEPT {} _LIBCPP_INLINE_VISIBILITY enable_shared_from_this(enable_shared_from_this const&) _NOEXCEPT {} _LIBCPP_INLINE_VISIBILITY enable_shared_from_this& operator=(enable_shared_from_this const&) _NOEXCEPT {return *this;} _LIBCPP_INLINE_VISIBILITY ~enable_shared_from_this() {} public: _LIBCPP_INLINE_VISIBILITY shared_ptr<_Tp> shared_from_this() {return shared_ptr<_Tp>(__weak_this_);} _LIBCPP_INLINE_VISIBILITY shared_ptr<_Tp const> shared_from_this() const {return shared_ptr(__weak_this_);} #if _LIBCPP_STD_VER > 14 _LIBCPP_INLINE_VISIBILITY weak_ptr<_Tp> weak_from_this() _NOEXCEPT { return __weak_this_; } _LIBCPP_INLINE_VISIBILITY weak_ptr weak_from_this() const _NOEXCEPT { return __weak_this_; } #endif // _LIBCPP_STD_VER > 14 template friend class shared_ptr; }; template struct _LIBCPP_TEMPLATE_VIS hash > { typedef shared_ptr<_Tp> argument_type; typedef size_t result_type; _LIBCPP_INLINE_VISIBILITY result_type operator()(const argument_type& __ptr) const _NOEXCEPT { return hash::element_type*>()(__ptr.get()); } }; template inline _LIBCPP_INLINE_VISIBILITY basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, shared_ptr<_Yp> const& __p); #if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) class _LIBCPP_TYPE_VIS __sp_mut { void* __lx; public: void lock() _NOEXCEPT; void unlock() _NOEXCEPT; private: _LIBCPP_CONSTEXPR __sp_mut(void*) _NOEXCEPT; __sp_mut(const __sp_mut&); __sp_mut& operator=(const __sp_mut&); friend _LIBCPP_FUNC_VIS __sp_mut& __get_sp_mut(const void*); }; _LIBCPP_FUNC_VIS _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR __sp_mut& __get_sp_mut(const void*); template inline _LIBCPP_INLINE_VISIBILITY bool atomic_is_lock_free(const shared_ptr<_Tp>*) { return false; } template _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR shared_ptr<_Tp> atomic_load(const shared_ptr<_Tp>* __p) { __sp_mut& __m = __get_sp_mut(__p); __m.lock(); shared_ptr<_Tp> __q = *__p; __m.unlock(); return __q; } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR shared_ptr<_Tp> atomic_load_explicit(const shared_ptr<_Tp>* __p, memory_order) { return atomic_load(__p); } template _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR void atomic_store(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r) { __sp_mut& __m = __get_sp_mut(__p); __m.lock(); __p->swap(__r); __m.unlock(); } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR void atomic_store_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r, memory_order) { atomic_store(__p, __r); } template _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR shared_ptr<_Tp> atomic_exchange(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r) { __sp_mut& __m = __get_sp_mut(__p); __m.lock(); __p->swap(__r); __m.unlock(); return __r; } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR shared_ptr<_Tp> atomic_exchange_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp> __r, memory_order) { return atomic_exchange(__p, __r); } template _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR bool atomic_compare_exchange_strong(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w) { shared_ptr<_Tp> __temp; __sp_mut& __m = __get_sp_mut(__p); __m.lock(); if (__p->__owner_equivalent(*__v)) { _VSTD::swap(__temp, *__p); *__p = __w; __m.unlock(); return true; } _VSTD::swap(__temp, *__v); *__v = *__p; __m.unlock(); return false; } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR bool atomic_compare_exchange_weak(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w) { return atomic_compare_exchange_strong(__p, __v, __w); } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR bool atomic_compare_exchange_strong_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w, memory_order, memory_order) { return atomic_compare_exchange_strong(__p, __v, __w); } template inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR bool atomic_compare_exchange_weak_explicit(shared_ptr<_Tp>* __p, shared_ptr<_Tp>* __v, shared_ptr<_Tp> __w, memory_order, memory_order) { return atomic_compare_exchange_weak(__p, __v, __w); } #endif // !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) //enum class #if defined(_LIBCPP_ABI_POINTER_SAFETY_ENUM_TYPE) # ifndef _LIBCPP_CXX03_LANG enum class pointer_safety : unsigned char { relaxed, preferred, strict }; # endif #else struct _LIBCPP_TYPE_VIS pointer_safety { enum __lx { relaxed, preferred, strict }; __lx __v_; _LIBCPP_INLINE_VISIBILITY pointer_safety() : __v_() {} _LIBCPP_INLINE_VISIBILITY pointer_safety(__lx __v) : __v_(__v) {} _LIBCPP_INLINE_VISIBILITY operator int() const {return __v_;} }; #endif #if !defined(_LIBCPP_ABI_POINTER_SAFETY_ENUM_TYPE) && \ defined(_LIBCPP_BUILDING_LIBRARY) _LIBCPP_FUNC_VIS pointer_safety get_pointer_safety() _NOEXCEPT; #else // This function is only offered in C++03 under ABI v1. # if !defined(_LIBCPP_ABI_POINTER_SAFETY_ENUM_TYPE) || !defined(_LIBCPP_CXX03_LANG) inline _LIBCPP_INLINE_VISIBILITY pointer_safety get_pointer_safety() _NOEXCEPT { return pointer_safety::relaxed; } # endif #endif _LIBCPP_FUNC_VIS void declare_reachable(void* __p); _LIBCPP_FUNC_VIS void declare_no_pointers(char* __p, size_t __n); _LIBCPP_FUNC_VIS void undeclare_no_pointers(char* __p, size_t __n); _LIBCPP_FUNC_VIS void* __undeclare_reachable(void* __p); template inline _LIBCPP_INLINE_VISIBILITY _Tp* undeclare_reachable(_Tp* __p) { return static_cast<_Tp*>(__undeclare_reachable(__p)); } _LIBCPP_FUNC_VIS void* align(size_t __align, size_t __sz, void*& __ptr, size_t& __space); // --- Helper for container swap -- template _LIBCPP_INLINE_VISIBILITY void __swap_allocator(_Alloc & __a1, _Alloc & __a2, true_type) #if _LIBCPP_STD_VER >= 14 _NOEXCEPT #else _NOEXCEPT_(__is_nothrow_swappable<_Alloc>::value) #endif { using _VSTD::swap; swap(__a1, __a2); } template inline _LIBCPP_INLINE_VISIBILITY void __swap_allocator(_Alloc &, _Alloc &, false_type) _NOEXCEPT {} template inline _LIBCPP_INLINE_VISIBILITY void __swap_allocator(_Alloc & __a1, _Alloc & __a2) #if _LIBCPP_STD_VER >= 14 _NOEXCEPT #else _NOEXCEPT_(__is_nothrow_swappable<_Alloc>::value) #endif { _VSTD::__swap_allocator(__a1, __a2, integral_constant::propagate_on_container_swap::value>()); } template > struct __noexcept_move_assign_container : public integral_constant 14 || _Traits::is_always_equal::value #else && is_nothrow_move_assignable<_Alloc>::value #endif > {}; template struct __temp_value { typedef allocator_traits<_Alloc> _Traits; typename aligned_storage::type __v; _Alloc &__a; _Tp *__addr() { return reinterpret_cast<_Tp *>(addressof(__v)); } _Tp & get() { return *__addr(); } template _LIBCPP_NO_CFI __temp_value(_Alloc &__alloc, _Args&& ... __args) : __a(__alloc) { _Traits::construct(__a, reinterpret_cast<_Tp*>(addressof(__v)), _VSTD::forward<_Args>(__args)...); } ~__temp_value() { _Traits::destroy(__a, __addr()); } }; template struct __is_allocator : false_type {}; template struct __is_allocator<_Alloc, typename __void_t::type, typename __void_t().allocate(size_t(0)))>::type > : true_type {}; // __builtin_new_allocator -- A non-templated helper for allocating and // deallocating memory using __builtin_operator_new and // __builtin_operator_delete. It should be used in preference to // `std::allocator` to avoid additional instantiations. struct __builtin_new_allocator { struct __builtin_new_deleter { typedef void* pointer_type; _LIBCPP_CONSTEXPR explicit __builtin_new_deleter(size_t __size, size_t __align) : __size_(__size), __align_(__align) {} void operator()(void* p) const _NOEXCEPT { _VSTD::__libcpp_deallocate(p, __size_, __align_); } private: size_t __size_; size_t __align_; }; typedef unique_ptr __holder_t; static __holder_t __allocate_bytes(size_t __s, size_t __align) { return __holder_t(_VSTD::__libcpp_allocate(__s, __align), __builtin_new_deleter(__s, __align)); } static void __deallocate_bytes(void* __p, size_t __s, size_t __align) _NOEXCEPT { _VSTD::__libcpp_deallocate(__p, __s, __align); } template _LIBCPP_NODEBUG _LIBCPP_ALWAYS_INLINE static __holder_t __allocate_type(size_t __n) { return __allocate_bytes(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); } template _LIBCPP_NODEBUG _LIBCPP_ALWAYS_INLINE static void __deallocate_type(void* __p, size_t __n) _NOEXCEPT { __deallocate_bytes(__p, __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); } }; _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #if defined(_LIBCPP_HAS_PARALLEL_ALGORITHMS) && _LIBCPP_STD_VER >= 17 # include <__pstl_memory> #endif #endif // _LIBCPP_MEMORY diff --git a/contrib/llvm-project/libcxx/include/version b/contrib/llvm-project/libcxx/include/version index 813bc1ab9e6a..c021db8bddd7 100644 --- a/contrib/llvm-project/libcxx/include/version +++ b/contrib/llvm-project/libcxx/include/version @@ -1,364 +1,366 @@ // -*- C++ -*- //===--------------------------- version ----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP_VERSIONH #define _LIBCPP_VERSIONH /* version synopsis Macro name Value Headers __cpp_lib_addressof_constexpr 201603L __cpp_lib_allocator_traits_is_always_equal 201411L __cpp_lib_any 201606L __cpp_lib_apply 201603L __cpp_lib_array_constexpr 201811L 201603L // C++17 __cpp_lib_as_const 201510L __cpp_lib_assume_aligned 201811L __cpp_lib_atomic_flag_test 201907L __cpp_lib_atomic_float 201711L __cpp_lib_atomic_is_always_lock_free 201603L __cpp_lib_atomic_lock_free_type_aliases 201907L __cpp_lib_atomic_ref 201806L __cpp_lib_atomic_shared_ptr 201711L __cpp_lib_atomic_value_initialization 201911L __cpp_lib_atomic_wait 201907L __cpp_lib_barrier 201907L __cpp_lib_bind_front 201907L __cpp_lib_bit_cast 201806L __cpp_lib_bitops 201907L __cpp_lib_bool_constant 201505L __cpp_lib_bounded_array_traits 201902L __cpp_lib_boyer_moore_searcher 201603L __cpp_lib_byte 201603L __cpp_lib_char8_t 201811L __cpp_lib_chrono 201611L __cpp_lib_chrono_udls 201304L __cpp_lib_clamp 201603L __cpp_lib_complex_udls 201309L __cpp_lib_concepts 202002L __cpp_lib_constexpr_algorithms 201806L __cpp_lib_constexpr_complex 201711L __cpp_lib_constexpr_dynamic_alloc 201907L __cpp_lib_constexpr_functional 201907L __cpp_lib_constexpr_iterator 201811L __cpp_lib_constexpr_memory 201811L __cpp_lib_constexpr_numeric 201911L __cpp_lib_constexpr_string 201907L __cpp_lib_constexpr_string_view 201811L __cpp_lib_constexpr_tuple 201811L __cpp_lib_constexpr_utility 201811L __cpp_lib_constexpr_vector 201907L __cpp_lib_coroutine 201902L __cpp_lib_destroying_delete 201806L __cpp_lib_enable_shared_from_this 201603L __cpp_lib_endian 201907L __cpp_lib_erase_if 202002L __cpp_lib_exchange_function 201304L __cpp_lib_execution 201902L 201603L // C++17 __cpp_lib_filesystem 201703L __cpp_lib_gcd_lcm 201606L __cpp_lib_generic_associative_lookup 201304L __cpp_lib_generic_unordered_lookup 201811L __cpp_lib_hardware_interference_size 201703L __cpp_lib_has_unique_object_representations 201606L __cpp_lib_hypot 201603L __cpp_lib_incomplete_container_elements 201505L __cpp_lib_int_pow2 202002L __cpp_lib_integer_comparison_functions 202002L __cpp_lib_integer_sequence 201304L __cpp_lib_integral_constant_callable 201304L __cpp_lib_interpolate 201902L __cpp_lib_invoke 201411L __cpp_lib_is_aggregate 201703L __cpp_lib_is_constant_evaluated 201811L __cpp_lib_is_final 201402L __cpp_lib_is_invocable 201703L __cpp_lib_is_layout_compatible 201907L __cpp_lib_is_nothrow_convertible 201806L __cpp_lib_is_null_pointer 201309L __cpp_lib_is_pointer_interconvertible 201907L __cpp_lib_is_scoped_enum 202011L __cpp_lib_is_swappable 201603L __cpp_lib_jthread 201911L __cpp_lib_latch 201907L __cpp_lib_launder 201606L __cpp_lib_list_remove_return_type 201806L __cpp_lib_logical_traits 201510L __cpp_lib_make_from_tuple 201606L __cpp_lib_make_reverse_iterator 201402L __cpp_lib_make_unique 201304L __cpp_lib_map_try_emplace 201411L __cpp_lib_math_constants 201907L __cpp_lib_math_special_functions 201603L __cpp_lib_memory_resource 201603L __cpp_lib_node_extract 201606L __cpp_lib_nonmember_container_access 201411L __cpp_lib_not_fn 201603L __cpp_lib_null_iterators 201304L __cpp_lib_optional 201606L __cpp_lib_parallel_algorithm 201603L __cpp_lib_polymorphic_allocator 201902L __cpp_lib_quoted_string_io 201304L __cpp_lib_ranges 201811L __cpp_lib_raw_memory_algorithms 201606L __cpp_lib_remove_cvref 201711L __cpp_lib_result_of_sfinae 201210L __cpp_lib_robust_nonmodifying_seq_ops 201304L __cpp_lib_sample 201603L __cpp_lib_scoped_lock 201703L __cpp_lib_semaphore 201907L __cpp_lib_shared_mutex 201505L __cpp_lib_shared_ptr_arrays 201611L __cpp_lib_shared_ptr_weak_type 201606L __cpp_lib_shared_timed_mutex 201402L __cpp_lib_shift 201806L __cpp_lib_smart_ptr_for_overwrite 202002L __cpp_lib_source_location 201907L __cpp_lib_span 202002L __cpp_lib_ssize 201902L __cpp_lib_stacktrace 202011L __cpp_lib_starts_ends_with 201711L __cpp_lib_stdatomic_h 202011L __cpp_lib_string_contains 202011L __cpp_lib_string_udls 201304L __cpp_lib_string_view 201803L 201606L // C++17 __cpp_lib_syncbuf 201803L __cpp_lib_three_way_comparison 201907L __cpp_lib_to_address 201711L __cpp_lib_to_array 201907L __cpp_lib_to_chars 201611L __cpp_lib_transformation_trait_aliases 201304L __cpp_lib_transparent_operators 201510L 201210L // C++14 __cpp_lib_tuple_element_t 201402L __cpp_lib_tuples_by_type 201304L __cpp_lib_type_trait_variable_templates 201510L __cpp_lib_uncaught_exceptions 201411L __cpp_lib_unordered_map_try_emplace 201411L __cpp_lib_unwrap_ref 201811L __cpp_lib_variant 201606L __cpp_lib_void_t 201411L */ #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif #if _LIBCPP_STD_VER > 11 # define __cpp_lib_chrono_udls 201304L # define __cpp_lib_complex_udls 201309L # define __cpp_lib_exchange_function 201304L # define __cpp_lib_generic_associative_lookup 201304L # define __cpp_lib_integer_sequence 201304L # define __cpp_lib_integral_constant_callable 201304L # define __cpp_lib_is_final 201402L # define __cpp_lib_is_null_pointer 201309L # define __cpp_lib_make_reverse_iterator 201402L # define __cpp_lib_make_unique 201304L # define __cpp_lib_null_iterators 201304L # define __cpp_lib_quoted_string_io 201304L # define __cpp_lib_result_of_sfinae 201210L # define __cpp_lib_robust_nonmodifying_seq_ops 201304L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_timed_mutex) # define __cpp_lib_shared_timed_mutex 201402L # endif # define __cpp_lib_string_udls 201304L # define __cpp_lib_transformation_trait_aliases 201304L # define __cpp_lib_transparent_operators 201210L # define __cpp_lib_tuple_element_t 201402L # define __cpp_lib_tuples_by_type 201304L #endif #if _LIBCPP_STD_VER > 14 # if !defined(_LIBCPP_HAS_NO_BUILTIN_ADDRESSOF) # define __cpp_lib_addressof_constexpr 201603L # endif # define __cpp_lib_allocator_traits_is_always_equal 201411L # define __cpp_lib_any 201606L # define __cpp_lib_apply 201603L # define __cpp_lib_array_constexpr 201603L # define __cpp_lib_as_const 201510L # if !defined(_LIBCPP_HAS_NO_THREADS) # define __cpp_lib_atomic_is_always_lock_free 201603L # endif # define __cpp_lib_bool_constant 201505L // # define __cpp_lib_boyer_moore_searcher 201603L # define __cpp_lib_byte 201603L # define __cpp_lib_chrono 201611L # define __cpp_lib_clamp 201603L # define __cpp_lib_enable_shared_from_this 201603L // # define __cpp_lib_execution 201603L -# define __cpp_lib_filesystem 201703L +# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_filesystem) +# define __cpp_lib_filesystem 201703L +# endif # define __cpp_lib_gcd_lcm 201606L // # define __cpp_lib_hardware_interference_size 201703L # if defined(_LIBCPP_HAS_UNIQUE_OBJECT_REPRESENTATIONS) # define __cpp_lib_has_unique_object_representations 201606L # endif # define __cpp_lib_hypot 201603L # define __cpp_lib_incomplete_container_elements 201505L # define __cpp_lib_invoke 201411L # if !defined(_LIBCPP_HAS_NO_IS_AGGREGATE) # define __cpp_lib_is_aggregate 201703L # endif # define __cpp_lib_is_invocable 201703L # define __cpp_lib_is_swappable 201603L # define __cpp_lib_launder 201606L # define __cpp_lib_logical_traits 201510L # define __cpp_lib_make_from_tuple 201606L # define __cpp_lib_map_try_emplace 201411L // # define __cpp_lib_math_special_functions 201603L // # define __cpp_lib_memory_resource 201603L # define __cpp_lib_node_extract 201606L # define __cpp_lib_nonmember_container_access 201411L # define __cpp_lib_not_fn 201603L # define __cpp_lib_optional 201606L // # define __cpp_lib_parallel_algorithm 201603L # define __cpp_lib_raw_memory_algorithms 201606L # define __cpp_lib_sample 201603L # define __cpp_lib_scoped_lock 201703L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_shared_mutex) # define __cpp_lib_shared_mutex 201505L # endif # define __cpp_lib_shared_ptr_arrays 201611L # define __cpp_lib_shared_ptr_weak_type 201606L # define __cpp_lib_string_view 201606L // # define __cpp_lib_to_chars 201611L # undef __cpp_lib_transparent_operators # define __cpp_lib_transparent_operators 201510L # define __cpp_lib_type_trait_variable_templates 201510L # define __cpp_lib_uncaught_exceptions 201411L # define __cpp_lib_unordered_map_try_emplace 201411L # define __cpp_lib_variant 201606L # define __cpp_lib_void_t 201411L #endif #if _LIBCPP_STD_VER > 17 # undef __cpp_lib_array_constexpr # define __cpp_lib_array_constexpr 201811L // # define __cpp_lib_assume_aligned 201811L # if !defined(_LIBCPP_HAS_NO_THREADS) # define __cpp_lib_atomic_flag_test 201907L # endif # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_atomic_float 201711L # endif # if !defined(_LIBCPP_HAS_NO_THREADS) # define __cpp_lib_atomic_lock_free_type_aliases 201907L # endif # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_atomic_ref 201806L # endif # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_atomic_shared_ptr 201711L # endif # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_atomic_value_initialization 201911L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_atomic_wait) # define __cpp_lib_atomic_wait 201907L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_barrier) # define __cpp_lib_barrier 201907L # endif // # define __cpp_lib_bind_front 201907L // # define __cpp_lib_bit_cast 201806L // # define __cpp_lib_bitops 201907L # define __cpp_lib_bounded_array_traits 201902L # if !defined(_LIBCPP_NO_HAS_CHAR8_T) # define __cpp_lib_char8_t 201811L # endif // # define __cpp_lib_concepts 202002L // # define __cpp_lib_constexpr_algorithms 201806L // # define __cpp_lib_constexpr_complex 201711L # define __cpp_lib_constexpr_dynamic_alloc 201907L # define __cpp_lib_constexpr_functional 201907L // # define __cpp_lib_constexpr_iterator 201811L // # define __cpp_lib_constexpr_memory 201811L # define __cpp_lib_constexpr_numeric 201911L // # define __cpp_lib_constexpr_string 201907L // # define __cpp_lib_constexpr_string_view 201811L // # define __cpp_lib_constexpr_tuple 201811L # define __cpp_lib_constexpr_utility 201811L // # define __cpp_lib_constexpr_vector 201907L // # define __cpp_lib_coroutine 201902L # if _LIBCPP_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # define __cpp_lib_destroying_delete 201806L # endif # define __cpp_lib_endian 201907L # define __cpp_lib_erase_if 202002L # undef __cpp_lib_execution // # define __cpp_lib_execution 201902L # define __cpp_lib_generic_unordered_lookup 201811L # define __cpp_lib_int_pow2 202002L // # define __cpp_lib_integer_comparison_functions 202002L # define __cpp_lib_interpolate 201902L # if !defined(_LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED) # define __cpp_lib_is_constant_evaluated 201811L # endif // # define __cpp_lib_is_layout_compatible 201907L # define __cpp_lib_is_nothrow_convertible 201806L // # define __cpp_lib_is_pointer_interconvertible 201907L # if !defined(_LIBCPP_HAS_NO_THREADS) // # define __cpp_lib_jthread 201911L # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch) # define __cpp_lib_latch 201907L # endif # define __cpp_lib_list_remove_return_type 201806L # if defined(__cpp_concepts) && __cpp_concepts >= 201811L # define __cpp_lib_math_constants 201907L # endif // # define __cpp_lib_polymorphic_allocator 201902L // # define __cpp_lib_ranges 201811L # define __cpp_lib_remove_cvref 201711L -# if !defined(_LIBCPP_HAS_NO_THREADS) +# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore) # define __cpp_lib_semaphore 201907L # endif # define __cpp_lib_shift 201806L // # define __cpp_lib_smart_ptr_for_overwrite 202002L // # define __cpp_lib_source_location 201907L # define __cpp_lib_span 202002L # define __cpp_lib_ssize 201902L # define __cpp_lib_starts_ends_with 201711L # undef __cpp_lib_string_view # define __cpp_lib_string_view 201803L // # define __cpp_lib_syncbuf 201803L // # define __cpp_lib_three_way_comparison 201907L # define __cpp_lib_to_address 201711L # define __cpp_lib_to_array 201907L # define __cpp_lib_unwrap_ref 201811L #endif #if _LIBCPP_STD_VER > 20 # define __cpp_lib_is_scoped_enum 202011L // # define __cpp_lib_stacktrace 202011L // # define __cpp_lib_stdatomic_h 202011L # define __cpp_lib_string_contains 202011L #endif #endif // _LIBCPP_VERSIONH diff --git a/contrib/llvm-project/llvm/include/llvm-c/Core.h b/contrib/llvm-project/llvm/include/llvm-c/Core.h index a78df16ca404..2901ab715810 100644 --- a/contrib/llvm-project/llvm/include/llvm-c/Core.h +++ b/contrib/llvm-project/llvm/include/llvm-c/Core.h @@ -1,4158 +1,4170 @@ /*===-- llvm-c/Core.h - Core Library C Interface ------------------*- C -*-===*\ |* *| |* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| |* Exceptions. *| |* See https://llvm.org/LICENSE.txt for license information. *| |* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| |* *| |*===----------------------------------------------------------------------===*| |* *| |* This header declares the C interface to libLLVMCore.a, which implements *| |* the LLVM intermediate representation. *| |* *| \*===----------------------------------------------------------------------===*/ #ifndef LLVM_C_CORE_H #define LLVM_C_CORE_H #include "llvm-c/ErrorHandling.h" #include "llvm-c/ExternC.h" #include "llvm-c/Types.h" LLVM_C_EXTERN_C_BEGIN /** * @defgroup LLVMC LLVM-C: C interface to LLVM * * This module exposes parts of the LLVM library as a C API. * * @{ */ /** * @defgroup LLVMCTransforms Transforms */ /** * @defgroup LLVMCCore Core * * This modules provide an interface to libLLVMCore, which implements * the LLVM intermediate representation as well as other related types * and utilities. * * Many exotic languages can interoperate with C code but have a harder time * with C++ due to name mangling. So in addition to C, this interface enables * tools written in such languages. * * @{ */ /** * @defgroup LLVMCCoreTypes Types and Enumerations * * @{ */ /// External users depend on the following values being stable. It is not safe /// to reorder them. typedef enum { /* Terminator Instructions */ LLVMRet = 1, LLVMBr = 2, LLVMSwitch = 3, LLVMIndirectBr = 4, LLVMInvoke = 5, /* removed 6 due to API changes */ LLVMUnreachable = 7, LLVMCallBr = 67, /* Standard Unary Operators */ LLVMFNeg = 66, /* Standard Binary Operators */ LLVMAdd = 8, LLVMFAdd = 9, LLVMSub = 10, LLVMFSub = 11, LLVMMul = 12, LLVMFMul = 13, LLVMUDiv = 14, LLVMSDiv = 15, LLVMFDiv = 16, LLVMURem = 17, LLVMSRem = 18, LLVMFRem = 19, /* Logical Operators */ LLVMShl = 20, LLVMLShr = 21, LLVMAShr = 22, LLVMAnd = 23, LLVMOr = 24, LLVMXor = 25, /* Memory Operators */ LLVMAlloca = 26, LLVMLoad = 27, LLVMStore = 28, LLVMGetElementPtr = 29, /* Cast Operators */ LLVMTrunc = 30, LLVMZExt = 31, LLVMSExt = 32, LLVMFPToUI = 33, LLVMFPToSI = 34, LLVMUIToFP = 35, LLVMSIToFP = 36, LLVMFPTrunc = 37, LLVMFPExt = 38, LLVMPtrToInt = 39, LLVMIntToPtr = 40, LLVMBitCast = 41, LLVMAddrSpaceCast = 60, /* Other Operators */ LLVMICmp = 42, LLVMFCmp = 43, LLVMPHI = 44, LLVMCall = 45, LLVMSelect = 46, LLVMUserOp1 = 47, LLVMUserOp2 = 48, LLVMVAArg = 49, LLVMExtractElement = 50, LLVMInsertElement = 51, LLVMShuffleVector = 52, LLVMExtractValue = 53, LLVMInsertValue = 54, LLVMFreeze = 68, /* Atomic operators */ LLVMFence = 55, LLVMAtomicCmpXchg = 56, LLVMAtomicRMW = 57, /* Exception Handling Operators */ LLVMResume = 58, LLVMLandingPad = 59, LLVMCleanupRet = 61, LLVMCatchRet = 62, LLVMCatchPad = 63, LLVMCleanupPad = 64, LLVMCatchSwitch = 65 } LLVMOpcode; typedef enum { LLVMVoidTypeKind, /**< type with no size */ LLVMHalfTypeKind, /**< 16 bit floating point type */ LLVMFloatTypeKind, /**< 32 bit floating point type */ LLVMDoubleTypeKind, /**< 64 bit floating point type */ LLVMX86_FP80TypeKind, /**< 80 bit floating point type (X87) */ LLVMFP128TypeKind, /**< 128 bit floating point type (112-bit mantissa)*/ LLVMPPC_FP128TypeKind, /**< 128 bit floating point type (two 64-bits) */ LLVMLabelTypeKind, /**< Labels */ LLVMIntegerTypeKind, /**< Arbitrary bit width integers */ LLVMFunctionTypeKind, /**< Functions */ LLVMStructTypeKind, /**< Structures */ LLVMArrayTypeKind, /**< Arrays */ LLVMPointerTypeKind, /**< Pointers */ LLVMVectorTypeKind, /**< Fixed width SIMD vector type */ LLVMMetadataTypeKind, /**< Metadata */ LLVMX86_MMXTypeKind, /**< X86 MMX */ LLVMTokenTypeKind, /**< Tokens */ LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */ LLVMBFloatTypeKind, /**< 16 bit brain floating point type */ LLVMX86_AMXTypeKind /**< X86 AMX */ } LLVMTypeKind; typedef enum { LLVMExternalLinkage, /**< Externally visible function */ LLVMAvailableExternallyLinkage, LLVMLinkOnceAnyLinkage, /**< Keep one copy of function when linking (inline)*/ LLVMLinkOnceODRLinkage, /**< Same, but only replaced by something equivalent. */ LLVMLinkOnceODRAutoHideLinkage, /**< Obsolete */ LLVMWeakAnyLinkage, /**< Keep one copy of function when linking (weak) */ LLVMWeakODRLinkage, /**< Same, but only replaced by something equivalent. */ LLVMAppendingLinkage, /**< Special purpose, only applies to global arrays */ LLVMInternalLinkage, /**< Rename collisions when linking (static functions) */ LLVMPrivateLinkage, /**< Like Internal, but omit from symbol table */ LLVMDLLImportLinkage, /**< Obsolete */ LLVMDLLExportLinkage, /**< Obsolete */ LLVMExternalWeakLinkage,/**< ExternalWeak linkage description */ LLVMGhostLinkage, /**< Obsolete */ LLVMCommonLinkage, /**< Tentative definitions */ LLVMLinkerPrivateLinkage, /**< Like Private, but linker removes. */ LLVMLinkerPrivateWeakLinkage /**< Like LinkerPrivate, but is weak. */ } LLVMLinkage; typedef enum { LLVMDefaultVisibility, /**< The GV is visible */ LLVMHiddenVisibility, /**< The GV is hidden */ LLVMProtectedVisibility /**< The GV is protected */ } LLVMVisibility; typedef enum { LLVMNoUnnamedAddr, /**< Address of the GV is significant. */ LLVMLocalUnnamedAddr, /**< Address of the GV is locally insignificant. */ LLVMGlobalUnnamedAddr /**< Address of the GV is globally insignificant. */ } LLVMUnnamedAddr; typedef enum { LLVMDefaultStorageClass = 0, LLVMDLLImportStorageClass = 1, /**< Function to be imported from DLL. */ LLVMDLLExportStorageClass = 2 /**< Function to be accessible from DLL. */ } LLVMDLLStorageClass; typedef enum { LLVMCCallConv = 0, LLVMFastCallConv = 8, LLVMColdCallConv = 9, LLVMGHCCallConv = 10, LLVMHiPECallConv = 11, LLVMWebKitJSCallConv = 12, LLVMAnyRegCallConv = 13, LLVMPreserveMostCallConv = 14, LLVMPreserveAllCallConv = 15, LLVMSwiftCallConv = 16, LLVMCXXFASTTLSCallConv = 17, LLVMX86StdcallCallConv = 64, LLVMX86FastcallCallConv = 65, LLVMARMAPCSCallConv = 66, LLVMARMAAPCSCallConv = 67, LLVMARMAAPCSVFPCallConv = 68, LLVMMSP430INTRCallConv = 69, LLVMX86ThisCallCallConv = 70, LLVMPTXKernelCallConv = 71, LLVMPTXDeviceCallConv = 72, LLVMSPIRFUNCCallConv = 75, LLVMSPIRKERNELCallConv = 76, LLVMIntelOCLBICallConv = 77, LLVMX8664SysVCallConv = 78, LLVMWin64CallConv = 79, LLVMX86VectorCallCallConv = 80, LLVMHHVMCallConv = 81, LLVMHHVMCCallConv = 82, LLVMX86INTRCallConv = 83, LLVMAVRINTRCallConv = 84, LLVMAVRSIGNALCallConv = 85, LLVMAVRBUILTINCallConv = 86, LLVMAMDGPUVSCallConv = 87, LLVMAMDGPUGSCallConv = 88, LLVMAMDGPUPSCallConv = 89, LLVMAMDGPUCSCallConv = 90, LLVMAMDGPUKERNELCallConv = 91, LLVMX86RegCallCallConv = 92, LLVMAMDGPUHSCallConv = 93, LLVMMSP430BUILTINCallConv = 94, LLVMAMDGPULSCallConv = 95, LLVMAMDGPUESCallConv = 96 } LLVMCallConv; typedef enum { LLVMArgumentValueKind, LLVMBasicBlockValueKind, LLVMMemoryUseValueKind, LLVMMemoryDefValueKind, LLVMMemoryPhiValueKind, LLVMFunctionValueKind, LLVMGlobalAliasValueKind, LLVMGlobalIFuncValueKind, LLVMGlobalVariableValueKind, LLVMBlockAddressValueKind, LLVMConstantExprValueKind, LLVMConstantArrayValueKind, LLVMConstantStructValueKind, LLVMConstantVectorValueKind, LLVMUndefValueValueKind, LLVMConstantAggregateZeroValueKind, LLVMConstantDataArrayValueKind, LLVMConstantDataVectorValueKind, LLVMConstantIntValueKind, LLVMConstantFPValueKind, LLVMConstantPointerNullValueKind, LLVMConstantTokenNoneValueKind, LLVMMetadataAsValueValueKind, LLVMInlineAsmValueKind, LLVMInstructionValueKind, LLVMPoisonValueValueKind } LLVMValueKind; typedef enum { LLVMIntEQ = 32, /**< equal */ LLVMIntNE, /**< not equal */ LLVMIntUGT, /**< unsigned greater than */ LLVMIntUGE, /**< unsigned greater or equal */ LLVMIntULT, /**< unsigned less than */ LLVMIntULE, /**< unsigned less or equal */ LLVMIntSGT, /**< signed greater than */ LLVMIntSGE, /**< signed greater or equal */ LLVMIntSLT, /**< signed less than */ LLVMIntSLE /**< signed less or equal */ } LLVMIntPredicate; typedef enum { LLVMRealPredicateFalse, /**< Always false (always folded) */ LLVMRealOEQ, /**< True if ordered and equal */ LLVMRealOGT, /**< True if ordered and greater than */ LLVMRealOGE, /**< True if ordered and greater than or equal */ LLVMRealOLT, /**< True if ordered and less than */ LLVMRealOLE, /**< True if ordered and less than or equal */ LLVMRealONE, /**< True if ordered and operands are unequal */ LLVMRealORD, /**< True if ordered (no nans) */ LLVMRealUNO, /**< True if unordered: isnan(X) | isnan(Y) */ LLVMRealUEQ, /**< True if unordered or equal */ LLVMRealUGT, /**< True if unordered or greater than */ LLVMRealUGE, /**< True if unordered, greater than, or equal */ LLVMRealULT, /**< True if unordered or less than */ LLVMRealULE, /**< True if unordered, less than, or equal */ LLVMRealUNE, /**< True if unordered or not equal */ LLVMRealPredicateTrue /**< Always true (always folded) */ } LLVMRealPredicate; typedef enum { LLVMLandingPadCatch, /**< A catch clause */ LLVMLandingPadFilter /**< A filter clause */ } LLVMLandingPadClauseTy; typedef enum { LLVMNotThreadLocal = 0, LLVMGeneralDynamicTLSModel, LLVMLocalDynamicTLSModel, LLVMInitialExecTLSModel, LLVMLocalExecTLSModel } LLVMThreadLocalMode; typedef enum { LLVMAtomicOrderingNotAtomic = 0, /**< A load or store which is not atomic */ LLVMAtomicOrderingUnordered = 1, /**< Lowest level of atomicity, guarantees somewhat sane results, lock free. */ LLVMAtomicOrderingMonotonic = 2, /**< guarantees that if you take all the operations affecting a specific address, a consistent ordering exists */ LLVMAtomicOrderingAcquire = 4, /**< Acquire provides a barrier of the sort necessary to acquire a lock to access other memory with normal loads and stores. */ LLVMAtomicOrderingRelease = 5, /**< Release is similar to Acquire, but with a barrier of the sort necessary to release a lock. */ LLVMAtomicOrderingAcquireRelease = 6, /**< provides both an Acquire and a Release barrier (for fences and operations which both read and write memory). */ LLVMAtomicOrderingSequentiallyConsistent = 7 /**< provides Acquire semantics for loads and Release semantics for stores. Additionally, it guarantees that a total ordering exists between all SequentiallyConsistent operations. */ } LLVMAtomicOrdering; typedef enum { LLVMAtomicRMWBinOpXchg, /**< Set the new value and return the one old */ LLVMAtomicRMWBinOpAdd, /**< Add a value and return the old one */ LLVMAtomicRMWBinOpSub, /**< Subtract a value and return the old one */ LLVMAtomicRMWBinOpAnd, /**< And a value and return the old one */ LLVMAtomicRMWBinOpNand, /**< Not-And a value and return the old one */ LLVMAtomicRMWBinOpOr, /**< OR a value and return the old one */ LLVMAtomicRMWBinOpXor, /**< Xor a value and return the old one */ LLVMAtomicRMWBinOpMax, /**< Sets the value if it's greater than the original using a signed comparison and return the old one */ LLVMAtomicRMWBinOpMin, /**< Sets the value if it's Smaller than the original using a signed comparison and return the old one */ LLVMAtomicRMWBinOpUMax, /**< Sets the value if it's greater than the original using an unsigned comparison and return the old one */ LLVMAtomicRMWBinOpUMin, /**< Sets the value if it's greater than the original using an unsigned comparison and return the old one */ LLVMAtomicRMWBinOpFAdd, /**< Add a floating point value and return the old one */ LLVMAtomicRMWBinOpFSub /**< Subtract a floating point value and return the old one */ } LLVMAtomicRMWBinOp; typedef enum { LLVMDSError, LLVMDSWarning, LLVMDSRemark, LLVMDSNote } LLVMDiagnosticSeverity; typedef enum { LLVMInlineAsmDialectATT, LLVMInlineAsmDialectIntel } LLVMInlineAsmDialect; typedef enum { /** * Emits an error if two values disagree, otherwise the resulting value is * that of the operands. * * @see Module::ModFlagBehavior::Error */ LLVMModuleFlagBehaviorError, /** * Emits a warning if two values disagree. The result value will be the * operand for the flag from the first module being linked. * * @see Module::ModFlagBehavior::Warning */ LLVMModuleFlagBehaviorWarning, /** * Adds a requirement that another module flag be present and have a * specified value after linking is performed. The value must be a metadata * pair, where the first element of the pair is the ID of the module flag * to be restricted, and the second element of the pair is the value the * module flag should be restricted to. This behavior can be used to * restrict the allowable results (via triggering of an error) of linking * IDs with the **Override** behavior. * * @see Module::ModFlagBehavior::Require */ LLVMModuleFlagBehaviorRequire, /** * Uses the specified value, regardless of the behavior or value of the * other module. If both modules specify **Override**, but the values * differ, an error will be emitted. * * @see Module::ModFlagBehavior::Override */ LLVMModuleFlagBehaviorOverride, /** * Appends the two values, which are required to be metadata nodes. * * @see Module::ModFlagBehavior::Append */ LLVMModuleFlagBehaviorAppend, /** * Appends the two values, which are required to be metadata * nodes. However, duplicate entries in the second list are dropped * during the append operation. * * @see Module::ModFlagBehavior::AppendUnique */ LLVMModuleFlagBehaviorAppendUnique, } LLVMModuleFlagBehavior; /** * Attribute index are either LLVMAttributeReturnIndex, * LLVMAttributeFunctionIndex or a parameter number from 1 to N. */ enum { LLVMAttributeReturnIndex = 0U, // ISO C restricts enumerator values to range of 'int' // (4294967295 is too large) // LLVMAttributeFunctionIndex = ~0U, LLVMAttributeFunctionIndex = -1, }; typedef unsigned LLVMAttributeIndex; /** * @} */ void LLVMInitializeCore(LLVMPassRegistryRef R); /** Deallocate and destroy all ManagedStatic variables. @see llvm::llvm_shutdown @see ManagedStatic */ void LLVMShutdown(void); /*===-- Error handling ----------------------------------------------------===*/ char *LLVMCreateMessage(const char *Message); void LLVMDisposeMessage(char *Message); /** * @defgroup LLVMCCoreContext Contexts * * Contexts are execution states for the core LLVM IR system. * * Most types are tied to a context instance. Multiple contexts can * exist simultaneously. A single context is not thread safe. However, * different contexts can execute on different threads simultaneously. * * @{ */ typedef void (*LLVMDiagnosticHandler)(LLVMDiagnosticInfoRef, void *); typedef void (*LLVMYieldCallback)(LLVMContextRef, void *); /** * Create a new context. * * Every call to this function should be paired with a call to * LLVMContextDispose() or the context will leak memory. */ LLVMContextRef LLVMContextCreate(void); /** * Obtain the global context instance. */ LLVMContextRef LLVMGetGlobalContext(void); /** * Set the diagnostic handler for this context. */ void LLVMContextSetDiagnosticHandler(LLVMContextRef C, LLVMDiagnosticHandler Handler, void *DiagnosticContext); /** * Get the diagnostic handler of this context. */ LLVMDiagnosticHandler LLVMContextGetDiagnosticHandler(LLVMContextRef C); /** * Get the diagnostic context of this context. */ void *LLVMContextGetDiagnosticContext(LLVMContextRef C); /** * Set the yield callback function for this context. * * @see LLVMContext::setYieldCallback() */ void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback, void *OpaqueHandle); /** * Retrieve whether the given context is set to discard all value names. * * @see LLVMContext::shouldDiscardValueNames() */ LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C); /** * Set whether the given context discards all value names. * * If true, only the names of GlobalValue objects will be available in the IR. * This can be used to save memory and runtime, especially in release mode. * * @see LLVMContext::setDiscardValueNames() */ void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard); /** * Destroy a context instance. * * This should be called for every call to LLVMContextCreate() or memory * will be leaked. */ void LLVMContextDispose(LLVMContextRef C); /** * Return a string representation of the DiagnosticInfo. Use * LLVMDisposeMessage to free the string. * * @see DiagnosticInfo::print() */ char *LLVMGetDiagInfoDescription(LLVMDiagnosticInfoRef DI); /** * Return an enum LLVMDiagnosticSeverity. * * @see DiagnosticInfo::getSeverity() */ LLVMDiagnosticSeverity LLVMGetDiagInfoSeverity(LLVMDiagnosticInfoRef DI); unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char *Name, unsigned SLen); unsigned LLVMGetMDKindID(const char *Name, unsigned SLen); /** * Return an unique id given the name of a enum attribute, * or 0 if no attribute by that name exists. * * See http://llvm.org/docs/LangRef.html#parameter-attributes * and http://llvm.org/docs/LangRef.html#function-attributes * for the list of available attributes. * * NB: Attribute names and/or id are subject to change without * going through the C API deprecation cycle. */ unsigned LLVMGetEnumAttributeKindForName(const char *Name, size_t SLen); unsigned LLVMGetLastEnumAttributeKind(void); /** * Create an enum attribute. */ LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID, uint64_t Val); /** * Get the unique id corresponding to the enum attribute * passed as argument. */ unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A); /** * Get the enum attribute's value. 0 is returned if none exists. */ uint64_t LLVMGetEnumAttributeValue(LLVMAttributeRef A); +/** + * Create a type attribute + */ +LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID, + LLVMTypeRef type_ref); + +/** + * Get the type attribute's value. + */ +LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A); + /** * Create a string attribute. */ LLVMAttributeRef LLVMCreateStringAttribute(LLVMContextRef C, const char *K, unsigned KLength, const char *V, unsigned VLength); /** * Get the string attribute's kind. */ const char *LLVMGetStringAttributeKind(LLVMAttributeRef A, unsigned *Length); /** * Get the string attribute's value. */ const char *LLVMGetStringAttributeValue(LLVMAttributeRef A, unsigned *Length); /** * Check for the different types of attributes. */ LLVMBool LLVMIsEnumAttribute(LLVMAttributeRef A); LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A); +LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A); /** * Obtain a Type from a context by its registered name. */ LLVMTypeRef LLVMGetTypeByName2(LLVMContextRef C, const char *Name); /** * @} */ /** * @defgroup LLVMCCoreModule Modules * * Modules represent the top-level structure in an LLVM program. An LLVM * module is effectively a translation unit or a collection of * translation units merged together. * * @{ */ /** * Create a new, empty module in the global context. * * This is equivalent to calling LLVMModuleCreateWithNameInContext with * LLVMGetGlobalContext() as the context parameter. * * Every invocation should be paired with LLVMDisposeModule() or memory * will be leaked. */ LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID); /** * Create a new, empty module in a specific context. * * Every invocation should be paired with LLVMDisposeModule() or memory * will be leaked. */ LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, LLVMContextRef C); /** * Return an exact copy of the specified module. */ LLVMModuleRef LLVMCloneModule(LLVMModuleRef M); /** * Destroy a module instance. * * This must be called for every created module or memory will be * leaked. */ void LLVMDisposeModule(LLVMModuleRef M); /** * Obtain the identifier of a module. * * @param M Module to obtain identifier of * @param Len Out parameter which holds the length of the returned string. * @return The identifier of M. * @see Module::getModuleIdentifier() */ const char *LLVMGetModuleIdentifier(LLVMModuleRef M, size_t *Len); /** * Set the identifier of a module to a string Ident with length Len. * * @param M The module to set identifier * @param Ident The string to set M's identifier to * @param Len Length of Ident * @see Module::setModuleIdentifier() */ void LLVMSetModuleIdentifier(LLVMModuleRef M, const char *Ident, size_t Len); /** * Obtain the module's original source file name. * * @param M Module to obtain the name of * @param Len Out parameter which holds the length of the returned string * @return The original source file name of M * @see Module::getSourceFileName() */ const char *LLVMGetSourceFileName(LLVMModuleRef M, size_t *Len); /** * Set the original source file name of a module to a string Name with length * Len. * * @param M The module to set the source file name of * @param Name The string to set M's source file name to * @param Len Length of Name * @see Module::setSourceFileName() */ void LLVMSetSourceFileName(LLVMModuleRef M, const char *Name, size_t Len); /** * Obtain the data layout for a module. * * @see Module::getDataLayoutStr() * * LLVMGetDataLayout is DEPRECATED, as the name is not only incorrect, * but match the name of another method on the module. Prefer the use * of LLVMGetDataLayoutStr, which is not ambiguous. */ const char *LLVMGetDataLayoutStr(LLVMModuleRef M); const char *LLVMGetDataLayout(LLVMModuleRef M); /** * Set the data layout for a module. * * @see Module::setDataLayout() */ void LLVMSetDataLayout(LLVMModuleRef M, const char *DataLayoutStr); /** * Obtain the target triple for a module. * * @see Module::getTargetTriple() */ const char *LLVMGetTarget(LLVMModuleRef M); /** * Set the target triple for a module. * * @see Module::setTargetTriple() */ void LLVMSetTarget(LLVMModuleRef M, const char *Triple); /** * Returns the module flags as an array of flag-key-value triples. The caller * is responsible for freeing this array by calling * \c LLVMDisposeModuleFlagsMetadata. * * @see Module::getModuleFlagsMetadata() */ LLVMModuleFlagEntry *LLVMCopyModuleFlagsMetadata(LLVMModuleRef M, size_t *Len); /** * Destroys module flags metadata entries. */ void LLVMDisposeModuleFlagsMetadata(LLVMModuleFlagEntry *Entries); /** * Returns the flag behavior for a module flag entry at a specific index. * * @see Module::ModuleFlagEntry::Behavior */ LLVMModuleFlagBehavior LLVMModuleFlagEntriesGetFlagBehavior(LLVMModuleFlagEntry *Entries, unsigned Index); /** * Returns the key for a module flag entry at a specific index. * * @see Module::ModuleFlagEntry::Key */ const char *LLVMModuleFlagEntriesGetKey(LLVMModuleFlagEntry *Entries, unsigned Index, size_t *Len); /** * Returns the metadata for a module flag entry at a specific index. * * @see Module::ModuleFlagEntry::Val */ LLVMMetadataRef LLVMModuleFlagEntriesGetMetadata(LLVMModuleFlagEntry *Entries, unsigned Index); /** * Add a module-level flag to the module-level flags metadata if it doesn't * already exist. * * @see Module::getModuleFlag() */ LLVMMetadataRef LLVMGetModuleFlag(LLVMModuleRef M, const char *Key, size_t KeyLen); /** * Add a module-level flag to the module-level flags metadata if it doesn't * already exist. * * @see Module::addModuleFlag() */ void LLVMAddModuleFlag(LLVMModuleRef M, LLVMModuleFlagBehavior Behavior, const char *Key, size_t KeyLen, LLVMMetadataRef Val); /** * Dump a representation of a module to stderr. * * @see Module::dump() */ void LLVMDumpModule(LLVMModuleRef M); /** * Print a representation of a module to a file. The ErrorMessage needs to be * disposed with LLVMDisposeMessage. Returns 0 on success, 1 otherwise. * * @see Module::print() */ LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, char **ErrorMessage); /** * Return a string representation of the module. Use * LLVMDisposeMessage to free the string. * * @see Module::print() */ char *LLVMPrintModuleToString(LLVMModuleRef M); /** * Get inline assembly for a module. * * @see Module::getModuleInlineAsm() */ const char *LLVMGetModuleInlineAsm(LLVMModuleRef M, size_t *Len); /** * Set inline assembly for a module. * * @see Module::setModuleInlineAsm() */ void LLVMSetModuleInlineAsm2(LLVMModuleRef M, const char *Asm, size_t Len); /** * Append inline assembly to a module. * * @see Module::appendModuleInlineAsm() */ void LLVMAppendModuleInlineAsm(LLVMModuleRef M, const char *Asm, size_t Len); /** * Create the specified uniqued inline asm string. * * @see InlineAsm::get() */ LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty, char *AsmString, size_t AsmStringSize, char *Constraints, size_t ConstraintsSize, LLVMBool HasSideEffects, LLVMBool IsAlignStack, LLVMInlineAsmDialect Dialect); /** * Obtain the context to which this module is associated. * * @see Module::getContext() */ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M); /** Deprecated: Use LLVMGetTypeByName2 instead. */ LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name); /** * Obtain an iterator to the first NamedMDNode in a Module. * * @see llvm::Module::named_metadata_begin() */ LLVMNamedMDNodeRef LLVMGetFirstNamedMetadata(LLVMModuleRef M); /** * Obtain an iterator to the last NamedMDNode in a Module. * * @see llvm::Module::named_metadata_end() */ LLVMNamedMDNodeRef LLVMGetLastNamedMetadata(LLVMModuleRef M); /** * Advance a NamedMDNode iterator to the next NamedMDNode. * * Returns NULL if the iterator was already at the end and there are no more * named metadata nodes. */ LLVMNamedMDNodeRef LLVMGetNextNamedMetadata(LLVMNamedMDNodeRef NamedMDNode); /** * Decrement a NamedMDNode iterator to the previous NamedMDNode. * * Returns NULL if the iterator was already at the beginning and there are * no previous named metadata nodes. */ LLVMNamedMDNodeRef LLVMGetPreviousNamedMetadata(LLVMNamedMDNodeRef NamedMDNode); /** * Retrieve a NamedMDNode with the given name, returning NULL if no such * node exists. * * @see llvm::Module::getNamedMetadata() */ LLVMNamedMDNodeRef LLVMGetNamedMetadata(LLVMModuleRef M, const char *Name, size_t NameLen); /** * Retrieve a NamedMDNode with the given name, creating a new node if no such * node exists. * * @see llvm::Module::getOrInsertNamedMetadata() */ LLVMNamedMDNodeRef LLVMGetOrInsertNamedMetadata(LLVMModuleRef M, const char *Name, size_t NameLen); /** * Retrieve the name of a NamedMDNode. * * @see llvm::NamedMDNode::getName() */ const char *LLVMGetNamedMetadataName(LLVMNamedMDNodeRef NamedMD, size_t *NameLen); /** * Obtain the number of operands for named metadata in a module. * * @see llvm::Module::getNamedMetadata() */ unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char *Name); /** * Obtain the named metadata operands for a module. * * The passed LLVMValueRef pointer should refer to an array of * LLVMValueRef at least LLVMGetNamedMetadataNumOperands long. This * array will be populated with the LLVMValueRef instances. Each * instance corresponds to a llvm::MDNode. * * @see llvm::Module::getNamedMetadata() * @see llvm::MDNode::getOperand() */ void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char *Name, LLVMValueRef *Dest); /** * Add an operand to named metadata. * * @see llvm::Module::getNamedMetadata() * @see llvm::MDNode::addOperand() */ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name, LLVMValueRef Val); /** * Return the directory of the debug location for this value, which must be * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function. * * @see llvm::Instruction::getDebugLoc() * @see llvm::GlobalVariable::getDebugInfo() * @see llvm::Function::getSubprogram() */ const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length); /** * Return the filename of the debug location for this value, which must be * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function. * * @see llvm::Instruction::getDebugLoc() * @see llvm::GlobalVariable::getDebugInfo() * @see llvm::Function::getSubprogram() */ const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length); /** * Return the line number of the debug location for this value, which must be * an llvm::Instruction, llvm::GlobalVariable, or llvm::Function. * * @see llvm::Instruction::getDebugLoc() * @see llvm::GlobalVariable::getDebugInfo() * @see llvm::Function::getSubprogram() */ unsigned LLVMGetDebugLocLine(LLVMValueRef Val); /** * Return the column number of the debug location for this value, which must be * an llvm::Instruction. * * @see llvm::Instruction::getDebugLoc() */ unsigned LLVMGetDebugLocColumn(LLVMValueRef Val); /** * Add a function to a module under a specified name. * * @see llvm::Function::Create() */ LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name, LLVMTypeRef FunctionTy); /** * Obtain a Function value from a Module by its name. * * The returned value corresponds to a llvm::Function value. * * @see llvm::Module::getFunction() */ LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name); /** * Obtain an iterator to the first Function in a Module. * * @see llvm::Module::begin() */ LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M); /** * Obtain an iterator to the last Function in a Module. * * @see llvm::Module::end() */ LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M); /** * Advance a Function iterator to the next Function. * * Returns NULL if the iterator was already at the end and there are no more * functions. */ LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn); /** * Decrement a Function iterator to the previous Function. * * Returns NULL if the iterator was already at the beginning and there are * no previous functions. */ LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn); /** Deprecated: Use LLVMSetModuleInlineAsm2 instead. */ void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm); /** * @} */ /** * @defgroup LLVMCCoreType Types * * Types represent the type of a value. * * Types are associated with a context instance. The context internally * deduplicates types so there is only 1 instance of a specific type * alive at a time. In other words, a unique type is shared among all * consumers within a context. * * A Type in the C API corresponds to llvm::Type. * * Types have the following hierarchy: * * types: * integer type * real type * function type * sequence types: * array type * pointer type * vector type * void type * label type * opaque type * * @{ */ /** * Obtain the enumerated type of a Type instance. * * @see llvm::Type:getTypeID() */ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty); /** * Whether the type has a known size. * * Things that don't have a size are abstract types, labels, and void.a * * @see llvm::Type::isSized() */ LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty); /** * Obtain the context to which this type instance is associated. * * @see llvm::Type::getContext() */ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty); /** * Dump a representation of a type to stderr. * * @see llvm::Type::dump() */ void LLVMDumpType(LLVMTypeRef Val); /** * Return a string representation of the type. Use * LLVMDisposeMessage to free the string. * * @see llvm::Type::print() */ char *LLVMPrintTypeToString(LLVMTypeRef Val); /** * @defgroup LLVMCCoreTypeInt Integer Types * * Functions in this section operate on integer types. * * @{ */ /** * Obtain an integer type from a context with specified bit width. */ LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMInt128TypeInContext(LLVMContextRef C); LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits); /** * Obtain an integer type from the global context with a specified bit * width. */ LLVMTypeRef LLVMInt1Type(void); LLVMTypeRef LLVMInt8Type(void); LLVMTypeRef LLVMInt16Type(void); LLVMTypeRef LLVMInt32Type(void); LLVMTypeRef LLVMInt64Type(void); LLVMTypeRef LLVMInt128Type(void); LLVMTypeRef LLVMIntType(unsigned NumBits); unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy); /** * @} */ /** * @defgroup LLVMCCoreTypeFloat Floating Point Types * * @{ */ /** * Obtain a 16-bit floating point type from a context. */ LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C); /** * Obtain a 16-bit brain floating point type from a context. */ LLVMTypeRef LLVMBFloatTypeInContext(LLVMContextRef C); /** * Obtain a 32-bit floating point type from a context. */ LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C); /** * Obtain a 64-bit floating point type from a context. */ LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C); /** * Obtain a 80-bit floating point type (X87) from a context. */ LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C); /** * Obtain a 128-bit floating point type (112-bit mantissa) from a * context. */ LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C); /** * Obtain a 128-bit floating point type (two 64-bits) from a context. */ LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C); /** * Obtain a floating point type from the global context. * * These map to the functions in this group of the same name. */ LLVMTypeRef LLVMHalfType(void); LLVMTypeRef LLVMBFloatType(void); LLVMTypeRef LLVMFloatType(void); LLVMTypeRef LLVMDoubleType(void); LLVMTypeRef LLVMX86FP80Type(void); LLVMTypeRef LLVMFP128Type(void); LLVMTypeRef LLVMPPCFP128Type(void); /** * @} */ /** * @defgroup LLVMCCoreTypeFunction Function Types * * @{ */ /** * Obtain a function type consisting of a specified signature. * * The function is defined as a tuple of a return Type, a list of * parameter types, and whether the function is variadic. */ LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, LLVMTypeRef *ParamTypes, unsigned ParamCount, LLVMBool IsVarArg); /** * Returns whether a function type is variadic. */ LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy); /** * Obtain the Type this function Type returns. */ LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy); /** * Obtain the number of parameters this function accepts. */ unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy); /** * Obtain the types of a function's parameters. * * The Dest parameter should point to a pre-allocated array of * LLVMTypeRef at least LLVMCountParamTypes() large. On return, the * first LLVMCountParamTypes() entries in the array will be populated * with LLVMTypeRef instances. * * @param FunctionTy The function type to operate on. * @param Dest Memory address of an array to be filled with result. */ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest); /** * @} */ /** * @defgroup LLVMCCoreTypeStruct Structure Types * * These functions relate to LLVMTypeRef instances. * * @see llvm::StructType * * @{ */ /** * Create a new structure type in a context. * * A structure is specified by a list of inner elements/types and * whether these can be packed together. * * @see llvm::StructType::create() */ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed); /** * Create a new structure type in the global context. * * @see llvm::StructType::create() */ LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed); /** * Create an empty structure in a context having a specified name. * * @see llvm::StructType::create() */ LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name); /** * Obtain the name of a structure. * * @see llvm::StructType::getName() */ const char *LLVMGetStructName(LLVMTypeRef Ty); /** * Set the contents of a structure type. * * @see llvm::StructType::setBody() */ void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed); /** * Get the number of elements defined inside the structure. * * @see llvm::StructType::getNumElements() */ unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy); /** * Get the elements within a structure. * * The function is passed the address of a pre-allocated array of * LLVMTypeRef at least LLVMCountStructElementTypes() long. After * invocation, this array will be populated with the structure's * elements. The objects in the destination array will have a lifetime * of the structure type itself, which is the lifetime of the context it * is contained in. */ void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest); /** * Get the type of the element at a given index in the structure. * * @see llvm::StructType::getTypeAtIndex() */ LLVMTypeRef LLVMStructGetTypeAtIndex(LLVMTypeRef StructTy, unsigned i); /** * Determine whether a structure is packed. * * @see llvm::StructType::isPacked() */ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy); /** * Determine whether a structure is opaque. * * @see llvm::StructType::isOpaque() */ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy); /** * Determine whether a structure is literal. * * @see llvm::StructType::isLiteral() */ LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy); /** * @} */ /** * @defgroup LLVMCCoreTypeSequential Sequential Types * * Sequential types represents "arrays" of types. This is a super class * for array, vector, and pointer types. * * @{ */ /** * Obtain the type of elements within a sequential type. * * This works on array, vector, and pointer types. * * @see llvm::SequentialType::getElementType() */ LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty); /** * Returns type's subtypes * * @see llvm::Type::subtypes() */ void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr); /** * Return the number of types in the derived type. * * @see llvm::Type::getNumContainedTypes() */ unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp); /** * Create a fixed size array type that refers to a specific type. * * The created type will exist in the context that its element type * exists in. * * @see llvm::ArrayType::get() */ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount); /** * Obtain the length of an array type. * * This only works on types that represent arrays. * * @see llvm::ArrayType::getNumElements() */ unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy); /** * Create a pointer type that points to a defined type. * * The created type will exist in the context that its pointee type * exists in. * * @see llvm::PointerType::get() */ LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace); /** * Obtain the address space of a pointer type. * * This only works on types that represent pointers. * * @see llvm::PointerType::getAddressSpace() */ unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy); /** * Create a vector type that contains a defined type and has a specific * number of elements. * * The created type will exist in the context thats its element type * exists in. * * @see llvm::VectorType::get() */ LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount); /** * Create a vector type that contains a defined type and has a scalable * number of elements. * * The created type will exist in the context thats its element type * exists in. * * @see llvm::ScalableVectorType::get() */ LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType, unsigned ElementCount); /** * Obtain the (possibly scalable) number of elements in a vector type. * * This only works on types that represent vectors (fixed or scalable). * * @see llvm::VectorType::getNumElements() */ unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy); /** * @} */ /** * @defgroup LLVMCCoreTypeOther Other Types * * @{ */ /** * Create a void type in a context. */ LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C); /** * Create a label type in a context. */ LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C); /** * Create a X86 MMX type in a context. */ LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C); /** * Create a X86 AMX type in a context. */ LLVMTypeRef LLVMX86AMXTypeInContext(LLVMContextRef C); /** * Create a token type in a context. */ LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C); /** * Create a metadata type in a context. */ LLVMTypeRef LLVMMetadataTypeInContext(LLVMContextRef C); /** * These are similar to the above functions except they operate on the * global context. */ LLVMTypeRef LLVMVoidType(void); LLVMTypeRef LLVMLabelType(void); LLVMTypeRef LLVMX86MMXType(void); LLVMTypeRef LLVMX86AMXType(void); /** * @} */ /** * @} */ /** * @defgroup LLVMCCoreValues Values * * The bulk of LLVM's object model consists of values, which comprise a very * rich type hierarchy. * * LLVMValueRef essentially represents llvm::Value. There is a rich * hierarchy of classes within this type. Depending on the instance * obtained, not all APIs are available. * * Callers can determine the type of an LLVMValueRef by calling the * LLVMIsA* family of functions (e.g. LLVMIsAArgument()). These * functions are defined by a macro, so it isn't obvious which are * available by looking at the Doxygen source code. Instead, look at the * source definition of LLVM_FOR_EACH_VALUE_SUBCLASS and note the list * of value names given. These value names also correspond to classes in * the llvm::Value hierarchy. * * @{ */ #define LLVM_FOR_EACH_VALUE_SUBCLASS(macro) \ macro(Argument) \ macro(BasicBlock) \ macro(InlineAsm) \ macro(User) \ macro(Constant) \ macro(BlockAddress) \ macro(ConstantAggregateZero) \ macro(ConstantArray) \ macro(ConstantDataSequential) \ macro(ConstantDataArray) \ macro(ConstantDataVector) \ macro(ConstantExpr) \ macro(ConstantFP) \ macro(ConstantInt) \ macro(ConstantPointerNull) \ macro(ConstantStruct) \ macro(ConstantTokenNone) \ macro(ConstantVector) \ macro(GlobalValue) \ macro(GlobalAlias) \ macro(GlobalIFunc) \ macro(GlobalObject) \ macro(Function) \ macro(GlobalVariable) \ macro(UndefValue) \ macro(PoisonValue) \ macro(Instruction) \ macro(UnaryOperator) \ macro(BinaryOperator) \ macro(CallInst) \ macro(IntrinsicInst) \ macro(DbgInfoIntrinsic) \ macro(DbgVariableIntrinsic) \ macro(DbgDeclareInst) \ macro(DbgLabelInst) \ macro(MemIntrinsic) \ macro(MemCpyInst) \ macro(MemMoveInst) \ macro(MemSetInst) \ macro(CmpInst) \ macro(FCmpInst) \ macro(ICmpInst) \ macro(ExtractElementInst) \ macro(GetElementPtrInst) \ macro(InsertElementInst) \ macro(InsertValueInst) \ macro(LandingPadInst) \ macro(PHINode) \ macro(SelectInst) \ macro(ShuffleVectorInst) \ macro(StoreInst) \ macro(BranchInst) \ macro(IndirectBrInst) \ macro(InvokeInst) \ macro(ReturnInst) \ macro(SwitchInst) \ macro(UnreachableInst) \ macro(ResumeInst) \ macro(CleanupReturnInst) \ macro(CatchReturnInst) \ macro(CatchSwitchInst) \ macro(CallBrInst) \ macro(FuncletPadInst) \ macro(CatchPadInst) \ macro(CleanupPadInst) \ macro(UnaryInstruction) \ macro(AllocaInst) \ macro(CastInst) \ macro(AddrSpaceCastInst) \ macro(BitCastInst) \ macro(FPExtInst) \ macro(FPToSIInst) \ macro(FPToUIInst) \ macro(FPTruncInst) \ macro(IntToPtrInst) \ macro(PtrToIntInst) \ macro(SExtInst) \ macro(SIToFPInst) \ macro(TruncInst) \ macro(UIToFPInst) \ macro(ZExtInst) \ macro(ExtractValueInst) \ macro(LoadInst) \ macro(VAArgInst) \ macro(FreezeInst) \ macro(AtomicCmpXchgInst) \ macro(AtomicRMWInst) \ macro(FenceInst) /** * @defgroup LLVMCCoreValueGeneral General APIs * * Functions in this section work on all LLVMValueRef instances, * regardless of their sub-type. They correspond to functions available * on llvm::Value. * * @{ */ /** * Obtain the type of a value. * * @see llvm::Value::getType() */ LLVMTypeRef LLVMTypeOf(LLVMValueRef Val); /** * Obtain the enumerated type of a Value instance. * * @see llvm::Value::getValueID() */ LLVMValueKind LLVMGetValueKind(LLVMValueRef Val); /** * Obtain the string name of a value. * * @see llvm::Value::getName() */ const char *LLVMGetValueName2(LLVMValueRef Val, size_t *Length); /** * Set the string name of a value. * * @see llvm::Value::setName() */ void LLVMSetValueName2(LLVMValueRef Val, const char *Name, size_t NameLen); /** * Dump a representation of a value to stderr. * * @see llvm::Value::dump() */ void LLVMDumpValue(LLVMValueRef Val); /** * Return a string representation of the value. Use * LLVMDisposeMessage to free the string. * * @see llvm::Value::print() */ char *LLVMPrintValueToString(LLVMValueRef Val); /** * Replace all uses of a value with another one. * * @see llvm::Value::replaceAllUsesWith() */ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal); /** * Determine whether the specified value instance is constant. */ LLVMBool LLVMIsConstant(LLVMValueRef Val); /** * Determine whether a value instance is undefined. */ LLVMBool LLVMIsUndef(LLVMValueRef Val); /** * Determine whether a value instance is poisonous. */ LLVMBool LLVMIsPoison(LLVMValueRef Val); /** * Convert value instances between types. * * Internally, an LLVMValueRef is "pinned" to a specific type. This * series of functions allows you to cast an instance to a specific * type. * * If the cast is not valid for the specified type, NULL is returned. * * @see llvm::dyn_cast_or_null<> */ #define LLVM_DECLARE_VALUE_CAST(name) \ LLVMValueRef LLVMIsA##name(LLVMValueRef Val); LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST) LLVMValueRef LLVMIsAMDNode(LLVMValueRef Val); LLVMValueRef LLVMIsAMDString(LLVMValueRef Val); /** Deprecated: Use LLVMGetValueName2 instead. */ const char *LLVMGetValueName(LLVMValueRef Val); /** Deprecated: Use LLVMSetValueName2 instead. */ void LLVMSetValueName(LLVMValueRef Val, const char *Name); /** * @} */ /** * @defgroup LLVMCCoreValueUses Usage * * This module defines functions that allow you to inspect the uses of a * LLVMValueRef. * * It is possible to obtain an LLVMUseRef for any LLVMValueRef instance. * Each LLVMUseRef (which corresponds to a llvm::Use instance) holds a * llvm::User and llvm::Value. * * @{ */ /** * Obtain the first use of a value. * * Uses are obtained in an iterator fashion. First, call this function * to obtain a reference to the first use. Then, call LLVMGetNextUse() * on that instance and all subsequently obtained instances until * LLVMGetNextUse() returns NULL. * * @see llvm::Value::use_begin() */ LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val); /** * Obtain the next use of a value. * * This effectively advances the iterator. It returns NULL if you are on * the final use and no more are available. */ LLVMUseRef LLVMGetNextUse(LLVMUseRef U); /** * Obtain the user value for a user. * * The returned value corresponds to a llvm::User type. * * @see llvm::Use::getUser() */ LLVMValueRef LLVMGetUser(LLVMUseRef U); /** * Obtain the value this use corresponds to. * * @see llvm::Use::get(). */ LLVMValueRef LLVMGetUsedValue(LLVMUseRef U); /** * @} */ /** * @defgroup LLVMCCoreValueUser User value * * Function in this group pertain to LLVMValueRef instances that descent * from llvm::User. This includes constants, instructions, and * operators. * * @{ */ /** * Obtain an operand at a specific index in a llvm::User value. * * @see llvm::User::getOperand() */ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index); /** * Obtain the use of an operand at a specific index in a llvm::User value. * * @see llvm::User::getOperandUse() */ LLVMUseRef LLVMGetOperandUse(LLVMValueRef Val, unsigned Index); /** * Set an operand at a specific index in a llvm::User value. * * @see llvm::User::setOperand() */ void LLVMSetOperand(LLVMValueRef User, unsigned Index, LLVMValueRef Val); /** * Obtain the number of operands in a llvm::User value. * * @see llvm::User::getNumOperands() */ int LLVMGetNumOperands(LLVMValueRef Val); /** * @} */ /** * @defgroup LLVMCCoreValueConstant Constants * * This section contains APIs for interacting with LLVMValueRef that * correspond to llvm::Constant instances. * * These functions will work for any LLVMValueRef in the llvm::Constant * class hierarchy. * * @{ */ /** * Obtain a constant value referring to the null instance of a type. * * @see llvm::Constant::getNullValue() */ LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */ /** * Obtain a constant value referring to the instance of a type * consisting of all ones. * * This is only valid for integer types. * * @see llvm::Constant::getAllOnesValue() */ LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /** * Obtain a constant value referring to an undefined value of a type. * * @see llvm::UndefValue::get() */ LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty); /** * Obtain a constant value referring to a poison value of a type. * * @see llvm::PoisonValue::get() */ LLVMValueRef LLVMGetPoison(LLVMTypeRef Ty); /** * Determine whether a value instance is null. * * @see llvm::Constant::isNullValue() */ LLVMBool LLVMIsNull(LLVMValueRef Val); /** * Obtain a constant that is a constant pointer pointing to NULL for a * specified type. */ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty); /** * @defgroup LLVMCCoreValueConstantScalar Scalar constants * * Functions in this group model LLVMValueRef instances that correspond * to constants referring to scalar types. * * For integer types, the LLVMTypeRef parameter should correspond to a * llvm::IntegerType instance and the returned LLVMValueRef will * correspond to a llvm::ConstantInt. * * For floating point types, the LLVMTypeRef returned corresponds to a * llvm::ConstantFP. * * @{ */ /** * Obtain a constant value for an integer type. * * The returned value corresponds to a llvm::ConstantInt. * * @see llvm::ConstantInt::get() * * @param IntTy Integer type to obtain value of. * @param N The value the returned instance should refer to. * @param SignExtend Whether to sign extend the produced value. */ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, LLVMBool SignExtend); /** * Obtain a constant value for an integer of arbitrary precision. * * @see llvm::ConstantInt::get() */ LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy, unsigned NumWords, const uint64_t Words[]); /** * Obtain a constant value for an integer parsed from a string. * * A similar API, LLVMConstIntOfStringAndSize is also available. If the * string's length is available, it is preferred to call that function * instead. * * @see llvm::ConstantInt::get() */ LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text, uint8_t Radix); /** * Obtain a constant value for an integer parsed from a string with * specified length. * * @see llvm::ConstantInt::get() */ LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text, unsigned SLen, uint8_t Radix); /** * Obtain a constant value referring to a double floating point value. */ LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N); /** * Obtain a constant for a floating point value parsed from a string. * * A similar API, LLVMConstRealOfStringAndSize is also available. It * should be used if the input string's length is known. */ LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text); /** * Obtain a constant for a floating point value parsed from a string. */ LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char *Text, unsigned SLen); /** * Obtain the zero extended value for an integer constant value. * * @see llvm::ConstantInt::getZExtValue() */ unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal); /** * Obtain the sign extended value for an integer constant value. * * @see llvm::ConstantInt::getSExtValue() */ long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal); /** * Obtain the double value for an floating point constant value. * losesInfo indicates if some precision was lost in the conversion. * * @see llvm::ConstantFP::getDoubleValue */ double LLVMConstRealGetDouble(LLVMValueRef ConstantVal, LLVMBool *losesInfo); /** * @} */ /** * @defgroup LLVMCCoreValueConstantComposite Composite Constants * * Functions in this group operate on composite constants. * * @{ */ /** * Create a ConstantDataSequential and initialize it with a string. * * @see llvm::ConstantDataArray::getString() */ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, unsigned Length, LLVMBool DontNullTerminate); /** * Create a ConstantDataSequential with string content in the global context. * * This is the same as LLVMConstStringInContext except it operates on the * global context. * * @see LLVMConstStringInContext() * @see llvm::ConstantDataArray::getString() */ LLVMValueRef LLVMConstString(const char *Str, unsigned Length, LLVMBool DontNullTerminate); /** * Returns true if the specified constant is an array of i8. * * @see ConstantDataSequential::getAsString() */ LLVMBool LLVMIsConstantString(LLVMValueRef c); /** * Get the given constant data sequential as a string. * * @see ConstantDataSequential::getAsString() */ const char *LLVMGetAsString(LLVMValueRef c, size_t *Length); /** * Create an anonymous ConstantStruct with the specified values. * * @see llvm::ConstantStruct::getAnon() */ LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed); /** * Create a ConstantStruct in the global Context. * * This is the same as LLVMConstStructInContext except it operates on the * global Context. * * @see LLVMConstStructInContext() */ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed); /** * Create a ConstantArray from values. * * @see llvm::ConstantArray::get() */ LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, LLVMValueRef *ConstantVals, unsigned Length); /** * Create a non-anonymous ConstantStruct from values. * * @see llvm::ConstantStruct::get() */ LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy, LLVMValueRef *ConstantVals, unsigned Count); /** * Get an element at specified index as a constant. * * @see ConstantDataSequential::getElementAsConstant() */ LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx); /** * Create a ConstantVector from values. * * @see llvm::ConstantVector::get() */ LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size); /** * @} */ /** * @defgroup LLVMCCoreValueConstantExpressions Constant Expressions * * Functions in this group correspond to APIs on llvm::ConstantExpr. * * @see llvm::ConstantExpr. * * @{ */ LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal); LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty); LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty); LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal); LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstExactUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate, LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate, LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant); LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices); LLVMValueRef LLVMConstGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices); LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices); LLVMValueRef LLVMConstInBoundsGEP2(LLVMTypeRef Ty, LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices); LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstAddrSpaceCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType, LLVMBool isSigned); LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType); LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition, LLVMValueRef ConstantIfTrue, LLVMValueRef ConstantIfFalse); LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant, LLVMValueRef IndexConstant); LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant, LLVMValueRef ElementValueConstant, LLVMValueRef IndexConstant); LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, LLVMValueRef VectorBConstant, LLVMValueRef MaskConstant); LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList, unsigned NumIdx); LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, LLVMValueRef ElementValueConstant, unsigned *IdxList, unsigned NumIdx); LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB); /** Deprecated: Use LLVMGetInlineAsm instead. */ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, const char *Constraints, LLVMBool HasSideEffects, LLVMBool IsAlignStack); /** * @} */ /** * @defgroup LLVMCCoreValueConstantGlobals Global Values * * This group contains functions that operate on global values. Functions in * this group relate to functions in the llvm::GlobalValue class tree. * * @see llvm::GlobalValue * * @{ */ LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global); LLVMBool LLVMIsDeclaration(LLVMValueRef Global); LLVMLinkage LLVMGetLinkage(LLVMValueRef Global); void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage); const char *LLVMGetSection(LLVMValueRef Global); void LLVMSetSection(LLVMValueRef Global, const char *Section); LLVMVisibility LLVMGetVisibility(LLVMValueRef Global); void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz); LLVMDLLStorageClass LLVMGetDLLStorageClass(LLVMValueRef Global); void LLVMSetDLLStorageClass(LLVMValueRef Global, LLVMDLLStorageClass Class); LLVMUnnamedAddr LLVMGetUnnamedAddress(LLVMValueRef Global); void LLVMSetUnnamedAddress(LLVMValueRef Global, LLVMUnnamedAddr UnnamedAddr); /** * Returns the "value type" of a global value. This differs from the formal * type of a global value which is always a pointer type. * * @see llvm::GlobalValue::getValueType() */ LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global); /** Deprecated: Use LLVMGetUnnamedAddress instead. */ LLVMBool LLVMHasUnnamedAddr(LLVMValueRef Global); /** Deprecated: Use LLVMSetUnnamedAddress instead. */ void LLVMSetUnnamedAddr(LLVMValueRef Global, LLVMBool HasUnnamedAddr); /** * @defgroup LLVMCCoreValueWithAlignment Values with alignment * * Functions in this group only apply to values with alignment, i.e. * global variables, load and store instructions. */ /** * Obtain the preferred alignment of the value. * @see llvm::AllocaInst::getAlignment() * @see llvm::LoadInst::getAlignment() * @see llvm::StoreInst::getAlignment() * @see llvm::GlobalValue::getAlignment() */ unsigned LLVMGetAlignment(LLVMValueRef V); /** * Set the preferred alignment of the value. * @see llvm::AllocaInst::setAlignment() * @see llvm::LoadInst::setAlignment() * @see llvm::StoreInst::setAlignment() * @see llvm::GlobalValue::setAlignment() */ void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes); /** * Sets a metadata attachment, erasing the existing metadata attachment if * it already exists for the given kind. * * @see llvm::GlobalObject::setMetadata() */ void LLVMGlobalSetMetadata(LLVMValueRef Global, unsigned Kind, LLVMMetadataRef MD); /** * Erases a metadata attachment of the given kind if it exists. * * @see llvm::GlobalObject::eraseMetadata() */ void LLVMGlobalEraseMetadata(LLVMValueRef Global, unsigned Kind); /** * Removes all metadata attachments from this value. * * @see llvm::GlobalObject::clearMetadata() */ void LLVMGlobalClearMetadata(LLVMValueRef Global); /** * Retrieves an array of metadata entries representing the metadata attached to * this value. The caller is responsible for freeing this array by calling * \c LLVMDisposeValueMetadataEntries. * * @see llvm::GlobalObject::getAllMetadata() */ LLVMValueMetadataEntry *LLVMGlobalCopyAllMetadata(LLVMValueRef Value, size_t *NumEntries); /** * Destroys value metadata entries. */ void LLVMDisposeValueMetadataEntries(LLVMValueMetadataEntry *Entries); /** * Returns the kind of a value metadata entry at a specific index. */ unsigned LLVMValueMetadataEntriesGetKind(LLVMValueMetadataEntry *Entries, unsigned Index); /** * Returns the underlying metadata node of a value metadata entry at a * specific index. */ LLVMMetadataRef LLVMValueMetadataEntriesGetMetadata(LLVMValueMetadataEntry *Entries, unsigned Index); /** * @} */ /** * @defgroup LLVMCoreValueConstantGlobalVariable Global Variables * * This group contains functions that operate on global variable values. * * @see llvm::GlobalVariable * * @{ */ LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name); LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name, unsigned AddressSpace); LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name); LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M); LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M); LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar); LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar); void LLVMDeleteGlobal(LLVMValueRef GlobalVar); LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar); void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal); LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar); void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal); LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar); void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant); LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar); void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode); LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar); void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit); /** * @} */ /** * @defgroup LLVMCoreValueConstantGlobalAlias Global Aliases * * This group contains function that operate on global alias values. * * @see llvm::GlobalAlias * * @{ */ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name); /** * Obtain a GlobalAlias value from a Module by its name. * * The returned value corresponds to a llvm::GlobalAlias value. * * @see llvm::Module::getNamedAlias() */ LLVMValueRef LLVMGetNamedGlobalAlias(LLVMModuleRef M, const char *Name, size_t NameLen); /** * Obtain an iterator to the first GlobalAlias in a Module. * * @see llvm::Module::alias_begin() */ LLVMValueRef LLVMGetFirstGlobalAlias(LLVMModuleRef M); /** * Obtain an iterator to the last GlobalAlias in a Module. * * @see llvm::Module::alias_end() */ LLVMValueRef LLVMGetLastGlobalAlias(LLVMModuleRef M); /** * Advance a GlobalAlias iterator to the next GlobalAlias. * * Returns NULL if the iterator was already at the end and there are no more * global aliases. */ LLVMValueRef LLVMGetNextGlobalAlias(LLVMValueRef GA); /** * Decrement a GlobalAlias iterator to the previous GlobalAlias. * * Returns NULL if the iterator was already at the beginning and there are * no previous global aliases. */ LLVMValueRef LLVMGetPreviousGlobalAlias(LLVMValueRef GA); /** * Retrieve the target value of an alias. */ LLVMValueRef LLVMAliasGetAliasee(LLVMValueRef Alias); /** * Set the target value of an alias. */ void LLVMAliasSetAliasee(LLVMValueRef Alias, LLVMValueRef Aliasee); /** * @} */ /** * @defgroup LLVMCCoreValueFunction Function values * * Functions in this group operate on LLVMValueRef instances that * correspond to llvm::Function instances. * * @see llvm::Function * * @{ */ /** * Remove a function from its containing module and deletes it. * * @see llvm::Function::eraseFromParent() */ void LLVMDeleteFunction(LLVMValueRef Fn); /** * Check whether the given function has a personality function. * * @see llvm::Function::hasPersonalityFn() */ LLVMBool LLVMHasPersonalityFn(LLVMValueRef Fn); /** * Obtain the personality function attached to the function. * * @see llvm::Function::getPersonalityFn() */ LLVMValueRef LLVMGetPersonalityFn(LLVMValueRef Fn); /** * Set the personality function attached to the function. * * @see llvm::Function::setPersonalityFn() */ void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn); /** * Obtain the intrinsic ID number which matches the given function name. * * @see llvm::Function::lookupIntrinsicID() */ unsigned LLVMLookupIntrinsicID(const char *Name, size_t NameLen); /** * Obtain the ID number from a function instance. * * @see llvm::Function::getIntrinsicID() */ unsigned LLVMGetIntrinsicID(LLVMValueRef Fn); /** * Create or insert the declaration of an intrinsic. For overloaded intrinsics, * parameter types must be provided to uniquely identify an overload. * * @see llvm::Intrinsic::getDeclaration() */ LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod, unsigned ID, LLVMTypeRef *ParamTypes, size_t ParamCount); /** * Retrieves the type of an intrinsic. For overloaded intrinsics, parameter * types must be provided to uniquely identify an overload. * * @see llvm::Intrinsic::getType() */ LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID, LLVMTypeRef *ParamTypes, size_t ParamCount); /** * Retrieves the name of an intrinsic. * * @see llvm::Intrinsic::getName() */ const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength); /** * Copies the name of an overloaded intrinsic identified by a given list of * parameter types. * * Unlike LLVMIntrinsicGetName, the caller is responsible for freeing the * returned string. * * @see llvm::Intrinsic::getName() */ const char *LLVMIntrinsicCopyOverloadedName(unsigned ID, LLVMTypeRef *ParamTypes, size_t ParamCount, size_t *NameLength); /** * Obtain if the intrinsic identified by the given ID is overloaded. * * @see llvm::Intrinsic::isOverloaded() */ LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID); /** * Obtain the calling function of a function. * * The returned value corresponds to the LLVMCallConv enumeration. * * @see llvm::Function::getCallingConv() */ unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn); /** * Set the calling convention of a function. * * @see llvm::Function::setCallingConv() * * @param Fn Function to operate on * @param CC LLVMCallConv to set calling convention to */ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC); /** * Obtain the name of the garbage collector to use during code * generation. * * @see llvm::Function::getGC() */ const char *LLVMGetGC(LLVMValueRef Fn); /** * Define the garbage collector to use during code generation. * * @see llvm::Function::setGC() */ void LLVMSetGC(LLVMValueRef Fn, const char *Name); /** * Add an attribute to a function. * * @see llvm::Function::addAttribute() */ void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, LLVMAttributeRef A); unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx); void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, LLVMAttributeRef *Attrs); LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, unsigned KindID); LLVMAttributeRef LLVMGetStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, const char *K, unsigned KLen); void LLVMRemoveEnumAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, unsigned KindID); void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, const char *K, unsigned KLen); /** * Add a target-dependent attribute to a function * @see llvm::AttrBuilder::addAttribute() */ void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A, const char *V); /** * @defgroup LLVMCCoreValueFunctionParameters Function Parameters * * Functions in this group relate to arguments/parameters on functions. * * Functions in this group expect LLVMValueRef instances that correspond * to llvm::Function instances. * * @{ */ /** * Obtain the number of parameters in a function. * * @see llvm::Function::arg_size() */ unsigned LLVMCountParams(LLVMValueRef Fn); /** * Obtain the parameters in a function. * * The takes a pointer to a pre-allocated array of LLVMValueRef that is * at least LLVMCountParams() long. This array will be filled with * LLVMValueRef instances which correspond to the parameters the * function receives. Each LLVMValueRef corresponds to a llvm::Argument * instance. * * @see llvm::Function::arg_begin() */ void LLVMGetParams(LLVMValueRef Fn, LLVMValueRef *Params); /** * Obtain the parameter at the specified index. * * Parameters are indexed from 0. * * @see llvm::Function::arg_begin() */ LLVMValueRef LLVMGetParam(LLVMValueRef Fn, unsigned Index); /** * Obtain the function to which this argument belongs. * * Unlike other functions in this group, this one takes an LLVMValueRef * that corresponds to a llvm::Attribute. * * The returned LLVMValueRef is the llvm::Function to which this * argument belongs. */ LLVMValueRef LLVMGetParamParent(LLVMValueRef Inst); /** * Obtain the first parameter to a function. * * @see llvm::Function::arg_begin() */ LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn); /** * Obtain the last parameter to a function. * * @see llvm::Function::arg_end() */ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn); /** * Obtain the next parameter to a function. * * This takes an LLVMValueRef obtained from LLVMGetFirstParam() (which is * actually a wrapped iterator) and obtains the next parameter from the * underlying iterator. */ LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg); /** * Obtain the previous parameter to a function. * * This is the opposite of LLVMGetNextParam(). */ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg); /** * Set the alignment for a function parameter. * * @see llvm::Argument::addAttr() * @see llvm::AttrBuilder::addAlignmentAttr() */ void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned Align); /** * @} */ /** * @defgroup LLVMCCoreValueGlobalIFunc IFuncs * * Functions in this group relate to indirect functions. * * Functions in this group expect LLVMValueRef instances that correspond * to llvm::GlobalIFunc instances. * * @{ */ /** * Add a global indirect function to a module under a specified name. * * @see llvm::GlobalIFunc::create() */ LLVMValueRef LLVMAddGlobalIFunc(LLVMModuleRef M, const char *Name, size_t NameLen, LLVMTypeRef Ty, unsigned AddrSpace, LLVMValueRef Resolver); /** * Obtain a GlobalIFunc value from a Module by its name. * * The returned value corresponds to a llvm::GlobalIFunc value. * * @see llvm::Module::getNamedIFunc() */ LLVMValueRef LLVMGetNamedGlobalIFunc(LLVMModuleRef M, const char *Name, size_t NameLen); /** * Obtain an iterator to the first GlobalIFunc in a Module. * * @see llvm::Module::ifunc_begin() */ LLVMValueRef LLVMGetFirstGlobalIFunc(LLVMModuleRef M); /** * Obtain an iterator to the last GlobalIFunc in a Module. * * @see llvm::Module::ifunc_end() */ LLVMValueRef LLVMGetLastGlobalIFunc(LLVMModuleRef M); /** * Advance a GlobalIFunc iterator to the next GlobalIFunc. * * Returns NULL if the iterator was already at the end and there are no more * global aliases. */ LLVMValueRef LLVMGetNextGlobalIFunc(LLVMValueRef IFunc); /** * Decrement a GlobalIFunc iterator to the previous GlobalIFunc. * * Returns NULL if the iterator was already at the beginning and there are * no previous global aliases. */ LLVMValueRef LLVMGetPreviousGlobalIFunc(LLVMValueRef IFunc); /** * Retrieves the resolver function associated with this indirect function, or * NULL if it doesn't not exist. * * @see llvm::GlobalIFunc::getResolver() */ LLVMValueRef LLVMGetGlobalIFuncResolver(LLVMValueRef IFunc); /** * Sets the resolver function associated with this indirect function. * * @see llvm::GlobalIFunc::setResolver() */ void LLVMSetGlobalIFuncResolver(LLVMValueRef IFunc, LLVMValueRef Resolver); /** * Remove a global indirect function from its parent module and delete it. * * @see llvm::GlobalIFunc::eraseFromParent() */ void LLVMEraseGlobalIFunc(LLVMValueRef IFunc); /** * Remove a global indirect function from its parent module. * * This unlinks the global indirect function from its containing module but * keeps it alive. * * @see llvm::GlobalIFunc::removeFromParent() */ void LLVMRemoveGlobalIFunc(LLVMValueRef IFunc); /** * @} */ /** * @} */ /** * @} */ /** * @} */ /** * @defgroup LLVMCCoreValueMetadata Metadata * * @{ */ /** * Create an MDString value from a given string value. * * The MDString value does not take ownership of the given string, it remains * the responsibility of the caller to free it. * * @see llvm::MDString::get() */ LLVMMetadataRef LLVMMDStringInContext2(LLVMContextRef C, const char *Str, size_t SLen); /** * Create an MDNode value with the given array of operands. * * @see llvm::MDNode::get() */ LLVMMetadataRef LLVMMDNodeInContext2(LLVMContextRef C, LLVMMetadataRef *MDs, size_t Count); /** * Obtain a Metadata as a Value. */ LLVMValueRef LLVMMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD); /** * Obtain a Value as a Metadata. */ LLVMMetadataRef LLVMValueAsMetadata(LLVMValueRef Val); /** * Obtain the underlying string from a MDString value. * * @param V Instance to obtain string from. * @param Length Memory address which will hold length of returned string. * @return String data in MDString. */ const char *LLVMGetMDString(LLVMValueRef V, unsigned *Length); /** * Obtain the number of operands from an MDNode value. * * @param V MDNode to get number of operands from. * @return Number of operands of the MDNode. */ unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V); /** * Obtain the given MDNode's operands. * * The passed LLVMValueRef pointer should point to enough memory to hold all of * the operands of the given MDNode (see LLVMGetMDNodeNumOperands) as * LLVMValueRefs. This memory will be populated with the LLVMValueRefs of the * MDNode's operands. * * @param V MDNode to get the operands from. * @param Dest Destination array for operands. */ void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest); /** Deprecated: Use LLVMMDStringInContext2 instead. */ LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str, unsigned SLen); /** Deprecated: Use LLVMMDStringInContext2 instead. */ LLVMValueRef LLVMMDString(const char *Str, unsigned SLen); /** Deprecated: Use LLVMMDNodeInContext2 instead. */ LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, unsigned Count); /** Deprecated: Use LLVMMDNodeInContext2 instead. */ LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count); /** * @} */ /** * @defgroup LLVMCCoreValueBasicBlock Basic Block * * A basic block represents a single entry single exit section of code. * Basic blocks contain a list of instructions which form the body of * the block. * * Basic blocks belong to functions. They have the type of label. * * Basic blocks are themselves values. However, the C API models them as * LLVMBasicBlockRef. * * @see llvm::BasicBlock * * @{ */ /** * Convert a basic block instance to a value type. */ LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB); /** * Determine whether an LLVMValueRef is itself a basic block. */ LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val); /** * Convert an LLVMValueRef to an LLVMBasicBlockRef instance. */ LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val); /** * Obtain the string name of a basic block. */ const char *LLVMGetBasicBlockName(LLVMBasicBlockRef BB); /** * Obtain the function to which a basic block belongs. * * @see llvm::BasicBlock::getParent() */ LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB); /** * Obtain the terminator instruction for a basic block. * * If the basic block does not have a terminator (it is not well-formed * if it doesn't), then NULL is returned. * * The returned LLVMValueRef corresponds to an llvm::Instruction. * * @see llvm::BasicBlock::getTerminator() */ LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB); /** * Obtain the number of basic blocks in a function. * * @param Fn Function value to operate on. */ unsigned LLVMCountBasicBlocks(LLVMValueRef Fn); /** * Obtain all of the basic blocks in a function. * * This operates on a function value. The BasicBlocks parameter is a * pointer to a pre-allocated array of LLVMBasicBlockRef of at least * LLVMCountBasicBlocks() in length. This array is populated with * LLVMBasicBlockRef instances. */ void LLVMGetBasicBlocks(LLVMValueRef Fn, LLVMBasicBlockRef *BasicBlocks); /** * Obtain the first basic block in a function. * * The returned basic block can be used as an iterator. You will likely * eventually call into LLVMGetNextBasicBlock() with it. * * @see llvm::Function::begin() */ LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn); /** * Obtain the last basic block in a function. * * @see llvm::Function::end() */ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn); /** * Advance a basic block iterator. */ LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB); /** * Go backwards in a basic block iterator. */ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB); /** * Obtain the basic block that corresponds to the entry point of a * function. * * @see llvm::Function::getEntryBlock() */ LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn); /** * Insert the given basic block after the insertion point of the given builder. * * The insertion point must be valid. * * @see llvm::Function::BasicBlockListType::insertAfter() */ void LLVMInsertExistingBasicBlockAfterInsertBlock(LLVMBuilderRef Builder, LLVMBasicBlockRef BB); /** * Append the given basic block to the basic block list of the given function. * * @see llvm::Function::BasicBlockListType::push_back() */ void LLVMAppendExistingBasicBlock(LLVMValueRef Fn, LLVMBasicBlockRef BB); /** * Create a new basic block without inserting it into a function. * * @see llvm::BasicBlock::Create() */ LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C, const char *Name); /** * Append a basic block to the end of a function. * * @see llvm::BasicBlock::Create() */ LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C, LLVMValueRef Fn, const char *Name); /** * Append a basic block to the end of a function using the global * context. * * @see llvm::BasicBlock::Create() */ LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name); /** * Insert a basic block in a function before another basic block. * * The function to add to is determined by the function of the * passed basic block. * * @see llvm::BasicBlock::Create() */ LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C, LLVMBasicBlockRef BB, const char *Name); /** * Insert a basic block in a function using the global context. * * @see llvm::BasicBlock::Create() */ LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB, const char *Name); /** * Remove a basic block from a function and delete it. * * This deletes the basic block from its containing function and deletes * the basic block itself. * * @see llvm::BasicBlock::eraseFromParent() */ void LLVMDeleteBasicBlock(LLVMBasicBlockRef BB); /** * Remove a basic block from a function. * * This deletes the basic block from its containing function but keep * the basic block alive. * * @see llvm::BasicBlock::removeFromParent() */ void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BB); /** * Move a basic block to before another one. * * @see llvm::BasicBlock::moveBefore() */ void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos); /** * Move a basic block to after another one. * * @see llvm::BasicBlock::moveAfter() */ void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos); /** * Obtain the first instruction in a basic block. * * The returned LLVMValueRef corresponds to a llvm::Instruction * instance. */ LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB); /** * Obtain the last instruction in a basic block. * * The returned LLVMValueRef corresponds to an LLVM:Instruction. */ LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB); /** * @} */ /** * @defgroup LLVMCCoreValueInstruction Instructions * * Functions in this group relate to the inspection and manipulation of * individual instructions. * * In the C++ API, an instruction is modeled by llvm::Instruction. This * class has a large number of descendents. llvm::Instruction is a * llvm::Value and in the C API, instructions are modeled by * LLVMValueRef. * * This group also contains sub-groups which operate on specific * llvm::Instruction types, e.g. llvm::CallInst. * * @{ */ /** * Determine whether an instruction has any metadata attached. */ int LLVMHasMetadata(LLVMValueRef Val); /** * Return metadata associated with an instruction value. */ LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID); /** * Set metadata associated with an instruction value. */ void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node); /** * Returns the metadata associated with an instruction value, but filters out * all the debug locations. * * @see llvm::Instruction::getAllMetadataOtherThanDebugLoc() */ LLVMValueMetadataEntry * LLVMInstructionGetAllMetadataOtherThanDebugLoc(LLVMValueRef Instr, size_t *NumEntries); /** * Obtain the basic block to which an instruction belongs. * * @see llvm::Instruction::getParent() */ LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst); /** * Obtain the instruction that occurs after the one specified. * * The next instruction will be from the same basic block. * * If this is the last instruction in a basic block, NULL will be * returned. */ LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst); /** * Obtain the instruction that occurred before this one. * * If the instruction is the first instruction in a basic block, NULL * will be returned. */ LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst); /** * Remove and delete an instruction. * * The instruction specified is removed from its containing building * block but is kept alive. * * @see llvm::Instruction::removeFromParent() */ void LLVMInstructionRemoveFromParent(LLVMValueRef Inst); /** * Remove and delete an instruction. * * The instruction specified is removed from its containing building * block and then deleted. * * @see llvm::Instruction::eraseFromParent() */ void LLVMInstructionEraseFromParent(LLVMValueRef Inst); /** * Obtain the code opcode for an individual instruction. * * @see llvm::Instruction::getOpCode() */ LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst); /** * Obtain the predicate of an instruction. * * This is only valid for instructions that correspond to llvm::ICmpInst * or llvm::ConstantExpr whose opcode is llvm::Instruction::ICmp. * * @see llvm::ICmpInst::getPredicate() */ LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst); /** * Obtain the float predicate of an instruction. * * This is only valid for instructions that correspond to llvm::FCmpInst * or llvm::ConstantExpr whose opcode is llvm::Instruction::FCmp. * * @see llvm::FCmpInst::getPredicate() */ LLVMRealPredicate LLVMGetFCmpPredicate(LLVMValueRef Inst); /** * Create a copy of 'this' instruction that is identical in all ways * except the following: * * The instruction has no parent * * The instruction has no name * * @see llvm::Instruction::clone() */ LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst); /** * Determine whether an instruction is a terminator. This routine is named to * be compatible with historical functions that did this by querying the * underlying C++ type. * * @see llvm::Instruction::isTerminator() */ LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst); /** * @defgroup LLVMCCoreValueInstructionCall Call Sites and Invocations * * Functions in this group apply to instructions that refer to call * sites and invocations. These correspond to C++ types in the * llvm::CallInst class tree. * * @{ */ /** * Obtain the argument count for a call instruction. * * This expects an LLVMValueRef that corresponds to a llvm::CallInst, * llvm::InvokeInst, or llvm:FuncletPadInst. * * @see llvm::CallInst::getNumArgOperands() * @see llvm::InvokeInst::getNumArgOperands() * @see llvm::FuncletPadInst::getNumArgOperands() */ unsigned LLVMGetNumArgOperands(LLVMValueRef Instr); /** * Set the calling convention for a call instruction. * * This expects an LLVMValueRef that corresponds to a llvm::CallInst or * llvm::InvokeInst. * * @see llvm::CallInst::setCallingConv() * @see llvm::InvokeInst::setCallingConv() */ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC); /** * Obtain the calling convention for a call instruction. * * This is the opposite of LLVMSetInstructionCallConv(). Reads its * usage. * * @see LLVMSetInstructionCallConv() */ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr); void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned Align); void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, LLVMAttributeRef A); unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C, LLVMAttributeIndex Idx); void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx, LLVMAttributeRef *Attrs); LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, unsigned KindID); LLVMAttributeRef LLVMGetCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, const char *K, unsigned KLen); void LLVMRemoveCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, unsigned KindID); void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, const char *K, unsigned KLen); /** * Obtain the function type called by this instruction. * * @see llvm::CallBase::getFunctionType() */ LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef C); /** * Obtain the pointer to the function invoked by this instruction. * * This expects an LLVMValueRef that corresponds to a llvm::CallInst or * llvm::InvokeInst. * * @see llvm::CallInst::getCalledOperand() * @see llvm::InvokeInst::getCalledOperand() */ LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr); /** * Obtain whether a call instruction is a tail call. * * This only works on llvm::CallInst instructions. * * @see llvm::CallInst::isTailCall() */ LLVMBool LLVMIsTailCall(LLVMValueRef CallInst); /** * Set whether a call instruction is a tail call. * * This only works on llvm::CallInst instructions. * * @see llvm::CallInst::setTailCall() */ void LLVMSetTailCall(LLVMValueRef CallInst, LLVMBool IsTailCall); /** * Return the normal destination basic block. * * This only works on llvm::InvokeInst instructions. * * @see llvm::InvokeInst::getNormalDest() */ LLVMBasicBlockRef LLVMGetNormalDest(LLVMValueRef InvokeInst); /** * Return the unwind destination basic block. * * Works on llvm::InvokeInst, llvm::CleanupReturnInst, and * llvm::CatchSwitchInst instructions. * * @see llvm::InvokeInst::getUnwindDest() * @see llvm::CleanupReturnInst::getUnwindDest() * @see llvm::CatchSwitchInst::getUnwindDest() */ LLVMBasicBlockRef LLVMGetUnwindDest(LLVMValueRef InvokeInst); /** * Set the normal destination basic block. * * This only works on llvm::InvokeInst instructions. * * @see llvm::InvokeInst::setNormalDest() */ void LLVMSetNormalDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B); /** * Set the unwind destination basic block. * * Works on llvm::InvokeInst, llvm::CleanupReturnInst, and * llvm::CatchSwitchInst instructions. * * @see llvm::InvokeInst::setUnwindDest() * @see llvm::CleanupReturnInst::setUnwindDest() * @see llvm::CatchSwitchInst::setUnwindDest() */ void LLVMSetUnwindDest(LLVMValueRef InvokeInst, LLVMBasicBlockRef B); /** * @} */ /** * @defgroup LLVMCCoreValueInstructionTerminator Terminators * * Functions in this group only apply to instructions for which * LLVMIsATerminatorInst returns true. * * @{ */ /** * Return the number of successors that this terminator has. * * @see llvm::Instruction::getNumSuccessors */ unsigned LLVMGetNumSuccessors(LLVMValueRef Term); /** * Return the specified successor. * * @see llvm::Instruction::getSuccessor */ LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i); /** * Update the specified successor to point at the provided block. * * @see llvm::Instruction::setSuccessor */ void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block); /** * Return if a branch is conditional. * * This only works on llvm::BranchInst instructions. * * @see llvm::BranchInst::isConditional */ LLVMBool LLVMIsConditional(LLVMValueRef Branch); /** * Return the condition of a branch instruction. * * This only works on llvm::BranchInst instructions. * * @see llvm::BranchInst::getCondition */ LLVMValueRef LLVMGetCondition(LLVMValueRef Branch); /** * Set the condition of a branch instruction. * * This only works on llvm::BranchInst instructions. * * @see llvm::BranchInst::setCondition */ void LLVMSetCondition(LLVMValueRef Branch, LLVMValueRef Cond); /** * Obtain the default destination basic block of a switch instruction. * * This only works on llvm::SwitchInst instructions. * * @see llvm::SwitchInst::getDefaultDest() */ LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef SwitchInstr); /** * @} */ /** * @defgroup LLVMCCoreValueInstructionAlloca Allocas * * Functions in this group only apply to instructions that map to * llvm::AllocaInst instances. * * @{ */ /** * Obtain the type that is being allocated by the alloca instruction. */ LLVMTypeRef LLVMGetAllocatedType(LLVMValueRef Alloca); /** * @} */ /** * @defgroup LLVMCCoreValueInstructionGetElementPointer GEPs * * Functions in this group only apply to instructions that map to * llvm::GetElementPtrInst instances. * * @{ */ /** * Check whether the given GEP instruction is inbounds. */ LLVMBool LLVMIsInBounds(LLVMValueRef GEP); /** * Set the given GEP instruction to be inbounds or not. */ void LLVMSetIsInBounds(LLVMValueRef GEP, LLVMBool InBounds); /** * @} */ /** * @defgroup LLVMCCoreValueInstructionPHINode PHI Nodes * * Functions in this group only apply to instructions that map to * llvm::PHINode instances. * * @{ */ /** * Add an incoming value to the end of a PHI list. */ void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues, LLVMBasicBlockRef *IncomingBlocks, unsigned Count); /** * Obtain the number of incoming basic blocks to a PHI node. */ unsigned LLVMCountIncoming(LLVMValueRef PhiNode); /** * Obtain an incoming value to a PHI node as an LLVMValueRef. */ LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index); /** * Obtain an incoming value to a PHI node as an LLVMBasicBlockRef. */ LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index); /** * @} */ /** * @defgroup LLVMCCoreValueInstructionExtractValue ExtractValue * @defgroup LLVMCCoreValueInstructionInsertValue InsertValue * * Functions in this group only apply to instructions that map to * llvm::ExtractValue and llvm::InsertValue instances. * * @{ */ /** * Obtain the number of indices. * NB: This also works on GEP. */ unsigned LLVMGetNumIndices(LLVMValueRef Inst); /** * Obtain the indices as an array. */ const unsigned *LLVMGetIndices(LLVMValueRef Inst); /** * @} */ /** * @} */ /** * @} */ /** * @defgroup LLVMCCoreInstructionBuilder Instruction Builders * * An instruction builder represents a point within a basic block and is * the exclusive means of building instructions using the C interface. * * @{ */ LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C); LLVMBuilderRef LLVMCreateBuilder(void); void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block, LLVMValueRef Instr); void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr); void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block); LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder); void LLVMClearInsertionPosition(LLVMBuilderRef Builder); void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr); void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr, const char *Name); void LLVMDisposeBuilder(LLVMBuilderRef Builder); /* Metadata */ /** * Get location information used by debugging information. * * @see llvm::IRBuilder::getCurrentDebugLocation() */ LLVMMetadataRef LLVMGetCurrentDebugLocation2(LLVMBuilderRef Builder); /** * Set location information used by debugging information. * * To clear the location metadata of the given instruction, pass NULL to \p Loc. * * @see llvm::IRBuilder::SetCurrentDebugLocation() */ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc); /** * Attempts to set the debug location for the given instruction using the * current debug location for the given builder. If the builder has no current * debug location, this function is a no-op. * * @see llvm::IRBuilder::SetInstDebugLocation() */ void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst); /** * Get the dafult floating-point math metadata for a given builder. * * @see llvm::IRBuilder::getDefaultFPMathTag() */ LLVMMetadataRef LLVMBuilderGetDefaultFPMathTag(LLVMBuilderRef Builder); /** * Set the default floating-point math metadata for the given builder. * * To clear the metadata, pass NULL to \p FPMathTag. * * @see llvm::IRBuilder::setDefaultFPMathTag() */ void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder, LLVMMetadataRef FPMathTag); /** * Deprecated: Passing the NULL location will crash. * Use LLVMGetCurrentDebugLocation2 instead. */ void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L); /** * Deprecated: Returning the NULL location will crash. * Use LLVMGetCurrentDebugLocation2 instead. */ LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder); /* Terminators */ LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef); LLVMValueRef LLVMBuildRet(LLVMBuilderRef, LLVMValueRef V); LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef, LLVMValueRef *RetVals, unsigned N); LLVMValueRef LLVMBuildBr(LLVMBuilderRef, LLVMBasicBlockRef Dest); LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef, LLVMValueRef If, LLVMBasicBlockRef Then, LLVMBasicBlockRef Else); LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef, LLVMValueRef V, LLVMBasicBlockRef Else, unsigned NumCases); LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr, unsigned NumDests); // LLVMBuildInvoke is deprecated in favor of LLVMBuildInvoke2, in preparation // for opaque pointer types. LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name); LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name); LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef); /* Exception Handling */ LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn); LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef PersFn, unsigned NumClauses, const char *Name); LLVMValueRef LLVMBuildCleanupRet(LLVMBuilderRef B, LLVMValueRef CatchPad, LLVMBasicBlockRef BB); LLVMValueRef LLVMBuildCatchRet(LLVMBuilderRef B, LLVMValueRef CatchPad, LLVMBasicBlockRef BB); LLVMValueRef LLVMBuildCatchPad(LLVMBuilderRef B, LLVMValueRef ParentPad, LLVMValueRef *Args, unsigned NumArgs, const char *Name); LLVMValueRef LLVMBuildCleanupPad(LLVMBuilderRef B, LLVMValueRef ParentPad, LLVMValueRef *Args, unsigned NumArgs, const char *Name); LLVMValueRef LLVMBuildCatchSwitch(LLVMBuilderRef B, LLVMValueRef ParentPad, LLVMBasicBlockRef UnwindBB, unsigned NumHandlers, const char *Name); /* Add a case to the switch instruction */ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal, LLVMBasicBlockRef Dest); /* Add a destination to the indirectbr instruction */ void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest); /* Get the number of clauses on the landingpad instruction */ unsigned LLVMGetNumClauses(LLVMValueRef LandingPad); /* Get the value of the clause at index Idx on the landingpad instruction */ LLVMValueRef LLVMGetClause(LLVMValueRef LandingPad, unsigned Idx); /* Add a catch or filter clause to the landingpad instruction */ void LLVMAddClause(LLVMValueRef LandingPad, LLVMValueRef ClauseVal); /* Get the 'cleanup' flag in the landingpad instruction */ LLVMBool LLVMIsCleanup(LLVMValueRef LandingPad); /* Set the 'cleanup' flag in the landingpad instruction */ void LLVMSetCleanup(LLVMValueRef LandingPad, LLVMBool Val); /* Add a destination to the catchswitch instruction */ void LLVMAddHandler(LLVMValueRef CatchSwitch, LLVMBasicBlockRef Dest); /* Get the number of handlers on the catchswitch instruction */ unsigned LLVMGetNumHandlers(LLVMValueRef CatchSwitch); /** * Obtain the basic blocks acting as handlers for a catchswitch instruction. * * The Handlers parameter should point to a pre-allocated array of * LLVMBasicBlockRefs at least LLVMGetNumHandlers() large. On return, the * first LLVMGetNumHandlers() entries in the array will be populated * with LLVMBasicBlockRef instances. * * @param CatchSwitch The catchswitch instruction to operate on. * @param Handlers Memory address of an array to be filled with basic blocks. */ void LLVMGetHandlers(LLVMValueRef CatchSwitch, LLVMBasicBlockRef *Handlers); /* Funclets */ /* Get the number of funcletpad arguments. */ LLVMValueRef LLVMGetArgOperand(LLVMValueRef Funclet, unsigned i); /* Set a funcletpad argument at the given index. */ void LLVMSetArgOperand(LLVMValueRef Funclet, unsigned i, LLVMValueRef value); /** * Get the parent catchswitch instruction of a catchpad instruction. * * This only works on llvm::CatchPadInst instructions. * * @see llvm::CatchPadInst::getCatchSwitch() */ LLVMValueRef LLVMGetParentCatchSwitch(LLVMValueRef CatchPad); /** * Set the parent catchswitch instruction of a catchpad instruction. * * This only works on llvm::CatchPadInst instructions. * * @see llvm::CatchPadInst::setCatchSwitch() */ void LLVMSetParentCatchSwitch(LLVMValueRef CatchPad, LLVMValueRef CatchSwitch); /* Arithmetic */ LLVMValueRef LLVMBuildAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildFSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildFMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildExactUDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildURem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildSRem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildFRem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildShl(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildLShr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildAShr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildAnd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildOr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildXor(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name); LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name); LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name); LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name); LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name); /* Memory */ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name); LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name); /** * Creates and inserts a memset to the specified pointer and the * specified value. * * @see llvm::IRRBuilder::CreateMemSet() */ LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr, LLVMValueRef Val, LLVMValueRef Len, unsigned Align); /** * Creates and inserts a memcpy between the specified pointers. * * @see llvm::IRRBuilder::CreateMemCpy() */ LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B, LLVMValueRef Dst, unsigned DstAlign, LLVMValueRef Src, unsigned SrcAlign, LLVMValueRef Size); /** * Creates and inserts a memmove between the specified pointers. * * @see llvm::IRRBuilder::CreateMemMove() */ LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B, LLVMValueRef Dst, unsigned DstAlign, LLVMValueRef Src, unsigned SrcAlign, LLVMValueRef Size); LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name); LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name); LLVMValueRef LLVMBuildFree(LLVMBuilderRef, LLVMValueRef PointerVal); // LLVMBuildLoad is deprecated in favor of LLVMBuildLoad2, in preparation for // opaque pointer types. LLVMValueRef LLVMBuildLoad(LLVMBuilderRef, LLVMValueRef PointerVal, const char *Name); LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef, LLVMTypeRef Ty, LLVMValueRef PointerVal, const char *Name); LLVMValueRef LLVMBuildStore(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef Ptr); // LLVMBuildGEP, LLVMBuildInBoundsGEP, and LLVMBuildStructGEP are deprecated in // favor of LLVMBuild*GEP2, in preparation for opaque pointer types. LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name); LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name); LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer, unsigned Idx, const char *Name); LLVMValueRef LLVMBuildGEP2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name); LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name); LLVMValueRef LLVMBuildStructGEP2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Pointer, unsigned Idx, const char *Name); LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str, const char *Name); LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, const char *Name); LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst); void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile); LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst); void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool IsWeak); LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemoryAccessInst); void LLVMSetOrdering(LLVMValueRef MemoryAccessInst, LLVMAtomicOrdering Ordering); LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst); void LLVMSetAtomicRMWBinOp(LLVMValueRef AtomicRMWInst, LLVMAtomicRMWBinOp BinOp); /* Casts */ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildZExt(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildSExt(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildAddrSpaceCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildIntCast2(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, LLVMBool IsSigned, const char *Name); LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); /** Deprecated: This cast is always signed. Use LLVMBuildIntCast2 instead. */ LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/ LLVMTypeRef DestTy, const char *Name); /* Comparisons */ LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef, LLVMRealPredicate Op, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); /* Miscellaneous instructions */ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name); // LLVMBuildCall is deprecated in favor of LLVMBuildCall2, in preparation for // opaque pointer types. LLVMValueRef LLVMBuildCall(LLVMBuilderRef, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name); LLVMValueRef LLVMBuildCall2(LLVMBuilderRef, LLVMTypeRef, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name); LLVMValueRef LLVMBuildSelect(LLVMBuilderRef, LLVMValueRef If, LLVMValueRef Then, LLVMValueRef Else, const char *Name); LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef, LLVMValueRef List, LLVMTypeRef Ty, const char *Name); LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef, LLVMValueRef VecVal, LLVMValueRef Index, const char *Name); LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef, LLVMValueRef VecVal, LLVMValueRef EltVal, LLVMValueRef Index, const char *Name); LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef, LLVMValueRef V1, LLVMValueRef V2, LLVMValueRef Mask, const char *Name); LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef, LLVMValueRef AggVal, unsigned Index, const char *Name); LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef, LLVMValueRef AggVal, LLVMValueRef EltVal, unsigned Index, const char *Name); LLVMValueRef LLVMBuildFreeze(LLVMBuilderRef, LLVMValueRef Val, const char *Name); LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef, LLVMValueRef Val, const char *Name); LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val, const char *Name); LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name); LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering ordering, LLVMBool singleThread, const char *Name); LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B, LLVMAtomicRMWBinOp op, LLVMValueRef PTR, LLVMValueRef Val, LLVMAtomicOrdering ordering, LLVMBool singleThread); LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, LLVMValueRef Cmp, LLVMValueRef New, LLVMAtomicOrdering SuccessOrdering, LLVMAtomicOrdering FailureOrdering, LLVMBool SingleThread); /** * Get the number of elements in the mask of a ShuffleVector instruction. */ unsigned LLVMGetNumMaskElements(LLVMValueRef ShuffleVectorInst); /** * \returns a constant that specifies that the result of a \c ShuffleVectorInst * is undefined. */ int LLVMGetUndefMaskElem(void); /** * Get the mask value at position Elt in the mask of a ShuffleVector * instruction. * * \Returns the result of \c LLVMGetUndefMaskElem() if the mask value is undef * at that position. */ int LLVMGetMaskValue(LLVMValueRef ShuffleVectorInst, unsigned Elt); LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst); void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool SingleThread); LLVMAtomicOrdering LLVMGetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst); void LLVMSetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst, LLVMAtomicOrdering Ordering); LLVMAtomicOrdering LLVMGetCmpXchgFailureOrdering(LLVMValueRef CmpXchgInst); void LLVMSetCmpXchgFailureOrdering(LLVMValueRef CmpXchgInst, LLVMAtomicOrdering Ordering); /** * @} */ /** * @defgroup LLVMCCoreModuleProvider Module Providers * * @{ */ /** * Changes the type of M so it can be passed to FunctionPassManagers and the * JIT. They take ModuleProviders for historical reasons. */ LLVMModuleProviderRef LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M); /** * Destroys the module M. */ void LLVMDisposeModuleProvider(LLVMModuleProviderRef M); /** * @} */ /** * @defgroup LLVMCCoreMemoryBuffers Memory Buffers * * @{ */ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path, LLVMMemoryBufferRef *OutMemBuf, char **OutMessage); LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage); LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange(const char *InputData, size_t InputDataLength, const char *BufferName, LLVMBool RequiresNullTerminator); LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy(const char *InputData, size_t InputDataLength, const char *BufferName); const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf); size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf); void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf); /** * @} */ /** * @defgroup LLVMCCorePassRegistry Pass Registry * * @{ */ /** Return the global pass registry, for use with initialization functions. @see llvm::PassRegistry::getPassRegistry */ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void); /** * @} */ /** * @defgroup LLVMCCorePassManagers Pass Managers * * @{ */ /** Constructs a new whole-module pass pipeline. This type of pipeline is suitable for link-time optimization and whole-module transformations. @see llvm::PassManager::PassManager */ LLVMPassManagerRef LLVMCreatePassManager(void); /** Constructs a new function-by-function pass pipeline over the module provider. It does not take ownership of the module provider. This type of pipeline is suitable for code generation and JIT compilation tasks. @see llvm::FunctionPassManager::FunctionPassManager */ LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M); /** Deprecated: Use LLVMCreateFunctionPassManagerForModule instead. */ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef MP); /** Initializes, executes on the provided module, and finalizes all of the passes scheduled in the pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. @see llvm::PassManager::run(Module&) */ LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M); /** Initializes all of the function passes scheduled in the function pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. @see llvm::FunctionPassManager::doInitialization */ LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM); /** Executes all of the function passes scheduled in the function pass manager on the provided function. Returns 1 if any of the passes modified the function, false otherwise. @see llvm::FunctionPassManager::run(Function&) */ LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F); /** Finalizes all of the function passes scheduled in the function pass manager. Returns 1 if any of the passes modified the module, 0 otherwise. @see llvm::FunctionPassManager::doFinalization */ LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM); /** Frees the memory of a pass pipeline. For function pipelines, does not free the module provider. @see llvm::PassManagerBase::~PassManagerBase. */ void LLVMDisposePassManager(LLVMPassManagerRef PM); /** * @} */ /** * @defgroup LLVMCCoreThreading Threading * * Handle the structures needed to make LLVM safe for multithreading. * * @{ */ /** Deprecated: Multi-threading can only be enabled/disabled with the compile time define LLVM_ENABLE_THREADS. This function always returns LLVMIsMultithreaded(). */ LLVMBool LLVMStartMultithreaded(void); /** Deprecated: Multi-threading can only be enabled/disabled with the compile time define LLVM_ENABLE_THREADS. */ void LLVMStopMultithreaded(void); /** Check whether LLVM is executing in thread-safe mode or not. @see llvm::llvm_is_multithreaded */ LLVMBool LLVMIsMultithreaded(void); /** * @} */ /** * @} */ /** * @} */ LLVM_C_EXTERN_C_END #endif /* LLVM_C_CORE_H */ diff --git a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h index 39ae53c4e7fe..cf4c827b9267 100644 --- a/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h +++ b/contrib/llvm-project/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h @@ -1,183 +1,193 @@ //===- DWARFAbbreviationDeclaration.h ---------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" #include #include #include namespace llvm { class DWARFFormValue; class DWARFUnit; class raw_ostream; class DWARFAbbreviationDeclaration { public: struct AttributeSpec { AttributeSpec(dwarf::Attribute A, dwarf::Form F, int64_t Value) : Attr(A), Form(F), Value(Value) { assert(isImplicitConst()); } AttributeSpec(dwarf::Attribute A, dwarf::Form F, Optional ByteSize) : Attr(A), Form(F) { assert(!isImplicitConst()); this->ByteSize.HasByteSize = ByteSize.hasValue(); if (this->ByteSize.HasByteSize) this->ByteSize.ByteSize = *ByteSize; } dwarf::Attribute Attr; dwarf::Form Form; private: /// The following field is used for ByteSize for non-implicit_const /// attributes and as value for implicit_const ones, indicated by /// Form == DW_FORM_implicit_const. /// The following cases are distinguished: /// * Form != DW_FORM_implicit_const and HasByteSize is true: /// ByteSize contains the fixed size in bytes for the Form in this /// object. /// * Form != DW_FORM_implicit_const and HasByteSize is false: /// byte size of Form either varies according to the DWARFUnit /// that it is contained in or the value size varies and must be /// decoded from the debug information in order to determine its size. /// * Form == DW_FORM_implicit_const: /// Value contains value for the implicit_const attribute. struct ByteSizeStorage { bool HasByteSize; uint8_t ByteSize; }; union { ByteSizeStorage ByteSize; int64_t Value; }; public: bool isImplicitConst() const { return Form == dwarf::DW_FORM_implicit_const; } int64_t getImplicitConstValue() const { assert(isImplicitConst()); return Value; } /// Get the fixed byte size of this Form if possible. This function might /// use the DWARFUnit to calculate the size of the Form, like for /// DW_AT_address and DW_AT_ref_addr, so this isn't just an accessor for /// the ByteSize member. Optional getByteSize(const DWARFUnit &U) const; }; using AttributeSpecVector = SmallVector; DWARFAbbreviationDeclaration(); uint32_t getCode() const { return Code; } uint8_t getCodeByteSize() const { return CodeByteSize; } dwarf::Tag getTag() const { return Tag; } bool hasChildren() const { return HasChildren; } using attr_iterator_range = iterator_range; attr_iterator_range attributes() const { return attr_iterator_range(AttributeSpecs.begin(), AttributeSpecs.end()); } dwarf::Form getFormByIndex(uint32_t idx) const { assert(idx < AttributeSpecs.size()); return AttributeSpecs[idx].Form; } size_t getNumAttributes() const { return AttributeSpecs.size(); } dwarf::Attribute getAttrByIndex(uint32_t idx) const { assert(idx < AttributeSpecs.size()); return AttributeSpecs[idx].Attr; } + bool getAttrIsImplicitConstByIndex(uint32_t idx) const { + assert(idx < AttributeSpecs.size()); + return AttributeSpecs[idx].isImplicitConst(); + } + + int64_t getAttrImplicitConstValueByIndex(uint32_t idx) const { + assert(idx < AttributeSpecs.size()); + return AttributeSpecs[idx].getImplicitConstValue(); + } + /// Get the index of the specified attribute. /// /// Searches the this abbreviation declaration for the index of the specified /// attribute. /// /// \param attr DWARF attribute to search for. /// \returns Optional index of the attribute if found, None otherwise. Optional findAttributeIndex(dwarf::Attribute attr) const; /// Extract a DWARF form value from a DIE specified by DIE offset. /// /// Extract an attribute value for a DWARFUnit given the DIE offset and the /// attribute. /// /// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation /// code in the .debug_info data. /// \param Attr DWARF attribute to search for. /// \param U the DWARFUnit the contains the DIE. /// \returns Optional DWARF form value if the attribute was extracted. Optional getAttributeValue(const uint64_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U) const; bool extract(DataExtractor Data, uint64_t* OffsetPtr); void dump(raw_ostream &OS) const; // Return an optional byte size of all attribute data in this abbreviation // if a constant byte size can be calculated given a DWARFUnit. This allows // DWARF parsing to be faster as many DWARF DIEs have a fixed byte size. Optional getFixedAttributesByteSize(const DWARFUnit &U) const; private: void clear(); /// A helper structure that can quickly determine the size in bytes of an /// abbreviation declaration. struct FixedSizeInfo { /// The fixed byte size for fixed size forms. uint16_t NumBytes = 0; /// Number of DW_FORM_address forms in this abbrevation declaration. uint8_t NumAddrs = 0; /// Number of DW_FORM_ref_addr forms in this abbrevation declaration. uint8_t NumRefAddrs = 0; /// Number of 4 byte in DWARF32 and 8 byte in DWARF64 forms. uint8_t NumDwarfOffsets = 0; FixedSizeInfo() = default; /// Calculate the fixed size in bytes given a DWARFUnit. /// /// \param U the DWARFUnit to use when determing the byte size. /// \returns the size in bytes for all attribute data in this abbreviation. /// The returned size does not include bytes for the ULEB128 abbreviation /// code size_t getByteSize(const DWARFUnit &U) const; }; uint32_t Code; dwarf::Tag Tag; uint8_t CodeByteSize; bool HasChildren; AttributeSpecVector AttributeSpecs; /// If this abbreviation has a fixed byte size then FixedAttributeSize member /// variable below will have a value. Optional FixedAttributeSize; }; } // end namespace llvm #endif // LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Host.h b/contrib/llvm-project/llvm/include/llvm/Support/Host.h index d4ef389450cc..b3c15f0683b9 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/Host.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/Host.h @@ -1,72 +1,86 @@ //===- llvm/Support/Host.h - Host machine characteristics --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Methods for querying the nature of the host machine. // //===----------------------------------------------------------------------===// #ifndef LLVM_SUPPORT_HOST_H #define LLVM_SUPPORT_HOST_H #include namespace llvm { class MallocAllocator; class StringRef; template class StringMap; namespace sys { /// getDefaultTargetTriple() - Return the default target triple the compiler /// has been configured to produce code for. /// /// The target triple is a string in the format of: /// CPU_TYPE-VENDOR-OPERATING_SYSTEM /// or /// CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM std::string getDefaultTargetTriple(); /// getProcessTriple() - Return an appropriate target triple for generating /// code to be loaded into the current process, e.g. when using the JIT. std::string getProcessTriple(); /// getHostCPUName - Get the LLVM name for the host CPU. The particular format /// of the name is target dependent, and suitable for passing as -mcpu to the /// target which matches the host. /// /// \return - The host CPU name, or empty if the CPU could not be determined. StringRef getHostCPUName(); /// getHostCPUFeatures - Get the LLVM names for the host CPU features. /// The particular format of the names are target dependent, and suitable for /// passing as -mattr to the target which matches the host. /// /// \param Features - A string mapping feature names to either /// true (if enabled) or false (if disabled). This routine makes no guarantees /// about exactly which features may appear in this map, except that they are /// all valid LLVM feature names. /// /// \return - True on success. bool getHostCPUFeatures(StringMap &Features); /// Get the number of physical cores (as opposed to logical cores returned /// from thread::hardware_concurrency(), which includes hyperthreads). /// Returns -1 if unknown for the current host system. int getHostNumPhysicalCores(); namespace detail { /// Helper functions to extract HostCPUName from /proc/cpuinfo on linux. StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent); StringRef getHostCPUNameForBPF(); + + /// Helper functions to extract CPU details from CPUID on x86. + namespace x86 { + enum class VendorSignatures { + UNKNOWN, + GENUINE_INTEL, + AUTHENTIC_AMD, + }; + + /// Returns the host CPU's vendor. + /// MaxLeaf: if a non-nullptr pointer is specified, the EAX value will be + /// assigned to its pointee. + VendorSignatures getVendorSignature(unsigned *MaxLeaf = nullptr); + } // namespace x86 } } } #endif diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 2f80b4373b46..246db0fd2dd9 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -1,530 +1,525 @@ //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// This file defines the LoopVectorizationLegality class. Original code /// in Loop Vectorizer has been moved out to its own file for modularity /// and reusability. /// /// Currently, it works for innermost loop vectorization. Extending this to /// outer loop vectorization is a TODO item. /// /// Also provides: /// 1) LoopVectorizeHints class which keeps a number of loop annotations /// locally for easy look up. It has the ability to write them back as /// loop metadata, upon request. /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose /// of reporting useful failure to vectorize message. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H #include "llvm/ADT/MapVector.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Support/TypeSize.h" #include "llvm/Transforms/Utils/LoopUtils.h" namespace llvm { /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. /// This class keeps a number of loop annotations locally (as member variables) /// and can, upon request, write them back as metadata on the loop. It will /// initially scan the loop for existing metadata, and will update the local /// values based on information in the loop. /// We cannot write all values to metadata, as the mere presence of some info, /// for example 'force', means a decision has been made. So, we need to be /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED, HK_PREDICATE, HK_SCALABLE }; /// Hint - associates name and validation with the hint value. struct Hint { const char *Name; unsigned Value; // This may have to change for non-numeric values. HintKind Kind; Hint(const char *Name, unsigned Value, HintKind Kind) : Name(Name), Value(Value), Kind(Kind) {} bool validate(unsigned Val); }; /// Vectorization width. Hint Width; /// Vectorization interleave factor. Hint Interleave; /// Vectorization forced Hint Force; /// Already Vectorized Hint IsVectorized; /// Vector Predicate Hint Predicate; /// Says whether we should use fixed width or scalable vectorization. Hint Scalable; /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } /// True if there is any unsafe math in the loop. bool PotentiallyUnsafe = false; public: enum ForceKind { FK_Undefined = -1, ///< Not selected. FK_Disabled = 0, ///< Forcing disabled. FK_Enabled = 1, ///< Forcing enabled. }; LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE); /// Mark the loop L as already vectorized by setting the width to 1. void setAlreadyVectorized(); bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const; /// Dumps all the hint information. void emitRemarkWithHints() const; ElementCount getWidth() const { return ElementCount::get(Width.Value, isScalable()); } unsigned getInterleave() const { return Interleave.Value; } unsigned getIsVectorized() const { return IsVectorized.Value; } unsigned getPredicate() const { return Predicate.Value; } enum ForceKind getForce() const { if ((ForceKind)Force.Value == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) return FK_Disabled; return (ForceKind)Force.Value; } bool isScalable() const { return Scalable.Value; } /// If hints are provided that force vectorization, use the AlwaysPrint /// pass name to force the frontend to print the diagnostic. const char *vectorizeAnalysisPassName() const; bool allowReordering() const { // When enabling loop hints are provided we allow the vectorizer to change // the order of operations that is given by the scalar loop. This is not // enabled by default because can be unsafe or inefficient. For example, // reordering floating-point operations will change the way round-off // error accumulates in the loop. ElementCount EC = getWidth(); return getForce() == LoopVectorizeHints::FK_Enabled || EC.getKnownMinValue() > 1; } bool isPotentiallyUnsafe() const { // Avoid FP vectorization if the target is unsure about proper support. // This may be related to the SIMD unit in the target not handling // IEEE 754 FP ops properly, or bad single-to-double promotions. // Otherwise, a sequence of vectorized loops, even without reduction, // could lead to different end results on the destination vectors. return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe; } void setPotentiallyUnsafe() { PotentiallyUnsafe = true; } private: /// Find hints specified in the loop metadata and update local values. void getHintsFromMetadata(); /// Checks string hint with one operand and set value if valid. void setHint(StringRef Name, Metadata *Arg); /// The loop these hints belong to. const Loop *TheLoop; /// Interface to emit optimization remarks. OptimizationRemarkEmitter &ORE; }; /// This holds vectorization requirements that must be verified late in /// the process. The requirements are set by legalize and costmodel. Once /// vectorization has been determined to be possible and profitable the /// requirements can be verified by looking for metadata or compiler options. /// For example, some loops require FP commutativity which is only allowed if /// vectorization is explicitly specified or if the fast-math compiler option /// has been provided. /// Late evaluation of these requirements allows helpful diagnostics to be /// composed that tells the user what need to be done to vectorize the loop. For /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late /// evaluation should be used only when diagnostics can generated that can be /// followed by a non-expert user. class LoopVectorizationRequirements { public: LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {} void addUnsafeAlgebraInst(Instruction *I) { // First unsafe algebra instruction. if (!UnsafeAlgebraInst) UnsafeAlgebraInst = I; } void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; } bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints); private: unsigned NumRuntimePointerChecks = 0; Instruction *UnsafeAlgebraInst = nullptr; /// Interface to emit optimization remarks. OptimizationRemarkEmitter &ORE; }; /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the /// legality. This class has two main kinds of checks: /// * Memory checks - The code in canVectorizeMemory checks if vectorization /// will change the order of memory accesses in a way that will change the /// correctness of the program. /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory /// checks for a number of different conditions, such as the availability of a /// single induction variable, that all types are supported and vectorize-able, /// etc. This code reflects the capabilities of InnerLoopVectorizer. /// This class is also used by InnerLoopVectorizer for identifying /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: LoopVectorizationLegality( Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AAResults *AA, Function *F, std::function *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI) : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. using ReductionList = MapVector; /// InductionList saves induction variables and maps them to the /// induction descriptor. using InductionList = MapVector; /// RecurrenceSet contains the phi nodes that are recurrences other than /// inductions and reductions. using RecurrenceSet = SmallPtrSet; /// Returns true if it is legal to vectorize this loop. /// This does not mean that it is profitable to vectorize this /// loop, only that it is legal to do so. /// Temporarily taking UseVPlanNativePath parameter. If true, take /// the new code path being implemented for outer loop vectorization /// (should be functional for inner loop vectorization) based on VPlan. /// If false, good old LV code. bool canVectorize(bool UseVPlanNativePath); /// Return true if we can vectorize this loop while folding its tail by /// masking, and mark all respective loads/stores for masking. /// This object's state is only modified iff this function returns true. bool prepareToFoldTailByMasking(); /// Returns the primary induction variable. PHINode *getPrimaryInduction() { return PrimaryInduction; } /// Returns the reduction variables found in the loop. ReductionList &getReductionVars() { return Reductions; } /// Returns the induction variables found in the loop. InductionList &getInductionVars() { return Inductions; } /// Return the first-order recurrences found in the loop. RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; } /// Return the set of instructions to sink to handle first-order recurrences. DenseMap &getSinkAfter() { return SinkAfter; } /// Returns the widest induction type. Type *getWidestInductionType() { return WidestIndTy; } /// Returns True if V is a Phi node of an induction variable in this loop. bool isInductionPhi(const Value *V); /// Returns True if V is a cast that is part of an induction def-use chain, /// and had been proven to be redundant under a runtime guard (in other /// words, the cast has the same SCEV expression as the induction phi). bool isCastedInductionVariable(const Value *V); /// Returns True if V can be considered as an induction variable in this /// loop. V can be the induction phi, or some redundant cast in the def-use /// chain of the inducion phi. bool isInductionVariable(const Value *V); /// Returns True if PN is a reduction variable in this loop. bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); } /// Returns True if Phi is a first-order recurrence in this loop. bool isFirstOrderRecurrence(const PHINode *Phi); /// Return true if the block BB needs to be predicated in order for the loop /// to be vectorized. bool blockNeedsPredication(BasicBlock *BB); /// Check if this pointer is consecutive when vectorizing. This happens /// when the last index of the GEP is the induction variable, or that the /// pointer itself is an induction variable. /// This check allows us to vectorize A[idx] into a wide load/store. /// Returns: /// 0 - Stride is unknown or non-consecutive. /// 1 - Address is consecutive. /// -1 - Address is consecutive, and decreasing. /// NOTE: This method must only be used before modifying the original scalar /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965). int isConsecutivePtr(Value *Ptr); /// Returns true if the value V is uniform within the loop. bool isUniform(Value *V); /// A uniform memory op is a load or store which accesses the same memory /// location on all lanes. bool isUniformMemOp(Instruction &I) { Value *Ptr = getLoadStorePointerOperand(&I); if (!Ptr) return false; // Note: There's nothing inherent which prevents predicated loads and // stores from being uniform. The current lowering simply doesn't handle // it; in particular, the cost model distinguishes scatter/gather from // scalar w/predication, and we currently rely on the scalar path. return isUniform(Ptr) && !blockNeedsPredication(I.getParent()); } /// Returns the information that we collected about runtime memory check. const RuntimePointerChecking *getRuntimePointerChecking() const { return LAI->getRuntimePointerChecking(); } const LoopAccessInfo *getLAI() const { return LAI; } bool isSafeForAnyVectorWidth() const { return LAI->getDepChecker().isSafeForAnyVectorWidth(); } unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); } uint64_t getMaxSafeVectorWidthInBits() const { return LAI->getDepChecker().getMaxSafeVectorWidthInBits(); } bool hasStride(Value *V) { return LAI->hasStride(V); } /// Returns true if vector representation of the instruction \p I /// requires mask. bool isMaskRequired(const Instruction *I) { return MaskedOp.contains(I); } unsigned getNumStores() const { return LAI->getNumStores(); } unsigned getNumLoads() const { return LAI->getNumLoads(); } // Returns true if the NoNaN attribute is set on the function. bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; } /// Returns all assume calls in predicated blocks. They need to be dropped /// when flattening the CFG. const SmallPtrSetImpl &getConditionalAssumes() const { return ConditionalAssumes; } private: /// Return true if the pre-header, exiting and latch blocks of \p Lp and all /// its nested loops are considered legal for vectorization. These legal /// checks are common for inner and outer loop vectorization. /// Temporarily taking UseVPlanNativePath parameter. If true, take /// the new code path being implemented for outer loop vectorization /// (should be functional for inner loop vectorization) based on VPlan. /// If false, good old LV code. bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath); /// Set up outer loop inductions by checking Phis in outer loop header for /// supported inductions (int inductions). Return false if any of these Phis /// is not a supported induction or if we fail to find an induction. bool setupOuterLoopInductions(); /// Return true if the pre-header, exiting and latch blocks of \p Lp /// (non-recursive) are considered legal for vectorization. /// Temporarily taking UseVPlanNativePath parameter. If true, take /// the new code path being implemented for outer loop vectorization /// (should be functional for inner loop vectorization) based on VPlan. /// If false, good old LV code. bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath); /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count /// and we only need to check individual instructions. bool canVectorizeInstrs(); /// When we vectorize loops we may change the order in which /// we read and write from memory. This method checks if it is /// legal to vectorize the code, considering only memory constrains. /// Returns true if the loop is vectorizable bool canVectorizeMemory(); /// Return true if we can vectorize this loop using the IF-conversion /// transformation. bool canVectorizeWithIfConvert(); /// Return true if we can vectorize this outer loop. The method performs /// specific checks for outer loop vectorization. bool canVectorizeOuterLoop(); /// Return true if all of the instructions in the block can be speculatively - /// executed, and record the loads/stores that require masking. If's that - /// guard loads can be ignored under "assume safety" unless \p PreserveGuards - /// is true. This can happen when we introduces guards for which the original - /// "unguarded-loads are safe" assumption does not hold. For example, the - /// vectorizer's fold-tail transformation changes the loop to execute beyond - /// its original trip-count, under a proper guard, which should be preserved. + /// executed, and record the loads/stores that require masking. /// \p SafePtrs is a list of addresses that are known to be legal and we know /// that we can read from them without segfault. /// \p MaskedOp is a list of instructions that have to be transformed into /// calls to the appropriate masked intrinsic when the loop is vectorized. /// \p ConditionalAssumes is a list of assume instructions in predicated /// blocks that must be dropped if the CFG gets flattened. - bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl &SafePtrs, - SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards = false) const; + bool blockCanBePredicated( + BasicBlock *BB, SmallPtrSetImpl &SafePtrs, + SmallPtrSetImpl &MaskedOp, + SmallPtrSetImpl &ConditionalAssumes) const; /// Updates the vectorization state by adding \p Phi to the inductions list. /// This can set \p Phi as the main induction of the loop if \p Phi is a /// better choice for the main induction than the existing one. void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit); /// If an access has a symbolic strides, this maps the pointer value to /// the stride symbol. const ValueToValueMap *getSymbolicStrides() { // FIXME: Currently, the set of symbolic strides is sometimes queried before // it's collected. This happens from canVectorizeWithIfConvert, when the // pointer is checked to reference consecutive elements suitable for a // masked access. return LAI ? &LAI->getSymbolicStrides() : nullptr; } /// The loop that we evaluate. Loop *TheLoop; /// Loop Info analysis. LoopInfo *LI; /// A wrapper around ScalarEvolution used to add runtime SCEV checks. /// Applies dynamic knowledge to simplify SCEV expressions in the context /// of existing SCEV assumptions. The analysis will also add a minimal set /// of new predicates if this is required to enable vectorization and /// unrolling. PredicatedScalarEvolution &PSE; /// Target Transform Info. TargetTransformInfo *TTI; /// Target Library Info. TargetLibraryInfo *TLI; /// Dominator Tree. DominatorTree *DT; // LoopAccess analysis. std::function *GetLAA; // And the loop-accesses info corresponding to this loop. This pointer is // null until canVectorizeMemory sets it up. const LoopAccessInfo *LAI = nullptr; /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; // --- vectorization state --- // /// Holds the primary induction variable. This is the counter of the /// loop. PHINode *PrimaryInduction = nullptr; /// Holds the reduction variables. ReductionList Reductions; /// Holds all of the induction variables that we found in the loop. /// Notice that inductions don't need to start at zero and that induction /// variables can be pointers. InductionList Inductions; /// Holds all the casts that participate in the update chain of the induction /// variables, and that have been proven to be redundant (possibly under a /// runtime guard). These casts can be ignored when creating the vectorized /// loop body. SmallPtrSet InductionCastsToIgnore; /// Holds the phi nodes that are first-order recurrences. RecurrenceSet FirstOrderRecurrences; /// Holds instructions that need to sink past other instructions to handle /// first-order recurrences. DenseMap SinkAfter; /// Holds the widest induction type encountered. Type *WidestIndTy = nullptr; /// Allowed outside users. This holds the variables that can be accessed from /// outside the loop. SmallPtrSet AllowedExit; /// Can we assume the absence of NaNs. bool HasFunNoNaNAttr = false; /// Vectorization requirements that will go through late-evaluation. LoopVectorizationRequirements *Requirements; /// Used to emit an analysis of any legality issues. LoopVectorizeHints *Hints; /// The demanded bits analysis is used to compute the minimum type size in /// which a reduction can be computed. DemandedBits *DB; /// The assumption cache analysis is used to compute the minimum type size in /// which a reduction can be computed. AssumptionCache *AC; /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet MaskedOp; /// Assume instructions in predicated blocks must be dropped if the CFG gets /// flattened. SmallPtrSet ConditionalAssumes; /// BFI and PSI are used to check for profile guided size optimizations. BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; }; } // namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 427f6f4942c3..5a55f3a04148 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -1,778 +1,777 @@ //===- DWARFDie.cpp -------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDie.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h" #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include using namespace llvm; using namespace dwarf; using namespace object; static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) { OS << " ("; do { uint64_t Shift = countTrailingZeros(Val); assert(Shift < 64 && "undefined behavior"); uint64_t Bit = 1ULL << Shift; auto PropName = ApplePropertyString(Bit); if (!PropName.empty()) OS << PropName; else OS << format("DW_APPLE_PROPERTY_0x%" PRIx64, Bit); if (!(Val ^= Bit)) break; OS << ", "; } while (true); OS << ")"; } static void dumpRanges(const DWARFObject &Obj, raw_ostream &OS, const DWARFAddressRangesVector &Ranges, unsigned AddressSize, unsigned Indent, const DIDumpOptions &DumpOpts) { if (!DumpOpts.ShowAddresses) return; for (const DWARFAddressRange &R : Ranges) { OS << '\n'; OS.indent(Indent); R.dump(OS, AddressSize, DumpOpts, &Obj); } } -static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, +static void dumpLocation(raw_ostream &OS, const DWARFFormValue &FormValue, DWARFUnit *U, unsigned Indent, DIDumpOptions DumpOpts) { DWARFContext &Ctx = U->getContext(); const MCRegisterInfo *MRI = Ctx.getRegisterInfo(); if (FormValue.isFormClass(DWARFFormValue::FC_Block) || FormValue.isFormClass(DWARFFormValue::FC_Exprloc)) { ArrayRef Expr = *FormValue.getAsBlock(); DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()), Ctx.isLittleEndian(), 0); DWARFExpression(Data, U->getAddressByteSize(), U->getFormParams().Format) .print(OS, DumpOpts, MRI, U); return; } if (FormValue.isFormClass(DWARFFormValue::FC_SectionOffset)) { uint64_t Offset = *FormValue.getAsSectionOffset(); if (FormValue.getForm() == DW_FORM_loclistx) { FormValue.dump(OS, DumpOpts); if (auto LoclistOffset = U->getLoclistOffset(Offset)) Offset = *LoclistOffset; else return; } U->getLocationTable().dumpLocationList(&Offset, OS, U->getBaseAddress(), MRI, Ctx.getDWARFObj(), U, DumpOpts, Indent); return; } FormValue.dump(OS, DumpOpts); } /// Dump the name encoded in the type tag. static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) { StringRef TagStr = TagString(T); if (!TagStr.startswith("DW_TAG_") || !TagStr.endswith("_type")) return; OS << TagStr.substr(7, TagStr.size() - 12) << " "; } static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) { for (const DWARFDie &C : D.children()) if (C.getTag() == DW_TAG_subrange_type) { Optional LB; Optional Count; Optional UB; Optional DefaultLB; if (Optional L = C.find(DW_AT_lower_bound)) LB = L->getAsUnsignedConstant(); if (Optional CountV = C.find(DW_AT_count)) Count = CountV->getAsUnsignedConstant(); if (Optional UpperV = C.find(DW_AT_upper_bound)) UB = UpperV->getAsUnsignedConstant(); if (Optional LV = D.getDwarfUnit()->getUnitDIE().find(DW_AT_language)) if (Optional LC = LV->getAsUnsignedConstant()) if ((DefaultLB = LanguageLowerBound(static_cast(*LC)))) if (LB && *LB == *DefaultLB) LB = None; if (!LB && !Count && !UB) OS << "[]"; else if (!LB && (Count || UB) && DefaultLB) OS << '[' << (Count ? *Count : *UB - *DefaultLB + 1) << ']'; else { OS << "[["; if (LB) OS << *LB; else OS << '?'; OS << ", "; if (Count) if (LB) OS << *LB + *Count; else OS << "? + " << *Count; else if (UB) OS << *UB + 1; else OS << '?'; OS << ")]"; } } } /// Recursively dump the DIE type name when applicable. static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) { if (!D.isValid()) return; if (const char *Name = D.getName(DINameKind::LinkageName)) { OS << Name; return; } // FIXME: We should have pretty printers per language. Currently we print // everything as if it was C++ and fall back to the TAG type name. const dwarf::Tag T = D.getTag(); switch (T) { case DW_TAG_array_type: case DW_TAG_pointer_type: case DW_TAG_ptr_to_member_type: case DW_TAG_reference_type: case DW_TAG_rvalue_reference_type: case DW_TAG_subroutine_type: break; default: dumpTypeTagName(OS, T); } // Follow the DW_AT_type if possible. DWARFDie TypeDie = D.getAttributeValueAsReferencedDie(DW_AT_type); dumpTypeName(OS, TypeDie); switch (T) { case DW_TAG_subroutine_type: { if (!TypeDie) OS << "void"; OS << '('; bool First = true; for (const DWARFDie &C : D.children()) { if (C.getTag() == DW_TAG_formal_parameter) { if (!First) OS << ", "; First = false; dumpTypeName(OS, C.getAttributeValueAsReferencedDie(DW_AT_type)); } } OS << ')'; break; } case DW_TAG_array_type: { dumpArrayType(OS, D); break; } case DW_TAG_pointer_type: OS << '*'; break; case DW_TAG_ptr_to_member_type: if (DWARFDie Cont = D.getAttributeValueAsReferencedDie(DW_AT_containing_type)) { dumpTypeName(OS << ' ', Cont); OS << "::"; } OS << '*'; break; case DW_TAG_reference_type: OS << '&'; break; case DW_TAG_rvalue_reference_type: OS << "&&"; break; default: break; } } static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, - uint64_t *OffsetPtr, dwarf::Attribute Attr, - dwarf::Form Form, unsigned Indent, + const DWARFAttribute &AttrValue, unsigned Indent, DIDumpOptions DumpOpts) { if (!Die.isValid()) return; const char BaseIndent[] = " "; OS << BaseIndent; OS.indent(Indent + 2); + dwarf::Attribute Attr = AttrValue.Attr; WithColor(OS, HighlightColor::Attribute) << formatv("{0}", Attr); + dwarf::Form Form = AttrValue.Value.getForm(); if (DumpOpts.Verbose || DumpOpts.ShowForm) OS << formatv(" [{0}]", Form); DWARFUnit *U = Die.getDwarfUnit(); - DWARFFormValue FormValue = DWARFFormValue::createFromUnit(Form, U, OffsetPtr); + const DWARFFormValue &FormValue = AttrValue.Value; OS << "\t("; StringRef Name; std::string File; auto Color = HighlightColor::Enumerator; if (Attr == DW_AT_decl_file || Attr == DW_AT_call_file) { Color = HighlightColor::String; if (const auto *LT = U->getContext().getLineTableForUnit(U)) if (LT->getFileNameByIndex( FormValue.getAsUnsignedConstant().getValue(), U->getCompilationDir(), DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) { File = '"' + File + '"'; Name = File; } } else if (Optional Val = FormValue.getAsUnsignedConstant()) Name = AttributeValueString(Attr, *Val); if (!Name.empty()) WithColor(OS, Color) << Name; else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line) OS << *FormValue.getAsUnsignedConstant(); else if (Attr == DW_AT_low_pc && (FormValue.getAsAddress() == dwarf::computeTombstoneAddress(U->getAddressByteSize()))) { if (DumpOpts.Verbose) { FormValue.dump(OS, DumpOpts); OS << " ("; } OS << "dead code"; if (DumpOpts.Verbose) OS << ')'; } else if (Attr == DW_AT_high_pc && !DumpOpts.ShowForm && !DumpOpts.Verbose && FormValue.getAsUnsignedConstant()) { if (DumpOpts.ShowAddresses) { // Print the actual address rather than the offset. uint64_t LowPC, HighPC, Index; if (Die.getLowAndHighPC(LowPC, HighPC, Index)) DWARFFormValue::dumpAddress(OS, U->getAddressByteSize(), HighPC); else FormValue.dump(OS, DumpOpts); } } else if (Form == dwarf::Form::DW_FORM_exprloc || DWARFAttribute::mayHaveLocationDescription(Attr)) dumpLocation(OS, FormValue, U, sizeof(BaseIndent) + Indent + 4, DumpOpts); else FormValue.dump(OS, DumpOpts); std::string Space = DumpOpts.ShowAddresses ? " " : ""; // We have dumped the attribute raw value. For some attributes // having both the raw value and the pretty-printed value is // interesting. These attributes are handled below. if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) { if (const char *Name = Die.getAttributeValueAsReferencedDie(FormValue).getName( DINameKind::LinkageName)) OS << Space << "\"" << Name << '\"'; } else if (Attr == DW_AT_type) { OS << Space << "\""; dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(FormValue)); OS << '"'; } else if (Attr == DW_AT_APPLE_property_attribute) { if (Optional OptVal = FormValue.getAsUnsignedConstant()) dumpApplePropertyAttribute(OS, *OptVal); } else if (Attr == DW_AT_ranges) { const DWARFObject &Obj = Die.getDwarfUnit()->getContext().getDWARFObj(); // For DW_FORM_rnglistx we need to dump the offset separately, since // we have only dumped the index so far. if (FormValue.getForm() == DW_FORM_rnglistx) if (auto RangeListOffset = U->getRnglistOffset(*FormValue.getAsSectionOffset())) { DWARFFormValue FV = DWARFFormValue::createFromUValue( dwarf::DW_FORM_sec_offset, *RangeListOffset); FV.dump(OS, DumpOpts); } if (auto RangesOrError = Die.getAddressRanges()) dumpRanges(Obj, OS, RangesOrError.get(), U->getAddressByteSize(), sizeof(BaseIndent) + Indent + 4, DumpOpts); else DumpOpts.RecoverableErrorHandler(createStringError( errc::invalid_argument, "decoding address ranges: %s", toString(RangesOrError.takeError()).c_str())); } OS << ")\n"; } bool DWARFDie::isSubprogramDIE() const { return getTag() == DW_TAG_subprogram; } bool DWARFDie::isSubroutineDIE() const { auto Tag = getTag(); return Tag == DW_TAG_subprogram || Tag == DW_TAG_inlined_subroutine; } Optional DWARFDie::find(dwarf::Attribute Attr) const { if (!isValid()) return None; auto AbbrevDecl = getAbbreviationDeclarationPtr(); if (AbbrevDecl) return AbbrevDecl->getAttributeValue(getOffset(), Attr, *U); return None; } Optional DWARFDie::find(ArrayRef Attrs) const { if (!isValid()) return None; auto AbbrevDecl = getAbbreviationDeclarationPtr(); if (AbbrevDecl) { for (auto Attr : Attrs) { if (auto Value = AbbrevDecl->getAttributeValue(getOffset(), Attr, *U)) return Value; } } return None; } Optional DWARFDie::findRecursively(ArrayRef Attrs) const { SmallVector Worklist; Worklist.push_back(*this); // Keep track if DIEs already seen to prevent infinite recursion. // Empirically we rarely see a depth of more than 3 when dealing with valid // DWARF. This corresponds to following the DW_AT_abstract_origin and // DW_AT_specification just once. SmallSet Seen; Seen.insert(*this); while (!Worklist.empty()) { DWARFDie Die = Worklist.pop_back_val(); if (!Die.isValid()) continue; if (auto Value = Die.find(Attrs)) return Value; if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_abstract_origin)) if (Seen.insert(D).second) Worklist.push_back(D); if (auto D = Die.getAttributeValueAsReferencedDie(DW_AT_specification)) if (Seen.insert(D).second) Worklist.push_back(D); } return None; } DWARFDie DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const { if (Optional F = find(Attr)) return getAttributeValueAsReferencedDie(*F); return DWARFDie(); } DWARFDie DWARFDie::getAttributeValueAsReferencedDie(const DWARFFormValue &V) const { if (auto SpecRef = V.getAsRelativeReference()) { if (SpecRef->Unit) return SpecRef->Unit->getDIEForOffset(SpecRef->Unit->getOffset() + SpecRef->Offset); if (auto SpecUnit = U->getUnitVector().getUnitForOffset(SpecRef->Offset)) return SpecUnit->getDIEForOffset(SpecRef->Offset); } return DWARFDie(); } Optional DWARFDie::getRangesBaseAttribute() const { return toSectionOffset(find({DW_AT_rnglists_base, DW_AT_GNU_ranges_base})); } Optional DWARFDie::getLocBaseAttribute() const { return toSectionOffset(find(DW_AT_loclists_base)); } Optional DWARFDie::getHighPC(uint64_t LowPC) const { uint64_t Tombstone = dwarf::computeTombstoneAddress(U->getAddressByteSize()); if (LowPC == Tombstone) return None; if (auto FormValue = find(DW_AT_high_pc)) { if (auto Address = FormValue->getAsAddress()) { // High PC is an address. return Address; } if (auto Offset = FormValue->getAsUnsignedConstant()) { // High PC is an offset from LowPC. return LowPC + *Offset; } } return None; } bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, uint64_t &SectionIndex) const { auto F = find(DW_AT_low_pc); auto LowPcAddr = toSectionedAddress(F); if (!LowPcAddr) return false; if (auto HighPcAddr = getHighPC(LowPcAddr->Address)) { LowPC = LowPcAddr->Address; HighPC = *HighPcAddr; SectionIndex = LowPcAddr->SectionIndex; return true; } return false; } Expected DWARFDie::getAddressRanges() const { if (isNULL()) return DWARFAddressRangesVector(); // Single range specified by low/high PC. uint64_t LowPC, HighPC, Index; if (getLowAndHighPC(LowPC, HighPC, Index)) return DWARFAddressRangesVector{{LowPC, HighPC, Index}}; Optional Value = find(DW_AT_ranges); if (Value) { if (Value->getForm() == DW_FORM_rnglistx) return U->findRnglistFromIndex(*Value->getAsSectionOffset()); return U->findRnglistFromOffset(*Value->getAsSectionOffset()); } return DWARFAddressRangesVector(); } void DWARFDie::collectChildrenAddressRanges( DWARFAddressRangesVector &Ranges) const { if (isNULL()) return; if (isSubprogramDIE()) { if (auto DIERangesOrError = getAddressRanges()) llvm::append_range(Ranges, DIERangesOrError.get()); else llvm::consumeError(DIERangesOrError.takeError()); } for (auto Child : children()) Child.collectChildrenAddressRanges(Ranges); } bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const { auto RangesOrError = getAddressRanges(); if (!RangesOrError) { llvm::consumeError(RangesOrError.takeError()); return false; } for (const auto &R : RangesOrError.get()) if (R.LowPC <= Address && Address < R.HighPC) return true; return false; } Expected DWARFDie::getLocations(dwarf::Attribute Attr) const { Optional Location = find(Attr); if (!Location) return createStringError(inconvertibleErrorCode(), "No %s", dwarf::AttributeString(Attr).data()); if (Optional Off = Location->getAsSectionOffset()) { uint64_t Offset = *Off; if (Location->getForm() == DW_FORM_loclistx) { if (auto LoclistOffset = U->getLoclistOffset(Offset)) Offset = *LoclistOffset; else return createStringError(inconvertibleErrorCode(), "Loclist table not found"); } return U->findLoclistFromOffset(Offset); } if (Optional> Expr = Location->getAsBlock()) { return DWARFLocationExpressionsVector{ DWARFLocationExpression{None, to_vector<4>(*Expr)}}; } return createStringError( inconvertibleErrorCode(), "Unsupported %s encoding: %s", dwarf::AttributeString(Attr).data(), dwarf::FormEncodingString(Location->getForm()).data()); } const char *DWARFDie::getSubroutineName(DINameKind Kind) const { if (!isSubroutineDIE()) return nullptr; return getName(Kind); } const char *DWARFDie::getName(DINameKind Kind) const { if (!isValid() || Kind == DINameKind::None) return nullptr; // Try to get mangled name only if it was asked for. if (Kind == DINameKind::LinkageName) { if (auto Name = getLinkageName()) return Name; } return getShortName(); } const char *DWARFDie::getShortName() const { if (!isValid()) return nullptr; return dwarf::toString(findRecursively(dwarf::DW_AT_name), nullptr); } const char *DWARFDie::getLinkageName() const { if (!isValid()) return nullptr; return dwarf::toString(findRecursively({dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_AT_linkage_name}), nullptr); } uint64_t DWARFDie::getDeclLine() const { return toUnsigned(findRecursively(DW_AT_decl_line), 0); } std::string DWARFDie::getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const { std::string FileName; if (auto DeclFile = toUnsigned(findRecursively(DW_AT_decl_file))) { if (const auto *LT = U->getContext().getLineTableForUnit(U)) { LT->getFileNameByIndex(*DeclFile, U->getCompilationDir(), Kind, FileName); } } return FileName; } void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine, uint32_t &CallColumn, uint32_t &CallDiscriminator) const { CallFile = toUnsigned(find(DW_AT_call_file), 0); CallLine = toUnsigned(find(DW_AT_call_line), 0); CallColumn = toUnsigned(find(DW_AT_call_column), 0); CallDiscriminator = toUnsigned(find(DW_AT_GNU_discriminator), 0); } /// Helper to dump a DIE with all of its parents, but no siblings. static unsigned dumpParentChain(DWARFDie Die, raw_ostream &OS, unsigned Indent, DIDumpOptions DumpOpts, unsigned Depth = 0) { if (!Die) return Indent; if (DumpOpts.ParentRecurseDepth > 0 && Depth >= DumpOpts.ParentRecurseDepth) return Indent; Indent = dumpParentChain(Die.getParent(), OS, Indent, DumpOpts, Depth + 1); Die.dump(OS, Indent, DumpOpts); return Indent + 2; } void DWARFDie::dump(raw_ostream &OS, unsigned Indent, DIDumpOptions DumpOpts) const { if (!isValid()) return; DWARFDataExtractor debug_info_data = U->getDebugInfoExtractor(); const uint64_t Offset = getOffset(); uint64_t offset = Offset; if (DumpOpts.ShowParents) { DIDumpOptions ParentDumpOpts = DumpOpts; ParentDumpOpts.ShowParents = false; ParentDumpOpts.ShowChildren = false; Indent = dumpParentChain(getParent(), OS, Indent, ParentDumpOpts); } if (debug_info_data.isValidOffset(offset)) { uint32_t abbrCode = debug_info_data.getULEB128(&offset); if (DumpOpts.ShowAddresses) WithColor(OS, HighlightColor::Address).get() << format("\n0x%8.8" PRIx64 ": ", Offset); if (abbrCode) { auto AbbrevDecl = getAbbreviationDeclarationPtr(); if (AbbrevDecl) { WithColor(OS, HighlightColor::Tag).get().indent(Indent) << formatv("{0}", getTag()); if (DumpOpts.Verbose) OS << format(" [%u] %c", abbrCode, AbbrevDecl->hasChildren() ? '*' : ' '); OS << '\n'; // Dump all data in the DIE for the attributes. - for (const auto &AttrSpec : AbbrevDecl->attributes()) { - if (AttrSpec.Form == DW_FORM_implicit_const) { - // We are dumping .debug_info section , - // implicit_const attribute values are not really stored here, - // but in .debug_abbrev section. So we just skip such attrs. - continue; - } - dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form, - Indent, DumpOpts); - } + for (const DWARFAttribute &AttrValue : attributes()) + dumpAttribute(OS, *this, AttrValue, Indent, DumpOpts); DWARFDie child = getFirstChild(); if (DumpOpts.ShowChildren && DumpOpts.ChildRecurseDepth > 0 && child) { DumpOpts.ChildRecurseDepth--; DIDumpOptions ChildDumpOpts = DumpOpts; ChildDumpOpts.ShowParents = false; while (child) { child.dump(OS, Indent + 2, ChildDumpOpts); child = child.getSibling(); } } } else { OS << "Abbreviation code not found in 'debug_abbrev' class for code: " << abbrCode << '\n'; } } else { OS.indent(Indent) << "NULL\n"; } } } LLVM_DUMP_METHOD void DWARFDie::dump() const { dump(llvm::errs(), 0); } DWARFDie DWARFDie::getParent() const { if (isValid()) return U->getParent(Die); return DWARFDie(); } DWARFDie DWARFDie::getSibling() const { if (isValid()) return U->getSibling(Die); return DWARFDie(); } DWARFDie DWARFDie::getPreviousSibling() const { if (isValid()) return U->getPreviousSibling(Die); return DWARFDie(); } DWARFDie DWARFDie::getFirstChild() const { if (isValid()) return U->getFirstChild(Die); return DWARFDie(); } DWARFDie DWARFDie::getLastChild() const { if (isValid()) return U->getLastChild(Die); return DWARFDie(); } iterator_range DWARFDie::attributes() const { return make_range(attribute_iterator(*this, false), attribute_iterator(*this, true)); } DWARFDie::attribute_iterator::attribute_iterator(DWARFDie D, bool End) : Die(D), Index(0) { auto AbbrDecl = Die.getAbbreviationDeclarationPtr(); assert(AbbrDecl && "Must have abbreviation declaration"); if (End) { // This is the end iterator so we set the index to the attribute count. Index = AbbrDecl->getNumAttributes(); } else { // This is the begin iterator so we extract the value for this->Index. AttrValue.Offset = D.getOffset() + AbbrDecl->getCodeByteSize(); updateForIndex(*AbbrDecl, 0); } } void DWARFDie::attribute_iterator::updateForIndex( const DWARFAbbreviationDeclaration &AbbrDecl, uint32_t I) { Index = I; // AbbrDecl must be valid before calling this function. auto NumAttrs = AbbrDecl.getNumAttributes(); if (Index < NumAttrs) { AttrValue.Attr = AbbrDecl.getAttrByIndex(Index); // Add the previous byte size of any previous attribute value. AttrValue.Offset += AttrValue.ByteSize; uint64_t ParseOffset = AttrValue.Offset; - auto U = Die.getDwarfUnit(); - assert(U && "Die must have valid DWARF unit"); - AttrValue.Value = DWARFFormValue::createFromUnit( - AbbrDecl.getFormByIndex(Index), U, &ParseOffset); + if (AbbrDecl.getAttrIsImplicitConstByIndex(Index)) + AttrValue.Value = DWARFFormValue::createFromSValue( + AbbrDecl.getFormByIndex(Index), + AbbrDecl.getAttrImplicitConstValueByIndex(Index)); + else { + auto U = Die.getDwarfUnit(); + assert(U && "Die must have valid DWARF unit"); + AttrValue.Value = DWARFFormValue::createFromUnit( + AbbrDecl.getFormByIndex(Index), U, &ParseOffset); + } AttrValue.ByteSize = ParseOffset - AttrValue.Offset; } else { assert(Index == NumAttrs && "Indexes should be [0, NumAttrs) only"); AttrValue = {}; } } DWARFDie::attribute_iterator &DWARFDie::attribute_iterator::operator++() { if (auto AbbrDecl = Die.getAbbreviationDeclarationPtr()) updateForIndex(*AbbrDecl, Index + 1); return *this; } bool DWARFAttribute::mayHaveLocationDescription(dwarf::Attribute Attr) { switch (Attr) { // From the DWARF v5 specification. case DW_AT_location: case DW_AT_byte_size: case DW_AT_bit_size: case DW_AT_string_length: case DW_AT_lower_bound: case DW_AT_return_addr: case DW_AT_bit_stride: case DW_AT_upper_bound: case DW_AT_count: case DW_AT_data_member_location: case DW_AT_frame_base: case DW_AT_segment: case DW_AT_static_link: case DW_AT_use_location: case DW_AT_vtable_elem_location: case DW_AT_allocated: case DW_AT_associated: case DW_AT_byte_stride: case DW_AT_rank: case DW_AT_call_value: case DW_AT_call_origin: case DW_AT_call_target: case DW_AT_call_target_clobbered: case DW_AT_call_data_location: case DW_AT_call_data_value: // Extensions. case DW_AT_GNU_call_site_value: case DW_AT_GNU_call_site_target: return true; default: return false; } } diff --git a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 7a84605211fb..2559765876d9 100644 --- a/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/contrib/llvm-project/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -1,727 +1,732 @@ //===- DWARFFormValue.cpp -------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include #include #include using namespace llvm; using namespace dwarf; static const DWARFFormValue::FormClass DWARF5FormClasses[] = { DWARFFormValue::FC_Unknown, // 0x0 DWARFFormValue::FC_Address, // 0x01 DW_FORM_addr DWARFFormValue::FC_Unknown, // 0x02 unused DWARFFormValue::FC_Block, // 0x03 DW_FORM_block2 DWARFFormValue::FC_Block, // 0x04 DW_FORM_block4 DWARFFormValue::FC_Constant, // 0x05 DW_FORM_data2 // --- These can be FC_SectionOffset in DWARF3 and below: DWARFFormValue::FC_Constant, // 0x06 DW_FORM_data4 DWARFFormValue::FC_Constant, // 0x07 DW_FORM_data8 // --- DWARFFormValue::FC_String, // 0x08 DW_FORM_string DWARFFormValue::FC_Block, // 0x09 DW_FORM_block DWARFFormValue::FC_Block, // 0x0a DW_FORM_block1 DWARFFormValue::FC_Constant, // 0x0b DW_FORM_data1 DWARFFormValue::FC_Flag, // 0x0c DW_FORM_flag DWARFFormValue::FC_Constant, // 0x0d DW_FORM_sdata DWARFFormValue::FC_String, // 0x0e DW_FORM_strp DWARFFormValue::FC_Constant, // 0x0f DW_FORM_udata DWARFFormValue::FC_Reference, // 0x10 DW_FORM_ref_addr DWARFFormValue::FC_Reference, // 0x11 DW_FORM_ref1 DWARFFormValue::FC_Reference, // 0x12 DW_FORM_ref2 DWARFFormValue::FC_Reference, // 0x13 DW_FORM_ref4 DWARFFormValue::FC_Reference, // 0x14 DW_FORM_ref8 DWARFFormValue::FC_Reference, // 0x15 DW_FORM_ref_udata DWARFFormValue::FC_Indirect, // 0x16 DW_FORM_indirect DWARFFormValue::FC_SectionOffset, // 0x17 DW_FORM_sec_offset DWARFFormValue::FC_Exprloc, // 0x18 DW_FORM_exprloc DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present DWARFFormValue::FC_String, // 0x1a DW_FORM_strx DWARFFormValue::FC_Address, // 0x1b DW_FORM_addrx DWARFFormValue::FC_Reference, // 0x1c DW_FORM_ref_sup4 DWARFFormValue::FC_String, // 0x1d DW_FORM_strp_sup DWARFFormValue::FC_Constant, // 0x1e DW_FORM_data16 DWARFFormValue::FC_String, // 0x1f DW_FORM_line_strp DWARFFormValue::FC_Reference, // 0x20 DW_FORM_ref_sig8 DWARFFormValue::FC_Constant, // 0x21 DW_FORM_implicit_const DWARFFormValue::FC_SectionOffset, // 0x22 DW_FORM_loclistx DWARFFormValue::FC_SectionOffset, // 0x23 DW_FORM_rnglistx DWARFFormValue::FC_Reference, // 0x24 DW_FORM_ref_sup8 DWARFFormValue::FC_String, // 0x25 DW_FORM_strx1 DWARFFormValue::FC_String, // 0x26 DW_FORM_strx2 DWARFFormValue::FC_String, // 0x27 DW_FORM_strx3 DWARFFormValue::FC_String, // 0x28 DW_FORM_strx4 DWARFFormValue::FC_Address, // 0x29 DW_FORM_addrx1 DWARFFormValue::FC_Address, // 0x2a DW_FORM_addrx2 DWARFFormValue::FC_Address, // 0x2b DW_FORM_addrx3 DWARFFormValue::FC_Address, // 0x2c DW_FORM_addrx4 }; DWARFFormValue DWARFFormValue::createFromSValue(dwarf::Form F, int64_t V) { return DWARFFormValue(F, ValueType(V)); } DWARFFormValue DWARFFormValue::createFromUValue(dwarf::Form F, uint64_t V) { return DWARFFormValue(F, ValueType(V)); } DWARFFormValue DWARFFormValue::createFromPValue(dwarf::Form F, const char *V) { return DWARFFormValue(F, ValueType(V)); } DWARFFormValue DWARFFormValue::createFromBlockValue(dwarf::Form F, ArrayRef D) { ValueType V; V.uval = D.size(); V.data = D.data(); return DWARFFormValue(F, V); } DWARFFormValue DWARFFormValue::createFromUnit(dwarf::Form F, const DWARFUnit *U, uint64_t *OffsetPtr) { DWARFFormValue FormValue(F); FormValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr, U->getFormParams(), U); return FormValue; } bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData, uint64_t *OffsetPtr, const dwarf::FormParams Params) { bool Indirect = false; do { switch (Form) { // Blocks of inlined data that have a length field and the data bytes // inlined in the .debug_info. case DW_FORM_exprloc: case DW_FORM_block: { uint64_t size = DebugInfoData.getULEB128(OffsetPtr); *OffsetPtr += size; return true; } case DW_FORM_block1: { uint8_t size = DebugInfoData.getU8(OffsetPtr); *OffsetPtr += size; return true; } case DW_FORM_block2: { uint16_t size = DebugInfoData.getU16(OffsetPtr); *OffsetPtr += size; return true; } case DW_FORM_block4: { uint32_t size = DebugInfoData.getU32(OffsetPtr); *OffsetPtr += size; return true; } // Inlined NULL terminated C-strings. case DW_FORM_string: DebugInfoData.getCStr(OffsetPtr); return true; case DW_FORM_addr: case DW_FORM_ref_addr: case DW_FORM_flag_present: case DW_FORM_data1: case DW_FORM_data2: case DW_FORM_data4: case DW_FORM_data8: case DW_FORM_data16: case DW_FORM_flag: case DW_FORM_ref1: case DW_FORM_ref2: case DW_FORM_ref4: case DW_FORM_ref8: case DW_FORM_ref_sig8: case DW_FORM_ref_sup4: case DW_FORM_ref_sup8: case DW_FORM_strx1: case DW_FORM_strx2: case DW_FORM_strx4: case DW_FORM_addrx1: case DW_FORM_addrx2: case DW_FORM_addrx4: case DW_FORM_sec_offset: case DW_FORM_strp: case DW_FORM_strp_sup: case DW_FORM_line_strp: case DW_FORM_GNU_ref_alt: case DW_FORM_GNU_strp_alt: + case DW_FORM_implicit_const: if (Optional FixedSize = dwarf::getFixedFormByteSize(Form, Params)) { *OffsetPtr += *FixedSize; return true; } return false; // signed or unsigned LEB 128 values. case DW_FORM_sdata: DebugInfoData.getSLEB128(OffsetPtr); return true; case DW_FORM_udata: case DW_FORM_ref_udata: case DW_FORM_strx: case DW_FORM_addrx: case DW_FORM_loclistx: case DW_FORM_rnglistx: case DW_FORM_GNU_addr_index: case DW_FORM_GNU_str_index: DebugInfoData.getULEB128(OffsetPtr); return true; case DW_FORM_indirect: Indirect = true; Form = static_cast(DebugInfoData.getULEB128(OffsetPtr)); break; default: return false; } } while (Indirect); return true; } bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { // First, check DWARF5 form classes. if (Form < makeArrayRef(DWARF5FormClasses).size() && DWARF5FormClasses[Form] == FC) return true; // Check more forms from extensions and proposals. switch (Form) { case DW_FORM_GNU_ref_alt: return (FC == FC_Reference); case DW_FORM_GNU_addr_index: return (FC == FC_Address); case DW_FORM_GNU_str_index: case DW_FORM_GNU_strp_alt: return (FC == FC_String); default: break; } if (FC == FC_SectionOffset) { if (Form == DW_FORM_strp || Form == DW_FORM_line_strp) return true; // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section // offset. If we don't have a DWARFUnit, default to the old behavior. if (Form == DW_FORM_data4 || Form == DW_FORM_data8) return !U || U->getVersion() <= 3; } return false; } bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, uint64_t *OffsetPtr, dwarf::FormParams FP, const DWARFContext *Ctx, const DWARFUnit *CU) { if (!Ctx && CU) Ctx = &CU->getContext(); C = Ctx; U = CU; Format = FP.Format; bool Indirect = false; bool IsBlock = false; Value.data = nullptr; // Read the value for the form into value and follow and DW_FORM_indirect // instances we run into Error Err = Error::success(); do { Indirect = false; switch (Form) { case DW_FORM_addr: case DW_FORM_ref_addr: { uint16_t Size = (Form == DW_FORM_addr) ? FP.AddrSize : FP.getRefAddrByteSize(); Value.uval = Data.getRelocatedValue(Size, OffsetPtr, &Value.SectionIndex, &Err); break; } case DW_FORM_exprloc: case DW_FORM_block: Value.uval = Data.getULEB128(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_block1: Value.uval = Data.getU8(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_block2: Value.uval = Data.getU16(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_block4: Value.uval = Data.getU32(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_data1: case DW_FORM_ref1: case DW_FORM_flag: case DW_FORM_strx1: case DW_FORM_addrx1: Value.uval = Data.getU8(OffsetPtr, &Err); break; case DW_FORM_data2: case DW_FORM_ref2: case DW_FORM_strx2: case DW_FORM_addrx2: Value.uval = Data.getU16(OffsetPtr, &Err); break; case DW_FORM_strx3: Value.uval = Data.getU24(OffsetPtr, &Err); break; case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: case DW_FORM_strx4: case DW_FORM_addrx4: Value.uval = Data.getRelocatedValue(4, OffsetPtr, nullptr, &Err); break; case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sup8: Value.uval = Data.getRelocatedValue(8, OffsetPtr, nullptr, &Err); break; case DW_FORM_data16: // Treat this like a 16-byte block. Value.uval = 16; IsBlock = true; break; case DW_FORM_sdata: Value.sval = Data.getSLEB128(OffsetPtr, &Err); break; case DW_FORM_udata: case DW_FORM_ref_udata: case DW_FORM_rnglistx: case DW_FORM_loclistx: case DW_FORM_GNU_addr_index: case DW_FORM_GNU_str_index: case DW_FORM_addrx: case DW_FORM_strx: Value.uval = Data.getULEB128(OffsetPtr, &Err); break; case DW_FORM_string: Value.cstr = Data.getCStr(OffsetPtr, &Err); break; case DW_FORM_indirect: Form = static_cast(Data.getULEB128(OffsetPtr, &Err)); Indirect = true; break; case DW_FORM_strp: case DW_FORM_sec_offset: case DW_FORM_GNU_ref_alt: case DW_FORM_GNU_strp_alt: case DW_FORM_line_strp: case DW_FORM_strp_sup: { Value.uval = Data.getRelocatedValue(FP.getDwarfOffsetByteSize(), OffsetPtr, nullptr, &Err); break; } case DW_FORM_flag_present: Value.uval = 1; break; case DW_FORM_ref_sig8: Value.uval = Data.getU64(OffsetPtr, &Err); break; + case DW_FORM_implicit_const: + // Value has been already set by DWARFFormValue::createFromSValue. + break; default: // DWARFFormValue::skipValue() will have caught this and caused all // DWARF DIEs to fail to be parsed, so this code is not be reachable. llvm_unreachable("unsupported form"); } } while (Indirect && !Err); if (IsBlock) Value.data = Data.getBytes(OffsetPtr, Value.uval, &Err).bytes_begin(); return !errorToBool(std::move(Err)); } void DWARFFormValue::dumpAddress(raw_ostream &OS, uint8_t AddressSize, uint64_t Address) { uint8_t HexDigits = AddressSize * 2; OS << format("0x%*.*" PRIx64, HexDigits, HexDigits, Address); } void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS, DIDumpOptions DumpOpts, object::SectionedAddress SA) const { dumpAddress(OS, U->getAddressByteSize(), SA.Address); dumpAddressSection(U->getContext().getDWARFObj(), OS, DumpOpts, SA.SectionIndex); } void DWARFFormValue::dumpAddressSection(const DWARFObject &Obj, raw_ostream &OS, DIDumpOptions DumpOpts, uint64_t SectionIndex) { if (!DumpOpts.Verbose || SectionIndex == -1ULL) return; ArrayRef SectionNames = Obj.getSectionNames(); const auto &SecRef = SectionNames[SectionIndex]; OS << " \"" << SecRef.Name << '\"'; // Print section index if name is not unique. if (!SecRef.IsNameUnique) OS << format(" [%" PRIu64 "]", SectionIndex); } void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { uint64_t UValue = Value.uval; bool CURelativeOffset = false; raw_ostream &AddrOS = DumpOpts.ShowAddresses ? WithColor(OS, HighlightColor::Address).get() : nulls(); int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(Format); switch (Form) { case DW_FORM_addr: dumpSectionedAddress(AddrOS, DumpOpts, {Value.uval, Value.SectionIndex}); break; case DW_FORM_addrx: case DW_FORM_addrx1: case DW_FORM_addrx2: case DW_FORM_addrx3: case DW_FORM_addrx4: case DW_FORM_GNU_addr_index: { if (U == nullptr) { OS << ""; break; } Optional A = U->getAddrOffsetSectionItem(UValue); if (!A || DumpOpts.Verbose) AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue); if (A) dumpSectionedAddress(AddrOS, DumpOpts, *A); else OS << ""; break; } case DW_FORM_flag_present: OS << "true"; break; case DW_FORM_flag: case DW_FORM_data1: OS << format("0x%02x", (uint8_t)UValue); break; case DW_FORM_data2: OS << format("0x%04x", (uint16_t)UValue); break; case DW_FORM_data4: OS << format("0x%08x", (uint32_t)UValue); break; case DW_FORM_ref_sig8: AddrOS << format("0x%016" PRIx64, UValue); break; case DW_FORM_data8: OS << format("0x%016" PRIx64, UValue); break; case DW_FORM_data16: OS << format_bytes(ArrayRef(Value.data, 16), None, 16, 16); break; case DW_FORM_string: OS << '"'; OS.write_escaped(Value.cstr); OS << '"'; break; case DW_FORM_exprloc: case DW_FORM_block: case DW_FORM_block1: case DW_FORM_block2: case DW_FORM_block4: if (UValue > 0) { switch (Form) { case DW_FORM_exprloc: case DW_FORM_block: AddrOS << format("<0x%" PRIx64 "> ", UValue); break; case DW_FORM_block1: AddrOS << format("<0x%2.2x> ", (uint8_t)UValue); break; case DW_FORM_block2: AddrOS << format("<0x%4.4x> ", (uint16_t)UValue); break; case DW_FORM_block4: AddrOS << format("<0x%8.8x> ", (uint32_t)UValue); break; default: break; } const uint8_t *DataPtr = Value.data; if (DataPtr) { // UValue contains size of block const uint8_t *EndDataPtr = DataPtr + UValue; while (DataPtr < EndDataPtr) { AddrOS << format("%2.2x ", *DataPtr); ++DataPtr; } } else OS << "NULL"; } break; case DW_FORM_sdata: + case DW_FORM_implicit_const: OS << Value.sval; break; case DW_FORM_udata: OS << Value.uval; break; case DW_FORM_strp: if (DumpOpts.Verbose) OS << format(" .debug_str[0x%0*" PRIx64 "] = ", OffsetDumpWidth, UValue); dumpString(OS); break; case DW_FORM_line_strp: if (DumpOpts.Verbose) OS << format(" .debug_line_str[0x%0*" PRIx64 "] = ", OffsetDumpWidth, UValue); dumpString(OS); break; case DW_FORM_strx: case DW_FORM_strx1: case DW_FORM_strx2: case DW_FORM_strx3: case DW_FORM_strx4: case DW_FORM_GNU_str_index: if (DumpOpts.Verbose) OS << format("indexed (%8.8x) string = ", (uint32_t)UValue); dumpString(OS); break; case DW_FORM_GNU_strp_alt: if (DumpOpts.Verbose) OS << format("alt indirect string, offset: 0x%" PRIx64 "", UValue); dumpString(OS); break; case DW_FORM_ref_addr: AddrOS << format("0x%016" PRIx64, UValue); break; case DW_FORM_ref1: CURelativeOffset = true; if (DumpOpts.Verbose) AddrOS << format("cu + 0x%2.2x", (uint8_t)UValue); break; case DW_FORM_ref2: CURelativeOffset = true; if (DumpOpts.Verbose) AddrOS << format("cu + 0x%4.4x", (uint16_t)UValue); break; case DW_FORM_ref4: CURelativeOffset = true; if (DumpOpts.Verbose) AddrOS << format("cu + 0x%4.4x", (uint32_t)UValue); break; case DW_FORM_ref8: CURelativeOffset = true; if (DumpOpts.Verbose) AddrOS << format("cu + 0x%8.8" PRIx64, UValue); break; case DW_FORM_ref_udata: CURelativeOffset = true; if (DumpOpts.Verbose) AddrOS << format("cu + 0x%" PRIx64, UValue); break; case DW_FORM_GNU_ref_alt: AddrOS << format("", UValue); break; // All DW_FORM_indirect attributes should be resolved prior to calling // this function case DW_FORM_indirect: OS << "DW_FORM_indirect"; break; case DW_FORM_rnglistx: OS << format("indexed (0x%x) rangelist = ", (uint32_t)UValue); break; case DW_FORM_loclistx: OS << format("indexed (0x%x) loclist = ", (uint32_t)UValue); break; case DW_FORM_sec_offset: AddrOS << format("0x%0*" PRIx64, OffsetDumpWidth, UValue); break; default: OS << format("DW_FORM(0x%4.4x)", Form); break; } if (CURelativeOffset) { if (DumpOpts.Verbose) OS << " => {"; if (DumpOpts.ShowAddresses) WithColor(OS, HighlightColor::Address).get() << format("0x%8.8" PRIx64, UValue + (U ? U->getOffset() : 0)); if (DumpOpts.Verbose) OS << "}"; } } void DWARFFormValue::dumpString(raw_ostream &OS) const { Optional DbgStr = getAsCString(); if (DbgStr.hasValue()) { auto COS = WithColor(OS, HighlightColor::String); COS.get() << '"'; COS.get().write_escaped(DbgStr.getValue()); COS.get() << '"'; } } Optional DWARFFormValue::getAsCString() const { if (!isFormClass(FC_String)) return None; if (Form == DW_FORM_string) return Value.cstr; // FIXME: Add support for DW_FORM_GNU_strp_alt if (Form == DW_FORM_GNU_strp_alt || C == nullptr) return None; uint64_t Offset = Value.uval; if (Form == DW_FORM_line_strp) { // .debug_line_str is tracked in the Context. if (const char *Str = C->getLineStringExtractor().getCStr(&Offset)) return Str; return None; } if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx || Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 || Form == DW_FORM_strx4) { if (!U) return None; Optional StrOffset = U->getStringOffsetSectionItem(Offset); if (!StrOffset) return None; Offset = *StrOffset; } // Prefer the Unit's string extractor, because for .dwo it will point to // .debug_str.dwo, while the Context's extractor always uses .debug_str. if (U) { if (const char *Str = U->getStringExtractor().getCStr(&Offset)) return Str; return None; } if (const char *Str = C->getStringExtractor().getCStr(&Offset)) return Str; return None; } Optional DWARFFormValue::getAsAddress() const { if (auto SA = getAsSectionedAddress()) return SA->Address; return None; } Optional DWARFFormValue::getAsSectionedAddress() const { if (!isFormClass(FC_Address)) return None; if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx) { uint32_t Index = Value.uval; if (!U) return None; Optional SA = U->getAddrOffsetSectionItem(Index); if (!SA) return None; return SA; } return {{Value.uval, Value.SectionIndex}}; } Optional DWARFFormValue::getAsReference() const { if (auto R = getAsRelativeReference()) return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset; return None; } Optional DWARFFormValue::getAsRelativeReference() const { if (!isFormClass(FC_Reference)) return None; switch (Form) { case DW_FORM_ref1: case DW_FORM_ref2: case DW_FORM_ref4: case DW_FORM_ref8: case DW_FORM_ref_udata: if (!U) return None; return UnitOffset{const_cast(U), Value.uval}; case DW_FORM_ref_addr: case DW_FORM_ref_sig8: case DW_FORM_GNU_ref_alt: return UnitOffset{nullptr, Value.uval}; default: return None; } } Optional DWARFFormValue::getAsSectionOffset() const { if (!isFormClass(FC_SectionOffset)) return None; return Value.uval; } Optional DWARFFormValue::getAsUnsignedConstant() const { if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || Form == DW_FORM_sdata) return None; return Value.uval; } Optional DWARFFormValue::getAsSignedConstant() const { if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) || (Form == DW_FORM_udata && uint64_t(std::numeric_limits::max()) < Value.uval)) return None; switch (Form) { case DW_FORM_data4: return int32_t(Value.uval); case DW_FORM_data2: return int16_t(Value.uval); case DW_FORM_data1: return int8_t(Value.uval); case DW_FORM_sdata: case DW_FORM_data8: default: return Value.sval; } } Optional> DWARFFormValue::getAsBlock() const { if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc) && Form != DW_FORM_data16) return None; return makeArrayRef(Value.data, Value.uval); } Optional DWARFFormValue::getAsCStringOffset() const { if (!isFormClass(FC_String) && Form == DW_FORM_string) return None; return Value.uval; } Optional DWARFFormValue::getAsReferenceUVal() const { if (!isFormClass(FC_Reference)) return None; return Value.uval; } diff --git a/contrib/llvm-project/llvm/lib/IR/Core.cpp b/contrib/llvm-project/llvm/lib/IR/Core.cpp index 90ba69069bae..039b34ace6ab 100644 --- a/contrib/llvm-project/llvm/lib/IR/Core.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Core.cpp @@ -1,4169 +1,4185 @@ //===-- Core.cpp ----------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the common infrastructure (including the C bindings) // for libLLVMCore.a, which implements the LLVM intermediate representation. // //===----------------------------------------------------------------------===// #include "llvm-c/Core.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include using namespace llvm; #define DEBUG_TYPE "ir" void llvm::initializeCore(PassRegistry &Registry) { initializeDominatorTreeWrapperPassPass(Registry); initializePrintModulePassWrapperPass(Registry); initializePrintFunctionPassWrapperPass(Registry); initializeSafepointIRVerifierPass(Registry); initializeVerifierLegacyPassPass(Registry); } void LLVMInitializeCore(LLVMPassRegistryRef R) { initializeCore(*unwrap(R)); } void LLVMShutdown() { llvm_shutdown(); } /*===-- Error handling ----------------------------------------------------===*/ char *LLVMCreateMessage(const char *Message) { return strdup(Message); } void LLVMDisposeMessage(char *Message) { free(Message); } /*===-- Operations on contexts --------------------------------------------===*/ static ManagedStatic GlobalContext; LLVMContextRef LLVMContextCreate() { return wrap(new LLVMContext()); } LLVMContextRef LLVMGetGlobalContext() { return wrap(&*GlobalContext); } void LLVMContextSetDiagnosticHandler(LLVMContextRef C, LLVMDiagnosticHandler Handler, void *DiagnosticContext) { unwrap(C)->setDiagnosticHandlerCallBack( LLVM_EXTENSION reinterpret_cast( Handler), DiagnosticContext); } LLVMDiagnosticHandler LLVMContextGetDiagnosticHandler(LLVMContextRef C) { return LLVM_EXTENSION reinterpret_cast( unwrap(C)->getDiagnosticHandlerCallBack()); } void *LLVMContextGetDiagnosticContext(LLVMContextRef C) { return unwrap(C)->getDiagnosticContext(); } void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback, void *OpaqueHandle) { auto YieldCallback = LLVM_EXTENSION reinterpret_cast(Callback); unwrap(C)->setYieldCallback(YieldCallback, OpaqueHandle); } LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C) { return unwrap(C)->shouldDiscardValueNames(); } void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard) { unwrap(C)->setDiscardValueNames(Discard); } void LLVMContextDispose(LLVMContextRef C) { delete unwrap(C); } unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char *Name, unsigned SLen) { return unwrap(C)->getMDKindID(StringRef(Name, SLen)); } unsigned LLVMGetMDKindID(const char *Name, unsigned SLen) { return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen); } unsigned LLVMGetEnumAttributeKindForName(const char *Name, size_t SLen) { return Attribute::getAttrKindFromName(StringRef(Name, SLen)); } unsigned LLVMGetLastEnumAttributeKind(void) { return Attribute::AttrKind::EndAttrKinds; } LLVMAttributeRef LLVMCreateEnumAttribute(LLVMContextRef C, unsigned KindID, uint64_t Val) { auto &Ctx = *unwrap(C); auto AttrKind = (Attribute::AttrKind)KindID; if (AttrKind == Attribute::AttrKind::ByVal) { // After r362128, byval attributes need to have a type attribute. Provide a // NULL one until a proper API is added for this. return wrap(Attribute::getWithByValType(Ctx, NULL)); } if (AttrKind == Attribute::AttrKind::StructRet) { // Same as byval. return wrap(Attribute::getWithStructRetType(Ctx, NULL)); } return wrap(Attribute::get(Ctx, AttrKind, Val)); } unsigned LLVMGetEnumAttributeKind(LLVMAttributeRef A) { return unwrap(A).getKindAsEnum(); } uint64_t LLVMGetEnumAttributeValue(LLVMAttributeRef A) { auto Attr = unwrap(A); if (Attr.isEnumAttribute()) return 0; return Attr.getValueAsInt(); } +LLVMAttributeRef LLVMCreateTypeAttribute(LLVMContextRef C, unsigned KindID, + LLVMTypeRef type_ref) { + auto &Ctx = *unwrap(C); + auto AttrKind = (Attribute::AttrKind)KindID; + return wrap(Attribute::get(Ctx, AttrKind, unwrap(type_ref))); +} + +LLVMTypeRef LLVMGetTypeAttributeValue(LLVMAttributeRef A) { + auto Attr = unwrap(A); + return wrap(Attr.getValueAsType()); +} + LLVMAttributeRef LLVMCreateStringAttribute(LLVMContextRef C, const char *K, unsigned KLength, const char *V, unsigned VLength) { return wrap(Attribute::get(*unwrap(C), StringRef(K, KLength), StringRef(V, VLength))); } const char *LLVMGetStringAttributeKind(LLVMAttributeRef A, unsigned *Length) { auto S = unwrap(A).getKindAsString(); *Length = S.size(); return S.data(); } const char *LLVMGetStringAttributeValue(LLVMAttributeRef A, unsigned *Length) { auto S = unwrap(A).getValueAsString(); *Length = S.size(); return S.data(); } LLVMBool LLVMIsEnumAttribute(LLVMAttributeRef A) { auto Attr = unwrap(A); return Attr.isEnumAttribute() || Attr.isIntAttribute(); } LLVMBool LLVMIsStringAttribute(LLVMAttributeRef A) { return unwrap(A).isStringAttribute(); } +LLVMBool LLVMIsTypeAttribute(LLVMAttributeRef A) { + return unwrap(A).isTypeAttribute(); +} + char *LLVMGetDiagInfoDescription(LLVMDiagnosticInfoRef DI) { std::string MsgStorage; raw_string_ostream Stream(MsgStorage); DiagnosticPrinterRawOStream DP(Stream); unwrap(DI)->print(DP); Stream.flush(); return LLVMCreateMessage(MsgStorage.c_str()); } LLVMDiagnosticSeverity LLVMGetDiagInfoSeverity(LLVMDiagnosticInfoRef DI) { LLVMDiagnosticSeverity severity; switch(unwrap(DI)->getSeverity()) { default: severity = LLVMDSError; break; case DS_Warning: severity = LLVMDSWarning; break; case DS_Remark: severity = LLVMDSRemark; break; case DS_Note: severity = LLVMDSNote; break; } return severity; } /*===-- Operations on modules ---------------------------------------------===*/ LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) { return wrap(new Module(ModuleID, *GlobalContext)); } LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, LLVMContextRef C) { return wrap(new Module(ModuleID, *unwrap(C))); } void LLVMDisposeModule(LLVMModuleRef M) { delete unwrap(M); } const char *LLVMGetModuleIdentifier(LLVMModuleRef M, size_t *Len) { auto &Str = unwrap(M)->getModuleIdentifier(); *Len = Str.length(); return Str.c_str(); } void LLVMSetModuleIdentifier(LLVMModuleRef M, const char *Ident, size_t Len) { unwrap(M)->setModuleIdentifier(StringRef(Ident, Len)); } const char *LLVMGetSourceFileName(LLVMModuleRef M, size_t *Len) { auto &Str = unwrap(M)->getSourceFileName(); *Len = Str.length(); return Str.c_str(); } void LLVMSetSourceFileName(LLVMModuleRef M, const char *Name, size_t Len) { unwrap(M)->setSourceFileName(StringRef(Name, Len)); } /*--.. Data layout .........................................................--*/ const char *LLVMGetDataLayoutStr(LLVMModuleRef M) { return unwrap(M)->getDataLayoutStr().c_str(); } const char *LLVMGetDataLayout(LLVMModuleRef M) { return LLVMGetDataLayoutStr(M); } void LLVMSetDataLayout(LLVMModuleRef M, const char *DataLayoutStr) { unwrap(M)->setDataLayout(DataLayoutStr); } /*--.. Target triple .......................................................--*/ const char * LLVMGetTarget(LLVMModuleRef M) { return unwrap(M)->getTargetTriple().c_str(); } void LLVMSetTarget(LLVMModuleRef M, const char *Triple) { unwrap(M)->setTargetTriple(Triple); } /*--.. Module flags ........................................................--*/ struct LLVMOpaqueModuleFlagEntry { LLVMModuleFlagBehavior Behavior; const char *Key; size_t KeyLen; LLVMMetadataRef Metadata; }; static Module::ModFlagBehavior map_to_llvmModFlagBehavior(LLVMModuleFlagBehavior Behavior) { switch (Behavior) { case LLVMModuleFlagBehaviorError: return Module::ModFlagBehavior::Error; case LLVMModuleFlagBehaviorWarning: return Module::ModFlagBehavior::Warning; case LLVMModuleFlagBehaviorRequire: return Module::ModFlagBehavior::Require; case LLVMModuleFlagBehaviorOverride: return Module::ModFlagBehavior::Override; case LLVMModuleFlagBehaviorAppend: return Module::ModFlagBehavior::Append; case LLVMModuleFlagBehaviorAppendUnique: return Module::ModFlagBehavior::AppendUnique; } llvm_unreachable("Unknown LLVMModuleFlagBehavior"); } static LLVMModuleFlagBehavior map_from_llvmModFlagBehavior(Module::ModFlagBehavior Behavior) { switch (Behavior) { case Module::ModFlagBehavior::Error: return LLVMModuleFlagBehaviorError; case Module::ModFlagBehavior::Warning: return LLVMModuleFlagBehaviorWarning; case Module::ModFlagBehavior::Require: return LLVMModuleFlagBehaviorRequire; case Module::ModFlagBehavior::Override: return LLVMModuleFlagBehaviorOverride; case Module::ModFlagBehavior::Append: return LLVMModuleFlagBehaviorAppend; case Module::ModFlagBehavior::AppendUnique: return LLVMModuleFlagBehaviorAppendUnique; default: llvm_unreachable("Unhandled Flag Behavior"); } } LLVMModuleFlagEntry *LLVMCopyModuleFlagsMetadata(LLVMModuleRef M, size_t *Len) { SmallVector MFEs; unwrap(M)->getModuleFlagsMetadata(MFEs); LLVMOpaqueModuleFlagEntry *Result = static_cast( safe_malloc(MFEs.size() * sizeof(LLVMOpaqueModuleFlagEntry))); for (unsigned i = 0; i < MFEs.size(); ++i) { const auto &ModuleFlag = MFEs[i]; Result[i].Behavior = map_from_llvmModFlagBehavior(ModuleFlag.Behavior); Result[i].Key = ModuleFlag.Key->getString().data(); Result[i].KeyLen = ModuleFlag.Key->getString().size(); Result[i].Metadata = wrap(ModuleFlag.Val); } *Len = MFEs.size(); return Result; } void LLVMDisposeModuleFlagsMetadata(LLVMModuleFlagEntry *Entries) { free(Entries); } LLVMModuleFlagBehavior LLVMModuleFlagEntriesGetFlagBehavior(LLVMModuleFlagEntry *Entries, unsigned Index) { LLVMOpaqueModuleFlagEntry MFE = static_cast(Entries[Index]); return MFE.Behavior; } const char *LLVMModuleFlagEntriesGetKey(LLVMModuleFlagEntry *Entries, unsigned Index, size_t *Len) { LLVMOpaqueModuleFlagEntry MFE = static_cast(Entries[Index]); *Len = MFE.KeyLen; return MFE.Key; } LLVMMetadataRef LLVMModuleFlagEntriesGetMetadata(LLVMModuleFlagEntry *Entries, unsigned Index) { LLVMOpaqueModuleFlagEntry MFE = static_cast(Entries[Index]); return MFE.Metadata; } LLVMMetadataRef LLVMGetModuleFlag(LLVMModuleRef M, const char *Key, size_t KeyLen) { return wrap(unwrap(M)->getModuleFlag({Key, KeyLen})); } void LLVMAddModuleFlag(LLVMModuleRef M, LLVMModuleFlagBehavior Behavior, const char *Key, size_t KeyLen, LLVMMetadataRef Val) { unwrap(M)->addModuleFlag(map_to_llvmModFlagBehavior(Behavior), {Key, KeyLen}, unwrap(Val)); } /*--.. Printing modules ....................................................--*/ void LLVMDumpModule(LLVMModuleRef M) { unwrap(M)->print(errs(), nullptr, /*ShouldPreserveUseListOrder=*/false, /*IsForDebug=*/true); } LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, char **ErrorMessage) { std::error_code EC; raw_fd_ostream dest(Filename, EC, sys::fs::OF_Text); if (EC) { *ErrorMessage = strdup(EC.message().c_str()); return true; } unwrap(M)->print(dest, nullptr); dest.close(); if (dest.has_error()) { std::string E = "Error printing to file: " + dest.error().message(); *ErrorMessage = strdup(E.c_str()); return true; } return false; } char *LLVMPrintModuleToString(LLVMModuleRef M) { std::string buf; raw_string_ostream os(buf); unwrap(M)->print(os, nullptr); os.flush(); return strdup(buf.c_str()); } /*--.. Operations on inline assembler ......................................--*/ void LLVMSetModuleInlineAsm2(LLVMModuleRef M, const char *Asm, size_t Len) { unwrap(M)->setModuleInlineAsm(StringRef(Asm, Len)); } void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) { unwrap(M)->setModuleInlineAsm(StringRef(Asm)); } void LLVMAppendModuleInlineAsm(LLVMModuleRef M, const char *Asm, size_t Len) { unwrap(M)->appendModuleInlineAsm(StringRef(Asm, Len)); } const char *LLVMGetModuleInlineAsm(LLVMModuleRef M, size_t *Len) { auto &Str = unwrap(M)->getModuleInlineAsm(); *Len = Str.length(); return Str.c_str(); } LLVMValueRef LLVMGetInlineAsm(LLVMTypeRef Ty, char *AsmString, size_t AsmStringSize, char *Constraints, size_t ConstraintsSize, LLVMBool HasSideEffects, LLVMBool IsAlignStack, LLVMInlineAsmDialect Dialect) { InlineAsm::AsmDialect AD; switch (Dialect) { case LLVMInlineAsmDialectATT: AD = InlineAsm::AD_ATT; break; case LLVMInlineAsmDialectIntel: AD = InlineAsm::AD_Intel; break; } return wrap(InlineAsm::get(unwrap(Ty), StringRef(AsmString, AsmStringSize), StringRef(Constraints, ConstraintsSize), HasSideEffects, IsAlignStack, AD)); } /*--.. Operations on module contexts ......................................--*/ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) { return wrap(&unwrap(M)->getContext()); } /*===-- Operations on types -----------------------------------------------===*/ /*--.. Operations on all types (mostly) ....................................--*/ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) { switch (unwrap(Ty)->getTypeID()) { case Type::VoidTyID: return LLVMVoidTypeKind; case Type::HalfTyID: return LLVMHalfTypeKind; case Type::BFloatTyID: return LLVMBFloatTypeKind; case Type::FloatTyID: return LLVMFloatTypeKind; case Type::DoubleTyID: return LLVMDoubleTypeKind; case Type::X86_FP80TyID: return LLVMX86_FP80TypeKind; case Type::FP128TyID: return LLVMFP128TypeKind; case Type::PPC_FP128TyID: return LLVMPPC_FP128TypeKind; case Type::LabelTyID: return LLVMLabelTypeKind; case Type::MetadataTyID: return LLVMMetadataTypeKind; case Type::IntegerTyID: return LLVMIntegerTypeKind; case Type::FunctionTyID: return LLVMFunctionTypeKind; case Type::StructTyID: return LLVMStructTypeKind; case Type::ArrayTyID: return LLVMArrayTypeKind; case Type::PointerTyID: return LLVMPointerTypeKind; case Type::FixedVectorTyID: return LLVMVectorTypeKind; case Type::X86_MMXTyID: return LLVMX86_MMXTypeKind; case Type::X86_AMXTyID: return LLVMX86_AMXTypeKind; case Type::TokenTyID: return LLVMTokenTypeKind; case Type::ScalableVectorTyID: return LLVMScalableVectorTypeKind; } llvm_unreachable("Unhandled TypeID."); } LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty) { return unwrap(Ty)->isSized(); } LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) { return wrap(&unwrap(Ty)->getContext()); } void LLVMDumpType(LLVMTypeRef Ty) { return unwrap(Ty)->print(errs(), /*IsForDebug=*/true); } char *LLVMPrintTypeToString(LLVMTypeRef Ty) { std::string buf; raw_string_ostream os(buf); if (unwrap(Ty)) unwrap(Ty)->print(os); else os << "Printing Type"; os.flush(); return strdup(buf.c_str()); } /*--.. Operations on integer types .........................................--*/ LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C)); } LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C)); } LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C)); } LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C)); } LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C)); } LLVMTypeRef LLVMInt128TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getInt128Ty(*unwrap(C)); } LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) { return wrap(IntegerType::get(*unwrap(C), NumBits)); } LLVMTypeRef LLVMInt1Type(void) { return LLVMInt1TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMInt8Type(void) { return LLVMInt8TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMInt16Type(void) { return LLVMInt16TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMInt32Type(void) { return LLVMInt32TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMInt64Type(void) { return LLVMInt64TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMInt128Type(void) { return LLVMInt128TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMIntType(unsigned NumBits) { return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits); } unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) { return unwrap(IntegerTy)->getBitWidth(); } /*--.. Operations on real types ............................................--*/ LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getHalfTy(*unwrap(C)); } LLVMTypeRef LLVMBFloatTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getBFloatTy(*unwrap(C)); } LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getFloatTy(*unwrap(C)); } LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C)); } LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C)); } LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C)); } LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C)); } LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C)); } LLVMTypeRef LLVMX86AMXTypeInContext(LLVMContextRef C) { return (LLVMTypeRef) Type::getX86_AMXTy(*unwrap(C)); } LLVMTypeRef LLVMHalfType(void) { return LLVMHalfTypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMBFloatType(void) { return LLVMBFloatTypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMFloatType(void) { return LLVMFloatTypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMDoubleType(void) { return LLVMDoubleTypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMX86FP80Type(void) { return LLVMX86FP80TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMFP128Type(void) { return LLVMFP128TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMPPCFP128Type(void) { return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMX86MMXType(void) { return LLVMX86MMXTypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMX86AMXType(void) { return LLVMX86AMXTypeInContext(LLVMGetGlobalContext()); } /*--.. Operations on function types ........................................--*/ LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType, LLVMTypeRef *ParamTypes, unsigned ParamCount, LLVMBool IsVarArg) { ArrayRef Tys(unwrap(ParamTypes), ParamCount); return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0)); } LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) { return unwrap(FunctionTy)->isVarArg(); } LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) { return wrap(unwrap(FunctionTy)->getReturnType()); } unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) { return unwrap(FunctionTy)->getNumParams(); } void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) { FunctionType *Ty = unwrap(FunctionTy); for (FunctionType::param_iterator I = Ty->param_begin(), E = Ty->param_end(); I != E; ++I) *Dest++ = wrap(*I); } /*--.. Operations on struct types ..........................................--*/ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed) { ArrayRef Tys(unwrap(ElementTypes), ElementCount); return wrap(StructType::get(*unwrap(C), Tys, Packed != 0)); } LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed) { return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes, ElementCount, Packed); } LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name) { return wrap(StructType::create(*unwrap(C), Name)); } const char *LLVMGetStructName(LLVMTypeRef Ty) { StructType *Type = unwrap(Ty); if (!Type->hasName()) return nullptr; return Type->getName().data(); } void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes, unsigned ElementCount, LLVMBool Packed) { ArrayRef Tys(unwrap(ElementTypes), ElementCount); unwrap(StructTy)->setBody(Tys, Packed != 0); } unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) { return unwrap(StructTy)->getNumElements(); } void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) { StructType *Ty = unwrap(StructTy); for (StructType::element_iterator I = Ty->element_begin(), E = Ty->element_end(); I != E; ++I) *Dest++ = wrap(*I); } LLVMTypeRef LLVMStructGetTypeAtIndex(LLVMTypeRef StructTy, unsigned i) { StructType *Ty = unwrap(StructTy); return wrap(Ty->getTypeAtIndex(i)); } LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) { return unwrap(StructTy)->isPacked(); } LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) { return unwrap(StructTy)->isOpaque(); } LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy) { return unwrap(StructTy)->isLiteral(); } LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) { return wrap(StructType::getTypeByName(unwrap(M)->getContext(), Name)); } LLVMTypeRef LLVMGetTypeByName2(LLVMContextRef C, const char *Name) { return wrap(StructType::getTypeByName(*unwrap(C), Name)); } /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/ void LLVMGetSubtypes(LLVMTypeRef Tp, LLVMTypeRef *Arr) { int i = 0; for (auto *T : unwrap(Tp)->subtypes()) { Arr[i] = wrap(T); i++; } } LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) { return wrap(ArrayType::get(unwrap(ElementType), ElementCount)); } LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) { return wrap(PointerType::get(unwrap(ElementType), AddressSpace)); } LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) { return wrap(FixedVectorType::get(unwrap(ElementType), ElementCount)); } LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType, unsigned ElementCount) { return wrap(ScalableVectorType::get(unwrap(ElementType), ElementCount)); } LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) { auto *Ty = unwrap(WrappedTy); if (auto *PTy = dyn_cast(Ty)) return wrap(PTy->getElementType()); if (auto *ATy = dyn_cast(Ty)) return wrap(ATy->getElementType()); return wrap(cast(Ty)->getElementType()); } unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp) { return unwrap(Tp)->getNumContainedTypes(); } unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) { return unwrap(ArrayTy)->getNumElements(); } unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) { return unwrap(PointerTy)->getAddressSpace(); } unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) { return unwrap(VectorTy)->getElementCount().getKnownMinValue(); } /*--.. Operations on other types ...........................................--*/ LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C) { return wrap(Type::getVoidTy(*unwrap(C))); } LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) { return wrap(Type::getLabelTy(*unwrap(C))); } LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C) { return wrap(Type::getTokenTy(*unwrap(C))); } LLVMTypeRef LLVMMetadataTypeInContext(LLVMContextRef C) { return wrap(Type::getMetadataTy(*unwrap(C))); } LLVMTypeRef LLVMVoidType(void) { return LLVMVoidTypeInContext(LLVMGetGlobalContext()); } LLVMTypeRef LLVMLabelType(void) { return LLVMLabelTypeInContext(LLVMGetGlobalContext()); } /*===-- Operations on values ----------------------------------------------===*/ /*--.. Operations on all values ............................................--*/ LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) { return wrap(unwrap(Val)->getType()); } LLVMValueKind LLVMGetValueKind(LLVMValueRef Val) { switch(unwrap(Val)->getValueID()) { #define LLVM_C_API 1 #define HANDLE_VALUE(Name) \ case Value::Name##Val: \ return LLVM##Name##ValueKind; #include "llvm/IR/Value.def" default: return LLVMInstructionValueKind; } } const char *LLVMGetValueName2(LLVMValueRef Val, size_t *Length) { auto *V = unwrap(Val); *Length = V->getName().size(); return V->getName().data(); } void LLVMSetValueName2(LLVMValueRef Val, const char *Name, size_t NameLen) { unwrap(Val)->setName(StringRef(Name, NameLen)); } const char *LLVMGetValueName(LLVMValueRef Val) { return unwrap(Val)->getName().data(); } void LLVMSetValueName(LLVMValueRef Val, const char *Name) { unwrap(Val)->setName(Name); } void LLVMDumpValue(LLVMValueRef Val) { unwrap(Val)->print(errs(), /*IsForDebug=*/true); } char* LLVMPrintValueToString(LLVMValueRef Val) { std::string buf; raw_string_ostream os(buf); if (unwrap(Val)) unwrap(Val)->print(os); else os << "Printing Value"; os.flush(); return strdup(buf.c_str()); } void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) { unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal)); } int LLVMHasMetadata(LLVMValueRef Inst) { return unwrap(Inst)->hasMetadata(); } LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) { auto *I = unwrap(Inst); assert(I && "Expected instruction"); if (auto *MD = I->getMetadata(KindID)) return wrap(MetadataAsValue::get(I->getContext(), MD)); return nullptr; } // MetadataAsValue uses a canonical format which strips the actual MDNode for // MDNode with just a single constant value, storing just a ConstantAsMetadata // This undoes this canonicalization, reconstructing the MDNode. static MDNode *extractMDNode(MetadataAsValue *MAV) { Metadata *MD = MAV->getMetadata(); assert((isa(MD) || isa(MD)) && "Expected a metadata node or a canonicalized constant"); if (MDNode *N = dyn_cast(MD)) return N; return MDNode::get(MAV->getContext(), MD); } void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef Val) { MDNode *N = Val ? extractMDNode(unwrap(Val)) : nullptr; unwrap(Inst)->setMetadata(KindID, N); } struct LLVMOpaqueValueMetadataEntry { unsigned Kind; LLVMMetadataRef Metadata; }; using MetadataEntries = SmallVectorImpl>; static LLVMValueMetadataEntry * llvm_getMetadata(size_t *NumEntries, llvm::function_ref AccessMD) { SmallVector, 8> MVEs; AccessMD(MVEs); LLVMOpaqueValueMetadataEntry *Result = static_cast( safe_malloc(MVEs.size() * sizeof(LLVMOpaqueValueMetadataEntry))); for (unsigned i = 0; i < MVEs.size(); ++i) { const auto &ModuleFlag = MVEs[i]; Result[i].Kind = ModuleFlag.first; Result[i].Metadata = wrap(ModuleFlag.second); } *NumEntries = MVEs.size(); return Result; } LLVMValueMetadataEntry * LLVMInstructionGetAllMetadataOtherThanDebugLoc(LLVMValueRef Value, size_t *NumEntries) { return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) { Entries.clear(); unwrap(Value)->getAllMetadata(Entries); }); } /*--.. Conversion functions ................................................--*/ #define LLVM_DEFINE_VALUE_CAST(name) \ LLVMValueRef LLVMIsA##name(LLVMValueRef Val) { \ return wrap(static_cast(dyn_cast_or_null(unwrap(Val)))); \ } LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST) LLVMValueRef LLVMIsAMDNode(LLVMValueRef Val) { if (auto *MD = dyn_cast_or_null(unwrap(Val))) if (isa(MD->getMetadata()) || isa(MD->getMetadata())) return Val; return nullptr; } LLVMValueRef LLVMIsAMDString(LLVMValueRef Val) { if (auto *MD = dyn_cast_or_null(unwrap(Val))) if (isa(MD->getMetadata())) return Val; return nullptr; } /*--.. Operations on Uses ..................................................--*/ LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) { Value *V = unwrap(Val); Value::use_iterator I = V->use_begin(); if (I == V->use_end()) return nullptr; return wrap(&*I); } LLVMUseRef LLVMGetNextUse(LLVMUseRef U) { Use *Next = unwrap(U)->getNext(); if (Next) return wrap(Next); return nullptr; } LLVMValueRef LLVMGetUser(LLVMUseRef U) { return wrap(unwrap(U)->getUser()); } LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) { return wrap(unwrap(U)->get()); } /*--.. Operations on Users .................................................--*/ static LLVMValueRef getMDNodeOperandImpl(LLVMContext &Context, const MDNode *N, unsigned Index) { Metadata *Op = N->getOperand(Index); if (!Op) return nullptr; if (auto *C = dyn_cast(Op)) return wrap(C->getValue()); return wrap(MetadataAsValue::get(Context, Op)); } LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) { Value *V = unwrap(Val); if (auto *MD = dyn_cast(V)) { if (auto *L = dyn_cast(MD->getMetadata())) { assert(Index == 0 && "Function-local metadata can only have one operand"); return wrap(L->getValue()); } return getMDNodeOperandImpl(V->getContext(), cast(MD->getMetadata()), Index); } return wrap(cast(V)->getOperand(Index)); } LLVMUseRef LLVMGetOperandUse(LLVMValueRef Val, unsigned Index) { Value *V = unwrap(Val); return wrap(&cast(V)->getOperandUse(Index)); } void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) { unwrap(Val)->setOperand(Index, unwrap(Op)); } int LLVMGetNumOperands(LLVMValueRef Val) { Value *V = unwrap(Val); if (isa(V)) return LLVMGetMDNodeNumOperands(Val); return cast(V)->getNumOperands(); } /*--.. Operations on constants of any type .................................--*/ LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) { return wrap(Constant::getNullValue(unwrap(Ty))); } LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) { return wrap(Constant::getAllOnesValue(unwrap(Ty))); } LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) { return wrap(UndefValue::get(unwrap(Ty))); } LLVMValueRef LLVMGetPoison(LLVMTypeRef Ty) { return wrap(PoisonValue::get(unwrap(Ty))); } LLVMBool LLVMIsConstant(LLVMValueRef Ty) { return isa(unwrap(Ty)); } LLVMBool LLVMIsNull(LLVMValueRef Val) { if (Constant *C = dyn_cast(unwrap(Val))) return C->isNullValue(); return false; } LLVMBool LLVMIsUndef(LLVMValueRef Val) { return isa(unwrap(Val)); } LLVMBool LLVMIsPoison(LLVMValueRef Val) { return isa(unwrap(Val)); } LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) { return wrap(ConstantPointerNull::get(unwrap(Ty))); } /*--.. Operations on metadata nodes ........................................--*/ LLVMMetadataRef LLVMMDStringInContext2(LLVMContextRef C, const char *Str, size_t SLen) { return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen))); } LLVMMetadataRef LLVMMDNodeInContext2(LLVMContextRef C, LLVMMetadataRef *MDs, size_t Count) { return wrap(MDNode::get(*unwrap(C), ArrayRef(unwrap(MDs), Count))); } LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str, unsigned SLen) { LLVMContext &Context = *unwrap(C); return wrap(MetadataAsValue::get( Context, MDString::get(Context, StringRef(Str, SLen)))); } LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) { return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen); } LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, unsigned Count) { LLVMContext &Context = *unwrap(C); SmallVector MDs; for (auto *OV : makeArrayRef(Vals, Count)) { Value *V = unwrap(OV); Metadata *MD; if (!V) MD = nullptr; else if (auto *C = dyn_cast(V)) MD = ConstantAsMetadata::get(C); else if (auto *MDV = dyn_cast(V)) { MD = MDV->getMetadata(); assert(!isa(MD) && "Unexpected function-local metadata " "outside of direct argument to call"); } else { // This is function-local metadata. Pretend to make an MDNode. assert(Count == 1 && "Expected only one operand to function-local metadata"); return wrap(MetadataAsValue::get(Context, LocalAsMetadata::get(V))); } MDs.push_back(MD); } return wrap(MetadataAsValue::get(Context, MDNode::get(Context, MDs))); } LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) { return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count); } LLVMValueRef LLVMMetadataAsValue(LLVMContextRef C, LLVMMetadataRef MD) { return wrap(MetadataAsValue::get(*unwrap(C), unwrap(MD))); } LLVMMetadataRef LLVMValueAsMetadata(LLVMValueRef Val) { auto *V = unwrap(Val); if (auto *C = dyn_cast(V)) return wrap(ConstantAsMetadata::get(C)); if (auto *MAV = dyn_cast(V)) return wrap(MAV->getMetadata()); return wrap(ValueAsMetadata::get(V)); } const char *LLVMGetMDString(LLVMValueRef V, unsigned *Length) { if (const auto *MD = dyn_cast(unwrap(V))) if (const MDString *S = dyn_cast(MD->getMetadata())) { *Length = S->getString().size(); return S->getString().data(); } *Length = 0; return nullptr; } unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V) { auto *MD = cast(unwrap(V)); if (isa(MD->getMetadata())) return 1; return cast(MD->getMetadata())->getNumOperands(); } LLVMNamedMDNodeRef LLVMGetFirstNamedMetadata(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::named_metadata_iterator I = Mod->named_metadata_begin(); if (I == Mod->named_metadata_end()) return nullptr; return wrap(&*I); } LLVMNamedMDNodeRef LLVMGetLastNamedMetadata(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::named_metadata_iterator I = Mod->named_metadata_end(); if (I == Mod->named_metadata_begin()) return nullptr; return wrap(&*--I); } LLVMNamedMDNodeRef LLVMGetNextNamedMetadata(LLVMNamedMDNodeRef NMD) { NamedMDNode *NamedNode = unwrap(NMD); Module::named_metadata_iterator I(NamedNode); if (++I == NamedNode->getParent()->named_metadata_end()) return nullptr; return wrap(&*I); } LLVMNamedMDNodeRef LLVMGetPreviousNamedMetadata(LLVMNamedMDNodeRef NMD) { NamedMDNode *NamedNode = unwrap(NMD); Module::named_metadata_iterator I(NamedNode); if (I == NamedNode->getParent()->named_metadata_begin()) return nullptr; return wrap(&*--I); } LLVMNamedMDNodeRef LLVMGetNamedMetadata(LLVMModuleRef M, const char *Name, size_t NameLen) { return wrap(unwrap(M)->getNamedMetadata(StringRef(Name, NameLen))); } LLVMNamedMDNodeRef LLVMGetOrInsertNamedMetadata(LLVMModuleRef M, const char *Name, size_t NameLen) { return wrap(unwrap(M)->getOrInsertNamedMetadata({Name, NameLen})); } const char *LLVMGetNamedMetadataName(LLVMNamedMDNodeRef NMD, size_t *NameLen) { NamedMDNode *NamedNode = unwrap(NMD); *NameLen = NamedNode->getName().size(); return NamedNode->getName().data(); } void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest) { auto *MD = cast(unwrap(V)); if (auto *MDV = dyn_cast(MD->getMetadata())) { *Dest = wrap(MDV->getValue()); return; } const auto *N = cast(MD->getMetadata()); const unsigned numOperands = N->getNumOperands(); LLVMContext &Context = unwrap(V)->getContext(); for (unsigned i = 0; i < numOperands; i++) Dest[i] = getMDNodeOperandImpl(Context, N, i); } unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char *Name) { if (NamedMDNode *N = unwrap(M)->getNamedMetadata(Name)) { return N->getNumOperands(); } return 0; } void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char *Name, LLVMValueRef *Dest) { NamedMDNode *N = unwrap(M)->getNamedMetadata(Name); if (!N) return; LLVMContext &Context = unwrap(M)->getContext(); for (unsigned i=0;igetNumOperands();i++) Dest[i] = wrap(MetadataAsValue::get(Context, N->getOperand(i))); } void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char *Name, LLVMValueRef Val) { NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(Name); if (!N) return; if (!Val) return; N->addOperand(extractMDNode(unwrap(Val))); } const char *LLVMGetDebugLocDirectory(LLVMValueRef Val, unsigned *Length) { if (!Length) return nullptr; StringRef S; if (const auto *I = dyn_cast(unwrap(Val))) { if (const auto &DL = I->getDebugLoc()) { S = DL->getDirectory(); } } else if (const auto *GV = dyn_cast(unwrap(Val))) { SmallVector GVEs; GV->getDebugInfo(GVEs); if (GVEs.size()) if (const DIGlobalVariable *DGV = GVEs[0]->getVariable()) S = DGV->getDirectory(); } else if (const auto *F = dyn_cast(unwrap(Val))) { if (const DISubprogram *DSP = F->getSubprogram()) S = DSP->getDirectory(); } else { assert(0 && "Expected Instruction, GlobalVariable or Function"); return nullptr; } *Length = S.size(); return S.data(); } const char *LLVMGetDebugLocFilename(LLVMValueRef Val, unsigned *Length) { if (!Length) return nullptr; StringRef S; if (const auto *I = dyn_cast(unwrap(Val))) { if (const auto &DL = I->getDebugLoc()) { S = DL->getFilename(); } } else if (const auto *GV = dyn_cast(unwrap(Val))) { SmallVector GVEs; GV->getDebugInfo(GVEs); if (GVEs.size()) if (const DIGlobalVariable *DGV = GVEs[0]->getVariable()) S = DGV->getFilename(); } else if (const auto *F = dyn_cast(unwrap(Val))) { if (const DISubprogram *DSP = F->getSubprogram()) S = DSP->getFilename(); } else { assert(0 && "Expected Instruction, GlobalVariable or Function"); return nullptr; } *Length = S.size(); return S.data(); } unsigned LLVMGetDebugLocLine(LLVMValueRef Val) { unsigned L = 0; if (const auto *I = dyn_cast(unwrap(Val))) { if (const auto &DL = I->getDebugLoc()) { L = DL->getLine(); } } else if (const auto *GV = dyn_cast(unwrap(Val))) { SmallVector GVEs; GV->getDebugInfo(GVEs); if (GVEs.size()) if (const DIGlobalVariable *DGV = GVEs[0]->getVariable()) L = DGV->getLine(); } else if (const auto *F = dyn_cast(unwrap(Val))) { if (const DISubprogram *DSP = F->getSubprogram()) L = DSP->getLine(); } else { assert(0 && "Expected Instruction, GlobalVariable or Function"); return -1; } return L; } unsigned LLVMGetDebugLocColumn(LLVMValueRef Val) { unsigned C = 0; if (const auto *I = dyn_cast(unwrap(Val))) if (const auto &DL = I->getDebugLoc()) C = DL->getColumn(); return C; } /*--.. Operations on scalar constants ......................................--*/ LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N, LLVMBool SignExtend) { return wrap(ConstantInt::get(unwrap(IntTy), N, SignExtend != 0)); } LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy, unsigned NumWords, const uint64_t Words[]) { IntegerType *Ty = unwrap(IntTy); return wrap(ConstantInt::get(Ty->getContext(), APInt(Ty->getBitWidth(), makeArrayRef(Words, NumWords)))); } LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[], uint8_t Radix) { return wrap(ConstantInt::get(unwrap(IntTy), StringRef(Str), Radix)); } LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[], unsigned SLen, uint8_t Radix) { return wrap(ConstantInt::get(unwrap(IntTy), StringRef(Str, SLen), Radix)); } LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) { return wrap(ConstantFP::get(unwrap(RealTy), N)); } LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) { return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text))); } LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[], unsigned SLen) { return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen))); } unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) { return unwrap(ConstantVal)->getZExtValue(); } long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) { return unwrap(ConstantVal)->getSExtValue(); } double LLVMConstRealGetDouble(LLVMValueRef ConstantVal, LLVMBool *LosesInfo) { ConstantFP *cFP = unwrap(ConstantVal) ; Type *Ty = cFP->getType(); if (Ty->isFloatTy()) { *LosesInfo = false; return cFP->getValueAPF().convertToFloat(); } if (Ty->isDoubleTy()) { *LosesInfo = false; return cFP->getValueAPF().convertToDouble(); } bool APFLosesInfo; APFloat APF = cFP->getValueAPF(); APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &APFLosesInfo); *LosesInfo = APFLosesInfo; return APF.convertToDouble(); } /*--.. Operations on composite constants ...................................--*/ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, unsigned Length, LLVMBool DontNullTerminate) { /* Inverted the sense of AddNull because ', 0)' is a better mnemonic for null termination than ', 1)'. */ return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length), DontNullTerminate == 0)); } LLVMValueRef LLVMConstString(const char *Str, unsigned Length, LLVMBool DontNullTerminate) { return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length, DontNullTerminate); } LLVMValueRef LLVMGetElementAsConstant(LLVMValueRef C, unsigned idx) { return wrap(unwrap(C)->getElementAsConstant(idx)); } LLVMBool LLVMIsConstantString(LLVMValueRef C) { return unwrap(C)->isString(); } const char *LLVMGetAsString(LLVMValueRef C, size_t *Length) { StringRef Str = unwrap(C)->getAsString(); *Length = Str.size(); return Str.data(); } LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy, LLVMValueRef *ConstantVals, unsigned Length) { ArrayRef V(unwrap(ConstantVals, Length), Length); return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V)); } LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed) { Constant **Elements = unwrap(ConstantVals, Count); return wrap(ConstantStruct::getAnon(*unwrap(C), makeArrayRef(Elements, Count), Packed != 0)); } LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed) { return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count, Packed); } LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy, LLVMValueRef *ConstantVals, unsigned Count) { Constant **Elements = unwrap(ConstantVals, Count); StructType *Ty = cast(unwrap(StructTy)); return wrap(ConstantStruct::get(Ty, makeArrayRef(Elements, Count))); } LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) { return wrap(ConstantVector::get(makeArrayRef( unwrap(ScalarConstantVals, Size), Size))); } /*-- Opcode mapping */ static LLVMOpcode map_to_llvmopcode(int opcode) { switch (opcode) { default: llvm_unreachable("Unhandled Opcode."); #define HANDLE_INST(num, opc, clas) case num: return LLVM##opc; #include "llvm/IR/Instruction.def" #undef HANDLE_INST } } static int map_from_llvmopcode(LLVMOpcode code) { switch (code) { #define HANDLE_INST(num, opc, clas) case LLVM##opc: return num; #include "llvm/IR/Instruction.def" #undef HANDLE_INST } llvm_unreachable("Unhandled Opcode."); } /*--.. Constant expressions ................................................--*/ LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) { return map_to_llvmopcode(unwrap(ConstantVal)->getOpcode()); } LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) { return wrap(ConstantExpr::getAlignOf(unwrap(Ty))); } LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) { return wrap(ConstantExpr::getSizeOf(unwrap(Ty))); } LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) { return wrap(ConstantExpr::getNeg(unwrap(ConstantVal))); } LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) { return wrap(ConstantExpr::getNSWNeg(unwrap(ConstantVal))); } LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) { return wrap(ConstantExpr::getNUWNeg(unwrap(ConstantVal))); } LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) { return wrap(ConstantExpr::getFNeg(unwrap(ConstantVal))); } LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) { return wrap(ConstantExpr::getNot(unwrap(ConstantVal))); } LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getAdd(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getNSWAdd(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getNUWAdd(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFAdd(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getSub(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getNSWSub(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getNUWSub(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFSub(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getMul(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getNSWMul(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getNUWMul(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFMul(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getUDiv(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstExactUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getExactUDiv(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getSDiv(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getExactSDiv(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFDiv(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getURem(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getSRem(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFRem(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getAnd(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getOr(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getXor(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate, LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getICmp(Predicate, unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate, LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getFCmp(Predicate, unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getShl(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getLShr(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) { return wrap(ConstantExpr::getAShr(unwrap(LHSConstant), unwrap(RHSConstant))); } LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices) { ArrayRef IdxList(unwrap(ConstantIndices, NumIndices), NumIndices); Constant *Val = unwrap(ConstantVal); Type *Ty = cast(Val->getType()->getScalarType())->getElementType(); return wrap(ConstantExpr::getGetElementPtr(Ty, Val, IdxList)); } LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal, LLVMValueRef *ConstantIndices, unsigned NumIndices) { ArrayRef IdxList(unwrap(ConstantIndices, NumIndices), NumIndices); Constant *Val = unwrap(ConstantVal); Type *Ty = cast(Val->getType()->getScalarType())->getElementType(); return wrap(ConstantExpr::getInBoundsGetElementPtr(Ty, Val, IdxList)); } LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getTrunc(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getSExt(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getZExt(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getFPTrunc(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getFPExtend(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getUIToFP(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getSIToFP(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getFPToUI(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getFPToSI(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getPtrToInt(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getIntToPtr(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getBitCast(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstAddrSpaceCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getAddrSpaceCast(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getZExtOrBitCast(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getSExtOrBitCast(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getTruncOrBitCast(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getPointerCast(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType, LLVMBool isSigned) { return wrap(ConstantExpr::getIntegerCast(unwrap(ConstantVal), unwrap(ToType), isSigned)); } LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getFPCast(unwrap(ConstantVal), unwrap(ToType))); } LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition, LLVMValueRef ConstantIfTrue, LLVMValueRef ConstantIfFalse) { return wrap(ConstantExpr::getSelect(unwrap(ConstantCondition), unwrap(ConstantIfTrue), unwrap(ConstantIfFalse))); } LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant, LLVMValueRef IndexConstant) { return wrap(ConstantExpr::getExtractElement(unwrap(VectorConstant), unwrap(IndexConstant))); } LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant, LLVMValueRef ElementValueConstant, LLVMValueRef IndexConstant) { return wrap(ConstantExpr::getInsertElement(unwrap(VectorConstant), unwrap(ElementValueConstant), unwrap(IndexConstant))); } LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant, LLVMValueRef VectorBConstant, LLVMValueRef MaskConstant) { SmallVector IntMask; ShuffleVectorInst::getShuffleMask(unwrap(MaskConstant), IntMask); return wrap(ConstantExpr::getShuffleVector(unwrap(VectorAConstant), unwrap(VectorBConstant), IntMask)); } LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList, unsigned NumIdx) { return wrap(ConstantExpr::getExtractValue(unwrap(AggConstant), makeArrayRef(IdxList, NumIdx))); } LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant, LLVMValueRef ElementValueConstant, unsigned *IdxList, unsigned NumIdx) { return wrap(ConstantExpr::getInsertValue(unwrap(AggConstant), unwrap(ElementValueConstant), makeArrayRef(IdxList, NumIdx))); } LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString, const char *Constraints, LLVMBool HasSideEffects, LLVMBool IsAlignStack) { return wrap(InlineAsm::get(dyn_cast(unwrap(Ty)), AsmString, Constraints, HasSideEffects, IsAlignStack)); } LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) { return wrap(BlockAddress::get(unwrap(F), unwrap(BB))); } /*--.. Operations on global variables, functions, and aliases (globals) ....--*/ LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) { return wrap(unwrap(Global)->getParent()); } LLVMBool LLVMIsDeclaration(LLVMValueRef Global) { return unwrap(Global)->isDeclaration(); } LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { switch (unwrap(Global)->getLinkage()) { case GlobalValue::ExternalLinkage: return LLVMExternalLinkage; case GlobalValue::AvailableExternallyLinkage: return LLVMAvailableExternallyLinkage; case GlobalValue::LinkOnceAnyLinkage: return LLVMLinkOnceAnyLinkage; case GlobalValue::LinkOnceODRLinkage: return LLVMLinkOnceODRLinkage; case GlobalValue::WeakAnyLinkage: return LLVMWeakAnyLinkage; case GlobalValue::WeakODRLinkage: return LLVMWeakODRLinkage; case GlobalValue::AppendingLinkage: return LLVMAppendingLinkage; case GlobalValue::InternalLinkage: return LLVMInternalLinkage; case GlobalValue::PrivateLinkage: return LLVMPrivateLinkage; case GlobalValue::ExternalWeakLinkage: return LLVMExternalWeakLinkage; case GlobalValue::CommonLinkage: return LLVMCommonLinkage; } llvm_unreachable("Invalid GlobalValue linkage!"); } void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { GlobalValue *GV = unwrap(Global); switch (Linkage) { case LLVMExternalLinkage: GV->setLinkage(GlobalValue::ExternalLinkage); break; case LLVMAvailableExternallyLinkage: GV->setLinkage(GlobalValue::AvailableExternallyLinkage); break; case LLVMLinkOnceAnyLinkage: GV->setLinkage(GlobalValue::LinkOnceAnyLinkage); break; case LLVMLinkOnceODRLinkage: GV->setLinkage(GlobalValue::LinkOnceODRLinkage); break; case LLVMLinkOnceODRAutoHideLinkage: LLVM_DEBUG( errs() << "LLVMSetLinkage(): LLVMLinkOnceODRAutoHideLinkage is no " "longer supported."); break; case LLVMWeakAnyLinkage: GV->setLinkage(GlobalValue::WeakAnyLinkage); break; case LLVMWeakODRLinkage: GV->setLinkage(GlobalValue::WeakODRLinkage); break; case LLVMAppendingLinkage: GV->setLinkage(GlobalValue::AppendingLinkage); break; case LLVMInternalLinkage: GV->setLinkage(GlobalValue::InternalLinkage); break; case LLVMPrivateLinkage: GV->setLinkage(GlobalValue::PrivateLinkage); break; case LLVMLinkerPrivateLinkage: GV->setLinkage(GlobalValue::PrivateLinkage); break; case LLVMLinkerPrivateWeakLinkage: GV->setLinkage(GlobalValue::PrivateLinkage); break; case LLVMDLLImportLinkage: LLVM_DEBUG( errs() << "LLVMSetLinkage(): LLVMDLLImportLinkage is no longer supported."); break; case LLVMDLLExportLinkage: LLVM_DEBUG( errs() << "LLVMSetLinkage(): LLVMDLLExportLinkage is no longer supported."); break; case LLVMExternalWeakLinkage: GV->setLinkage(GlobalValue::ExternalWeakLinkage); break; case LLVMGhostLinkage: LLVM_DEBUG( errs() << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported."); break; case LLVMCommonLinkage: GV->setLinkage(GlobalValue::CommonLinkage); break; } } const char *LLVMGetSection(LLVMValueRef Global) { // Using .data() is safe because of how GlobalObject::setSection is // implemented. return unwrap(Global)->getSection().data(); } void LLVMSetSection(LLVMValueRef Global, const char *Section) { unwrap(Global)->setSection(Section); } LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) { return static_cast( unwrap(Global)->getVisibility()); } void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) { unwrap(Global) ->setVisibility(static_cast(Viz)); } LLVMDLLStorageClass LLVMGetDLLStorageClass(LLVMValueRef Global) { return static_cast( unwrap(Global)->getDLLStorageClass()); } void LLVMSetDLLStorageClass(LLVMValueRef Global, LLVMDLLStorageClass Class) { unwrap(Global)->setDLLStorageClass( static_cast(Class)); } LLVMUnnamedAddr LLVMGetUnnamedAddress(LLVMValueRef Global) { switch (unwrap(Global)->getUnnamedAddr()) { case GlobalVariable::UnnamedAddr::None: return LLVMNoUnnamedAddr; case GlobalVariable::UnnamedAddr::Local: return LLVMLocalUnnamedAddr; case GlobalVariable::UnnamedAddr::Global: return LLVMGlobalUnnamedAddr; } llvm_unreachable("Unknown UnnamedAddr kind!"); } void LLVMSetUnnamedAddress(LLVMValueRef Global, LLVMUnnamedAddr UnnamedAddr) { GlobalValue *GV = unwrap(Global); switch (UnnamedAddr) { case LLVMNoUnnamedAddr: return GV->setUnnamedAddr(GlobalVariable::UnnamedAddr::None); case LLVMLocalUnnamedAddr: return GV->setUnnamedAddr(GlobalVariable::UnnamedAddr::Local); case LLVMGlobalUnnamedAddr: return GV->setUnnamedAddr(GlobalVariable::UnnamedAddr::Global); } } LLVMBool LLVMHasUnnamedAddr(LLVMValueRef Global) { return unwrap(Global)->hasGlobalUnnamedAddr(); } void LLVMSetUnnamedAddr(LLVMValueRef Global, LLVMBool HasUnnamedAddr) { unwrap(Global)->setUnnamedAddr( HasUnnamedAddr ? GlobalValue::UnnamedAddr::Global : GlobalValue::UnnamedAddr::None); } LLVMTypeRef LLVMGlobalGetValueType(LLVMValueRef Global) { return wrap(unwrap(Global)->getValueType()); } /*--.. Operations on global variables, load and store instructions .........--*/ unsigned LLVMGetAlignment(LLVMValueRef V) { Value *P = unwrap(V); if (GlobalObject *GV = dyn_cast(P)) return GV->getAlignment(); if (AllocaInst *AI = dyn_cast(P)) return AI->getAlignment(); if (LoadInst *LI = dyn_cast(P)) return LI->getAlignment(); if (StoreInst *SI = dyn_cast(P)) return SI->getAlignment(); llvm_unreachable( "only GlobalObject, AllocaInst, LoadInst and StoreInst have alignment"); } void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) { Value *P = unwrap(V); if (GlobalObject *GV = dyn_cast(P)) GV->setAlignment(MaybeAlign(Bytes)); else if (AllocaInst *AI = dyn_cast(P)) AI->setAlignment(Align(Bytes)); else if (LoadInst *LI = dyn_cast(P)) LI->setAlignment(Align(Bytes)); else if (StoreInst *SI = dyn_cast(P)) SI->setAlignment(Align(Bytes)); else llvm_unreachable( "only GlobalValue, AllocaInst, LoadInst and StoreInst have alignment"); } LLVMValueMetadataEntry *LLVMGlobalCopyAllMetadata(LLVMValueRef Value, size_t *NumEntries) { return llvm_getMetadata(NumEntries, [&Value](MetadataEntries &Entries) { Entries.clear(); if (Instruction *Instr = dyn_cast(unwrap(Value))) { Instr->getAllMetadata(Entries); } else { unwrap(Value)->getAllMetadata(Entries); } }); } unsigned LLVMValueMetadataEntriesGetKind(LLVMValueMetadataEntry *Entries, unsigned Index) { LLVMOpaqueValueMetadataEntry MVE = static_cast(Entries[Index]); return MVE.Kind; } LLVMMetadataRef LLVMValueMetadataEntriesGetMetadata(LLVMValueMetadataEntry *Entries, unsigned Index) { LLVMOpaqueValueMetadataEntry MVE = static_cast(Entries[Index]); return MVE.Metadata; } void LLVMDisposeValueMetadataEntries(LLVMValueMetadataEntry *Entries) { free(Entries); } void LLVMGlobalSetMetadata(LLVMValueRef Global, unsigned Kind, LLVMMetadataRef MD) { unwrap(Global)->setMetadata(Kind, unwrap(MD)); } void LLVMGlobalEraseMetadata(LLVMValueRef Global, unsigned Kind) { unwrap(Global)->eraseMetadata(Kind); } void LLVMGlobalClearMetadata(LLVMValueRef Global) { unwrap(Global)->clearMetadata(); } /*--.. Operations on global variables ......................................--*/ LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) { return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, GlobalValue::ExternalLinkage, nullptr, Name)); } LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name, unsigned AddressSpace) { return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, GlobalValue::ExternalLinkage, nullptr, Name, nullptr, GlobalVariable::NotThreadLocal, AddressSpace)); } LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) { return wrap(unwrap(M)->getNamedGlobal(Name)); } LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::global_iterator I = Mod->global_begin(); if (I == Mod->global_end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::global_iterator I = Mod->global_end(); if (I == Mod->global_begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) { GlobalVariable *GV = unwrap(GlobalVar); Module::global_iterator I(GV); if (++I == GV->getParent()->global_end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) { GlobalVariable *GV = unwrap(GlobalVar); Module::global_iterator I(GV); if (I == GV->getParent()->global_begin()) return nullptr; return wrap(&*--I); } void LLVMDeleteGlobal(LLVMValueRef GlobalVar) { unwrap(GlobalVar)->eraseFromParent(); } LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) { GlobalVariable* GV = unwrap(GlobalVar); if ( !GV->hasInitializer() ) return nullptr; return wrap(GV->getInitializer()); } void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) { unwrap(GlobalVar) ->setInitializer(unwrap(ConstantVal)); } LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) { return unwrap(GlobalVar)->isThreadLocal(); } void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) { unwrap(GlobalVar)->setThreadLocal(IsThreadLocal != 0); } LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) { return unwrap(GlobalVar)->isConstant(); } void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) { unwrap(GlobalVar)->setConstant(IsConstant != 0); } LLVMThreadLocalMode LLVMGetThreadLocalMode(LLVMValueRef GlobalVar) { switch (unwrap(GlobalVar)->getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: return LLVMNotThreadLocal; case GlobalVariable::GeneralDynamicTLSModel: return LLVMGeneralDynamicTLSModel; case GlobalVariable::LocalDynamicTLSModel: return LLVMLocalDynamicTLSModel; case GlobalVariable::InitialExecTLSModel: return LLVMInitialExecTLSModel; case GlobalVariable::LocalExecTLSModel: return LLVMLocalExecTLSModel; } llvm_unreachable("Invalid GlobalVariable thread local mode"); } void LLVMSetThreadLocalMode(LLVMValueRef GlobalVar, LLVMThreadLocalMode Mode) { GlobalVariable *GV = unwrap(GlobalVar); switch (Mode) { case LLVMNotThreadLocal: GV->setThreadLocalMode(GlobalVariable::NotThreadLocal); break; case LLVMGeneralDynamicTLSModel: GV->setThreadLocalMode(GlobalVariable::GeneralDynamicTLSModel); break; case LLVMLocalDynamicTLSModel: GV->setThreadLocalMode(GlobalVariable::LocalDynamicTLSModel); break; case LLVMInitialExecTLSModel: GV->setThreadLocalMode(GlobalVariable::InitialExecTLSModel); break; case LLVMLocalExecTLSModel: GV->setThreadLocalMode(GlobalVariable::LocalExecTLSModel); break; } } LLVMBool LLVMIsExternallyInitialized(LLVMValueRef GlobalVar) { return unwrap(GlobalVar)->isExternallyInitialized(); } void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) { unwrap(GlobalVar)->setExternallyInitialized(IsExtInit); } /*--.. Operations on aliases ......................................--*/ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name) { auto *PTy = cast(unwrap(Ty)); return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), GlobalValue::ExternalLinkage, Name, unwrap(Aliasee), unwrap(M))); } LLVMValueRef LLVMGetNamedGlobalAlias(LLVMModuleRef M, const char *Name, size_t NameLen) { return wrap(unwrap(M)->getNamedAlias(Name)); } LLVMValueRef LLVMGetFirstGlobalAlias(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::alias_iterator I = Mod->alias_begin(); if (I == Mod->alias_end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetLastGlobalAlias(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::alias_iterator I = Mod->alias_end(); if (I == Mod->alias_begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMGetNextGlobalAlias(LLVMValueRef GA) { GlobalAlias *Alias = unwrap(GA); Module::alias_iterator I(Alias); if (++I == Alias->getParent()->alias_end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetPreviousGlobalAlias(LLVMValueRef GA) { GlobalAlias *Alias = unwrap(GA); Module::alias_iterator I(Alias); if (I == Alias->getParent()->alias_begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMAliasGetAliasee(LLVMValueRef Alias) { return wrap(unwrap(Alias)->getAliasee()); } void LLVMAliasSetAliasee(LLVMValueRef Alias, LLVMValueRef Aliasee) { unwrap(Alias)->setAliasee(unwrap(Aliasee)); } /*--.. Operations on functions .............................................--*/ LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name, LLVMTypeRef FunctionTy) { return wrap(Function::Create(unwrap(FunctionTy), GlobalValue::ExternalLinkage, Name, unwrap(M))); } LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) { return wrap(unwrap(M)->getFunction(Name)); } LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::iterator I = Mod->begin(); if (I == Mod->end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::iterator I = Mod->end(); if (I == Mod->begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Module::iterator I(Func); if (++I == Func->getParent()->end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Module::iterator I(Func); if (I == Func->getParent()->begin()) return nullptr; return wrap(&*--I); } void LLVMDeleteFunction(LLVMValueRef Fn) { unwrap(Fn)->eraseFromParent(); } LLVMBool LLVMHasPersonalityFn(LLVMValueRef Fn) { return unwrap(Fn)->hasPersonalityFn(); } LLVMValueRef LLVMGetPersonalityFn(LLVMValueRef Fn) { return wrap(unwrap(Fn)->getPersonalityFn()); } void LLVMSetPersonalityFn(LLVMValueRef Fn, LLVMValueRef PersonalityFn) { unwrap(Fn)->setPersonalityFn(unwrap(PersonalityFn)); } unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) { if (Function *F = dyn_cast(unwrap(Fn))) return F->getIntrinsicID(); return 0; } static Intrinsic::ID llvm_map_to_intrinsic_id(unsigned ID) { assert(ID < llvm::Intrinsic::num_intrinsics && "Intrinsic ID out of range"); return llvm::Intrinsic::ID(ID); } LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod, unsigned ID, LLVMTypeRef *ParamTypes, size_t ParamCount) { ArrayRef Tys(unwrap(ParamTypes), ParamCount); auto IID = llvm_map_to_intrinsic_id(ID); return wrap(llvm::Intrinsic::getDeclaration(unwrap(Mod), IID, Tys)); } const char *LLVMIntrinsicGetName(unsigned ID, size_t *NameLength) { auto IID = llvm_map_to_intrinsic_id(ID); auto Str = llvm::Intrinsic::getName(IID); *NameLength = Str.size(); return Str.data(); } LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID, LLVMTypeRef *ParamTypes, size_t ParamCount) { auto IID = llvm_map_to_intrinsic_id(ID); ArrayRef Tys(unwrap(ParamTypes), ParamCount); return wrap(llvm::Intrinsic::getType(*unwrap(Ctx), IID, Tys)); } const char *LLVMIntrinsicCopyOverloadedName(unsigned ID, LLVMTypeRef *ParamTypes, size_t ParamCount, size_t *NameLength) { auto IID = llvm_map_to_intrinsic_id(ID); ArrayRef Tys(unwrap(ParamTypes), ParamCount); auto Str = llvm::Intrinsic::getName(IID, Tys); *NameLength = Str.length(); return strdup(Str.c_str()); } unsigned LLVMLookupIntrinsicID(const char *Name, size_t NameLen) { return Function::lookupIntrinsicID({Name, NameLen}); } LLVMBool LLVMIntrinsicIsOverloaded(unsigned ID) { auto IID = llvm_map_to_intrinsic_id(ID); return llvm::Intrinsic::isOverloaded(IID); } unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) { return unwrap(Fn)->getCallingConv(); } void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) { return unwrap(Fn)->setCallingConv( static_cast(CC)); } const char *LLVMGetGC(LLVMValueRef Fn) { Function *F = unwrap(Fn); return F->hasGC()? F->getGC().c_str() : nullptr; } void LLVMSetGC(LLVMValueRef Fn, const char *GC) { Function *F = unwrap(Fn); if (GC) F->setGC(GC); else F->clearGC(); } void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, LLVMAttributeRef A) { unwrap(F)->addAttribute(Idx, unwrap(A)); } unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) { auto AS = unwrap(F)->getAttributes().getAttributes(Idx); return AS.getNumAttributes(); } void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, LLVMAttributeRef *Attrs) { auto AS = unwrap(F)->getAttributes().getAttributes(Idx); for (auto A : AS) *Attrs++ = wrap(A); } LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, unsigned KindID) { return wrap(unwrap(F)->getAttribute(Idx, (Attribute::AttrKind)KindID)); } LLVMAttributeRef LLVMGetStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, const char *K, unsigned KLen) { return wrap(unwrap(F)->getAttribute(Idx, StringRef(K, KLen))); } void LLVMRemoveEnumAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, unsigned KindID) { unwrap(F)->removeAttribute(Idx, (Attribute::AttrKind)KindID); } void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx, const char *K, unsigned KLen) { unwrap(F)->removeAttribute(Idx, StringRef(K, KLen)); } void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A, const char *V) { Function *Func = unwrap(Fn); Attribute Attr = Attribute::get(Func->getContext(), A, V); Func->addAttribute(AttributeList::FunctionIndex, Attr); } /*--.. Operations on parameters ............................................--*/ unsigned LLVMCountParams(LLVMValueRef FnRef) { // This function is strictly redundant to // LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef))) return unwrap(FnRef)->arg_size(); } void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) { Function *Fn = unwrap(FnRef); for (Function::arg_iterator I = Fn->arg_begin(), E = Fn->arg_end(); I != E; I++) *ParamRefs++ = wrap(&*I); } LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) { Function *Fn = unwrap(FnRef); return wrap(&Fn->arg_begin()[index]); } LLVMValueRef LLVMGetParamParent(LLVMValueRef V) { return wrap(unwrap(V)->getParent()); } LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::arg_iterator I = Func->arg_begin(); if (I == Func->arg_end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::arg_iterator I = Func->arg_end(); if (I == Func->arg_begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) { Argument *A = unwrap(Arg); Function *Fn = A->getParent(); if (A->getArgNo() + 1 >= Fn->arg_size()) return nullptr; return wrap(&Fn->arg_begin()[A->getArgNo() + 1]); } LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { Argument *A = unwrap(Arg); if (A->getArgNo() == 0) return nullptr; return wrap(&A->getParent()->arg_begin()[A->getArgNo() - 1]); } void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { Argument *A = unwrap(Arg); A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(align))); } /*--.. Operations on ifuncs ................................................--*/ LLVMValueRef LLVMAddGlobalIFunc(LLVMModuleRef M, const char *Name, size_t NameLen, LLVMTypeRef Ty, unsigned AddrSpace, LLVMValueRef Resolver) { return wrap(GlobalIFunc::create(unwrap(Ty), AddrSpace, GlobalValue::ExternalLinkage, StringRef(Name, NameLen), unwrap(Resolver), unwrap(M))); } LLVMValueRef LLVMGetNamedGlobalIFunc(LLVMModuleRef M, const char *Name, size_t NameLen) { return wrap(unwrap(M)->getNamedIFunc(StringRef(Name, NameLen))); } LLVMValueRef LLVMGetFirstGlobalIFunc(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::ifunc_iterator I = Mod->ifunc_begin(); if (I == Mod->ifunc_end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetLastGlobalIFunc(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::ifunc_iterator I = Mod->ifunc_end(); if (I == Mod->ifunc_begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMGetNextGlobalIFunc(LLVMValueRef IFunc) { GlobalIFunc *GIF = unwrap(IFunc); Module::ifunc_iterator I(GIF); if (++I == GIF->getParent()->ifunc_end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetPreviousGlobalIFunc(LLVMValueRef IFunc) { GlobalIFunc *GIF = unwrap(IFunc); Module::ifunc_iterator I(GIF); if (I == GIF->getParent()->ifunc_begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMGetGlobalIFuncResolver(LLVMValueRef IFunc) { return wrap(unwrap(IFunc)->getResolver()); } void LLVMSetGlobalIFuncResolver(LLVMValueRef IFunc, LLVMValueRef Resolver) { unwrap(IFunc)->setResolver(unwrap(Resolver)); } void LLVMEraseGlobalIFunc(LLVMValueRef IFunc) { unwrap(IFunc)->eraseFromParent(); } void LLVMRemoveGlobalIFunc(LLVMValueRef IFunc) { unwrap(IFunc)->removeFromParent(); } /*--.. Operations on basic blocks ..........................................--*/ LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) { return wrap(static_cast(unwrap(BB))); } LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) { return isa(unwrap(Val)); } LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) { return wrap(unwrap(Val)); } const char *LLVMGetBasicBlockName(LLVMBasicBlockRef BB) { return unwrap(BB)->getName().data(); } LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) { return wrap(unwrap(BB)->getParent()); } LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB) { return wrap(unwrap(BB)->getTerminator()); } unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) { return unwrap(FnRef)->size(); } void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){ Function *Fn = unwrap(FnRef); for (BasicBlock &BB : *Fn) *BasicBlocksRefs++ = wrap(&BB); } LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) { return wrap(&unwrap(Fn)->getEntryBlock()); } LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::iterator I = Func->begin(); if (I == Func->end()) return nullptr; return wrap(&*I); } LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::iterator I = Func->end(); if (I == Func->begin()) return nullptr; return wrap(&*--I); } LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); Function::iterator I(Block); if (++I == Block->getParent()->end()) return nullptr; return wrap(&*I); } LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); Function::iterator I(Block); if (I == Block->getParent()->begin()) return nullptr; return wrap(&*--I); } LLVMBasicBlockRef LLVMCreateBasicBlockInContext(LLVMContextRef C, const char *Name) { return wrap(llvm::BasicBlock::Create(*unwrap(C), Name)); } void LLVMInsertExistingBasicBlockAfterInsertBlock(LLVMBuilderRef Builder, LLVMBasicBlockRef BB) { BasicBlock *ToInsert = unwrap(BB); BasicBlock *CurBB = unwrap(Builder)->GetInsertBlock(); assert(CurBB && "current insertion point is invalid!"); CurBB->getParent()->getBasicBlockList().insertAfter(CurBB->getIterator(), ToInsert); } void LLVMAppendExistingBasicBlock(LLVMValueRef Fn, LLVMBasicBlockRef BB) { unwrap(Fn)->getBasicBlockList().push_back(unwrap(BB)); } LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C, LLVMValueRef FnRef, const char *Name) { return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap(FnRef))); } LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) { return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name); } LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C, LLVMBasicBlockRef BBRef, const char *Name) { BasicBlock *BB = unwrap(BBRef); return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB)); } LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef, const char *Name) { return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name); } void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) { unwrap(BBRef)->eraseFromParent(); } void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BBRef) { unwrap(BBRef)->removeFromParent(); } void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) { unwrap(BB)->moveBefore(unwrap(MovePos)); } void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) { unwrap(BB)->moveAfter(unwrap(MovePos)); } /*--.. Operations on instructions ..........................................--*/ LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) { return wrap(unwrap(Inst)->getParent()); } LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); BasicBlock::iterator I = Block->begin(); if (I == Block->end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); BasicBlock::iterator I = Block->end(); if (I == Block->begin()) return nullptr; return wrap(&*--I); } LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) { Instruction *Instr = unwrap(Inst); BasicBlock::iterator I(Instr); if (++I == Instr->getParent()->end()) return nullptr; return wrap(&*I); } LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) { Instruction *Instr = unwrap(Inst); BasicBlock::iterator I(Instr); if (I == Instr->getParent()->begin()) return nullptr; return wrap(&*--I); } void LLVMInstructionRemoveFromParent(LLVMValueRef Inst) { unwrap(Inst)->removeFromParent(); } void LLVMInstructionEraseFromParent(LLVMValueRef Inst) { unwrap(Inst)->eraseFromParent(); } LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst) { if (ICmpInst *I = dyn_cast(unwrap(Inst))) return (LLVMIntPredicate)I->getPredicate(); if (ConstantExpr *CE = dyn_cast(unwrap(Inst))) if (CE->getOpcode() == Instruction::ICmp) return (LLVMIntPredicate)CE->getPredicate(); return (LLVMIntPredicate)0; } LLVMRealPredicate LLVMGetFCmpPredicate(LLVMValueRef Inst) { if (FCmpInst *I = dyn_cast(unwrap(Inst))) return (LLVMRealPredicate)I->getPredicate(); if (ConstantExpr *CE = dyn_cast(unwrap(Inst))) if (CE->getOpcode() == Instruction::FCmp) return (LLVMRealPredicate)CE->getPredicate(); return (LLVMRealPredicate)0; } LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst) { if (Instruction *C = dyn_cast(unwrap(Inst))) return map_to_llvmopcode(C->getOpcode()); return (LLVMOpcode)0; } LLVMValueRef LLVMInstructionClone(LLVMValueRef Inst) { if (Instruction *C = dyn_cast(unwrap(Inst))) return wrap(C->clone()); return nullptr; } LLVMValueRef LLVMIsATerminatorInst(LLVMValueRef Inst) { Instruction *I = dyn_cast(unwrap(Inst)); return (I && I->isTerminator()) ? wrap(I) : nullptr; } unsigned LLVMGetNumArgOperands(LLVMValueRef Instr) { if (FuncletPadInst *FPI = dyn_cast(unwrap(Instr))) { return FPI->getNumArgOperands(); } return unwrap(Instr)->getNumArgOperands(); } /*--.. Call and invoke instructions ........................................--*/ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) { return unwrap(Instr)->getCallingConv(); } void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) { return unwrap(Instr)->setCallingConv( static_cast(CC)); } void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align) { auto *Call = unwrap(Instr); Attribute AlignAttr = Attribute::getWithAlignment(Call->getContext(), Align(align)); Call->addAttribute(index, AlignAttr); } void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, LLVMAttributeRef A) { unwrap(C)->addAttribute(Idx, unwrap(A)); } unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C, LLVMAttributeIndex Idx) { auto *Call = unwrap(C); auto AS = Call->getAttributes().getAttributes(Idx); return AS.getNumAttributes(); } void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx, LLVMAttributeRef *Attrs) { auto *Call = unwrap(C); auto AS = Call->getAttributes().getAttributes(Idx); for (auto A : AS) *Attrs++ = wrap(A); } LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, unsigned KindID) { return wrap( unwrap(C)->getAttribute(Idx, (Attribute::AttrKind)KindID)); } LLVMAttributeRef LLVMGetCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, const char *K, unsigned KLen) { return wrap(unwrap(C)->getAttribute(Idx, StringRef(K, KLen))); } void LLVMRemoveCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, unsigned KindID) { unwrap(C)->removeAttribute(Idx, (Attribute::AttrKind)KindID); } void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx, const char *K, unsigned KLen) { unwrap(C)->removeAttribute(Idx, StringRef(K, KLen)); } LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr) { return wrap(unwrap(Instr)->getCalledOperand()); } LLVMTypeRef LLVMGetCalledFunctionType(LLVMValueRef Instr) { return wrap(unwrap(Instr)->getFunctionType()); } /*--.. Operations on call instructions (only) ..............................--*/ LLVMBool LLVMIsTailCall(LLVMValueRef Call) { return unwrap(Call)->isTailCall(); } void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) { unwrap(Call)->setTailCall(isTailCall); } /*--.. Operations on invoke instructions (only) ............................--*/ LLVMBasicBlockRef LLVMGetNormalDest(LLVMValueRef Invoke) { return wrap(unwrap(Invoke)->getNormalDest()); } LLVMBasicBlockRef LLVMGetUnwindDest(LLVMValueRef Invoke) { if (CleanupReturnInst *CRI = dyn_cast(unwrap(Invoke))) { return wrap(CRI->getUnwindDest()); } else if (CatchSwitchInst *CSI = dyn_cast(unwrap(Invoke))) { return wrap(CSI->getUnwindDest()); } return wrap(unwrap(Invoke)->getUnwindDest()); } void LLVMSetNormalDest(LLVMValueRef Invoke, LLVMBasicBlockRef B) { unwrap(Invoke)->setNormalDest(unwrap(B)); } void LLVMSetUnwindDest(LLVMValueRef Invoke, LLVMBasicBlockRef B) { if (CleanupReturnInst *CRI = dyn_cast(unwrap(Invoke))) { return CRI->setUnwindDest(unwrap(B)); } else if (CatchSwitchInst *CSI = dyn_cast(unwrap(Invoke))) { return CSI->setUnwindDest(unwrap(B)); } unwrap(Invoke)->setUnwindDest(unwrap(B)); } /*--.. Operations on terminators ...........................................--*/ unsigned LLVMGetNumSuccessors(LLVMValueRef Term) { return unwrap(Term)->getNumSuccessors(); } LLVMBasicBlockRef LLVMGetSuccessor(LLVMValueRef Term, unsigned i) { return wrap(unwrap(Term)->getSuccessor(i)); } void LLVMSetSuccessor(LLVMValueRef Term, unsigned i, LLVMBasicBlockRef block) { return unwrap(Term)->setSuccessor(i, unwrap(block)); } /*--.. Operations on branch instructions (only) ............................--*/ LLVMBool LLVMIsConditional(LLVMValueRef Branch) { return unwrap(Branch)->isConditional(); } LLVMValueRef LLVMGetCondition(LLVMValueRef Branch) { return wrap(unwrap(Branch)->getCondition()); } void LLVMSetCondition(LLVMValueRef Branch, LLVMValueRef Cond) { return unwrap(Branch)->setCondition(unwrap(Cond)); } /*--.. Operations on switch instructions (only) ............................--*/ LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef Switch) { return wrap(unwrap(Switch)->getDefaultDest()); } /*--.. Operations on alloca instructions (only) ............................--*/ LLVMTypeRef LLVMGetAllocatedType(LLVMValueRef Alloca) { return wrap(unwrap(Alloca)->getAllocatedType()); } /*--.. Operations on gep instructions (only) ...............................--*/ LLVMBool LLVMIsInBounds(LLVMValueRef GEP) { return unwrap(GEP)->isInBounds(); } void LLVMSetIsInBounds(LLVMValueRef GEP, LLVMBool InBounds) { return unwrap(GEP)->setIsInBounds(InBounds); } /*--.. Operations on phi nodes .............................................--*/ void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues, LLVMBasicBlockRef *IncomingBlocks, unsigned Count) { PHINode *PhiVal = unwrap(PhiNode); for (unsigned I = 0; I != Count; ++I) PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I])); } unsigned LLVMCountIncoming(LLVMValueRef PhiNode) { return unwrap(PhiNode)->getNumIncomingValues(); } LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) { return wrap(unwrap(PhiNode)->getIncomingValue(Index)); } LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) { return wrap(unwrap(PhiNode)->getIncomingBlock(Index)); } /*--.. Operations on extractvalue and insertvalue nodes ....................--*/ unsigned LLVMGetNumIndices(LLVMValueRef Inst) { auto *I = unwrap(Inst); if (auto *GEP = dyn_cast(I)) return GEP->getNumIndices(); if (auto *EV = dyn_cast(I)) return EV->getNumIndices(); if (auto *IV = dyn_cast(I)) return IV->getNumIndices(); if (auto *CE = dyn_cast(I)) return CE->getIndices().size(); llvm_unreachable( "LLVMGetNumIndices applies only to extractvalue and insertvalue!"); } const unsigned *LLVMGetIndices(LLVMValueRef Inst) { auto *I = unwrap(Inst); if (auto *EV = dyn_cast(I)) return EV->getIndices().data(); if (auto *IV = dyn_cast(I)) return IV->getIndices().data(); if (auto *CE = dyn_cast(I)) return CE->getIndices().data(); llvm_unreachable( "LLVMGetIndices applies only to extractvalue and insertvalue!"); } /*===-- Instruction builders ----------------------------------------------===*/ LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) { return wrap(new IRBuilder<>(*unwrap(C))); } LLVMBuilderRef LLVMCreateBuilder(void) { return LLVMCreateBuilderInContext(LLVMGetGlobalContext()); } void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block, LLVMValueRef Instr) { BasicBlock *BB = unwrap(Block); auto I = Instr ? unwrap(Instr)->getIterator() : BB->end(); unwrap(Builder)->SetInsertPoint(BB, I); } void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) { Instruction *I = unwrap(Instr); unwrap(Builder)->SetInsertPoint(I->getParent(), I->getIterator()); } void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) { BasicBlock *BB = unwrap(Block); unwrap(Builder)->SetInsertPoint(BB); } LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) { return wrap(unwrap(Builder)->GetInsertBlock()); } void LLVMClearInsertionPosition(LLVMBuilderRef Builder) { unwrap(Builder)->ClearInsertionPoint(); } void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) { unwrap(Builder)->Insert(unwrap(Instr)); } void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr, const char *Name) { unwrap(Builder)->Insert(unwrap(Instr), Name); } void LLVMDisposeBuilder(LLVMBuilderRef Builder) { delete unwrap(Builder); } /*--.. Metadata builders ...................................................--*/ LLVMMetadataRef LLVMGetCurrentDebugLocation2(LLVMBuilderRef Builder) { return wrap(unwrap(Builder)->getCurrentDebugLocation().getAsMDNode()); } void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc) { if (Loc) unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(unwrap(Loc))); else unwrap(Builder)->SetCurrentDebugLocation(DebugLoc()); } void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) { MDNode *Loc = L ? cast(unwrap(L)->getMetadata()) : nullptr; unwrap(Builder)->SetCurrentDebugLocation(DebugLoc(Loc)); } LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) { LLVMContext &Context = unwrap(Builder)->getContext(); return wrap(MetadataAsValue::get( Context, unwrap(Builder)->getCurrentDebugLocation().getAsMDNode())); } void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) { unwrap(Builder)->SetInstDebugLocation(unwrap(Inst)); } void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder, LLVMMetadataRef FPMathTag) { unwrap(Builder)->setDefaultFPMathTag(FPMathTag ? unwrap(FPMathTag) : nullptr); } LLVMMetadataRef LLVMBuilderGetDefaultFPMathTag(LLVMBuilderRef Builder) { return wrap(unwrap(Builder)->getDefaultFPMathTag()); } /*--.. Instruction builders ................................................--*/ LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) { return wrap(unwrap(B)->CreateRetVoid()); } LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) { return wrap(unwrap(B)->CreateRet(unwrap(V))); } LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals, unsigned N) { return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N)); } LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) { return wrap(unwrap(B)->CreateBr(unwrap(Dest))); } LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If, LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) { return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else))); } LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V, LLVMBasicBlockRef Else, unsigned NumCases) { return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases)); } LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr, unsigned NumDests) { return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests)); } LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name) { Value *V = unwrap(Fn); FunctionType *FnT = cast(cast(V->getType())->getElementType()); return wrap( unwrap(B)->CreateInvoke(FnT, unwrap(Fn), unwrap(Then), unwrap(Catch), makeArrayRef(unwrap(Args), NumArgs), Name)); } LLVMValueRef LLVMBuildInvoke2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch, const char *Name) { return wrap(unwrap(B)->CreateInvoke( unwrap(Ty), unwrap(Fn), unwrap(Then), unwrap(Catch), makeArrayRef(unwrap(Args), NumArgs), Name)); } LLVMValueRef LLVMBuildLandingPad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef PersFn, unsigned NumClauses, const char *Name) { // The personality used to live on the landingpad instruction, but now it // lives on the parent function. For compatibility, take the provided // personality and put it on the parent function. if (PersFn) unwrap(B)->GetInsertBlock()->getParent()->setPersonalityFn( cast(unwrap(PersFn))); return wrap(unwrap(B)->CreateLandingPad(unwrap(Ty), NumClauses, Name)); } LLVMValueRef LLVMBuildCatchPad(LLVMBuilderRef B, LLVMValueRef ParentPad, LLVMValueRef *Args, unsigned NumArgs, const char *Name) { return wrap(unwrap(B)->CreateCatchPad(unwrap(ParentPad), makeArrayRef(unwrap(Args), NumArgs), Name)); } LLVMValueRef LLVMBuildCleanupPad(LLVMBuilderRef B, LLVMValueRef ParentPad, LLVMValueRef *Args, unsigned NumArgs, const char *Name) { if (ParentPad == nullptr) { Type *Ty = Type::getTokenTy(unwrap(B)->getContext()); ParentPad = wrap(Constant::getNullValue(Ty)); } return wrap(unwrap(B)->CreateCleanupPad(unwrap(ParentPad), makeArrayRef(unwrap(Args), NumArgs), Name)); } LLVMValueRef LLVMBuildResume(LLVMBuilderRef B, LLVMValueRef Exn) { return wrap(unwrap(B)->CreateResume(unwrap(Exn))); } LLVMValueRef LLVMBuildCatchSwitch(LLVMBuilderRef B, LLVMValueRef ParentPad, LLVMBasicBlockRef UnwindBB, unsigned NumHandlers, const char *Name) { if (ParentPad == nullptr) { Type *Ty = Type::getTokenTy(unwrap(B)->getContext()); ParentPad = wrap(Constant::getNullValue(Ty)); } return wrap(unwrap(B)->CreateCatchSwitch(unwrap(ParentPad), unwrap(UnwindBB), NumHandlers, Name)); } LLVMValueRef LLVMBuildCatchRet(LLVMBuilderRef B, LLVMValueRef CatchPad, LLVMBasicBlockRef BB) { return wrap(unwrap(B)->CreateCatchRet(unwrap(CatchPad), unwrap(BB))); } LLVMValueRef LLVMBuildCleanupRet(LLVMBuilderRef B, LLVMValueRef CatchPad, LLVMBasicBlockRef BB) { return wrap(unwrap(B)->CreateCleanupRet(unwrap(CatchPad), unwrap(BB))); } LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) { return wrap(unwrap(B)->CreateUnreachable()); } void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal, LLVMBasicBlockRef Dest) { unwrap(Switch)->addCase(unwrap(OnVal), unwrap(Dest)); } void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) { unwrap(IndirectBr)->addDestination(unwrap(Dest)); } unsigned LLVMGetNumClauses(LLVMValueRef LandingPad) { return unwrap(LandingPad)->getNumClauses(); } LLVMValueRef LLVMGetClause(LLVMValueRef LandingPad, unsigned Idx) { return wrap(unwrap(LandingPad)->getClause(Idx)); } void LLVMAddClause(LLVMValueRef LandingPad, LLVMValueRef ClauseVal) { unwrap(LandingPad)-> addClause(cast(unwrap(ClauseVal))); } LLVMBool LLVMIsCleanup(LLVMValueRef LandingPad) { return unwrap(LandingPad)->isCleanup(); } void LLVMSetCleanup(LLVMValueRef LandingPad, LLVMBool Val) { unwrap(LandingPad)->setCleanup(Val); } void LLVMAddHandler(LLVMValueRef CatchSwitch, LLVMBasicBlockRef Dest) { unwrap(CatchSwitch)->addHandler(unwrap(Dest)); } unsigned LLVMGetNumHandlers(LLVMValueRef CatchSwitch) { return unwrap(CatchSwitch)->getNumHandlers(); } void LLVMGetHandlers(LLVMValueRef CatchSwitch, LLVMBasicBlockRef *Handlers) { CatchSwitchInst *CSI = unwrap(CatchSwitch); for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), E = CSI->handler_end(); I != E; ++I) *Handlers++ = wrap(*I); } LLVMValueRef LLVMGetParentCatchSwitch(LLVMValueRef CatchPad) { return wrap(unwrap(CatchPad)->getCatchSwitch()); } void LLVMSetParentCatchSwitch(LLVMValueRef CatchPad, LLVMValueRef CatchSwitch) { unwrap(CatchPad) ->setCatchSwitch(unwrap(CatchSwitch)); } /*--.. Funclets ...........................................................--*/ LLVMValueRef LLVMGetArgOperand(LLVMValueRef Funclet, unsigned i) { return wrap(unwrap(Funclet)->getArgOperand(i)); } void LLVMSetArgOperand(LLVMValueRef Funclet, unsigned i, LLVMValueRef value) { unwrap(Funclet)->setArgOperand(i, unwrap(value)); } /*--.. Arithmetic ..........................................................--*/ LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildExactUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateExactUDiv(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(map_from_llvmopcode(Op)), unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { return wrap(unwrap(B)->CreateNeg(unwrap(V), Name)); } LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name)); } LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name)); } LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name)); } LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) { return wrap(unwrap(B)->CreateNot(unwrap(V), Name)); } /*--.. Memory ..............................................................--*/ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), ITy, unwrap(Ty), AllocSize, nullptr, nullptr, ""); return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name) { Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), ITy, unwrap(Ty), AllocSize, unwrap(Val), nullptr, ""); return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } LLVMValueRef LLVMBuildMemSet(LLVMBuilderRef B, LLVMValueRef Ptr, LLVMValueRef Val, LLVMValueRef Len, unsigned Align) { return wrap(unwrap(B)->CreateMemSet(unwrap(Ptr), unwrap(Val), unwrap(Len), MaybeAlign(Align))); } LLVMValueRef LLVMBuildMemCpy(LLVMBuilderRef B, LLVMValueRef Dst, unsigned DstAlign, LLVMValueRef Src, unsigned SrcAlign, LLVMValueRef Size) { return wrap(unwrap(B)->CreateMemCpy(unwrap(Dst), MaybeAlign(DstAlign), unwrap(Src), MaybeAlign(SrcAlign), unwrap(Size))); } LLVMValueRef LLVMBuildMemMove(LLVMBuilderRef B, LLVMValueRef Dst, unsigned DstAlign, LLVMValueRef Src, unsigned SrcAlign, LLVMValueRef Size) { return wrap(unwrap(B)->CreateMemMove(unwrap(Dst), MaybeAlign(DstAlign), unwrap(Src), MaybeAlign(SrcAlign), unwrap(Size))); } LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), nullptr, Name)); } LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Val, const char *Name) { return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name)); } LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) { return wrap(unwrap(B)->Insert( CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock()))); } LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal, const char *Name) { Value *V = unwrap(PointerVal); PointerType *Ty = cast(V->getType()); return wrap(unwrap(B)->CreateLoad(Ty->getElementType(), V, Name)); } LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef PointerVal, const char *Name) { return wrap(unwrap(B)->CreateLoad(unwrap(Ty), unwrap(PointerVal), Name)); } LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, LLVMValueRef PointerVal) { return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal))); } static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) { switch (Ordering) { case LLVMAtomicOrderingNotAtomic: return AtomicOrdering::NotAtomic; case LLVMAtomicOrderingUnordered: return AtomicOrdering::Unordered; case LLVMAtomicOrderingMonotonic: return AtomicOrdering::Monotonic; case LLVMAtomicOrderingAcquire: return AtomicOrdering::Acquire; case LLVMAtomicOrderingRelease: return AtomicOrdering::Release; case LLVMAtomicOrderingAcquireRelease: return AtomicOrdering::AcquireRelease; case LLVMAtomicOrderingSequentiallyConsistent: return AtomicOrdering::SequentiallyConsistent; } llvm_unreachable("Invalid LLVMAtomicOrdering value!"); } static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) { switch (Ordering) { case AtomicOrdering::NotAtomic: return LLVMAtomicOrderingNotAtomic; case AtomicOrdering::Unordered: return LLVMAtomicOrderingUnordered; case AtomicOrdering::Monotonic: return LLVMAtomicOrderingMonotonic; case AtomicOrdering::Acquire: return LLVMAtomicOrderingAcquire; case AtomicOrdering::Release: return LLVMAtomicOrderingRelease; case AtomicOrdering::AcquireRelease: return LLVMAtomicOrderingAcquireRelease; case AtomicOrdering::SequentiallyConsistent: return LLVMAtomicOrderingSequentiallyConsistent; } llvm_unreachable("Invalid AtomicOrdering value!"); } static AtomicRMWInst::BinOp mapFromLLVMRMWBinOp(LLVMAtomicRMWBinOp BinOp) { switch (BinOp) { case LLVMAtomicRMWBinOpXchg: return AtomicRMWInst::Xchg; case LLVMAtomicRMWBinOpAdd: return AtomicRMWInst::Add; case LLVMAtomicRMWBinOpSub: return AtomicRMWInst::Sub; case LLVMAtomicRMWBinOpAnd: return AtomicRMWInst::And; case LLVMAtomicRMWBinOpNand: return AtomicRMWInst::Nand; case LLVMAtomicRMWBinOpOr: return AtomicRMWInst::Or; case LLVMAtomicRMWBinOpXor: return AtomicRMWInst::Xor; case LLVMAtomicRMWBinOpMax: return AtomicRMWInst::Max; case LLVMAtomicRMWBinOpMin: return AtomicRMWInst::Min; case LLVMAtomicRMWBinOpUMax: return AtomicRMWInst::UMax; case LLVMAtomicRMWBinOpUMin: return AtomicRMWInst::UMin; case LLVMAtomicRMWBinOpFAdd: return AtomicRMWInst::FAdd; case LLVMAtomicRMWBinOpFSub: return AtomicRMWInst::FSub; } llvm_unreachable("Invalid LLVMAtomicRMWBinOp value!"); } static LLVMAtomicRMWBinOp mapToLLVMRMWBinOp(AtomicRMWInst::BinOp BinOp) { switch (BinOp) { case AtomicRMWInst::Xchg: return LLVMAtomicRMWBinOpXchg; case AtomicRMWInst::Add: return LLVMAtomicRMWBinOpAdd; case AtomicRMWInst::Sub: return LLVMAtomicRMWBinOpSub; case AtomicRMWInst::And: return LLVMAtomicRMWBinOpAnd; case AtomicRMWInst::Nand: return LLVMAtomicRMWBinOpNand; case AtomicRMWInst::Or: return LLVMAtomicRMWBinOpOr; case AtomicRMWInst::Xor: return LLVMAtomicRMWBinOpXor; case AtomicRMWInst::Max: return LLVMAtomicRMWBinOpMax; case AtomicRMWInst::Min: return LLVMAtomicRMWBinOpMin; case AtomicRMWInst::UMax: return LLVMAtomicRMWBinOpUMax; case AtomicRMWInst::UMin: return LLVMAtomicRMWBinOpUMin; case AtomicRMWInst::FAdd: return LLVMAtomicRMWBinOpFAdd; case AtomicRMWInst::FSub: return LLVMAtomicRMWBinOpFSub; default: break; } llvm_unreachable("Invalid AtomicRMWBinOp value!"); } // TODO: Should this and other atomic instructions support building with // "syncscope"? LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering, LLVMBool isSingleThread, const char *Name) { return wrap( unwrap(B)->CreateFence(mapFromLLVMOrdering(Ordering), isSingleThread ? SyncScope::SingleThread : SyncScope::System, Name)); } LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { ArrayRef IdxList(unwrap(Indices), NumIndices); Value *Val = unwrap(Pointer); Type *Ty = cast(Val->getType()->getScalarType())->getElementType(); return wrap(unwrap(B)->CreateGEP(Ty, Val, IdxList, Name)); } LLVMValueRef LLVMBuildGEP2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { ArrayRef IdxList(unwrap(Indices), NumIndices); return wrap(unwrap(B)->CreateGEP(unwrap(Ty), unwrap(Pointer), IdxList, Name)); } LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { ArrayRef IdxList(unwrap(Indices), NumIndices); Value *Val = unwrap(Pointer); Type *Ty = cast(Val->getType()->getScalarType())->getElementType(); return wrap(unwrap(B)->CreateInBoundsGEP(Ty, Val, IdxList, Name)); } LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Pointer, LLVMValueRef *Indices, unsigned NumIndices, const char *Name) { ArrayRef IdxList(unwrap(Indices), NumIndices); return wrap( unwrap(B)->CreateInBoundsGEP(unwrap(Ty), unwrap(Pointer), IdxList, Name)); } LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer, unsigned Idx, const char *Name) { Value *Val = unwrap(Pointer); Type *Ty = cast(Val->getType()->getScalarType())->getElementType(); return wrap(unwrap(B)->CreateStructGEP(Ty, Val, Idx, Name)); } LLVMValueRef LLVMBuildStructGEP2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Pointer, unsigned Idx, const char *Name) { return wrap( unwrap(B)->CreateStructGEP(unwrap(Ty), unwrap(Pointer), Idx, Name)); } LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str, const char *Name) { return wrap(unwrap(B)->CreateGlobalString(Str, Name)); } LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str, const char *Name) { return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name)); } LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) { Value *P = unwrap(MemAccessInst); if (LoadInst *LI = dyn_cast(P)) return LI->isVolatile(); if (StoreInst *SI = dyn_cast(P)) return SI->isVolatile(); if (AtomicRMWInst *AI = dyn_cast(P)) return AI->isVolatile(); return cast(P)->isVolatile(); } void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) { Value *P = unwrap(MemAccessInst); if (LoadInst *LI = dyn_cast(P)) return LI->setVolatile(isVolatile); if (StoreInst *SI = dyn_cast(P)) return SI->setVolatile(isVolatile); if (AtomicRMWInst *AI = dyn_cast(P)) return AI->setVolatile(isVolatile); return cast(P)->setVolatile(isVolatile); } LLVMBool LLVMGetWeak(LLVMValueRef CmpXchgInst) { return unwrap(CmpXchgInst)->isWeak(); } void LLVMSetWeak(LLVMValueRef CmpXchgInst, LLVMBool isWeak) { return unwrap(CmpXchgInst)->setWeak(isWeak); } LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) { Value *P = unwrap(MemAccessInst); AtomicOrdering O; if (LoadInst *LI = dyn_cast(P)) O = LI->getOrdering(); else if (StoreInst *SI = dyn_cast(P)) O = SI->getOrdering(); else O = cast(P)->getOrdering(); return mapToLLVMOrdering(O); } void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) { Value *P = unwrap(MemAccessInst); AtomicOrdering O = mapFromLLVMOrdering(Ordering); if (LoadInst *LI = dyn_cast(P)) return LI->setOrdering(O); return cast(P)->setOrdering(O); } LLVMAtomicRMWBinOp LLVMGetAtomicRMWBinOp(LLVMValueRef Inst) { return mapToLLVMRMWBinOp(unwrap(Inst)->getOperation()); } void LLVMSetAtomicRMWBinOp(LLVMValueRef Inst, LLVMAtomicRMWBinOp BinOp) { unwrap(Inst)->setOperation(mapFromLLVMRMWBinOp(BinOp)); } /*--.. Casts ...............................................................--*/ LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildAddrSpaceCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateAddrSpaceCast(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateCast(Instruction::CastOps(map_from_llvmopcode(Op)), unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name)); } LLVMValueRef LLVMBuildIntCast2(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, LLVMBool IsSigned, const char *Name) { return wrap( unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), IsSigned, Name)); } LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), /*isSigned*/true, Name)); } LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name)); } /*--.. Comparisons .........................................................--*/ LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateICmp(static_cast(Op), unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreateFCmp(static_cast(Op), unwrap(LHS), unwrap(RHS), Name)); } /*--.. Miscellaneous instructions ..........................................--*/ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name)); } LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name) { Value *V = unwrap(Fn); FunctionType *FnT = cast(cast(V->getType())->getElementType()); return wrap(unwrap(B)->CreateCall(FnT, unwrap(Fn), makeArrayRef(unwrap(Args), NumArgs), Name)); } LLVMValueRef LLVMBuildCall2(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Fn, LLVMValueRef *Args, unsigned NumArgs, const char *Name) { FunctionType *FTy = unwrap(Ty); return wrap(unwrap(B)->CreateCall(FTy, unwrap(Fn), makeArrayRef(unwrap(Args), NumArgs), Name)); } LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If, LLVMValueRef Then, LLVMValueRef Else, const char *Name) { return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else), Name)); } LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List, LLVMTypeRef Ty, const char *Name) { return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name)); } LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal, LLVMValueRef Index, const char *Name) { return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index), Name)); } LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal, LLVMValueRef EltVal, LLVMValueRef Index, const char *Name) { return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal), unwrap(Index), Name)); } LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1, LLVMValueRef V2, LLVMValueRef Mask, const char *Name) { return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2), unwrap(Mask), Name)); } LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal, unsigned Index, const char *Name) { return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name)); } LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal, LLVMValueRef EltVal, unsigned Index, const char *Name) { return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal), Index, Name)); } LLVMValueRef LLVMBuildFreeze(LLVMBuilderRef B, LLVMValueRef Val, const char *Name) { return wrap(unwrap(B)->CreateFreeze(unwrap(Val), Name)); } LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val, const char *Name) { return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name)); } LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val, const char *Name) { return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name)); } LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *Name) { return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name)); } LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, LLVMValueRef PTR, LLVMValueRef Val, LLVMAtomicOrdering ordering, LLVMBool singleThread) { AtomicRMWInst::BinOp intop = mapFromLLVMRMWBinOp(op); return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), mapFromLLVMOrdering(ordering), singleThread ? SyncScope::SingleThread : SyncScope::System)); } LLVMValueRef LLVMBuildAtomicCmpXchg(LLVMBuilderRef B, LLVMValueRef Ptr, LLVMValueRef Cmp, LLVMValueRef New, LLVMAtomicOrdering SuccessOrdering, LLVMAtomicOrdering FailureOrdering, LLVMBool singleThread) { return wrap(unwrap(B)->CreateAtomicCmpXchg(unwrap(Ptr), unwrap(Cmp), unwrap(New), mapFromLLVMOrdering(SuccessOrdering), mapFromLLVMOrdering(FailureOrdering), singleThread ? SyncScope::SingleThread : SyncScope::System)); } unsigned LLVMGetNumMaskElements(LLVMValueRef SVInst) { Value *P = unwrap(SVInst); ShuffleVectorInst *I = cast(P); return I->getShuffleMask().size(); } int LLVMGetMaskValue(LLVMValueRef SVInst, unsigned Elt) { Value *P = unwrap(SVInst); ShuffleVectorInst *I = cast(P); return I->getMaskValue(Elt); } int LLVMGetUndefMaskElem(void) { return UndefMaskElem; } LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst) { Value *P = unwrap(AtomicInst); if (AtomicRMWInst *I = dyn_cast(P)) return I->getSyncScopeID() == SyncScope::SingleThread; return cast(P)->getSyncScopeID() == SyncScope::SingleThread; } void LLVMSetAtomicSingleThread(LLVMValueRef AtomicInst, LLVMBool NewValue) { Value *P = unwrap(AtomicInst); SyncScope::ID SSID = NewValue ? SyncScope::SingleThread : SyncScope::System; if (AtomicRMWInst *I = dyn_cast(P)) return I->setSyncScopeID(SSID); return cast(P)->setSyncScopeID(SSID); } LLVMAtomicOrdering LLVMGetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst) { Value *P = unwrap(CmpXchgInst); return mapToLLVMOrdering(cast(P)->getSuccessOrdering()); } void LLVMSetCmpXchgSuccessOrdering(LLVMValueRef CmpXchgInst, LLVMAtomicOrdering Ordering) { Value *P = unwrap(CmpXchgInst); AtomicOrdering O = mapFromLLVMOrdering(Ordering); return cast(P)->setSuccessOrdering(O); } LLVMAtomicOrdering LLVMGetCmpXchgFailureOrdering(LLVMValueRef CmpXchgInst) { Value *P = unwrap(CmpXchgInst); return mapToLLVMOrdering(cast(P)->getFailureOrdering()); } void LLVMSetCmpXchgFailureOrdering(LLVMValueRef CmpXchgInst, LLVMAtomicOrdering Ordering) { Value *P = unwrap(CmpXchgInst); AtomicOrdering O = mapFromLLVMOrdering(Ordering); return cast(P)->setFailureOrdering(O); } /*===-- Module providers --------------------------------------------------===*/ LLVMModuleProviderRef LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) { return reinterpret_cast(M); } void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) { delete unwrap(MP); } /*===-- Memory buffers ----------------------------------------------------===*/ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile( const char *Path, LLVMMemoryBufferRef *OutMemBuf, char **OutMessage) { ErrorOr> MBOrErr = MemoryBuffer::getFile(Path); if (std::error_code EC = MBOrErr.getError()) { *OutMessage = strdup(EC.message().c_str()); return 1; } *OutMemBuf = wrap(MBOrErr.get().release()); return 0; } LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage) { ErrorOr> MBOrErr = MemoryBuffer::getSTDIN(); if (std::error_code EC = MBOrErr.getError()) { *OutMessage = strdup(EC.message().c_str()); return 1; } *OutMemBuf = wrap(MBOrErr.get().release()); return 0; } LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange( const char *InputData, size_t InputDataLength, const char *BufferName, LLVMBool RequiresNullTerminator) { return wrap(MemoryBuffer::getMemBuffer(StringRef(InputData, InputDataLength), StringRef(BufferName), RequiresNullTerminator).release()); } LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRangeCopy( const char *InputData, size_t InputDataLength, const char *BufferName) { return wrap( MemoryBuffer::getMemBufferCopy(StringRef(InputData, InputDataLength), StringRef(BufferName)).release()); } const char *LLVMGetBufferStart(LLVMMemoryBufferRef MemBuf) { return unwrap(MemBuf)->getBufferStart(); } size_t LLVMGetBufferSize(LLVMMemoryBufferRef MemBuf) { return unwrap(MemBuf)->getBufferSize(); } void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) { delete unwrap(MemBuf); } /*===-- Pass Registry -----------------------------------------------------===*/ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) { return wrap(PassRegistry::getPassRegistry()); } /*===-- Pass Manager ------------------------------------------------------===*/ LLVMPassManagerRef LLVMCreatePassManager() { return wrap(new legacy::PassManager()); } LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) { return wrap(new legacy::FunctionPassManager(unwrap(M))); } LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) { return LLVMCreateFunctionPassManagerForModule( reinterpret_cast(P)); } LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) { return unwrap(PM)->run(*unwrap(M)); } LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) { return unwrap(FPM)->doInitialization(); } LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) { return unwrap(FPM)->run(*unwrap(F)); } LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) { return unwrap(FPM)->doFinalization(); } void LLVMDisposePassManager(LLVMPassManagerRef PM) { delete unwrap(PM); } /*===-- Threading ------------------------------------------------------===*/ LLVMBool LLVMStartMultithreaded() { return LLVMIsMultithreaded(); } void LLVMStopMultithreaded() { } LLVMBool LLVMIsMultithreaded() { return llvm_is_multithreaded(); } diff --git a/contrib/llvm-project/llvm/lib/Support/Host.cpp b/contrib/llvm-project/llvm/lib/Support/Host.cpp index a1bd3cc12f1d..09146c47ff2c 100644 --- a/contrib/llvm-project/llvm/lib/Support/Host.cpp +++ b/contrib/llvm-project/llvm/lib/Support/Host.cpp @@ -1,1604 +1,1646 @@ //===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the operating system Host concept. // //===----------------------------------------------------------------------===// #include "llvm/Support/Host.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/X86TargetParser.h" #include "llvm/Support/raw_ostream.h" #include #include // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Host.inc" #include #endif #ifdef _WIN32 #include "Windows/Host.inc" #endif #ifdef _MSC_VER #include #endif #if defined(__APPLE__) && (!defined(__x86_64__)) #include #include #include #include #endif #define DEBUG_TYPE "host-detection" //===----------------------------------------------------------------------===// // // Implementations of the CPU detection routines // //===----------------------------------------------------------------------===// using namespace llvm; static std::unique_ptr LLVM_ATTRIBUTE_UNUSED getProcCpuinfoContent() { llvm::ErrorOr> Text = llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); if (std::error_code EC = Text.getError()) { llvm::errs() << "Can't read " << "/proc/cpuinfo: " << EC.message() << "\n"; return nullptr; } return std::move(*Text); } StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) { // Access to the Processor Version Register (PVR) on PowerPC is privileged, // and so we must use an operating-system interface to determine the current // processor type. On Linux, this is exposed through the /proc/cpuinfo file. const char *generic = "generic"; // The cpu line is second (after the 'processor: 0' line), so if this // buffer is too small then something has changed (or is wrong). StringRef::const_iterator CPUInfoStart = ProcCpuinfoContent.begin(); StringRef::const_iterator CPUInfoEnd = ProcCpuinfoContent.end(); StringRef::const_iterator CIP = CPUInfoStart; StringRef::const_iterator CPUStart = 0; size_t CPULen = 0; // We need to find the first line which starts with cpu, spaces, and a colon. // After the colon, there may be some additional spaces and then the cpu type. while (CIP < CPUInfoEnd && CPUStart == 0) { if (CIP < CPUInfoEnd && *CIP == '\n') ++CIP; if (CIP < CPUInfoEnd && *CIP == 'c') { ++CIP; if (CIP < CPUInfoEnd && *CIP == 'p') { ++CIP; if (CIP < CPUInfoEnd && *CIP == 'u') { ++CIP; while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) ++CIP; if (CIP < CPUInfoEnd && *CIP == ':') { ++CIP; while (CIP < CPUInfoEnd && (*CIP == ' ' || *CIP == '\t')) ++CIP; if (CIP < CPUInfoEnd) { CPUStart = CIP; while (CIP < CPUInfoEnd && (*CIP != ' ' && *CIP != '\t' && *CIP != ',' && *CIP != '\n')) ++CIP; CPULen = CIP - CPUStart; } } } } } if (CPUStart == 0) while (CIP < CPUInfoEnd && *CIP != '\n') ++CIP; } if (CPUStart == 0) return generic; return StringSwitch(StringRef(CPUStart, CPULen)) .Case("604e", "604e") .Case("604", "604") .Case("7400", "7400") .Case("7410", "7400") .Case("7447", "7400") .Case("7455", "7450") .Case("G4", "g4") .Case("POWER4", "970") .Case("PPC970FX", "970") .Case("PPC970MP", "970") .Case("G5", "g5") .Case("POWER5", "g5") .Case("A2", "a2") .Case("POWER6", "pwr6") .Case("POWER7", "pwr7") .Case("POWER8", "pwr8") .Case("POWER8E", "pwr8") .Case("POWER8NVL", "pwr8") .Case("POWER9", "pwr9") .Case("POWER10", "pwr10") // FIXME: If we get a simulator or machine with the capabilities of // mcpu=future, we should revisit this and add the name reported by the // simulator/machine. .Default(generic); } StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { // The cpuid register on arm is not accessible from user space. On Linux, // it is exposed through the /proc/cpuinfo file. // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line // in all cases. SmallVector Lines; ProcCpuinfoContent.split(Lines, "\n"); // Look for the CPU implementer line. StringRef Implementer; StringRef Hardware; StringRef Part; for (unsigned I = 0, E = Lines.size(); I != E; ++I) { if (Lines[I].startswith("CPU implementer")) Implementer = Lines[I].substr(15).ltrim("\t :"); if (Lines[I].startswith("Hardware")) Hardware = Lines[I].substr(8).ltrim("\t :"); if (Lines[I].startswith("CPU part")) Part = Lines[I].substr(8).ltrim("\t :"); } if (Implementer == "0x41") { // ARM Ltd. // MSM8992/8994 may give cpu part for the core that the kernel is running on, // which is undeterministic and wrong. Always return cortex-a53 for these SoC. if (Hardware.endswith("MSM8994") || Hardware.endswith("MSM8996")) return "cortex-a53"; // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The // values correspond to the "Part number" in the CP15/c0 register. The // contents are specified in the various processor manuals. // This corresponds to the Main ID Register in Technical Reference Manuals. // and is used in programs like sys-utils return StringSwitch(Part) .Case("0x926", "arm926ej-s") .Case("0xb02", "mpcore") .Case("0xb36", "arm1136j-s") .Case("0xb56", "arm1156t2-s") .Case("0xb76", "arm1176jz-s") .Case("0xc08", "cortex-a8") .Case("0xc09", "cortex-a9") .Case("0xc0f", "cortex-a15") .Case("0xc20", "cortex-m0") .Case("0xc23", "cortex-m3") .Case("0xc24", "cortex-m4") .Case("0xd22", "cortex-m55") .Case("0xd02", "cortex-a34") .Case("0xd04", "cortex-a35") .Case("0xd03", "cortex-a53") .Case("0xd07", "cortex-a57") .Case("0xd08", "cortex-a72") .Case("0xd09", "cortex-a73") .Case("0xd0a", "cortex-a75") .Case("0xd0b", "cortex-a76") .Case("0xd0d", "cortex-a77") .Case("0xd41", "cortex-a78") .Case("0xd44", "cortex-x1") .Case("0xd0c", "neoverse-n1") .Case("0xd49", "neoverse-n2") .Default("generic"); } if (Implementer == "0x42" || Implementer == "0x43") { // Broadcom | Cavium. return StringSwitch(Part) .Case("0x516", "thunderx2t99") .Case("0x0516", "thunderx2t99") .Case("0xaf", "thunderx2t99") .Case("0x0af", "thunderx2t99") .Case("0xa1", "thunderxt88") .Case("0x0a1", "thunderxt88") .Default("generic"); } if (Implementer == "0x46") { // Fujitsu Ltd. return StringSwitch(Part) .Case("0x001", "a64fx") .Default("generic"); } if (Implementer == "0x4e") { // NVIDIA Corporation return StringSwitch(Part) .Case("0x004", "carmel") .Default("generic"); } if (Implementer == "0x48") // HiSilicon Technologies, Inc. // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The // values correspond to the "Part number" in the CP15/c0 register. The // contents are specified in the various processor manuals. return StringSwitch(Part) .Case("0xd01", "tsv110") .Default("generic"); if (Implementer == "0x51") // Qualcomm Technologies, Inc. // The CPU part is a 3 digit hexadecimal number with a 0x prefix. The // values correspond to the "Part number" in the CP15/c0 register. The // contents are specified in the various processor manuals. return StringSwitch(Part) .Case("0x06f", "krait") // APQ8064 .Case("0x201", "kryo") .Case("0x205", "kryo") .Case("0x211", "kryo") .Case("0x800", "cortex-a73") // Kryo 2xx Gold .Case("0x801", "cortex-a73") // Kryo 2xx Silver .Case("0x802", "cortex-a75") // Kryo 3xx Gold .Case("0x803", "cortex-a75") // Kryo 3xx Silver .Case("0x804", "cortex-a76") // Kryo 4xx Gold .Case("0x805", "cortex-a76") // Kryo 4xx/5xx Silver .Case("0xc00", "falkor") .Case("0xc01", "saphira") .Default("generic"); if (Implementer == "0x53") { // Samsung Electronics Co., Ltd. // The Exynos chips have a convoluted ID scheme that doesn't seem to follow // any predictive pattern across variants and parts. unsigned Variant = 0, Part = 0; // Look for the CPU variant line, whose value is a 1 digit hexadecimal // number, corresponding to the Variant bits in the CP15/C0 register. for (auto I : Lines) if (I.consume_front("CPU variant")) I.ltrim("\t :").getAsInteger(0, Variant); // Look for the CPU part line, whose value is a 3 digit hexadecimal // number, corresponding to the PartNum bits in the CP15/C0 register. for (auto I : Lines) if (I.consume_front("CPU part")) I.ltrim("\t :").getAsInteger(0, Part); unsigned Exynos = (Variant << 12) | Part; switch (Exynos) { default: // Default by falling through to Exynos M3. LLVM_FALLTHROUGH; case 0x1002: return "exynos-m3"; case 0x1003: return "exynos-m4"; } } return "generic"; } StringRef sys::detail::getHostCPUNameForS390x(StringRef ProcCpuinfoContent) { // STIDP is a privileged operation, so use /proc/cpuinfo instead. // The "processor 0:" line comes after a fair amount of other information, // including a cache breakdown, but this should be plenty. SmallVector Lines; ProcCpuinfoContent.split(Lines, "\n"); // Look for the CPU features. SmallVector CPUFeatures; for (unsigned I = 0, E = Lines.size(); I != E; ++I) if (Lines[I].startswith("features")) { size_t Pos = Lines[I].find(':'); if (Pos != StringRef::npos) { Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' '); break; } } // We need to check for the presence of vector support independently of // the machine type, since we may only use the vector register set when // supported by the kernel (and hypervisor). bool HaveVectorSupport = false; for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { if (CPUFeatures[I] == "vx") HaveVectorSupport = true; } // Now check the processor machine type. for (unsigned I = 0, E = Lines.size(); I != E; ++I) { if (Lines[I].startswith("processor ")) { size_t Pos = Lines[I].find("machine = "); if (Pos != StringRef::npos) { Pos += sizeof("machine = ") - 1; unsigned int Id; if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { if (Id >= 8561 && HaveVectorSupport) return "z15"; if (Id >= 3906 && HaveVectorSupport) return "z14"; if (Id >= 2964 && HaveVectorSupport) return "z13"; if (Id >= 2827) return "zEC12"; if (Id >= 2817) return "z196"; } } break; } } return "generic"; } StringRef sys::detail::getHostCPUNameForBPF() { #if !defined(__linux__) || !defined(__x86_64__) return "generic"; #else uint8_t v3_insns[40] __attribute__ ((aligned (8))) = /* BPF_MOV64_IMM(BPF_REG_0, 0) */ { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, /* BPF_JMP32_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 0xae, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, /* BPF_EXIT_INSN() */ 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; uint8_t v2_insns[40] __attribute__ ((aligned (8))) = /* BPF_MOV64_IMM(BPF_REG_0, 0) */ { 0xb7, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, /* BPF_MOV64_IMM(BPF_REG_2, 1) */ 0xb7, 0x2, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, /* BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_2, 1) */ 0xad, 0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, /* BPF_MOV64_IMM(BPF_REG_0, 1) */ 0xb7, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, /* BPF_EXIT_INSN() */ 0x95, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 }; struct bpf_prog_load_attr { uint32_t prog_type; uint32_t insn_cnt; uint64_t insns; uint64_t license; uint32_t log_level; uint32_t log_size; uint64_t log_buf; uint32_t kern_version; uint32_t prog_flags; } attr = {}; attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ attr.insn_cnt = 5; attr.insns = (uint64_t)v3_insns; attr.license = (uint64_t)"DUMMY"; int fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); if (fd >= 0) { close(fd); return "v3"; } /* Clear the whole attr in case its content changed by syscall. */ memset(&attr, 0, sizeof(attr)); attr.prog_type = 1; /* BPF_PROG_TYPE_SOCKET_FILTER */ attr.insn_cnt = 5; attr.insns = (uint64_t)v2_insns; attr.license = (uint64_t)"DUMMY"; fd = syscall(321 /* __NR_bpf */, 5 /* BPF_PROG_LOAD */, &attr, sizeof(attr)); if (fd >= 0) { close(fd); return "v2"; } return "v1"; #endif } #if defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) -enum VendorSignatures { - SIG_INTEL = 0x756e6547 /* Genu */, - SIG_AMD = 0x68747541 /* Auth */ -}; - // The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max). // Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID // support. Consequently, for i386, the presence of CPUID is checked first // via the corresponding eflags bit. // Removal of cpuid.h header motivated by PR30384 // Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp // or test-suite, but are used in external projects e.g. libstdcxx static bool isCpuIdSupported() { #if defined(__GNUC__) || defined(__clang__) #if defined(__i386__) int __cpuid_supported; __asm__(" pushfl\n" " popl %%eax\n" " movl %%eax,%%ecx\n" " xorl $0x00200000,%%eax\n" " pushl %%eax\n" " popfl\n" " pushfl\n" " popl %%eax\n" " movl $0,%0\n" " cmpl %%eax,%%ecx\n" " je 1f\n" " movl $1,%0\n" "1:" : "=r"(__cpuid_supported) : : "eax", "ecx"); if (!__cpuid_supported) return false; #endif return true; #endif return true; } /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in /// the specified arguments. If we can't run cpuid on the host, return true. static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value)); return false; #else return true; #endif #elif defined(_MSC_VER) // The MSVC intrinsic is portable across x86 and x64. int registers[4]; __cpuid(registers, value); *rEAX = registers[0]; *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; return false; #else return true; #endif } +namespace llvm { +namespace sys { +namespace detail { +namespace x86 { + +VendorSignatures getVendorSignature(unsigned *MaxLeaf) { + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; + if (MaxLeaf == nullptr) + MaxLeaf = &EAX; + else + *MaxLeaf = 0; + + if (!isCpuIdSupported()) + return VendorSignatures::UNKNOWN; + + if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1) + return VendorSignatures::UNKNOWN; + + // "Genu ineI ntel" + if (EBX == 0x756e6547 && EDX == 0x49656e69 && ECX == 0x6c65746e) + return VendorSignatures::GENUINE_INTEL; + + // "Auth enti cAMD" + if (EBX == 0x68747541 && EDX == 0x69746e65 && ECX == 0x444d4163) + return VendorSignatures::AUTHENTIC_AMD; + + return VendorSignatures::UNKNOWN; +} + +} // namespace x86 +} // namespace detail +} // namespace sys +} // namespace llvm + +using namespace llvm::sys::detail::x86; + /// getX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return /// the 4 values in the specified arguments. If we can't run cpuid on the host, /// return true. static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) #if defined(__x86_64__) // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually. // FIXME: should we save this for Clang? __asm__("movq\t%%rbx, %%rsi\n\t" "cpuid\n\t" "xchgq\t%%rbx, %%rsi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); return false; #elif defined(__i386__) __asm__("movl\t%%ebx, %%esi\n\t" "cpuid\n\t" "xchgl\t%%ebx, %%esi\n\t" : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX) : "a"(value), "c"(subleaf)); return false; #else return true; #endif #elif defined(_MSC_VER) int registers[4]; __cpuidex(registers, value, subleaf); *rEAX = registers[0]; *rEBX = registers[1]; *rECX = registers[2]; *rEDX = registers[3]; return false; #else return true; #endif } // Read control register 0 (XCR0). Used to detect features such as AVX. static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) { #if defined(__GNUC__) || defined(__clang__) // Check xgetbv; this uses a .byte sequence instead of the instruction // directly because older assemblers do not include support for xgetbv and // there is no easy way to conditionally compile based on the assembler used. __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(*rEAX), "=d"(*rEDX) : "c"(0)); return false; #elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) unsigned long long Result = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); *rEAX = Result; *rEDX = Result >> 32; return false; #else return true; #endif } static void detectX86FamilyModel(unsigned EAX, unsigned *Family, unsigned *Model) { *Family = (EAX >> 8) & 0xf; // Bits 8 - 11 *Model = (EAX >> 4) & 0xf; // Bits 4 - 7 if (*Family == 6 || *Family == 0xf) { if (*Family == 0xf) // Examine extended family ID if family ID is F. *Family += (EAX >> 20) & 0xff; // Bits 20 - 27 // Examine extended model ID if family ID is 6 or F. *Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19 } } static StringRef getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, const unsigned *Features, unsigned *Type, unsigned *Subtype) { auto testFeature = [&](unsigned F) { return (Features[F / 32] & (1U << (F % 32))) != 0; }; StringRef CPU; switch (Family) { case 3: CPU = "i386"; break; case 4: CPU = "i486"; break; case 5: if (testFeature(X86::FEATURE_MMX)) { CPU = "pentium-mmx"; break; } CPU = "pentium"; break; case 6: switch (Model) { case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile // processor, Intel Core 2 Quad processor, Intel Core 2 Quad // mobile processor, Intel Core 2 Extreme processor, Intel // Pentium Dual-Core processor, Intel Xeon processor, model // 0Fh. All processors are manufactured using the 65 nm process. case 0x16: // Intel Celeron processor model 16h. All processors are // manufactured using the 65 nm process CPU = "core2"; *Type = X86::INTEL_CORE2; break; case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model // 17h. All processors are manufactured using the 45 nm process. // // 45nm: Penryn , Wolfdale, Yorkfield (XE) case 0x1d: // Intel Xeon processor MP. All processors are manufactured using // the 45 nm process. CPU = "penryn"; *Type = X86::INTEL_CORE2; break; case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 45 nm process. case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. // As found in a Summer 2010 model iMac. case 0x1f: case 0x2e: // Nehalem EX CPU = "nehalem"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_NEHALEM; break; case 0x25: // Intel Core i7, laptop version. case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 32 nm process. case 0x2f: // Westmere EX CPU = "westmere"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_WESTMERE; break; case 0x2a: // Intel Core i7 processor. All processors are manufactured // using the 32 nm process. case 0x2d: CPU = "sandybridge"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SANDYBRIDGE; break; case 0x3a: case 0x3e: // Ivy Bridge EP CPU = "ivybridge"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_IVYBRIDGE; break; // Haswell: case 0x3c: case 0x3f: case 0x45: case 0x46: CPU = "haswell"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_HASWELL; break; // Broadwell: case 0x3d: case 0x47: case 0x4f: case 0x56: CPU = "broadwell"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_BROADWELL; break; // Skylake: case 0x4e: // Skylake mobile case 0x5e: // Skylake desktop case 0x8e: // Kaby Lake mobile case 0x9e: // Kaby Lake desktop case 0xa5: // Comet Lake-H/S case 0xa6: // Comet Lake-U CPU = "skylake"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SKYLAKE; break; // Skylake Xeon: case 0x55: *Type = X86::INTEL_COREI7; if (testFeature(X86::FEATURE_AVX512BF16)) { CPU = "cooperlake"; *Subtype = X86::INTEL_COREI7_COOPERLAKE; } else if (testFeature(X86::FEATURE_AVX512VNNI)) { CPU = "cascadelake"; *Subtype = X86::INTEL_COREI7_CASCADELAKE; } else { CPU = "skylake-avx512"; *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; } break; // Cannonlake: case 0x66: CPU = "cannonlake"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_CANNONLAKE; break; // Icelake: case 0x7d: case 0x7e: CPU = "icelake-client"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; break; // Icelake Xeon: case 0x6a: case 0x6c: CPU = "icelake-server"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; break; // Sapphire Rapids: case 0x8f: CPU = "sapphirerapids"; *Type = X86::INTEL_COREI7; *Subtype = X86::INTEL_COREI7_SAPPHIRERAPIDS; break; case 0x1c: // Most 45 nm Intel Atom processors case 0x26: // 45 nm Atom Lincroft case 0x27: // 32 nm Atom Medfield case 0x35: // 32 nm Atom Midview case 0x36: // 32 nm Atom Midview CPU = "bonnell"; *Type = X86::INTEL_BONNELL; break; // Atom Silvermont codes from the Intel software optimization guide. case 0x37: case 0x4a: case 0x4d: case 0x5a: case 0x5d: case 0x4c: // really airmont CPU = "silvermont"; *Type = X86::INTEL_SILVERMONT; break; // Goldmont: case 0x5c: // Apollo Lake case 0x5f: // Denverton CPU = "goldmont"; *Type = X86::INTEL_GOLDMONT; break; case 0x7a: CPU = "goldmont-plus"; *Type = X86::INTEL_GOLDMONT_PLUS; break; case 0x86: CPU = "tremont"; *Type = X86::INTEL_TREMONT; break; // Xeon Phi (Knights Landing + Knights Mill): case 0x57: CPU = "knl"; *Type = X86::INTEL_KNL; break; case 0x85: CPU = "knm"; *Type = X86::INTEL_KNM; break; default: // Unknown family 6 CPU, try to guess. // Don't both with Type/Subtype here, they aren't used by the caller. // They're used above to keep the code in sync with compiler-rt. // TODO detect tigerlake host from model if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) { CPU = "tigerlake"; } else if (testFeature(X86::FEATURE_AVX512VBMI2)) { CPU = "icelake-client"; } else if (testFeature(X86::FEATURE_AVX512VBMI)) { CPU = "cannonlake"; } else if (testFeature(X86::FEATURE_AVX512BF16)) { CPU = "cooperlake"; } else if (testFeature(X86::FEATURE_AVX512VNNI)) { CPU = "cascadelake"; } else if (testFeature(X86::FEATURE_AVX512VL)) { CPU = "skylake-avx512"; } else if (testFeature(X86::FEATURE_AVX512ER)) { CPU = "knl"; } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) { if (testFeature(X86::FEATURE_SHA)) CPU = "goldmont"; else CPU = "skylake"; } else if (testFeature(X86::FEATURE_ADX)) { CPU = "broadwell"; } else if (testFeature(X86::FEATURE_AVX2)) { CPU = "haswell"; } else if (testFeature(X86::FEATURE_AVX)) { CPU = "sandybridge"; } else if (testFeature(X86::FEATURE_SSE4_2)) { if (testFeature(X86::FEATURE_MOVBE)) CPU = "silvermont"; else CPU = "nehalem"; } else if (testFeature(X86::FEATURE_SSE4_1)) { CPU = "penryn"; } else if (testFeature(X86::FEATURE_SSSE3)) { if (testFeature(X86::FEATURE_MOVBE)) CPU = "bonnell"; else CPU = "core2"; } else if (testFeature(X86::FEATURE_64BIT)) { CPU = "core2"; } else if (testFeature(X86::FEATURE_SSE3)) { CPU = "yonah"; } else if (testFeature(X86::FEATURE_SSE2)) { CPU = "pentium-m"; } else if (testFeature(X86::FEATURE_SSE)) { CPU = "pentium3"; } else if (testFeature(X86::FEATURE_MMX)) { CPU = "pentium2"; } else { CPU = "pentiumpro"; } break; } break; case 15: { if (testFeature(X86::FEATURE_64BIT)) { CPU = "nocona"; break; } if (testFeature(X86::FEATURE_SSE3)) { CPU = "prescott"; break; } CPU = "pentium4"; break; } default: break; // Unknown. } return CPU; } static StringRef getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model, const unsigned *Features, unsigned *Type, unsigned *Subtype) { auto testFeature = [&](unsigned F) { return (Features[F / 32] & (1U << (F % 32))) != 0; }; StringRef CPU; switch (Family) { case 4: CPU = "i486"; break; case 5: CPU = "pentium"; switch (Model) { case 6: case 7: CPU = "k6"; break; case 8: CPU = "k6-2"; break; case 9: case 13: CPU = "k6-3"; break; case 10: CPU = "geode"; break; } break; case 6: if (testFeature(X86::FEATURE_SSE)) { CPU = "athlon-xp"; break; } CPU = "athlon"; break; case 15: if (testFeature(X86::FEATURE_SSE3)) { CPU = "k8-sse3"; break; } CPU = "k8"; break; case 16: CPU = "amdfam10"; *Type = X86::AMDFAM10H; // "amdfam10" switch (Model) { case 2: *Subtype = X86::AMDFAM10H_BARCELONA; break; case 4: *Subtype = X86::AMDFAM10H_SHANGHAI; break; case 8: *Subtype = X86::AMDFAM10H_ISTANBUL; break; } break; case 20: CPU = "btver1"; *Type = X86::AMD_BTVER1; break; case 21: CPU = "bdver1"; *Type = X86::AMDFAM15H; if (Model >= 0x60 && Model <= 0x7f) { CPU = "bdver4"; *Subtype = X86::AMDFAM15H_BDVER4; break; // 60h-7Fh: Excavator } if (Model >= 0x30 && Model <= 0x3f) { CPU = "bdver3"; *Subtype = X86::AMDFAM15H_BDVER3; break; // 30h-3Fh: Steamroller } if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) { CPU = "bdver2"; *Subtype = X86::AMDFAM15H_BDVER2; break; // 02h, 10h-1Fh: Piledriver } if (Model <= 0x0f) { *Subtype = X86::AMDFAM15H_BDVER1; break; // 00h-0Fh: Bulldozer } break; case 22: CPU = "btver2"; *Type = X86::AMD_BTVER2; break; case 23: CPU = "znver1"; *Type = X86::AMDFAM17H; if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) { CPU = "znver2"; *Subtype = X86::AMDFAM17H_ZNVER2; break; // 30h-3fh, 71h: Zen2 } if (Model <= 0x0f) { *Subtype = X86::AMDFAM17H_ZNVER1; break; // 00h-0Fh: Zen1 } break; case 25: CPU = "znver3"; *Type = X86::AMDFAM19H; if (Model <= 0x0f) { *Subtype = X86::AMDFAM19H_ZNVER3; break; // 00h-0Fh: Zen3 } break; default: break; // Unknown AMD CPU. } return CPU; } static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, unsigned *Features) { unsigned EAX, EBX; auto setFeature = [&](unsigned F) { Features[F / 32] |= 1U << (F % 32); }; if ((EDX >> 15) & 1) setFeature(X86::FEATURE_CMOV); if ((EDX >> 23) & 1) setFeature(X86::FEATURE_MMX); if ((EDX >> 25) & 1) setFeature(X86::FEATURE_SSE); if ((EDX >> 26) & 1) setFeature(X86::FEATURE_SSE2); if ((ECX >> 0) & 1) setFeature(X86::FEATURE_SSE3); if ((ECX >> 1) & 1) setFeature(X86::FEATURE_PCLMUL); if ((ECX >> 9) & 1) setFeature(X86::FEATURE_SSSE3); if ((ECX >> 12) & 1) setFeature(X86::FEATURE_FMA); if ((ECX >> 19) & 1) setFeature(X86::FEATURE_SSE4_1); if ((ECX >> 20) & 1) setFeature(X86::FEATURE_SSE4_2); if ((ECX >> 23) & 1) setFeature(X86::FEATURE_POPCNT); if ((ECX >> 25) & 1) setFeature(X86::FEATURE_AES); if ((ECX >> 22) & 1) setFeature(X86::FEATURE_MOVBE); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. const unsigned AVXBits = (1 << 27) | (1 << 28); bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6); #if defined(__APPLE__) // Darwin lazily saves the AVX512 context on first use: trust that the OS will // save the AVX512 context if we use AVX512 instructions, even the bit is not // set right now. bool HasAVX512Save = true; #else // AVX512 requires additional context to be saved by the OS. bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); #endif if (HasAVX) setFeature(X86::FEATURE_AVX); bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); if (HasLeaf7 && ((EBX >> 3) & 1)) setFeature(X86::FEATURE_BMI); if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) setFeature(X86::FEATURE_AVX2); if (HasLeaf7 && ((EBX >> 8) & 1)) setFeature(X86::FEATURE_BMI2); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512F); if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512DQ); if (HasLeaf7 && ((EBX >> 19) & 1)) setFeature(X86::FEATURE_ADX); if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512IFMA); if (HasLeaf7 && ((EBX >> 23) & 1)) setFeature(X86::FEATURE_CLFLUSHOPT); if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512PF); if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512ER); if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512CD); if (HasLeaf7 && ((EBX >> 29) & 1)) setFeature(X86::FEATURE_SHA); if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512BW); if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512VL); if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512VBMI); if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512VBMI2); if (HasLeaf7 && ((ECX >> 8) & 1)) setFeature(X86::FEATURE_GFNI); if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) setFeature(X86::FEATURE_VPCLMULQDQ); if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512VNNI); if (HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512BITALG); if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512VPOPCNTDQ); if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX5124VNNIW); if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX5124FMAPS); if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512VP2INTERSECT); bool HasLeaf7Subleaf1 = MaxLeaf >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) setFeature(X86::FEATURE_AVX512BF16); unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); if (HasExtLeaf1 && ((ECX >> 6) & 1)) setFeature(X86::FEATURE_SSE4_A); if (HasExtLeaf1 && ((ECX >> 11) & 1)) setFeature(X86::FEATURE_XOP); if (HasExtLeaf1 && ((ECX >> 16) & 1)) setFeature(X86::FEATURE_FMA4); if (HasExtLeaf1 && ((EDX >> 29) & 1)) setFeature(X86::FEATURE_64BIT); } StringRef sys::getHostCPUName() { - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - unsigned MaxLeaf, Vendor; - - if (!isCpuIdSupported()) + unsigned MaxLeaf = 0; + const VendorSignatures Vendor = getVendorSignature(&MaxLeaf); + if (Vendor == VendorSignatures::UNKNOWN) return "generic"; - if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) - return "generic"; + unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); unsigned Family = 0, Model = 0; unsigned Features[(X86::CPU_FEATURE_MAX + 31) / 32] = {0}; detectX86FamilyModel(EAX, &Family, &Model); getAvailableFeatures(ECX, EDX, MaxLeaf, Features); // These aren't consumed in this file, but we try to keep some source code the // same or similar to compiler-rt. unsigned Type = 0; unsigned Subtype = 0; StringRef CPU; - if (Vendor == SIG_INTEL) { + if (Vendor == VendorSignatures::GENUINE_INTEL) { CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); - } else if (Vendor == SIG_AMD) { + } else if (Vendor == VendorSignatures::AUTHENTIC_AMD) { CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype); } if (!CPU.empty()) return CPU; return "generic"; } #elif defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; mach_msg_type_number_t infoCount; infoCount = HOST_BASIC_INFO_COUNT; mach_port_t hostPort = mach_host_self(); host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount); mach_port_deallocate(mach_task_self(), hostPort); if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic"; switch (hostInfo.cpu_subtype) { case CPU_SUBTYPE_POWERPC_601: return "601"; case CPU_SUBTYPE_POWERPC_602: return "602"; case CPU_SUBTYPE_POWERPC_603: return "603"; case CPU_SUBTYPE_POWERPC_603e: return "603e"; case CPU_SUBTYPE_POWERPC_603ev: return "603ev"; case CPU_SUBTYPE_POWERPC_604: return "604"; case CPU_SUBTYPE_POWERPC_604e: return "604e"; case CPU_SUBTYPE_POWERPC_620: return "620"; case CPU_SUBTYPE_POWERPC_750: return "750"; case CPU_SUBTYPE_POWERPC_7400: return "7400"; case CPU_SUBTYPE_POWERPC_7450: return "7450"; case CPU_SUBTYPE_POWERPC_970: return "970"; default:; } return "generic"; } #elif defined(__linux__) && (defined(__ppc__) || defined(__powerpc__)) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); StringRef Content = P ? P->getBuffer() : ""; return detail::getHostCPUNameForPowerPC(Content); } #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); StringRef Content = P ? P->getBuffer() : ""; return detail::getHostCPUNameForARM(Content); } #elif defined(__linux__) && defined(__s390x__) StringRef sys::getHostCPUName() { std::unique_ptr P = getProcCpuinfoContent(); StringRef Content = P ? P->getBuffer() : ""; return detail::getHostCPUNameForS390x(Content); } #elif defined(__APPLE__) && defined(__aarch64__) StringRef sys::getHostCPUName() { return "cyclone"; } #elif defined(__APPLE__) && defined(__arm__) StringRef sys::getHostCPUName() { host_basic_info_data_t hostInfo; mach_msg_type_number_t infoCount; infoCount = HOST_BASIC_INFO_COUNT; mach_port_t hostPort = mach_host_self(); host_info(hostPort, HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount); mach_port_deallocate(mach_task_self(), hostPort); if (hostInfo.cpu_type != CPU_TYPE_ARM) { assert(false && "CPUType not equal to ARM should not be possible on ARM"); return "generic"; } switch (hostInfo.cpu_subtype) { case CPU_SUBTYPE_ARM_V7S: return "swift"; default:; } return "generic"; } #else StringRef sys::getHostCPUName() { return "generic"; } +namespace llvm { +namespace sys { +namespace detail { +namespace x86 { + +VendorSignatures getVendorSignature(unsigned *MaxLeaf) { + return VendorSignatures::UNKNOWN; +} + +} // namespace x86 +} // namespace detail +} // namespace sys +} // namespace llvm #endif #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__)) // On Linux, the number of physical cores can be computed from /proc/cpuinfo, // using the number of unique physical/core id pairs. The following // implementation reads the /proc/cpuinfo format on an x86_64 system. int computeHostNumPhysicalCores() { // Enabled represents the number of physical id/core id pairs with at least // one processor id enabled by the CPU affinity mask. cpu_set_t Affinity, Enabled; if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0) return -1; CPU_ZERO(&Enabled); // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be // mmapped because it appears to have 0 size. llvm::ErrorOr> Text = llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo"); if (std::error_code EC = Text.getError()) { llvm::errs() << "Can't read " << "/proc/cpuinfo: " << EC.message() << "\n"; return -1; } SmallVector strs; (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1, /*KeepEmpty=*/false); int CurProcessor = -1; int CurPhysicalId = -1; int CurSiblings = -1; int CurCoreId = -1; for (StringRef Line : strs) { std::pair Data = Line.split(':'); auto Name = Data.first.trim(); auto Val = Data.second.trim(); // These fields are available if the kernel is configured with CONFIG_SMP. if (Name == "processor") Val.getAsInteger(10, CurProcessor); else if (Name == "physical id") Val.getAsInteger(10, CurPhysicalId); else if (Name == "siblings") Val.getAsInteger(10, CurSiblings); else if (Name == "core id") { Val.getAsInteger(10, CurCoreId); // The processor id corresponds to an index into cpu_set_t. if (CPU_ISSET(CurProcessor, &Affinity)) CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled); } } return CPU_COUNT(&Enabled); } #elif defined(__linux__) && defined(__powerpc__) int computeHostNumPhysicalCores() { cpu_set_t Affinity; if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0) return CPU_COUNT(&Affinity); // The call to sched_getaffinity() may have failed because the Affinity // mask is too small for the number of CPU's on the system (i.e. the // system has more than 1024 CPUs). Allocate a mask large enough for // twice as many CPUs. cpu_set_t *DynAffinity; DynAffinity = CPU_ALLOC(2048); if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) { int NumCPUs = CPU_COUNT(DynAffinity); CPU_FREE(DynAffinity); return NumCPUs; } return -1; } #elif defined(__linux__) && defined(__s390x__) int computeHostNumPhysicalCores() { return sysconf(_SC_NPROCESSORS_ONLN); } #elif defined(__APPLE__) && defined(__x86_64__) #include #include // Gets the number of *physical cores* on the machine. int computeHostNumPhysicalCores() { uint32_t count; size_t len = sizeof(count); sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0); if (count < 1) { int nm[2]; nm[0] = CTL_HW; nm[1] = HW_AVAILCPU; sysctl(nm, 2, &count, &len, NULL, 0); if (count < 1) return -1; } return count; } #elif defined(__MVS__) int computeHostNumPhysicalCores() { enum { // Byte offset of the pointer to the Communications Vector Table (CVT) in // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and // will be zero-extended to uintptr_t. FLCCVT = 16, // Byte offset of the pointer to the Common System Data Area (CSD) in the // CVT. The table entry is a 31-bit pointer and will be zero-extended to // uintptr_t. CVTCSD = 660, // Byte offset to the number of live CPs in the LPAR, stored as a signed // 32-bit value in the table. CSD_NUMBER_ONLINE_STANDARD_CPS = 264, }; char *PSA = 0; char *CVT = reinterpret_cast( static_cast(reinterpret_cast(PSA[FLCCVT]))); char *CSD = reinterpret_cast( static_cast(reinterpret_cast(CVT[CVTCSD]))); return reinterpret_cast(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]); } #elif defined(_WIN32) && LLVM_ENABLE_THREADS != 0 // Defined in llvm/lib/Support/Windows/Threading.inc int computeHostNumPhysicalCores(); #else // On other systems, return -1 to indicate unknown. static int computeHostNumPhysicalCores() { return -1; } #endif int sys::getHostNumPhysicalCores() { static int NumCores = computeHostNumPhysicalCores(); return NumCores; } #if defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) bool sys::getHostCPUFeatures(StringMap &Features) { unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; unsigned MaxLevel; if (getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX) || MaxLevel < 1) return false; getX86CpuIDAndInfo(1, &EAX, &EBX, &ECX, &EDX); Features["cx8"] = (EDX >> 8) & 1; Features["cmov"] = (EDX >> 15) & 1; Features["mmx"] = (EDX >> 23) & 1; Features["fxsr"] = (EDX >> 24) & 1; Features["sse"] = (EDX >> 25) & 1; Features["sse2"] = (EDX >> 26) & 1; Features["sse3"] = (ECX >> 0) & 1; Features["pclmul"] = (ECX >> 1) & 1; Features["ssse3"] = (ECX >> 9) & 1; Features["cx16"] = (ECX >> 13) & 1; Features["sse4.1"] = (ECX >> 19) & 1; Features["sse4.2"] = (ECX >> 20) & 1; Features["movbe"] = (ECX >> 22) & 1; Features["popcnt"] = (ECX >> 23) & 1; Features["aes"] = (ECX >> 25) & 1; Features["rdrnd"] = (ECX >> 30) & 1; // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); bool HasAVXSave = HasXSave && ((ECX >> 28) & 1) && ((EAX & 0x6) == 0x6); #if defined(__APPLE__) // Darwin lazily saves the AVX512 context on first use: trust that the OS will // save the AVX512 context if we use AVX512 instructions, even the bit is not // set right now. bool HasAVX512Save = true; #else // AVX512 requires additional context to be saved by the OS. bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); #endif // AMX requires additional context to be saved by the OS. const unsigned AMXBits = (1 << 17) | (1 << 18); bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); Features["avx"] = HasAVXSave; Features["fma"] = ((ECX >> 12) & 1) && HasAVXSave; // Only enable XSAVE if OS has enabled support for saving YMM state. Features["xsave"] = ((ECX >> 26) & 1) && HasAVXSave; Features["f16c"] = ((ECX >> 29) & 1) && HasAVXSave; unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); bool HasExtLeaf1 = MaxExtLevel >= 0x80000001 && !getX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); Features["sahf"] = HasExtLeaf1 && ((ECX >> 0) & 1); Features["lzcnt"] = HasExtLeaf1 && ((ECX >> 5) & 1); Features["sse4a"] = HasExtLeaf1 && ((ECX >> 6) & 1); Features["prfchw"] = HasExtLeaf1 && ((ECX >> 8) & 1); Features["xop"] = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave; Features["lwp"] = HasExtLeaf1 && ((ECX >> 15) & 1); Features["fma4"] = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave; Features["tbm"] = HasExtLeaf1 && ((ECX >> 21) & 1); Features["mwaitx"] = HasExtLeaf1 && ((ECX >> 29) & 1); Features["64bit"] = HasExtLeaf1 && ((EDX >> 29) & 1); // Miscellaneous memory related features, detected by // using the 0x80000008 leaf of the CPUID instruction bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); Features["clzero"] = HasExtLeaf8 && ((EBX >> 0) & 1); Features["wbnoinvd"] = HasExtLeaf8 && ((EBX >> 9) & 1); bool HasLeaf7 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); Features["fsgsbase"] = HasLeaf7 && ((EBX >> 0) & 1); Features["sgx"] = HasLeaf7 && ((EBX >> 2) & 1); Features["bmi"] = HasLeaf7 && ((EBX >> 3) & 1); // AVX2 is only supported if we have the OS save support from AVX. Features["avx2"] = HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave; Features["bmi2"] = HasLeaf7 && ((EBX >> 8) & 1); Features["invpcid"] = HasLeaf7 && ((EBX >> 10) & 1); Features["rtm"] = HasLeaf7 && ((EBX >> 11) & 1); // AVX512 is only supported if the OS supports the context save for it. Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save; Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save; Features["rdseed"] = HasLeaf7 && ((EBX >> 18) & 1); Features["adx"] = HasLeaf7 && ((EBX >> 19) & 1); Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save; Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1); Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1); Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save; Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save; Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1); Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1); Features["avx512vbmi2"] = HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save; Features["shstk"] = HasLeaf7 && ((ECX >> 7) & 1); Features["gfni"] = HasLeaf7 && ((ECX >> 8) & 1); Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave; Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave; Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save; Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; Features["rdpid"] = HasLeaf7 && ((ECX >> 22) & 1); Features["kl"] = HasLeaf7 && ((ECX >> 23) & 1); // key locker Features["cldemote"] = HasLeaf7 && ((ECX >> 25) & 1); Features["movdiri"] = HasLeaf7 && ((ECX >> 27) & 1); Features["movdir64b"] = HasLeaf7 && ((ECX >> 28) & 1); Features["enqcmd"] = HasLeaf7 && ((ECX >> 29) & 1); Features["uintr"] = HasLeaf7 && ((EDX >> 5) & 1); Features["avx512vp2intersect"] = HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save; Features["serialize"] = HasLeaf7 && ((EDX >> 14) & 1); Features["tsxldtrk"] = HasLeaf7 && ((EDX >> 16) & 1); // There are two CPUID leafs which information associated with the pconfig // instruction: // EAX=0x7, ECX=0x0 indicates the availability of the instruction (via the 18th // bit of EDX), while the EAX=0x1b leaf returns information on the // availability of specific pconfig leafs. // The target feature here only refers to the the first of these two. // Users might need to check for the availability of specific pconfig // leaves using cpuid, since that information is ignored while // detecting features using the "-march=native" flag. // For more info, see X86 ISA docs. Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1); Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave; Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave; Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave; bool HasLeaf7Subleaf1 = MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave; Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save; Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1); bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); // Only enable XSAVE if OS has enabled support for saving YMM state. Features["xsaveopt"] = HasLeafD && ((EAX >> 0) & 1) && HasAVXSave; Features["xsavec"] = HasLeafD && ((EAX >> 1) & 1) && HasAVXSave; Features["xsaves"] = HasLeafD && ((EAX >> 3) & 1) && HasAVXSave; bool HasLeaf14 = MaxLevel >= 0x14 && !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); Features["ptwrite"] = HasLeaf14 && ((EBX >> 4) & 1); bool HasLeaf19 = MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); Features["widekl"] = HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1); return true; } #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) bool sys::getHostCPUFeatures(StringMap &Features) { std::unique_ptr P = getProcCpuinfoContent(); if (!P) return false; SmallVector Lines; P->getBuffer().split(Lines, "\n"); SmallVector CPUFeatures; // Look for the CPU features. for (unsigned I = 0, E = Lines.size(); I != E; ++I) if (Lines[I].startswith("Features")) { Lines[I].split(CPUFeatures, ' '); break; } #if defined(__aarch64__) // Keep track of which crypto features we have seen enum { CAP_AES = 0x1, CAP_PMULL = 0x2, CAP_SHA1 = 0x4, CAP_SHA2 = 0x8 }; uint32_t crypto = 0; #endif for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { StringRef LLVMFeatureStr = StringSwitch(CPUFeatures[I]) #if defined(__aarch64__) .Case("asimd", "neon") .Case("fp", "fp-armv8") .Case("crc32", "crc") #else .Case("half", "fp16") .Case("neon", "neon") .Case("vfpv3", "vfp3") .Case("vfpv3d16", "d16") .Case("vfpv4", "vfp4") .Case("idiva", "hwdiv-arm") .Case("idivt", "hwdiv") #endif .Default(""); #if defined(__aarch64__) // We need to check crypto separately since we need all of the crypto // extensions to enable the subtarget feature if (CPUFeatures[I] == "aes") crypto |= CAP_AES; else if (CPUFeatures[I] == "pmull") crypto |= CAP_PMULL; else if (CPUFeatures[I] == "sha1") crypto |= CAP_SHA1; else if (CPUFeatures[I] == "sha2") crypto |= CAP_SHA2; #endif if (LLVMFeatureStr != "") Features[LLVMFeatureStr] = true; } #if defined(__aarch64__) // If we have all crypto bits we can add the feature if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) Features["crypto"] = true; #endif return true; } #elif defined(_WIN32) && (defined(__aarch64__) || defined(_M_ARM64)) bool sys::getHostCPUFeatures(StringMap &Features) { if (IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)) Features["neon"] = true; if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE)) Features["crc"] = true; if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) Features["crypto"] = true; return true; } #else bool sys::getHostCPUFeatures(StringMap &Features) { return false; } #endif std::string sys::getProcessTriple() { std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE); Triple PT(Triple::normalize(TargetTripleString)); if (sizeof(void *) == 8 && PT.isArch32Bit()) PT = PT.get64BitArchVariant(); if (sizeof(void *) == 4 && PT.isArch64Bit()) PT = PT.get32BitArchVariant(); return PT.str(); } diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp index 581b4b9857af..9ba16003a97a 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMBlockPlacement.cpp @@ -1,231 +1,228 @@ //===-- ARMBlockPlacement.cpp - ARM block placement pass ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass re-arranges machine basic blocks to suit target requirements. // Currently it only moves blocks to fix backwards WLS branches. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBasicBlockInfo.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" using namespace llvm; #define DEBUG_TYPE "arm-block-placement" #define DEBUG_PREFIX "ARM Block Placement: " namespace llvm { class ARMBlockPlacement : public MachineFunctionPass { private: const ARMBaseInstrInfo *TII; std::unique_ptr BBUtils = nullptr; MachineLoopInfo *MLI = nullptr; public: static char ID; ARMBlockPlacement() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; void moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After); bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } }; } // namespace llvm FunctionPass *llvm::createARMBlockPlacementPass() { return new ARMBlockPlacement(); } char ARMBlockPlacement::ID = 0; INITIALIZE_PASS(ARMBlockPlacement, DEBUG_TYPE, "ARM block placement", false, false) bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; const ARMSubtarget &ST = static_cast(MF.getSubtarget()); if (!ST.hasLOB()) return false; LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Running on " << MF.getName() << "\n"); MLI = &getAnalysis(); TII = static_cast(ST.getInstrInfo()); BBUtils = std::unique_ptr(new ARMBasicBlockUtils(MF)); MF.RenumberBlocks(); BBUtils->computeAllBlockSizes(); BBUtils->adjustBBOffsetsAfter(&MF.front()); bool Changed = false; // Find loops with a backwards branching WLS. // This requires looping over the loops in the function, checking each // preheader for a WLS and if its target is before the preheader. If moving // the target block wouldn't produce another backwards WLS or a new forwards // LE branch then move the target block after the preheader. for (auto *ML : *MLI) { MachineBasicBlock *Preheader = ML->getLoopPredecessor(); if (!Preheader) continue; for (auto &Terminator : Preheader->terminators()) { if (Terminator.getOpcode() != ARM::t2WhileLoopStart) continue; MachineBasicBlock *LoopExit = Terminator.getOperand(1).getMBB(); // We don't want to move the function's entry block. if (!LoopExit->getPrevNode()) continue; if (blockIsBefore(Preheader, LoopExit)) continue; LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Found a backwards WLS from " << Preheader->getFullName() << " to " << LoopExit->getFullName() << "\n"); // Make sure that moving the target block doesn't cause any of its WLSs // that were previously not backwards to become backwards bool CanMove = true; for (auto &LoopExitTerminator : LoopExit->terminators()) { if (LoopExitTerminator.getOpcode() != ARM::t2WhileLoopStart) continue; // An example loop structure where the LoopExit can't be moved, since // bb1's WLS will become backwards once it's moved after bb3 bb1: - // LoopExit // WLS bb2 - LoopExit2 // bb2: // ... // bb3: - Preheader // WLS bb1 // bb4: - Header MachineBasicBlock *LoopExit2 = LoopExitTerminator.getOperand(1).getMBB(); // If the WLS from LoopExit to LoopExit2 is already backwards then // moving LoopExit won't affect it, so it can be moved. If LoopExit2 is // after the Preheader then moving will keep it as a forward branch, so // it can be moved. If LoopExit2 is between the Preheader and LoopExit // then moving LoopExit will make it a backwards branch, so it can't be // moved since we'd fix one and introduce one backwards branch. // TODO: Analyse the blocks to make a decision if it would be worth // moving LoopExit even if LoopExit2 is between the Preheader and // LoopExit. if (!blockIsBefore(LoopExit2, LoopExit) && (LoopExit2 == Preheader || blockIsBefore(LoopExit2, Preheader))) { LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Can't move the target block as it would " "introduce a new backwards WLS branch\n"); CanMove = false; break; } } if (CanMove) { // Make sure no LEs become forwards. // An example loop structure where the LoopExit can't be moved, since // bb2's LE will become forwards once bb1 is moved after bb3. // bb1: - LoopExit // bb2: // LE bb1 - Terminator // bb3: - Preheader // WLS bb1 // bb4: - Header for (auto It = LoopExit->getIterator(); It != Preheader->getIterator(); It++) { MachineBasicBlock *MBB = &*It; for (auto &Terminator : MBB->terminators()) { - if (Terminator.getOpcode() != ARM::t2LoopEnd && - Terminator.getOpcode() != ARM::t2LoopEndDec) + if (Terminator.getOpcode() != ARM::t2LoopEndDec) continue; MachineBasicBlock *LETarget = Terminator.getOperand(2).getMBB(); // The LE will become forwards branching if it branches to LoopExit // which isn't allowed by the architecture, so we should avoid // introducing these. // TODO: Analyse the blocks to make a decision if it would be worth // moving LoopExit even if we'd introduce a forwards LE if (LETarget == LoopExit) { LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Can't move the target block as it would " "introduce a new forwards LE branch\n"); CanMove = false; break; } } } if (!CanMove) break; } if (CanMove) { moveBasicBlock(LoopExit, Preheader); Changed = true; break; } } } return Changed; } bool ARMBlockPlacement::blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other) { return BBUtils->getOffsetOf(Other) > BBUtils->getOffsetOf(BB); } void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB, MachineBasicBlock *After) { LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Moving " << BB->getName() << " after " << After->getName() << "\n"); MachineBasicBlock *BBPrevious = BB->getPrevNode(); assert(BBPrevious && "Cannot move the function entry basic block"); MachineBasicBlock *AfterNext = After->getNextNode(); MachineBasicBlock *BBNext = BB->getNextNode(); BB->moveAfter(After); auto FixFallthrough = [&](MachineBasicBlock *From, MachineBasicBlock *To) { LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Checking for fallthrough from " << From->getName() << " to " << To->getName() << "\n"); assert(From->isSuccessor(To) && "'To' is expected to be a successor of 'From'"); MachineInstr &Terminator = *(--From->terminators().end()); if (!Terminator.isUnconditionalBranch()) { // The BB doesn't have an unconditional branch so it relied on // fall-through. Fix by adding an unconditional branch to the moved BB. - unsigned BrOpc = - BBUtils->isBBInRange(&Terminator, To, 254) ? ARM::tB : ARM::t2B; MachineInstrBuilder MIB = - BuildMI(From, Terminator.getDebugLoc(), TII->get(BrOpc)); + BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B)); MIB.addMBB(To); MIB.addImm(ARMCC::CondCodes::AL); MIB.addReg(ARM::NoRegister); LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from " << From->getName() << " to " << To->getName() << ": " << *MIB.getInstr()); } }; // Fix fall-through to the moved BB from the one that used to be before it. if (BBPrevious->isSuccessor(BB)) FixFallthrough(BBPrevious, BB); // Fix fall through from the destination BB to the one that used to follow. if (AfterNext && After->isSuccessor(AfterNext)) FixFallthrough(After, AfterNext); // Fix fall through from the moved BB to the one that used to follow. if (BBNext && BB->isSuccessor(BBNext)) FixFallthrough(BB, BBNext); BBUtils->adjustBBOffsetsAfter(After); } diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp index 61a924078f29..8dc532058492 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -1,1725 +1,1731 @@ //===-- ARMLowOverheadLoops.cpp - CodeGen Low-overhead Loops ---*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// Finalize v8.1-m low-overhead loops by converting the associated pseudo /// instructions into machine operations. /// The expectation is that the loop contains three pseudo instructions: /// - t2*LoopStart - placed in the preheader or pre-preheader. The do-loop /// form should be in the preheader, whereas the while form should be in the /// preheaders only predecessor. /// - t2LoopDec - placed within in the loop body. /// - t2LoopEnd - the loop latch terminator. /// /// In addition to this, we also look for the presence of the VCTP instruction, /// which determines whether we can generated the tail-predicated low-overhead /// loop form. /// /// Assumptions and Dependencies: /// Low-overhead loops are constructed and executed using a setup instruction: /// DLS, WLS, DLSTP or WLSTP and an instruction that loops back: LE or LETP. /// WLS(TP) and LE(TP) are branching instructions with a (large) limited range /// but fixed polarity: WLS can only branch forwards and LE can only branch /// backwards. These restrictions mean that this pass is dependent upon block /// layout and block sizes, which is why it's the last pass to run. The same is /// true for ConstantIslands, but this pass does not increase the size of the /// basic blocks, nor does it change the CFG. Instructions are mainly removed /// during the transform and pseudo instructions are replaced by real ones. In /// some cases, when we have to revert to a 'normal' loop, we have to introduce /// multiple instructions for a single pseudo (see RevertWhile and /// RevertLoopEnd). To handle this situation, t2WhileLoopStart and t2LoopEnd /// are defined to be as large as this maximum sequence of replacement /// instructions. /// /// A note on VPR.P0 (the lane mask): /// VPT, VCMP, VPNOT and VCTP won't overwrite VPR.P0 when they update it in a /// "VPT Active" context (which includes low-overhead loops and vpt blocks). /// They will simply "and" the result of their calculation with the current /// value of VPR.P0. You can think of it like this: /// \verbatim /// if VPT active: ; Between a DLSTP/LETP, or for predicated instrs /// VPR.P0 &= Value /// else /// VPR.P0 = Value /// \endverbatim /// When we're inside the low-overhead loop (between DLSTP and LETP), we always /// fall in the "VPT active" case, so we can consider that all VPR writes by /// one of those instruction is actually a "and". //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMBasicBlockInfo.h" #include "ARMSubtarget.h" #include "MVETailPredUtils.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/MC/MCInstrDesc.h" using namespace llvm; #define DEBUG_TYPE "arm-low-overhead-loops" #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass" static cl::opt DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false)); static bool isVectorPredicated(MachineInstr *MI) { int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); return PIdx != -1 && MI->getOperand(PIdx + 1).getReg() == ARM::VPR; } static bool isVectorPredicate(MachineInstr *MI) { return MI->findRegisterDefOperandIdx(ARM::VPR) != -1; } static bool hasVPRUse(MachineInstr &MI) { return MI.findRegisterUseOperandIdx(ARM::VPR) != -1; } static bool isDomainMVE(MachineInstr *MI) { uint64_t Domain = MI->getDesc().TSFlags & ARMII::DomainMask; return Domain == ARMII::DomainMVE; } static bool shouldInspect(MachineInstr &MI) { return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI); } static bool isDo(MachineInstr *MI) { return MI->getOpcode() != ARM::t2WhileLoopStart; } namespace { using InstSet = SmallPtrSetImpl; class PostOrderLoopTraversal { MachineLoop &ML; MachineLoopInfo &MLI; SmallPtrSet Visited; SmallVector Order; public: PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI) : ML(ML), MLI(MLI) { } const SmallVectorImpl &getOrder() const { return Order; } // Visit all the blocks within the loop, as well as exit blocks and any // blocks properly dominating the header. void ProcessLoop() { std::function Search = [this, &Search] (MachineBasicBlock *MBB) -> void { if (Visited.count(MBB)) return; Visited.insert(MBB); for (auto *Succ : MBB->successors()) { if (!ML.contains(Succ)) continue; Search(Succ); } Order.push_back(MBB); }; // Insert exit blocks. SmallVector ExitBlocks; ML.getExitBlocks(ExitBlocks); append_range(Order, ExitBlocks); // Then add the loop body. Search(ML.getHeader()); // Then try the preheader and its predecessors. std::function GetPredecessor = [this, &GetPredecessor] (MachineBasicBlock *MBB) -> void { Order.push_back(MBB); if (MBB->pred_size() == 1) GetPredecessor(*MBB->pred_begin()); }; if (auto *Preheader = ML.getLoopPreheader()) GetPredecessor(Preheader); else if (auto *Preheader = MLI.findLoopPreheader(&ML, true)) GetPredecessor(Preheader); } }; struct PredicatedMI { MachineInstr *MI = nullptr; SetVector Predicates; public: PredicatedMI(MachineInstr *I, SetVector &Preds) : MI(I) { assert(I && "Instruction must not be null!"); Predicates.insert(Preds.begin(), Preds.end()); } }; // Represent the current state of the VPR and hold all instances which // represent a VPT block, which is a list of instructions that begins with a // VPT/VPST and has a maximum of four proceeding instructions. All // instructions within the block are predicated upon the vpr and we allow // instructions to define the vpr within in the block too. class VPTState { friend struct LowOverheadLoop; SmallVector Insts; static SmallVector Blocks; static SetVector CurrentPredicates; static std::map> PredicatedInsts; static void CreateVPTBlock(MachineInstr *MI) { assert((CurrentPredicates.size() || MI->getParent()->isLiveIn(ARM::VPR)) && "Can't begin VPT without predicate"); Blocks.emplace_back(MI); // The execution of MI is predicated upon the current set of instructions // that are AND'ed together to form the VPR predicate value. In the case // that MI is a VPT, CurrentPredicates will also just be MI. PredicatedInsts.emplace( MI, std::make_unique(MI, CurrentPredicates)); } static void reset() { Blocks.clear(); PredicatedInsts.clear(); CurrentPredicates.clear(); } static void addInst(MachineInstr *MI) { Blocks.back().insert(MI); PredicatedInsts.emplace( MI, std::make_unique(MI, CurrentPredicates)); } static void addPredicate(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "ARM Loops: Adding VPT Predicate: " << *MI); CurrentPredicates.insert(MI); } static void resetPredicate(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "ARM Loops: Resetting VPT Predicate: " << *MI); CurrentPredicates.clear(); CurrentPredicates.insert(MI); } public: // Have we found an instruction within the block which defines the vpr? If // so, not all the instructions in the block will have the same predicate. static bool hasUniformPredicate(VPTState &Block) { return getDivergent(Block) == nullptr; } // If it exists, return the first internal instruction which modifies the // VPR. static MachineInstr *getDivergent(VPTState &Block) { SmallVectorImpl &Insts = Block.getInsts(); for (unsigned i = 1; i < Insts.size(); ++i) { MachineInstr *Next = Insts[i]; if (isVectorPredicate(Next)) return Next; // Found an instruction altering the vpr. } return nullptr; } // Return whether the given instruction is predicated upon a VCTP. static bool isPredicatedOnVCTP(MachineInstr *MI, bool Exclusive = false) { SetVector &Predicates = PredicatedInsts[MI]->Predicates; if (Exclusive && Predicates.size() != 1) return false; for (auto *PredMI : Predicates) if (isVCTP(PredMI)) return true; return false; } // Is the VPST, controlling the block entry, predicated upon a VCTP. static bool isEntryPredicatedOnVCTP(VPTState &Block, bool Exclusive = false) { SmallVectorImpl &Insts = Block.getInsts(); return isPredicatedOnVCTP(Insts.front(), Exclusive); } // If this block begins with a VPT, we can check whether it's using // at least one predicated input(s), as well as possible loop invariant // which would result in it being implicitly predicated. static bool hasImplicitlyValidVPT(VPTState &Block, ReachingDefAnalysis &RDA) { SmallVectorImpl &Insts = Block.getInsts(); MachineInstr *VPT = Insts.front(); assert(isVPTOpcode(VPT->getOpcode()) && "Expected VPT block to begin with VPT/VPST"); if (VPT->getOpcode() == ARM::MVE_VPST) return false; auto IsOperandPredicated = [&](MachineInstr *MI, unsigned Idx) { MachineInstr *Op = RDA.getMIOperand(MI, MI->getOperand(Idx)); return Op && PredicatedInsts.count(Op) && isPredicatedOnVCTP(Op); }; auto IsOperandInvariant = [&](MachineInstr *MI, unsigned Idx) { MachineOperand &MO = MI->getOperand(Idx); if (!MO.isReg() || !MO.getReg()) return true; SmallPtrSet Defs; RDA.getGlobalReachingDefs(MI, MO.getReg(), Defs); if (Defs.empty()) return true; for (auto *Def : Defs) if (Def->getParent() == VPT->getParent()) return false; return true; }; // Check that at least one of the operands is directly predicated on a // vctp and allow an invariant value too. return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) && (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) && (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2)); } static bool isValid(ReachingDefAnalysis &RDA) { // All predication within the loop should be based on vctp. If the block // isn't predicated on entry, check whether the vctp is within the block // and that all other instructions are then predicated on it. for (auto &Block : Blocks) { if (isEntryPredicatedOnVCTP(Block, false) || hasImplicitlyValidVPT(Block, RDA)) continue; SmallVectorImpl &Insts = Block.getInsts(); // We don't know how to convert a block with just a VPT;VCTP into // anything valid once we remove the VCTP. For now just bail out. assert(isVPTOpcode(Insts.front()->getOpcode()) && "Expected VPT block to start with a VPST or VPT!"); if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST && isVCTP(Insts.back())) return false; for (auto *MI : Insts) { // Check that any internal VCTPs are 'Then' predicated. if (isVCTP(MI) && getVPTInstrPredicate(*MI) != ARMVCC::Then) return false; // Skip other instructions that build up the predicate. if (MI->getOpcode() == ARM::MVE_VPST || isVectorPredicate(MI)) continue; // Check that any other instructions are predicated upon a vctp. // TODO: We could infer when VPTs are implicitly predicated on the // vctp (when the operands are predicated). if (!isPredicatedOnVCTP(MI)) { LLVM_DEBUG(dbgs() << "ARM Loops: Can't convert: " << *MI); return false; } } } return true; } VPTState(MachineInstr *MI) { Insts.push_back(MI); } void insert(MachineInstr *MI) { Insts.push_back(MI); // VPT/VPST + 4 predicated instructions. assert(Insts.size() <= 5 && "Too many instructions in VPT block!"); } bool containsVCTP() const { for (auto *MI : Insts) if (isVCTP(MI)) return true; return false; } unsigned size() const { return Insts.size(); } SmallVectorImpl &getInsts() { return Insts; } }; struct LowOverheadLoop { MachineLoop &ML; MachineBasicBlock *Preheader = nullptr; MachineLoopInfo &MLI; ReachingDefAnalysis &RDA; const TargetRegisterInfo &TRI; const ARMBaseInstrInfo &TII; MachineFunction *MF = nullptr; MachineBasicBlock::iterator StartInsertPt; MachineBasicBlock *StartInsertBB = nullptr; MachineInstr *Start = nullptr; MachineInstr *Dec = nullptr; MachineInstr *End = nullptr; MachineOperand TPNumElements; SmallVector VCTPs; SmallPtrSet ToRemove; SmallPtrSet BlockMasksToRecompute; bool Revert = false; bool CannotTailPredicate = false; LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI, ReachingDefAnalysis &RDA, const TargetRegisterInfo &TRI, const ARMBaseInstrInfo &TII) : ML(ML), MLI(MLI), RDA(RDA), TRI(TRI), TII(TII), TPNumElements(MachineOperand::CreateImm(0)) { MF = ML.getHeader()->getParent(); if (auto *MBB = ML.getLoopPreheader()) Preheader = MBB; else if (auto *MBB = MLI.findLoopPreheader(&ML, true)) Preheader = MBB; VPTState::reset(); } // If this is an MVE instruction, check that we know how to use tail // predication with it. Record VPT blocks and return whether the // instruction is valid for tail predication. bool ValidateMVEInst(MachineInstr *MI); void AnalyseMVEInst(MachineInstr *MI) { CannotTailPredicate = !ValidateMVEInst(MI); } bool IsTailPredicationLegal() const { // For now, let's keep things really simple and only support a single // block for tail predication. return !Revert && FoundAllComponents() && !VCTPs.empty() && !CannotTailPredicate && ML.getNumBlocks() == 1; } // Given that MI is a VCTP, check that is equivalent to any other VCTPs // found. bool AddVCTP(MachineInstr *MI); // Check that the predication in the loop will be equivalent once we // perform the conversion. Also ensure that we can provide the number // of elements to the loop start instruction. bool ValidateTailPredicate(); // Check that any values available outside of the loop will be the same // after tail predication conversion. bool ValidateLiveOuts(); // Is it safe to define LR with DLS/WLS? // LR can be defined if it is the operand to start, because it's the same // value, or if it's going to be equivalent to the operand to Start. MachineInstr *isSafeToDefineLR(); // Check the branch targets are within range and we satisfy our // restrictions. void Validate(ARMBasicBlockUtils *BBUtils); bool FoundAllComponents() const { return Start && Dec && End; } SmallVectorImpl &getVPTBlocks() { return VPTState::Blocks; } // Return the operand for the loop start instruction. This will be the loop // iteration count, or the number of elements if we're tail predicating. MachineOperand &getLoopStartOperand() { if (IsTailPredicationLegal()) return TPNumElements; return isDo(Start) ? Start->getOperand(1) : Start->getOperand(0); } unsigned getStartOpcode() const { bool IsDo = isDo(Start); if (!IsTailPredicationLegal()) return IsDo ? ARM::t2DLS : ARM::t2WLS; return VCTPOpcodeToLSTP(VCTPs.back()->getOpcode(), IsDo); } void dump() const { if (Start) dbgs() << "ARM Loops: Found Loop Start: " << *Start; if (Dec) dbgs() << "ARM Loops: Found Loop Dec: " << *Dec; if (End) dbgs() << "ARM Loops: Found Loop End: " << *End; if (!VCTPs.empty()) { dbgs() << "ARM Loops: Found VCTP(s):\n"; for (auto *MI : VCTPs) dbgs() << " - " << *MI; } if (!FoundAllComponents()) dbgs() << "ARM Loops: Not a low-overhead loop.\n"; else if (!(Start && Dec && End)) dbgs() << "ARM Loops: Failed to find all loop components.\n"; } }; class ARMLowOverheadLoops : public MachineFunctionPass { MachineFunction *MF = nullptr; MachineLoopInfo *MLI = nullptr; ReachingDefAnalysis *RDA = nullptr; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; const TargetRegisterInfo *TRI = nullptr; std::unique_ptr BBUtils = nullptr; public: static char ID; ARMLowOverheadLoops() : MachineFunctionPass(ID) { } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs).set( MachineFunctionProperties::Property::TracksLiveness); } StringRef getPassName() const override { return ARM_LOW_OVERHEAD_LOOPS_NAME; } private: bool ProcessLoop(MachineLoop *ML); bool RevertNonLoops(); void RevertWhile(MachineInstr *MI) const; void RevertDo(MachineInstr *MI) const; bool RevertLoopDec(MachineInstr *MI) const; void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const; void RevertLoopEndDec(MachineInstr *MI) const; void ConvertVPTBlocks(LowOverheadLoop &LoLoop); MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop); void Expand(LowOverheadLoop &LoLoop); void IterationCountDCE(LowOverheadLoop &LoLoop); }; } char ARMLowOverheadLoops::ID = 0; SmallVector VPTState::Blocks; SetVector VPTState::CurrentPredicates; std::map> VPTState::PredicatedInsts; INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA, InstSet &ToRemove, InstSet &Ignore) { // Check that we can remove all of Killed without having to modify any IT // blocks. auto WontCorruptITs = [](InstSet &Killed, ReachingDefAnalysis &RDA) { // Collect the dead code and the MBBs in which they reside. SmallPtrSet BasicBlocks; for (auto *Dead : Killed) BasicBlocks.insert(Dead->getParent()); // Collect IT blocks in all affected basic blocks. std::map> ITBlocks; for (auto *MBB : BasicBlocks) { for (auto &IT : *MBB) { if (IT.getOpcode() != ARM::t2IT) continue; RDA.getReachingLocalUses(&IT, MCRegister::from(ARM::ITSTATE), ITBlocks[&IT]); } } // If we're removing all of the instructions within an IT block, then // also remove the IT instruction. SmallPtrSet ModifiedITs; SmallPtrSet RemoveITs; for (auto *Dead : Killed) { if (MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) { MachineInstr *IT = RDA.getMIOperand(Dead, *MO); RemoveITs.insert(IT); auto &CurrentBlock = ITBlocks[IT]; CurrentBlock.erase(Dead); if (CurrentBlock.empty()) ModifiedITs.erase(IT); else ModifiedITs.insert(IT); } } if (!ModifiedITs.empty()) return false; Killed.insert(RemoveITs.begin(), RemoveITs.end()); return true; }; SmallPtrSet Uses; if (!RDA.isSafeToRemove(MI, Uses, Ignore)) return false; if (WontCorruptITs(Uses, RDA)) { ToRemove.insert(Uses.begin(), Uses.end()); LLVM_DEBUG(dbgs() << "ARM Loops: Able to remove: " << *MI << " - can also remove:\n"; for (auto *Use : Uses) dbgs() << " - " << *Use); SmallPtrSet Killed; RDA.collectKilledOperands(MI, Killed); if (WontCorruptITs(Killed, RDA)) { ToRemove.insert(Killed.begin(), Killed.end()); LLVM_DEBUG(for (auto *Dead : Killed) dbgs() << " - " << *Dead); } return true; } return false; } bool LowOverheadLoop::ValidateTailPredicate() { if (!IsTailPredicationLegal()) { LLVM_DEBUG(if (VCTPs.empty()) dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n"; dbgs() << "ARM Loops: Tail-predication is not valid.\n"); return false; } assert(!VCTPs.empty() && "VCTP instruction expected but is not set"); assert(ML.getBlocks().size() == 1 && "Shouldn't be processing a loop with more than one block"); if (DisableTailPredication) { LLVM_DEBUG(dbgs() << "ARM Loops: tail-predication is disabled\n"); return false; } if (!VPTState::isValid(RDA)) { LLVM_DEBUG(dbgs() << "ARM Loops: Invalid VPT state.\n"); return false; } if (!ValidateLiveOuts()) { LLVM_DEBUG(dbgs() << "ARM Loops: Invalid live outs.\n"); return false; } // Check that creating a [W|D]LSTP, which will define LR with an element // count instead of iteration count, won't affect any other instructions // than the LoopStart and LoopDec. // TODO: We should try to insert the [W|D]LSTP after any of the other uses. Register StartReg = isDo(Start) ? Start->getOperand(1).getReg() : Start->getOperand(0).getReg(); if (StartInsertPt == Start && StartReg == ARM::LR) { if (auto *IterCount = RDA.getMIOperand(Start, isDo(Start) ? 1 : 0)) { SmallPtrSet Uses; RDA.getGlobalUses(IterCount, MCRegister::from(ARM::LR), Uses); for (auto *Use : Uses) { if (Use != Start && Use != Dec) { LLVM_DEBUG(dbgs() << " ARM Loops: Found LR use: " << *Use); return false; } } } } // For tail predication, we need to provide the number of elements, instead // of the iteration count, to the loop start instruction. The number of // elements is provided to the vctp instruction, so we need to check that // we can use this register at InsertPt. MachineInstr *VCTP = VCTPs.back(); if (Start->getOpcode() == ARM::t2DoLoopStartTP) { TPNumElements = Start->getOperand(2); StartInsertPt = Start; StartInsertBB = Start->getParent(); } else { TPNumElements = VCTP->getOperand(1); MCRegister NumElements = TPNumElements.getReg().asMCReg(); // If the register is defined within loop, then we can't perform TP. // TODO: Check whether this is just a mov of a register that would be // available. if (RDA.hasLocalDefBefore(VCTP, NumElements)) { LLVM_DEBUG(dbgs() << "ARM Loops: VCTP operand is defined in the loop.\n"); return false; } // The element count register maybe defined after InsertPt, in which case we // need to try to move either InsertPt or the def so that the [w|d]lstp can // use the value. if (StartInsertPt != StartInsertBB->end() && !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) { if (auto *ElemDef = RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) { if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) { ElemDef->removeFromParent(); StartInsertBB->insert(StartInsertPt, ElemDef); LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " << *ElemDef); } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) { StartInsertPt->removeFromParent(); StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), &*StartInsertPt); LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); } else { // If we fail to move an instruction and the element count is provided // by a mov, use the mov operand if it will have the same value at the // insertion point MachineOperand Operand = ElemDef->getOperand(1); if (isMovRegOpcode(ElemDef->getOpcode()) && RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg().asMCReg()) == RDA.getUniqueReachingMIDef(&*StartInsertPt, Operand.getReg().asMCReg())) { TPNumElements = Operand; NumElements = TPNumElements.getReg(); } else { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to move element count to loop " << "start instruction.\n"); return false; } } } } // Especially in the case of while loops, InsertBB may not be the // preheader, so we need to check that the register isn't redefined // before entering the loop. auto CannotProvideElements = [this](MachineBasicBlock *MBB, MCRegister NumElements) { if (MBB->empty()) return false; // NumElements is redefined in this block. if (RDA.hasLocalDefBefore(&MBB->back(), NumElements)) return true; // Don't continue searching up through multiple predecessors. if (MBB->pred_size() > 1) return true; return false; }; // Search backwards for a def, until we get to InsertBB. MachineBasicBlock *MBB = Preheader; while (MBB && MBB != StartInsertBB) { if (CannotProvideElements(MBB, NumElements)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n"); return false; } MBB = *MBB->pred_begin(); } } // Could inserting the [W|D]LSTP cause some unintended affects? In a perfect // world the [w|d]lstp instruction would be last instruction in the preheader // and so it would only affect instructions within the loop body. But due to // scheduling, and/or the logic in this pass (above), the insertion point can // be moved earlier. So if the Loop Start isn't the last instruction in the // preheader, and if the initial element count is smaller than the vector // width, the Loop Start instruction will immediately generate one or more // false lane mask which can, incorrectly, affect the proceeding MVE // instructions in the preheader. if (std::any_of(StartInsertPt, StartInsertBB->end(), shouldInspect)) { LLVM_DEBUG(dbgs() << "ARM Loops: Instruction blocks [W|D]LSTP\n"); return false; } // Check that the value change of the element count is what we expect and // that the predication will be equivalent. For this we need: // NumElements = NumElements - VectorWidth. The sub will be a sub immediate // and we can also allow register copies within the chain too. auto IsValidSub = [](MachineInstr *MI, int ExpectedVecWidth) { return -getAddSubImmediate(*MI) == ExpectedVecWidth; }; MachineBasicBlock *MBB = VCTP->getParent(); // Remove modifications to the element count since they have no purpose in a // tail predicated loop. Explicitly refer to the vctp operand no matter which // register NumElements has been assigned to, since that is what the // modifications will be using if (auto *Def = RDA.getUniqueReachingMIDef( &MBB->back(), VCTP->getOperand(1).getReg().asMCReg())) { SmallPtrSet ElementChain; SmallPtrSet Ignore; unsigned ExpectedVectorWidth = getTailPredVectorWidth(VCTP->getOpcode()); Ignore.insert(VCTPs.begin(), VCTPs.end()); if (TryRemove(Def, RDA, ElementChain, Ignore)) { bool FoundSub = false; for (auto *MI : ElementChain) { if (isMovRegOpcode(MI->getOpcode())) continue; if (isSubImmOpcode(MI->getOpcode())) { if (FoundSub || !IsValidSub(MI, ExpectedVectorWidth)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element" " count: " << *MI); return false; } FoundSub = true; } else { LLVM_DEBUG(dbgs() << "ARM Loops: Unexpected instruction in element" " count: " << *MI); return false; } } ToRemove.insert(ElementChain.begin(), ElementChain.end()); } } return true; } static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class) { return MO.isReg() && MO.getReg() && Class->contains(MO.getReg()); } // MVE 'narrowing' operate on half a lane, reading from half and writing // to half, which are referred to has the top and bottom half. The other // half retains its previous value. static bool retainsPreviousHalfElement(const MachineInstr &MI) { const MCInstrDesc &MCID = MI.getDesc(); uint64_t Flags = MCID.TSFlags; return (Flags & ARMII::RetainsPreviousHalfElement) != 0; } // Some MVE instructions read from the top/bottom halves of their operand(s) // and generate a vector result with result elements that are double the // width of the input. static bool producesDoubleWidthResult(const MachineInstr &MI) { const MCInstrDesc &MCID = MI.getDesc(); uint64_t Flags = MCID.TSFlags; return (Flags & ARMII::DoubleWidthResult) != 0; } static bool isHorizontalReduction(const MachineInstr &MI) { const MCInstrDesc &MCID = MI.getDesc(); uint64_t Flags = MCID.TSFlags; return (Flags & ARMII::HorizontalReduction) != 0; } // Can this instruction generate a non-zero result when given only zeroed // operands? This allows us to know that, given operands with false bytes // zeroed by masked loads, that the result will also contain zeros in those // bytes. static bool canGenerateNonZeros(const MachineInstr &MI) { // Check for instructions which can write into a larger element size, // possibly writing into a previous zero'd lane. if (producesDoubleWidthResult(MI)) return true; switch (MI.getOpcode()) { default: break; // FIXME: VNEG FP and -0? I think we'll need to handle this once we allow // fp16 -> fp32 vector conversions. // Instructions that perform a NOT will generate 1s from 0s. case ARM::MVE_VMVN: case ARM::MVE_VORN: // Count leading zeros will do just that! case ARM::MVE_VCLZs8: case ARM::MVE_VCLZs16: case ARM::MVE_VCLZs32: return true; } return false; } // Look at its register uses to see if it only can only receive zeros // into its false lanes which would then produce zeros. Also check that // the output register is also defined by an FalseLanesZero instruction // so that if tail-predication happens, the lanes that aren't updated will // still be zeros. static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero) { if (canGenerateNonZeros(MI)) return false; bool isPredicated = isVectorPredicated(&MI); // Predicated loads will write zeros to the falsely predicated bytes of the // destination register. if (MI.mayLoad()) return isPredicated; auto IsZeroInit = [](MachineInstr *Def) { return !isVectorPredicated(Def) && Def->getOpcode() == ARM::MVE_VMOVimmi32 && Def->getOperand(1).getImm() == 0; }; bool AllowScalars = isHorizontalReduction(MI); for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.getReg()) continue; if (!isRegInClass(MO, QPRs) && AllowScalars) continue; // Check that this instruction will produce zeros in its false lanes: // - If it only consumes false lanes zero or constant 0 (vmov #0) // - If it's predicated, it only matters that it's def register already has // false lane zeros, so we can ignore the uses. SmallPtrSet Defs; RDA.getGlobalReachingDefs(&MI, MO.getReg(), Defs); for (auto *Def : Defs) { if (Def == &MI || FalseLanesZero.count(Def) || IsZeroInit(Def)) continue; if (MO.isUse() && isPredicated) continue; return false; } } LLVM_DEBUG(dbgs() << "ARM Loops: Always False Zeros: " << MI); return true; } bool LowOverheadLoop::ValidateLiveOuts() { // We want to find out if the tail-predicated version of this loop will // produce the same values as the loop in its original form. For this to // be true, the newly inserted implicit predication must not change the // the (observable) results. // We're doing this because many instructions in the loop will not be // predicated and so the conversion from VPT predication to tail-predication // can result in different values being produced; due to the tail-predication // preventing many instructions from updating their falsely predicated // lanes. This analysis assumes that all the instructions perform lane-wise // operations and don't perform any exchanges. // A masked load, whether through VPT or tail predication, will write zeros // to any of the falsely predicated bytes. So, from the loads, we know that // the false lanes are zeroed and here we're trying to track that those false // lanes remain zero, or where they change, the differences are masked away // by their user(s). // All MVE stores have to be predicated, so we know that any predicate load // operands, or stored results are equivalent already. Other explicitly // predicated instructions will perform the same operation in the original // loop and the tail-predicated form too. Because of this, we can insert // loads, stores and other predicated instructions into our Predicated // set and build from there. const TargetRegisterClass *QPRs = TRI.getRegClass(ARM::MQPRRegClassID); SetVector FalseLanesUnknown; SmallPtrSet FalseLanesZero; SmallPtrSet Predicated; MachineBasicBlock *Header = ML.getHeader(); for (auto &MI : *Header) { if (!shouldInspect(MI)) continue; if (isVCTP(&MI) || isVPTOpcode(MI.getOpcode())) continue; bool isPredicated = isVectorPredicated(&MI); bool retainsOrReduces = retainsPreviousHalfElement(MI) || isHorizontalReduction(MI); if (isPredicated) Predicated.insert(&MI); if (producesFalseLanesZero(MI, QPRs, RDA, FalseLanesZero)) FalseLanesZero.insert(&MI); else if (MI.getNumDefs() == 0) continue; else if (!isPredicated && retainsOrReduces) return false; else if (!isPredicated) FalseLanesUnknown.insert(&MI); } auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO, SmallPtrSetImpl &Predicated) { SmallPtrSet Uses; RDA.getGlobalUses(MI, MO.getReg().asMCReg(), Uses); for (auto *Use : Uses) { if (Use != MI && !Predicated.count(Use)) return false; } return true; }; // Visit the unknowns in reverse so that we can start at the values being // stored and then we can work towards the leaves, hopefully adding more // instructions to Predicated. Successfully terminating the loop means that // all the unknown values have to found to be masked by predicated user(s). // For any unpredicated values, we store them in NonPredicated so that we // can later check whether these form a reduction. SmallPtrSet NonPredicated; for (auto *MI : reverse(FalseLanesUnknown)) { for (auto &MO : MI->operands()) { if (!isRegInClass(MO, QPRs) || !MO.isDef()) continue; if (!HasPredicatedUsers(MI, MO, Predicated)) { LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : " << TRI.getRegAsmName(MO.getReg()) << " at " << *MI); NonPredicated.insert(MI); break; } } // Any unknown false lanes have been masked away by the user(s). if (!NonPredicated.contains(MI)) Predicated.insert(MI); } SmallPtrSet LiveOutMIs; SmallVector ExitBlocks; ML.getExitBlocks(ExitBlocks); assert(ML.getNumBlocks() == 1 && "Expected single block loop!"); assert(ExitBlocks.size() == 1 && "Expected a single exit block"); MachineBasicBlock *ExitBB = ExitBlocks.front(); for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) { // TODO: Instead of blocking predication, we could move the vctp to the exit // block and calculate it's operand there in or the preheader. if (RegMask.PhysReg == ARM::VPR) return false; // Check Q-regs that are live in the exit blocks. We don't collect scalars // because they won't be affected by lane predication. if (QPRs->contains(RegMask.PhysReg)) if (auto *MI = RDA.getLocalLiveOutMIDef(Header, RegMask.PhysReg)) LiveOutMIs.insert(MI); } // We've already validated that any VPT predication within the loop will be // equivalent when we perform the predication transformation; so we know that // any VPT predicated instruction is predicated upon VCTP. Any live-out // instruction needs to be predicated, so check this here. The instructions // in NonPredicated have been found to be a reduction that we can ensure its // legality. for (auto *MI : LiveOutMIs) { if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI); return false; } } return true; } void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) { if (Revert) return; // Check branch target ranges: WLS[TP] can only branch forwards and LE[TP] // can only jump back. auto ValidateRanges = [](MachineInstr *Start, MachineInstr *End, ARMBasicBlockUtils *BBUtils, MachineLoop &ML) { MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd ? End->getOperand(1).getMBB() : End->getOperand(2).getMBB(); // TODO Maybe there's cases where the target doesn't have to be the header, // but for now be safe and revert. if (TgtBB != ML.getHeader()) { LLVM_DEBUG(dbgs() << "ARM Loops: LoopEnd is not targeting header.\n"); return false; } // The WLS and LE instructions have 12-bits for the label offset. WLS // requires a positive offset, while LE uses negative. if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.getHeader()) || !BBUtils->isBBInRange(End, ML.getHeader(), 4094)) { LLVM_DEBUG(dbgs() << "ARM Loops: LE offset is out-of-range\n"); return false; } if (Start->getOpcode() == ARM::t2WhileLoopStart && (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(Start->getOperand(1).getMBB()) || !BBUtils->isBBInRange(Start, Start->getOperand(1).getMBB(), 4094))) { LLVM_DEBUG(dbgs() << "ARM Loops: WLS offset is out-of-range!\n"); return false; } return true; }; // Find a suitable position to insert the loop start instruction. It needs to // be able to safely define LR. auto FindStartInsertionPoint = [](MachineInstr *Start, MachineInstr *Dec, MachineBasicBlock::iterator &InsertPt, MachineBasicBlock *&InsertBB, ReachingDefAnalysis &RDA, InstSet &ToRemove) { // For a t2DoLoopStart it is always valid to use the start insertion point. // For WLS we can define LR if LR already contains the same value. if (isDo(Start) || Start->getOperand(0).getReg() == ARM::LR) { InsertPt = MachineBasicBlock::iterator(Start); InsertBB = Start->getParent(); return true; } // We've found no suitable LR def and Start doesn't use LR directly. Can we // just define LR anyway? if (!RDA.isSafeToDefRegAt(Start, MCRegister::from(ARM::LR))) return false; InsertPt = MachineBasicBlock::iterator(Start); InsertBB = Start->getParent(); return true; }; if (!FindStartInsertionPoint(Start, Dec, StartInsertPt, StartInsertBB, RDA, ToRemove)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; return; } LLVM_DEBUG(if (StartInsertPt == StartInsertBB->end()) dbgs() << "ARM Loops: Will insert LoopStart at end of block\n"; else dbgs() << "ARM Loops: Will insert LoopStart at " << *StartInsertPt ); Revert = !ValidateRanges(Start, End, BBUtils, ML); CannotTailPredicate = !ValidateTailPredicate(); } bool LowOverheadLoop::AddVCTP(MachineInstr *MI) { LLVM_DEBUG(dbgs() << "ARM Loops: Adding VCTP: " << *MI); if (VCTPs.empty()) { VCTPs.push_back(MI); return true; } // If we find another VCTP, check whether it uses the same value as the main VCTP. // If it does, store it in the VCTPs set, else refuse it. MachineInstr *Prev = VCTPs.back(); if (!Prev->getOperand(1).isIdenticalTo(MI->getOperand(1)) || !RDA.hasSameReachingDef(Prev, MI, MI->getOperand(1).getReg().asMCReg())) { LLVM_DEBUG(dbgs() << "ARM Loops: Found VCTP with a different reaching " "definition from the main VCTP"); return false; } VCTPs.push_back(MI); return true; } bool LowOverheadLoop::ValidateMVEInst(MachineInstr* MI) { if (CannotTailPredicate) return false; if (!shouldInspect(*MI)) return true; if (MI->getOpcode() == ARM::MVE_VPSEL || MI->getOpcode() == ARM::MVE_VPNOT) { // TODO: Allow VPSEL and VPNOT, we currently cannot because: // 1) It will use the VPR as a predicate operand, but doesn't have to be // instead a VPT block, which means we can assert while building up // the VPT block because we don't find another VPT or VPST to being a new // one. // 2) VPSEL still requires a VPR operand even after tail predicating, // which means we can't remove it unless there is another // instruction, such as vcmp, that can provide the VPR def. return false; } // Record all VCTPs and check that they're equivalent to one another. if (isVCTP(MI) && !AddVCTP(MI)) return false; // Inspect uses first so that any instructions that alter the VPR don't // alter the predicate upon themselves. const MCInstrDesc &MCID = MI->getDesc(); bool IsUse = false; unsigned LastOpIdx = MI->getNumOperands() - 1; for (auto &Op : enumerate(reverse(MCID.operands()))) { const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index()); if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR) continue; if (ARM::isVpred(Op.value().OperandType)) { VPTState::addInst(MI); IsUse = true; } else if (MI->getOpcode() != ARM::MVE_VPST) { LLVM_DEBUG(dbgs() << "ARM Loops: Found instruction using vpr: " << *MI); return false; } } // If we find an instruction that has been marked as not valid for tail // predication, only allow the instruction if it's contained within a valid // VPT block. bool RequiresExplicitPredication = (MCID.TSFlags & ARMII::ValidForTailPredication) == 0; if (isDomainMVE(MI) && RequiresExplicitPredication) { LLVM_DEBUG(if (!IsUse) dbgs() << "ARM Loops: Can't tail predicate: " << *MI); return IsUse; } // If the instruction is already explicitly predicated, then the conversion // will be fine, but ensure that all store operations are predicated. if (MI->mayStore()) return IsUse; // If this instruction defines the VPR, update the predicate for the // proceeding instructions. if (isVectorPredicate(MI)) { // Clear the existing predicate when we're not in VPT Active state, // otherwise we add to it. if (!isVectorPredicated(MI)) VPTState::resetPredicate(MI); else VPTState::addPredicate(MI); } // Finally once the predicate has been modified, we can start a new VPT // block if necessary. if (isVPTOpcode(MI->getOpcode())) VPTState::CreateVPTBlock(MI); return true; } bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) { const ARMSubtarget &ST = static_cast(mf.getSubtarget()); if (!ST.hasLOB()) return false; MF = &mf; LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); MLI = &getAnalysis(); RDA = &getAnalysis(); MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MRI = &MF->getRegInfo(); TII = static_cast(ST.getInstrInfo()); TRI = ST.getRegisterInfo(); BBUtils = std::unique_ptr(new ARMBasicBlockUtils(*MF)); BBUtils->computeAllBlockSizes(); BBUtils->adjustBBOffsetsAfter(&MF->front()); bool Changed = false; for (auto ML : *MLI) { if (ML->isOutermost()) Changed |= ProcessLoop(ML); } Changed |= RevertNonLoops(); return Changed; } bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { bool Changed = false; // Process inner loops first. for (auto I = ML->begin(), E = ML->end(); I != E; ++I) Changed |= ProcessLoop(*I); LLVM_DEBUG(dbgs() << "ARM Loops: Processing loop containing:\n"; if (auto *Preheader = ML->getLoopPreheader()) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML)) dbgs() << " - " << Preheader->getName() << "\n"; else if (auto *Preheader = MLI->findLoopPreheader(ML, true)) dbgs() << " - " << Preheader->getName() << "\n"; for (auto *MBB : ML->getBlocks()) dbgs() << " - " << MBB->getName() << "\n"; ); // Search the given block for a loop start instruction. If one isn't found, // and there's only one predecessor block, search that one too. std::function SearchForStart = [&SearchForStart](MachineBasicBlock *MBB) -> MachineInstr* { for (auto &MI : *MBB) { if (isLoopStart(MI)) return &MI; } if (MBB->pred_size() == 1) return SearchForStart(*MBB->pred_begin()); return nullptr; }; LowOverheadLoop LoLoop(*ML, *MLI, *RDA, *TRI, *TII); // Search the preheader for the start intrinsic. // FIXME: I don't see why we shouldn't be supporting multiple predecessors // with potentially multiple set.loop.iterations, so we need to enable this. if (LoLoop.Preheader) LoLoop.Start = SearchForStart(LoLoop.Preheader); else return false; // Find the low-overhead loop components and decide whether or not to fall // back to a normal loop. Also look for a vctp instructions and decide // whether we can convert that predicate using tail predication. for (auto *MBB : reverse(ML->getBlocks())) { for (auto &MI : *MBB) { if (MI.isDebugValue()) continue; else if (MI.getOpcode() == ARM::t2LoopDec) LoLoop.Dec = &MI; else if (MI.getOpcode() == ARM::t2LoopEnd) LoLoop.End = &MI; else if (MI.getOpcode() == ARM::t2LoopEndDec) LoLoop.End = LoLoop.Dec = &MI; else if (isLoopStart(MI)) LoLoop.Start = &MI; else if (MI.getDesc().isCall()) { // TODO: Though the call will require LE to execute again, does this // mean we should revert? Always executing LE hopefully should be // faster than performing a sub,cmp,br or even subs,br. LoLoop.Revert = true; LLVM_DEBUG(dbgs() << "ARM Loops: Found call.\n"); } else { // Record VPR defs and build up their corresponding vpt blocks. // Check we know how to tail predicate any mve instructions. LoLoop.AnalyseMVEInst(&MI); } } } LLVM_DEBUG(LoLoop.dump()); if (!LoLoop.FoundAllComponents()) { LLVM_DEBUG(dbgs() << "ARM Loops: Didn't find loop start, update, end\n"); return false; } // Check that the only instruction using LoopDec is LoopEnd. This can only // happen when the Dec and End are separate, not a single t2LoopEndDec. // TODO: Check for copy chains that really have no effect. if (LoLoop.Dec != LoLoop.End) { SmallPtrSet Uses; RDA->getReachingLocalUses(LoLoop.Dec, MCRegister::from(ARM::LR), Uses); if (Uses.size() > 1 || !Uses.count(LoLoop.End)) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to remove LoopDec.\n"); LoLoop.Revert = true; } } LoLoop.Validate(BBUtils.get()); Expand(LoLoop); return true; } // WhileLoopStart holds the exit block, so produce a cmp lr, 0 and then a // beq that branches to the exit branch. // TODO: We could also try to generate a cbz if the value in LR is also in // another low register. void ARMLowOverheadLoops::RevertWhile(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp: " << *MI); MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc; RevertWhileLoopStart(MI, TII, BrOpc); } void ARMLowOverheadLoops::RevertDo(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to mov: " << *MI); RevertDoLoopStart(MI, TII); } bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); SmallPtrSet Ignore; for (auto I = MachineBasicBlock::iterator(MI), E = MBB->end(); I != E; ++I) { if (I->getOpcode() == ARM::t2LoopEnd) { Ignore.insert(&*I); break; } } // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS. bool SetFlags = RDA->isSafeToDefRegAt(MI, MCRegister::from(ARM::CPSR), Ignore); llvm::RevertLoopDec(MI, TII, SetFlags); return SetFlags; } // Generate a subs, or sub and cmp, and a branch instead of an LE. void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *MI, bool SkipCmp) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to cmp, br: " << *MI); MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc; llvm::RevertLoopEnd(MI, TII, BrOpc, SkipCmp); } // Generate a subs, or sub and cmp, and a branch instead of an LE. void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to subs, br: " << *MI); assert(MI->getOpcode() == ARM::t2LoopEndDec && "Expected a t2LoopEndDec!"); MachineBasicBlock *MBB = MI->getParent(); MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); MIB.addDef(ARM::LR); MIB.add(MI->getOperand(1)); MIB.addImm(1); MIB.addImm(ARMCC::AL); MIB.addReg(ARM::NoRegister); MIB.addReg(ARM::CPSR); MIB->getOperand(5).setIsDef(true); MachineBasicBlock *DestBB = MI->getOperand(2).getMBB(); unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc; // Create bne MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(BrOpc)); MIB.add(MI->getOperand(2)); // branch target MIB.addImm(ARMCC::NE); // condition code MIB.addReg(ARM::CPSR); MI->eraseFromParent(); } // Perform dead code elimation on the loop iteration count setup expression. // If we are tail-predicating, the number of elements to be processed is the // operand of the VCTP instruction in the vector body, see getCount(), which is // register $r3 in this example: // // $lr = big-itercount-expression // .. // $lr = t2DoLoopStart renamable $lr // vector.body: // .. // $vpr = MVE_VCTP32 renamable $r3 // renamable $lr = t2LoopDec killed renamable $lr, 1 // t2LoopEnd renamable $lr, %vector.body // tB %end // // What we would like achieve here is to replace the do-loop start pseudo // instruction t2DoLoopStart with: // // $lr = MVE_DLSTP_32 killed renamable $r3 // // Thus, $r3 which defines the number of elements, is written to $lr, // and then we want to delete the whole chain that used to define $lr, // see the comment below how this chain could look like. // void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) { if (!LoLoop.IsTailPredicationLegal()) return; LLVM_DEBUG(dbgs() << "ARM Loops: Trying DCE on loop iteration count.\n"); MachineInstr *Def = RDA->getMIOperand(LoLoop.Start, isDo(LoLoop.Start) ? 1 : 0); if (!Def) { LLVM_DEBUG(dbgs() << "ARM Loops: Couldn't find iteration count.\n"); return; } // Collect and remove the users of iteration count. SmallPtrSet Killed = { LoLoop.Start, LoLoop.Dec, LoLoop.End }; if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed)) LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n"); } MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { LLVM_DEBUG(dbgs() << "ARM Loops: Expanding LoopStart.\n"); // When using tail-predication, try to delete the dead code that was used to // calculate the number of loop iterations. IterationCountDCE(LoLoop); MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt; MachineInstr *Start = LoLoop.Start; MachineBasicBlock *MBB = LoLoop.StartInsertBB; unsigned Opc = LoLoop.getStartOpcode(); MachineOperand &Count = LoLoop.getLoopStartOperand(); MachineInstrBuilder MIB = BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc)); MIB.addDef(ARM::LR); MIB.add(Count); if (!isDo(Start)) MIB.add(Start->getOperand(1)); LoLoop.ToRemove.insert(Start); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); return &*MIB; } void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) { auto RemovePredicate = [](MachineInstr *MI) { + if (MI->isDebugInstr()) + return; LLVM_DEBUG(dbgs() << "ARM Loops: Removing predicate from: " << *MI); - if (int PIdx = llvm::findFirstVPTPredOperandIdx(*MI)) { - assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && - "Expected Then predicate!"); - MI->getOperand(PIdx).setImm(ARMVCC::None); - MI->getOperand(PIdx+1).setReg(0); - } else - llvm_unreachable("trying to unpredicate a non-predicated instruction"); + int PIdx = llvm::findFirstVPTPredOperandIdx(*MI); + assert(PIdx >= 1 && "Trying to unpredicate a non-predicated instruction"); + assert(MI->getOperand(PIdx).getImm() == ARMVCC::Then && + "Expected Then predicate!"); + MI->getOperand(PIdx).setImm(ARMVCC::None); + MI->getOperand(PIdx + 1).setReg(0); }; for (auto &Block : LoLoop.getVPTBlocks()) { SmallVectorImpl &Insts = Block.getInsts(); auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) { assert(TheVCMP && "Replacing a removed or non-existent VCMP"); // Replace the VCMP with a VPT MachineInstrBuilder MIB = BuildMI(*At->getParent(), At, At->getDebugLoc(), TII->get(VCMPOpcodeToVPT(TheVCMP->getOpcode()))); MIB.addImm(ARMVCC::Then); // Register one MIB.add(TheVCMP->getOperand(1)); // Register two MIB.add(TheVCMP->getOperand(2)); // The comparison code, e.g. ge, eq, lt MIB.add(TheVCMP->getOperand(3)); LLVM_DEBUG(dbgs() << "ARM Loops: Combining with VCMP to VPT: " << *MIB); LoLoop.BlockMasksToRecompute.insert(MIB.getInstr()); LoLoop.ToRemove.insert(TheVCMP); TheVCMP = nullptr; }; if (VPTState::isEntryPredicatedOnVCTP(Block, /*exclusive*/ true)) { MachineInstr *VPST = Insts.front(); if (VPTState::hasUniformPredicate(Block)) { // A vpt block starting with VPST, is only predicated upon vctp and has no // internal vpr defs: // - Remove vpst. // - Unpredicate the remaining instructions. LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST); for (unsigned i = 1; i < Insts.size(); ++i) RemovePredicate(Insts[i]); } else { // The VPT block has a non-uniform predicate but it uses a vpst and its // entry is guarded only by a vctp, which means we: // - Need to remove the original vpst. // - Then need to unpredicate any following instructions, until // we come across the divergent vpr def. // - Insert a new vpst to predicate the instruction(s) that following // the divergent vpr def. MachineInstr *Divergent = VPTState::getDivergent(Block); + MachineBasicBlock *MBB = Divergent->getParent(); auto DivergentNext = ++MachineBasicBlock::iterator(Divergent); + while (DivergentNext != MBB->end() && DivergentNext->isDebugInstr()) + ++DivergentNext; + bool DivergentNextIsPredicated = + DivergentNext != MBB->end() && getVPTInstrPredicate(*DivergentNext) != ARMVCC::None; for (auto I = ++MachineBasicBlock::iterator(VPST), E = DivergentNext; I != E; ++I) RemovePredicate(&*I); // Check if the instruction defining vpr is a vcmp so it can be combined // with the VPST This should be the divergent instruction MachineInstr *VCMP = VCMPOpcodeToVPT(Divergent->getOpcode()) != 0 ? Divergent : nullptr; if (DivergentNextIsPredicated) { // Insert a VPST at the divergent only if the next instruction // would actually use it. A VCMP following a VPST can be // merged into a VPT so do that instead if the VCMP exists. if (!VCMP) { // Create a VPST (with a null mask for now, we'll recompute it // later) MachineInstrBuilder MIB = BuildMI(*Divergent->getParent(), Divergent, Divergent->getDebugLoc(), TII->get(ARM::MVE_VPST)); MIB.addImm(0); LLVM_DEBUG(dbgs() << "ARM Loops: Created VPST: " << *MIB); LoLoop.BlockMasksToRecompute.insert(MIB.getInstr()); } else { // No RDA checks are necessary here since the VPST would have been // directly after the VCMP ReplaceVCMPWithVPT(VCMP, VCMP); } } } LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST); LoLoop.ToRemove.insert(VPST); } else if (Block.containsVCTP()) { // The vctp will be removed, so either the entire block will be dead or // the block mask of the vp(s)t will need to be recomputed. MachineInstr *VPST = Insts.front(); if (Block.size() == 2) { assert(VPST->getOpcode() == ARM::MVE_VPST && "Found a VPST in an otherwise empty vpt block"); LoLoop.ToRemove.insert(VPST); } else LoLoop.BlockMasksToRecompute.insert(VPST); } else if (Insts.front()->getOpcode() == ARM::MVE_VPST) { // If this block starts with a VPST then attempt to merge it with the // preceeding un-merged VCMP into a VPT. This VCMP comes from a VPT // block that no longer exists MachineInstr *VPST = Insts.front(); auto Next = ++MachineBasicBlock::iterator(VPST); assert(getVPTInstrPredicate(*Next) != ARMVCC::None && "The instruction after a VPST must be predicated"); (void)Next; MachineInstr *VprDef = RDA->getUniqueReachingMIDef(VPST, ARM::VPR); if (VprDef && VCMPOpcodeToVPT(VprDef->getOpcode()) && !LoLoop.ToRemove.contains(VprDef)) { MachineInstr *VCMP = VprDef; // The VCMP and VPST can only be merged if the VCMP's operands will have // the same values at the VPST. // If any of the instructions between the VCMP and VPST are predicated // then a different code path is expected to have merged the VCMP and // VPST already. if (!std::any_of(++MachineBasicBlock::iterator(VCMP), MachineBasicBlock::iterator(VPST), hasVPRUse) && RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(1).getReg()) && RDA->hasSameReachingDef(VCMP, VPST, VCMP->getOperand(2).getReg())) { ReplaceVCMPWithVPT(VCMP, VPST); LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *VPST); LoLoop.ToRemove.insert(VPST); } } } } LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end()); } void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) { // Combine the LoopDec and LoopEnd instructions into LE(TP). auto ExpandLoopEnd = [this](LowOverheadLoop &LoLoop) { MachineInstr *End = LoLoop.End; MachineBasicBlock *MBB = End->getParent(); unsigned Opc = LoLoop.IsTailPredicationLegal() ? ARM::MVE_LETP : ARM::t2LEUpdate; MachineInstrBuilder MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(Opc)); MIB.addDef(ARM::LR); unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0; MIB.add(End->getOperand(Off + 0)); MIB.add(End->getOperand(Off + 1)); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB); LoLoop.ToRemove.insert(LoLoop.Dec); LoLoop.ToRemove.insert(End); return &*MIB; }; // TODO: We should be able to automatically remove these branches before we // get here - probably by teaching analyzeBranch about the pseudo // instructions. // If there is an unconditional branch, after I, that just branches to the // next block, remove it. auto RemoveDeadBranch = [](MachineInstr *I) { MachineBasicBlock *BB = I->getParent(); MachineInstr *Terminator = &BB->instr_back(); if (Terminator->isUnconditionalBranch() && I != Terminator) { MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB(); if (BB->isLayoutSuccessor(Succ)) { LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator); Terminator->eraseFromParent(); } } }; if (LoLoop.Revert) { if (LoLoop.Start->getOpcode() == ARM::t2WhileLoopStart) RevertWhile(LoLoop.Start); else RevertDo(LoLoop.Start); if (LoLoop.Dec == LoLoop.End) RevertLoopEndDec(LoLoop.End); else RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec)); } else { LoLoop.Start = ExpandLoopStart(LoLoop); RemoveDeadBranch(LoLoop.Start); LoLoop.End = ExpandLoopEnd(LoLoop); RemoveDeadBranch(LoLoop.End); if (LoLoop.IsTailPredicationLegal()) ConvertVPTBlocks(LoLoop); for (auto *I : LoLoop.ToRemove) { LLVM_DEBUG(dbgs() << "ARM Loops: Erasing " << *I); I->eraseFromParent(); } for (auto *I : LoLoop.BlockMasksToRecompute) { LLVM_DEBUG(dbgs() << "ARM Loops: Recomputing VPT/VPST Block Mask: " << *I); recomputeVPTBlockMask(*I); LLVM_DEBUG(dbgs() << " ... done: " << *I); } } PostOrderLoopTraversal DFS(LoLoop.ML, *MLI); DFS.ProcessLoop(); const SmallVectorImpl &PostOrder = DFS.getOrder(); for (auto *MBB : PostOrder) { recomputeLiveIns(*MBB); // FIXME: For some reason, the live-in print order is non-deterministic for // our tests and I can't out why... So just sort them. MBB->sortUniqueLiveIns(); } for (auto *MBB : reverse(PostOrder)) recomputeLivenessFlags(*MBB); // We've moved, removed and inserted new instructions, so update RDA. RDA->reset(); } bool ARMLowOverheadLoops::RevertNonLoops() { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting any remaining pseudos...\n"); bool Changed = false; for (auto &MBB : *MF) { SmallVector Starts; SmallVector Decs; SmallVector Ends; SmallVector EndDecs; for (auto &I : MBB) { if (isLoopStart(I)) Starts.push_back(&I); else if (I.getOpcode() == ARM::t2LoopDec) Decs.push_back(&I); else if (I.getOpcode() == ARM::t2LoopEnd) Ends.push_back(&I); else if (I.getOpcode() == ARM::t2LoopEndDec) EndDecs.push_back(&I); } if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty()) continue; Changed = true; for (auto *Start : Starts) { if (Start->getOpcode() == ARM::t2WhileLoopStart) RevertWhile(Start); else RevertDo(Start); } for (auto *Dec : Decs) RevertLoopDec(Dec); for (auto *End : Ends) RevertLoopEnd(End); for (auto *End : EndDecs) RevertLoopEndDec(End); } return Changed; } FunctionPass *llvm::createARMLowOverheadLoopsPass() { return new ARMLowOverheadLoops(); } diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp index 81f113b8302f..56823735e2d9 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp @@ -1,1203 +1,1206 @@ //===- MVEGatherScatterLowering.cpp - Gather/Scatter lowering -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// This pass custom lowers llvm.gather and llvm.scatter instructions to /// arm.mve.gather and arm.mve.scatter intrinsics, optimising the code to /// produce a better final result as we go. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Transforms/Utils/Local.h" #include #include using namespace llvm; #define DEBUG_TYPE "arm-mve-gather-scatter-lowering" cl::opt EnableMaskedGatherScatters( "enable-arm-maskedgatscat", cl::Hidden, cl::init(true), cl::desc("Enable the generation of masked gathers and scatters")); namespace { class MVEGatherScatterLowering : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid explicit MVEGatherScatterLowering() : FunctionPass(ID) { initializeMVEGatherScatterLoweringPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; StringRef getPassName() const override { return "MVE gather/scatter lowering"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); FunctionPass::getAnalysisUsage(AU); } private: LoopInfo *LI = nullptr; // Check this is a valid gather with correct alignment bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize, Align Alignment); // Check whether Ptr is hidden behind a bitcast and look through it void lookThroughBitcast(Value *&Ptr); // Check for a getelementptr and deduce base and offsets from it, on success // returning the base directly and the offsets indirectly using the Offsets // argument Value *checkGEP(Value *&Offsets, FixedVectorType *Ty, GetElementPtrInst *GEP, IRBuilder<> &Builder); // Compute the scale of this gather/scatter instruction int computeScale(unsigned GEPElemSize, unsigned MemoryElemSize); // If the value is a constant, or derived from constants via additions // and multilications, return its numeric value Optional getIfConst(const Value *V); // If Inst is an add instruction, check whether one summand is a // constant. If so, scale this constant and return it together with // the other summand. std::pair getVarAndConst(Value *Inst, int TypeScale); Value *lowerGather(IntrinsicInst *I); // Create a gather from a base + vector of offsets Value *tryCreateMaskedGatherOffset(IntrinsicInst *I, Value *Ptr, Instruction *&Root, IRBuilder<> &Builder); // Create a gather from a vector of pointers Value *tryCreateMaskedGatherBase(IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment = 0); // Create an incrementing gather from a vector of pointers Value *tryCreateMaskedGatherBaseWB(IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment = 0); Value *lowerScatter(IntrinsicInst *I); // Create a scatter to a base + vector of offsets Value *tryCreateMaskedScatterOffset(IntrinsicInst *I, Value *Offsets, IRBuilder<> &Builder); // Create a scatter to a vector of pointers Value *tryCreateMaskedScatterBase(IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment = 0); // Create an incrementing scatter from a vector of pointers Value *tryCreateMaskedScatterBaseWB(IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment = 0); // QI gathers and scatters can increment their offsets on their own if // the increment is a constant value (digit) Value *tryCreateIncrementingGatScat(IntrinsicInst *I, Value *BasePtr, Value *Ptr, GetElementPtrInst *GEP, IRBuilder<> &Builder); // QI gathers/scatters can increment their offsets on their own if the // increment is a constant value (digit) - this creates a writeback QI // gather/scatter Value *tryCreateIncrementingWBGatScat(IntrinsicInst *I, Value *BasePtr, Value *Ptr, unsigned TypeScale, IRBuilder<> &Builder); // Optimise the base and offsets of the given address bool optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI); // Try to fold consecutive geps together into one Value *foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder); // Check whether these offsets could be moved out of the loop they're in bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI); // Pushes the given add out of the loop void pushOutAdd(PHINode *&Phi, Value *OffsSecondOperand, unsigned StartIndex); // Pushes the given mul out of the loop void pushOutMul(PHINode *&Phi, Value *IncrementPerRound, Value *OffsSecondOperand, unsigned LoopIncrement, IRBuilder<> &Builder); }; } // end anonymous namespace char MVEGatherScatterLowering::ID = 0; INITIALIZE_PASS(MVEGatherScatterLowering, DEBUG_TYPE, "MVE gather/scattering lowering pass", false, false) Pass *llvm::createMVEGatherScatterLoweringPass() { return new MVEGatherScatterLowering(); } bool MVEGatherScatterLowering::isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize, Align Alignment) { if (((NumElements == 4 && (ElemSize == 32 || ElemSize == 16 || ElemSize == 8)) || (NumElements == 8 && (ElemSize == 16 || ElemSize == 8)) || (NumElements == 16 && ElemSize == 8)) && Alignment >= ElemSize / 8) return true; LLVM_DEBUG(dbgs() << "masked gathers/scatters: instruction does not have " << "valid alignment or vector type \n"); return false; } static bool checkOffsetSize(Value *Offsets, unsigned TargetElemCount) { // Offsets that are not of type are sign extended by the // getelementptr instruction, and MVE gathers/scatters treat the offset as // unsigned. Thus, if the element size is smaller than 32, we can only allow // positive offsets - i.e., the offsets are not allowed to be variables we // can't look into. // Additionally, offsets have to either originate from a zext of a // vector with element types smaller or equal the type of the gather we're // looking at, or consist of constants that we can check are small enough // to fit into the gather type. // Thus we check that 0 < value < 2^TargetElemSize. unsigned TargetElemSize = 128 / TargetElemCount; unsigned OffsetElemSize = cast(Offsets->getType()) ->getElementType() ->getScalarSizeInBits(); if (OffsetElemSize != TargetElemSize || OffsetElemSize != 32) { Constant *ConstOff = dyn_cast(Offsets); if (!ConstOff) return false; int64_t TargetElemMaxSize = (1ULL << TargetElemSize); auto CheckValueSize = [TargetElemMaxSize](Value *OffsetElem) { ConstantInt *OConst = dyn_cast(OffsetElem); if (!OConst) return false; int SExtValue = OConst->getSExtValue(); if (SExtValue >= TargetElemMaxSize || SExtValue < 0) return false; return true; }; if (isa(ConstOff->getType())) { for (unsigned i = 0; i < TargetElemCount; i++) { if (!CheckValueSize(ConstOff->getAggregateElement(i))) return false; } } else { if (!CheckValueSize(ConstOff)) return false; } } return true; } Value *MVEGatherScatterLowering::checkGEP(Value *&Offsets, FixedVectorType *Ty, GetElementPtrInst *GEP, IRBuilder<> &Builder) { if (!GEP) { LLVM_DEBUG( dbgs() << "masked gathers/scatters: no getelementpointer found\n"); return nullptr; } LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementpointer found." << " Looking at intrinsic for base + vector of offsets\n"); Value *GEPPtr = GEP->getPointerOperand(); Offsets = GEP->getOperand(1); if (GEPPtr->getType()->isVectorTy() || !isa(Offsets->getType())) return nullptr; if (GEP->getNumOperands() != 2) { LLVM_DEBUG(dbgs() << "masked gathers/scatters: getelementptr with too many" << " operands. Expanding.\n"); return nullptr; } Offsets = GEP->getOperand(1); unsigned OffsetsElemCount = cast(Offsets->getType())->getNumElements(); // Paranoid check whether the number of parallel lanes is the same assert(Ty->getNumElements() == OffsetsElemCount); ZExtInst *ZextOffs = dyn_cast(Offsets); if (ZextOffs) Offsets = ZextOffs->getOperand(0); FixedVectorType *OffsetType = cast(Offsets->getType()); // If the offsets are already being zext-ed to , that relieves us of // having to make sure that they won't overflow. if (!ZextOffs || cast(ZextOffs->getDestTy()) ->getElementType() ->getScalarSizeInBits() != 32) if (!checkOffsetSize(Offsets, OffsetsElemCount)) return nullptr; // The offset sizes have been checked; if any truncating or zext-ing is // required to fix them, do that now if (Ty != Offsets->getType()) { if ((Ty->getElementType()->getScalarSizeInBits() < OffsetType->getElementType()->getScalarSizeInBits())) { Offsets = Builder.CreateTrunc(Offsets, Ty); } else { Offsets = Builder.CreateZExt(Offsets, VectorType::getInteger(Ty)); } } // If none of the checks failed, return the gep's base pointer LLVM_DEBUG(dbgs() << "masked gathers/scatters: found correct offsets\n"); return GEPPtr; } void MVEGatherScatterLowering::lookThroughBitcast(Value *&Ptr) { // Look through bitcast instruction if #elements is the same if (auto *BitCast = dyn_cast(Ptr)) { auto *BCTy = cast(BitCast->getType()); auto *BCSrcTy = cast(BitCast->getOperand(0)->getType()); if (BCTy->getNumElements() == BCSrcTy->getNumElements()) { LLVM_DEBUG( dbgs() << "masked gathers/scatters: looking through bitcast\n"); Ptr = BitCast->getOperand(0); } } } int MVEGatherScatterLowering::computeScale(unsigned GEPElemSize, unsigned MemoryElemSize) { // This can be a 32bit load/store scaled by 4, a 16bit load/store scaled by 2, // or a 8bit, 16bit or 32bit load/store scaled by 1 if (GEPElemSize == 32 && MemoryElemSize == 32) return 2; else if (GEPElemSize == 16 && MemoryElemSize == 16) return 1; else if (GEPElemSize == 8) return 0; LLVM_DEBUG(dbgs() << "masked gathers/scatters: incorrect scale. Can't " << "create intrinsic\n"); return -1; } Optional MVEGatherScatterLowering::getIfConst(const Value *V) { const Constant *C = dyn_cast(V); if (C != nullptr) return Optional{C->getUniqueInteger().getSExtValue()}; if (!isa(V)) return Optional{}; const Instruction *I = cast(V); if (I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Mul) { Optional Op0 = getIfConst(I->getOperand(0)); Optional Op1 = getIfConst(I->getOperand(1)); if (!Op0 || !Op1) return Optional{}; if (I->getOpcode() == Instruction::Add) return Optional{Op0.getValue() + Op1.getValue()}; if (I->getOpcode() == Instruction::Mul) return Optional{Op0.getValue() * Op1.getValue()}; } return Optional{}; } std::pair MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) { std::pair ReturnFalse = std::pair(nullptr, 0); // At this point, the instruction we're looking at must be an add or we // bail out Instruction *Add = dyn_cast(Inst); if (Add == nullptr || Add->getOpcode() != Instruction::Add) return ReturnFalse; Value *Summand; Optional Const; // Find out which operand the value that is increased is if ((Const = getIfConst(Add->getOperand(0)))) Summand = Add->getOperand(1); else if ((Const = getIfConst(Add->getOperand(1)))) Summand = Add->getOperand(0); else return ReturnFalse; // Check that the constant is small enough for an incrementing gather int64_t Immediate = Const.getValue() << TypeScale; if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0) return ReturnFalse; return std::pair(Summand, Immediate); } Value *MVEGatherScatterLowering::lowerGather(IntrinsicInst *I) { using namespace PatternMatch; LLVM_DEBUG(dbgs() << "masked gathers: checking transform preconditions\n"); // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) // Attempt to turn the masked gather in I into a MVE intrinsic // Potentially optimising the addressing modes as we do so. auto *Ty = cast(I->getType()); Value *Ptr = I->getArgOperand(0); Align Alignment = cast(I->getArgOperand(1))->getAlignValue(); Value *Mask = I->getArgOperand(2); Value *PassThru = I->getArgOperand(3); if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(), Alignment)) return nullptr; lookThroughBitcast(Ptr); assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type"); IRBuilder<> Builder(I->getContext()); Builder.SetInsertPoint(I); Builder.SetCurrentDebugLocation(I->getDebugLoc()); Instruction *Root = I; Value *Load = tryCreateMaskedGatherOffset(I, Ptr, Root, Builder); if (!Load) Load = tryCreateMaskedGatherBase(I, Ptr, Builder); if (!Load) return nullptr; if (!isa(PassThru) && !match(PassThru, m_Zero())) { LLVM_DEBUG(dbgs() << "masked gathers: found non-trivial passthru - " << "creating select\n"); Load = Builder.CreateSelect(Mask, Load, PassThru); } Root->replaceAllUsesWith(Load); Root->eraseFromParent(); if (Root != I) // If this was an extending gather, we need to get rid of the sext/zext // sext/zext as well as of the gather itself I->eraseFromParent(); LLVM_DEBUG(dbgs() << "masked gathers: successfully built masked gather\n"); return Load; } Value *MVEGatherScatterLowering::tryCreateMaskedGatherBase(IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) { using namespace PatternMatch; auto *Ty = cast(I->getType()); LLVM_DEBUG(dbgs() << "masked gathers: loading from vector of pointers\n"); if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) // Can't build an intrinsic for this return nullptr; Value *Mask = I->getArgOperand(2); if (match(Mask, m_One())) return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base, {Ty, Ptr->getType()}, {Ptr, Builder.getInt32(Increment)}); else return Builder.CreateIntrinsic( Intrinsic::arm_mve_vldr_gather_base_predicated, {Ty, Ptr->getType(), Mask->getType()}, {Ptr, Builder.getInt32(Increment), Mask}); } Value *MVEGatherScatterLowering::tryCreateMaskedGatherBaseWB( IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) { using namespace PatternMatch; auto *Ty = cast(I->getType()); LLVM_DEBUG( dbgs() << "masked gathers: loading from vector of pointers with writeback\n"); if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) // Can't build an intrinsic for this return nullptr; Value *Mask = I->getArgOperand(2); if (match(Mask, m_One())) return Builder.CreateIntrinsic(Intrinsic::arm_mve_vldr_gather_base_wb, {Ty, Ptr->getType()}, {Ptr, Builder.getInt32(Increment)}); else return Builder.CreateIntrinsic( Intrinsic::arm_mve_vldr_gather_base_wb_predicated, {Ty, Ptr->getType(), Mask->getType()}, {Ptr, Builder.getInt32(Increment), Mask}); } Value *MVEGatherScatterLowering::tryCreateMaskedGatherOffset( IntrinsicInst *I, Value *Ptr, Instruction *&Root, IRBuilder<> &Builder) { using namespace PatternMatch; Type *OriginalTy = I->getType(); Type *ResultTy = OriginalTy; unsigned Unsigned = 1; // The size of the gather was already checked in isLegalTypeAndAlignment; // if it was not a full vector width an appropriate extend should follow. auto *Extend = Root; if (OriginalTy->getPrimitiveSizeInBits() < 128) { // Only transform gathers with exactly one use if (!I->hasOneUse()) return nullptr; // The correct root to replace is not the CallInst itself, but the // instruction which extends it Extend = cast(*I->users().begin()); if (isa(Extend)) { Unsigned = 0; } else if (!isa(Extend)) { LLVM_DEBUG(dbgs() << "masked gathers: extend needed but not provided. " << "Expanding\n"); return nullptr; } LLVM_DEBUG(dbgs() << "masked gathers: found an extending gather\n"); ResultTy = Extend->getType(); // The final size of the gather must be a full vector width if (ResultTy->getPrimitiveSizeInBits() != 128) { LLVM_DEBUG(dbgs() << "masked gathers: extending from the wrong type. " << "Expanding\n"); return nullptr; } } GetElementPtrInst *GEP = dyn_cast(Ptr); Value *Offsets; Value *BasePtr = checkGEP(Offsets, cast(ResultTy), GEP, Builder); if (!BasePtr) return nullptr; // Check whether the offset is a constant increment that could be merged into // a QI gather Value *Load = tryCreateIncrementingGatScat(I, BasePtr, Offsets, GEP, Builder); if (Load) return Load; int Scale = computeScale( BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(), OriginalTy->getScalarSizeInBits()); if (Scale == -1) return nullptr; Root = Extend; Value *Mask = I->getArgOperand(2); if (!match(Mask, m_One())) return Builder.CreateIntrinsic( Intrinsic::arm_mve_vldr_gather_offset_predicated, {ResultTy, BasePtr->getType(), Offsets->getType(), Mask->getType()}, {BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()), Builder.getInt32(Scale), Builder.getInt32(Unsigned), Mask}); else return Builder.CreateIntrinsic( Intrinsic::arm_mve_vldr_gather_offset, {ResultTy, BasePtr->getType(), Offsets->getType()}, {BasePtr, Offsets, Builder.getInt32(OriginalTy->getScalarSizeInBits()), Builder.getInt32(Scale), Builder.getInt32(Unsigned)}); } Value *MVEGatherScatterLowering::lowerScatter(IntrinsicInst *I) { using namespace PatternMatch; LLVM_DEBUG(dbgs() << "masked scatters: checking transform preconditions\n"); // @llvm.masked.scatter.*(data, ptrs, alignment, mask) // Attempt to turn the masked scatter in I into a MVE intrinsic // Potentially optimising the addressing modes as we do so. Value *Input = I->getArgOperand(0); Value *Ptr = I->getArgOperand(1); Align Alignment = cast(I->getArgOperand(2))->getAlignValue(); auto *Ty = cast(Input->getType()); if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(), Alignment)) return nullptr; lookThroughBitcast(Ptr); assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type"); IRBuilder<> Builder(I->getContext()); Builder.SetInsertPoint(I); Builder.SetCurrentDebugLocation(I->getDebugLoc()); Value *Store = tryCreateMaskedScatterOffset(I, Ptr, Builder); if (!Store) Store = tryCreateMaskedScatterBase(I, Ptr, Builder); if (!Store) return nullptr; LLVM_DEBUG(dbgs() << "masked scatters: successfully built masked scatter\n"); I->eraseFromParent(); return Store; } Value *MVEGatherScatterLowering::tryCreateMaskedScatterBase( IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) { using namespace PatternMatch; Value *Input = I->getArgOperand(0); auto *Ty = cast(Input->getType()); // Only QR variants allow truncating if (!(Ty->getNumElements() == 4 && Ty->getScalarSizeInBits() == 32)) { // Can't build an intrinsic for this return nullptr; } Value *Mask = I->getArgOperand(3); // int_arm_mve_vstr_scatter_base(_predicated) addr, offset, data(, mask) LLVM_DEBUG(dbgs() << "masked scatters: storing to a vector of pointers\n"); if (match(Mask, m_One())) return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base, {Ptr->getType(), Input->getType()}, {Ptr, Builder.getInt32(Increment), Input}); else return Builder.CreateIntrinsic( Intrinsic::arm_mve_vstr_scatter_base_predicated, {Ptr->getType(), Input->getType(), Mask->getType()}, {Ptr, Builder.getInt32(Increment), Input, Mask}); } Value *MVEGatherScatterLowering::tryCreateMaskedScatterBaseWB( IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder, int64_t Increment) { using namespace PatternMatch; Value *Input = I->getArgOperand(0); auto *Ty = cast(Input->getType()); LLVM_DEBUG( dbgs() << "masked scatters: storing to a vector of pointers with writeback\n"); if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) // Can't build an intrinsic for this return nullptr; Value *Mask = I->getArgOperand(3); if (match(Mask, m_One())) return Builder.CreateIntrinsic(Intrinsic::arm_mve_vstr_scatter_base_wb, {Ptr->getType(), Input->getType()}, {Ptr, Builder.getInt32(Increment), Input}); else return Builder.CreateIntrinsic( Intrinsic::arm_mve_vstr_scatter_base_wb_predicated, {Ptr->getType(), Input->getType(), Mask->getType()}, {Ptr, Builder.getInt32(Increment), Input, Mask}); } Value *MVEGatherScatterLowering::tryCreateMaskedScatterOffset( IntrinsicInst *I, Value *Ptr, IRBuilder<> &Builder) { using namespace PatternMatch; Value *Input = I->getArgOperand(0); Value *Mask = I->getArgOperand(3); Type *InputTy = Input->getType(); Type *MemoryTy = InputTy; LLVM_DEBUG(dbgs() << "masked scatters: getelementpointer found. Storing" << " to base + vector of offsets\n"); // If the input has been truncated, try to integrate that trunc into the // scatter instruction (we don't care about alignment here) if (TruncInst *Trunc = dyn_cast(Input)) { Value *PreTrunc = Trunc->getOperand(0); Type *PreTruncTy = PreTrunc->getType(); if (PreTruncTy->getPrimitiveSizeInBits() == 128) { Input = PreTrunc; InputTy = PreTruncTy; } } if (InputTy->getPrimitiveSizeInBits() != 128) { LLVM_DEBUG( dbgs() << "masked scatters: cannot create scatters for non-standard" << " input types. Expanding.\n"); return nullptr; } GetElementPtrInst *GEP = dyn_cast(Ptr); Value *Offsets; Value *BasePtr = checkGEP(Offsets, cast(InputTy), GEP, Builder); if (!BasePtr) return nullptr; // Check whether the offset is a constant increment that could be merged into // a QI gather Value *Store = tryCreateIncrementingGatScat(I, BasePtr, Offsets, GEP, Builder); if (Store) return Store; int Scale = computeScale( BasePtr->getType()->getPointerElementType()->getPrimitiveSizeInBits(), MemoryTy->getScalarSizeInBits()); if (Scale == -1) return nullptr; if (!match(Mask, m_One())) return Builder.CreateIntrinsic( Intrinsic::arm_mve_vstr_scatter_offset_predicated, {BasePtr->getType(), Offsets->getType(), Input->getType(), Mask->getType()}, {BasePtr, Offsets, Input, Builder.getInt32(MemoryTy->getScalarSizeInBits()), Builder.getInt32(Scale), Mask}); else return Builder.CreateIntrinsic( Intrinsic::arm_mve_vstr_scatter_offset, {BasePtr->getType(), Offsets->getType(), Input->getType()}, {BasePtr, Offsets, Input, Builder.getInt32(MemoryTy->getScalarSizeInBits()), Builder.getInt32(Scale)}); } Value *MVEGatherScatterLowering::tryCreateIncrementingGatScat( IntrinsicInst *I, Value *BasePtr, Value *Offsets, GetElementPtrInst *GEP, IRBuilder<> &Builder) { FixedVectorType *Ty; if (I->getIntrinsicID() == Intrinsic::masked_gather) Ty = cast(I->getType()); else Ty = cast(I->getArgOperand(0)->getType()); // Incrementing gathers only exist for v4i32 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) return nullptr; Loop *L = LI->getLoopFor(I->getParent()); if (L == nullptr) // Incrementing gathers are not beneficial outside of a loop return nullptr; LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to build incrementing " "wb gather/scatter\n"); // The gep was in charge of making sure the offsets are scaled correctly // - calculate that factor so it can be applied by hand DataLayout DT = I->getParent()->getParent()->getParent()->getDataLayout(); int TypeScale = computeScale(DT.getTypeSizeInBits(GEP->getOperand(0)->getType()), DT.getTypeSizeInBits(GEP->getType()) / cast(GEP->getType())->getNumElements()); if (TypeScale == -1) return nullptr; if (GEP->hasOneUse()) { // Only in this case do we want to build a wb gather, because the wb will // change the phi which does affect other users of the gep (which will still // be using the phi in the old way) Value *Load = tryCreateIncrementingWBGatScat(I, BasePtr, Offsets, TypeScale, Builder); if (Load != nullptr) return Load; } LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to build incrementing " "non-wb gather/scatter\n"); std::pair Add = getVarAndConst(Offsets, TypeScale); if (Add.first == nullptr) return nullptr; Value *OffsetsIncoming = Add.first; int64_t Immediate = Add.second; // Make sure the offsets are scaled correctly Instruction *ScaledOffsets = BinaryOperator::Create( Instruction::Shl, OffsetsIncoming, Builder.CreateVectorSplat(Ty->getNumElements(), Builder.getInt32(TypeScale)), "ScaledIndex", I); // Add the base to the offsets OffsetsIncoming = BinaryOperator::Create( Instruction::Add, ScaledOffsets, Builder.CreateVectorSplat( Ty->getNumElements(), Builder.CreatePtrToInt( BasePtr, cast(ScaledOffsets->getType())->getElementType())), "StartIndex", I); if (I->getIntrinsicID() == Intrinsic::masked_gather) return cast( tryCreateMaskedGatherBase(I, OffsetsIncoming, Builder, Immediate)); else return cast( tryCreateMaskedScatterBase(I, OffsetsIncoming, Builder, Immediate)); } Value *MVEGatherScatterLowering::tryCreateIncrementingWBGatScat( IntrinsicInst *I, Value *BasePtr, Value *Offsets, unsigned TypeScale, IRBuilder<> &Builder) { // Check whether this gather's offset is incremented by a constant - if so, // and the load is of the right type, we can merge this into a QI gather Loop *L = LI->getLoopFor(I->getParent()); // Offsets that are worth merging into this instruction will be incremented // by a constant, thus we're looking for an add of a phi and a constant PHINode *Phi = dyn_cast(Offsets); if (Phi == nullptr || Phi->getNumIncomingValues() != 2 || Phi->getParent() != L->getHeader() || Phi->getNumUses() != 2) // No phi means no IV to write back to; if there is a phi, we expect it // to have exactly two incoming values; the only phis we are interested in // will be loop IV's and have exactly two uses, one in their increment and // one in the gather's gep return nullptr; unsigned IncrementIndex = Phi->getIncomingBlock(0) == L->getLoopLatch() ? 0 : 1; // Look through the phi to the phi increment Offsets = Phi->getIncomingValue(IncrementIndex); std::pair Add = getVarAndConst(Offsets, TypeScale); if (Add.first == nullptr) return nullptr; Value *OffsetsIncoming = Add.first; int64_t Immediate = Add.second; if (OffsetsIncoming != Phi) // Then the increment we are looking at is not an increment of the // induction variable, and we don't want to do a writeback return nullptr; Builder.SetInsertPoint(&Phi->getIncomingBlock(1 - IncrementIndex)->back()); unsigned NumElems = cast(OffsetsIncoming->getType())->getNumElements(); // Make sure the offsets are scaled correctly Instruction *ScaledOffsets = BinaryOperator::Create( Instruction::Shl, Phi->getIncomingValue(1 - IncrementIndex), Builder.CreateVectorSplat(NumElems, Builder.getInt32(TypeScale)), "ScaledIndex", &Phi->getIncomingBlock(1 - IncrementIndex)->back()); // Add the base to the offsets OffsetsIncoming = BinaryOperator::Create( Instruction::Add, ScaledOffsets, Builder.CreateVectorSplat( NumElems, Builder.CreatePtrToInt( BasePtr, cast(ScaledOffsets->getType())->getElementType())), "StartIndex", &Phi->getIncomingBlock(1 - IncrementIndex)->back()); // The gather is pre-incrementing OffsetsIncoming = BinaryOperator::Create( Instruction::Sub, OffsetsIncoming, Builder.CreateVectorSplat(NumElems, Builder.getInt32(Immediate)), "PreIncrementStartIndex", &Phi->getIncomingBlock(1 - IncrementIndex)->back()); Phi->setIncomingValue(1 - IncrementIndex, OffsetsIncoming); Builder.SetInsertPoint(I); Value *EndResult; Value *NewInduction; if (I->getIntrinsicID() == Intrinsic::masked_gather) { // Build the incrementing gather Value *Load = tryCreateMaskedGatherBaseWB(I, Phi, Builder, Immediate); // One value to be handed to whoever uses the gather, one is the loop // increment EndResult = Builder.CreateExtractValue(Load, 0, "Gather"); NewInduction = Builder.CreateExtractValue(Load, 1, "GatherIncrement"); } else { // Build the incrementing scatter NewInduction = tryCreateMaskedScatterBaseWB(I, Phi, Builder, Immediate); EndResult = NewInduction; } Instruction *AddInst = cast(Offsets); AddInst->replaceAllUsesWith(NewInduction); AddInst->eraseFromParent(); Phi->setIncomingValue(IncrementIndex, NewInduction); return EndResult; } void MVEGatherScatterLowering::pushOutAdd(PHINode *&Phi, Value *OffsSecondOperand, unsigned StartIndex) { LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising add instruction\n"); Instruction *InsertionPoint = &cast(Phi->getIncomingBlock(StartIndex)->back()); // Initialize the phi with a vector that contains a sum of the constants Instruction *NewIndex = BinaryOperator::Create( Instruction::Add, Phi->getIncomingValue(StartIndex), OffsSecondOperand, "PushedOutAdd", InsertionPoint); unsigned IncrementIndex = StartIndex == 0 ? 1 : 0; // Order such that start index comes first (this reduces mov's) Phi->addIncoming(NewIndex, Phi->getIncomingBlock(StartIndex)); Phi->addIncoming(Phi->getIncomingValue(IncrementIndex), Phi->getIncomingBlock(IncrementIndex)); Phi->removeIncomingValue(IncrementIndex); Phi->removeIncomingValue(StartIndex); } void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi, Value *IncrementPerRound, Value *OffsSecondOperand, unsigned LoopIncrement, IRBuilder<> &Builder) { LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising mul instruction\n"); // Create a new scalar add outside of the loop and transform it to a splat // by which loop variable can be incremented Instruction *InsertionPoint = &cast( Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back()); // Create a new index Value *StartIndex = BinaryOperator::Create( Instruction::Mul, Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1), OffsSecondOperand, "PushedOutMul", InsertionPoint); Instruction *Product = BinaryOperator::Create(Instruction::Mul, IncrementPerRound, OffsSecondOperand, "Product", InsertionPoint); // Increment NewIndex by Product instead of the multiplication Instruction *NewIncrement = BinaryOperator::Create( Instruction::Add, Phi, Product, "IncrementPushedOutMul", cast(Phi->getIncomingBlock(LoopIncrement)->back()) .getPrevNode()); Phi->addIncoming(StartIndex, Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)); Phi->addIncoming(NewIncrement, Phi->getIncomingBlock(LoopIncrement)); Phi->removeIncomingValue((unsigned)0); Phi->removeIncomingValue((unsigned)0); } // Check whether all usages of this instruction are as offsets of // gathers/scatters or simple arithmetics only used by gathers/scatters static bool hasAllGatScatUsers(Instruction *I) { if (I->hasNUses(0)) { return false; } bool Gatscat = true; for (User *U : I->users()) { if (!isa(U)) return false; if (isa(U) || isGatherScatter(dyn_cast(U))) { return Gatscat; } else { unsigned OpCode = cast(U)->getOpcode(); if ((OpCode == Instruction::Add || OpCode == Instruction::Mul) && hasAllGatScatUsers(cast(U))) { continue; } return false; } } return Gatscat; } bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI) { LLVM_DEBUG(dbgs() << "masked gathers/scatters: trying to optimize\n"); // Optimise the addresses of gathers/scatters by moving invariant // calculations out of the loop if (!isa(Offsets)) return false; Instruction *Offs = cast(Offsets); if (Offs->getOpcode() != Instruction::Add && Offs->getOpcode() != Instruction::Mul) return false; Loop *L = LI->getLoopFor(BB); if (L == nullptr) return false; if (!Offs->hasOneUse()) { if (!hasAllGatScatUsers(Offs)) return false; } // Find out which, if any, operand of the instruction // is a phi node PHINode *Phi; int OffsSecondOp; if (isa(Offs->getOperand(0))) { Phi = cast(Offs->getOperand(0)); OffsSecondOp = 1; } else if (isa(Offs->getOperand(1))) { Phi = cast(Offs->getOperand(1)); OffsSecondOp = 0; } else { bool Changed = true; if (isa(Offs->getOperand(0)) && L->contains(cast(Offs->getOperand(0)))) Changed |= optimiseOffsets(Offs->getOperand(0), BB, LI); if (isa(Offs->getOperand(1)) && L->contains(cast(Offs->getOperand(1)))) Changed |= optimiseOffsets(Offs->getOperand(1), BB, LI); if (!Changed) { return false; } else { if (isa(Offs->getOperand(0))) { Phi = cast(Offs->getOperand(0)); OffsSecondOp = 1; } else if (isa(Offs->getOperand(1))) { Phi = cast(Offs->getOperand(1)); OffsSecondOp = 0; } else { return false; } } } // A phi node we want to perform this function on should be from the // loop header, and shouldn't have more than 2 incoming values if (Phi->getParent() != L->getHeader() || Phi->getNumIncomingValues() != 2) return false; // The phi must be an induction variable int IncrementingBlock = -1; for (int i = 0; i < 2; i++) if (auto *Op = dyn_cast(Phi->getIncomingValue(i))) if (Op->getOpcode() == Instruction::Add && (Op->getOperand(0) == Phi || Op->getOperand(1) == Phi)) IncrementingBlock = i; if (IncrementingBlock == -1) return false; Instruction *IncInstruction = cast(Phi->getIncomingValue(IncrementingBlock)); // If the phi is not used by anything else, we can just adapt it when // replacing the instruction; if it is, we'll have to duplicate it PHINode *NewPhi; Value *IncrementPerRound = IncInstruction->getOperand( (IncInstruction->getOperand(0) == Phi) ? 1 : 0); // Get the value that is added to/multiplied with the phi Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp); - if (IncrementPerRound->getType() != OffsSecondOperand->getType()) + if (IncrementPerRound->getType() != OffsSecondOperand->getType() || + !L->isLoopInvariant(OffsSecondOperand)) // Something has gone wrong, abort return false; // Only proceed if the increment per round is a constant or an instruction // which does not originate from within the loop if (!isa(IncrementPerRound) && !(isa(IncrementPerRound) && !L->contains(cast(IncrementPerRound)))) return false; if (Phi->getNumUses() == 2) { // No other users -> reuse existing phi (One user is the instruction // we're looking at, the other is the phi increment) if (IncInstruction->getNumUses() != 1) { // If the incrementing instruction does have more users than // our phi, we need to copy it IncInstruction = BinaryOperator::Create( Instruction::BinaryOps(IncInstruction->getOpcode()), Phi, IncrementPerRound, "LoopIncrement", IncInstruction); Phi->setIncomingValue(IncrementingBlock, IncInstruction); } NewPhi = Phi; } else { // There are other users -> create a new phi NewPhi = PHINode::Create(Phi->getType(), 0, "NewPhi", Phi); std::vector Increases; // Copy the incoming values of the old phi NewPhi->addIncoming(Phi->getIncomingValue(IncrementingBlock == 1 ? 0 : 1), Phi->getIncomingBlock(IncrementingBlock == 1 ? 0 : 1)); IncInstruction = BinaryOperator::Create( Instruction::BinaryOps(IncInstruction->getOpcode()), NewPhi, IncrementPerRound, "LoopIncrement", IncInstruction); NewPhi->addIncoming(IncInstruction, Phi->getIncomingBlock(IncrementingBlock)); IncrementingBlock = 1; } IRBuilder<> Builder(BB->getContext()); Builder.SetInsertPoint(Phi); Builder.SetCurrentDebugLocation(Offs->getDebugLoc()); switch (Offs->getOpcode()) { case Instruction::Add: pushOutAdd(NewPhi, OffsSecondOperand, IncrementingBlock == 1 ? 0 : 1); break; case Instruction::Mul: pushOutMul(NewPhi, IncrementPerRound, OffsSecondOperand, IncrementingBlock, Builder); break; default: return false; } LLVM_DEBUG( dbgs() << "masked gathers/scatters: simplified loop variable add/mul\n"); // The instruction has now been "absorbed" into the phi value Offs->replaceAllUsesWith(NewPhi); if (Offs->hasNUses(0)) Offs->eraseFromParent(); // Clean up the old increment in case it's unused because we built a new // one if (IncInstruction->hasNUses(0)) IncInstruction->eraseFromParent(); return true; } static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP, IRBuilder<> &Builder) { // Splat the non-vector value to a vector of the given type - if the value is // a constant (and its value isn't too big), we can even use this opportunity // to scale it to the size of the vector elements auto FixSummands = [&Builder](FixedVectorType *&VT, Value *&NonVectorVal) { ConstantInt *Const; if ((Const = dyn_cast(NonVectorVal)) && VT->getElementType() != NonVectorVal->getType()) { unsigned TargetElemSize = VT->getElementType()->getPrimitiveSizeInBits(); uint64_t N = Const->getZExtValue(); if (N < (unsigned)(1 << (TargetElemSize - 1))) { NonVectorVal = Builder.CreateVectorSplat( VT->getNumElements(), Builder.getIntN(TargetElemSize, N)); return; } } NonVectorVal = Builder.CreateVectorSplat(VT->getNumElements(), NonVectorVal); }; FixedVectorType *XElType = dyn_cast(X->getType()); FixedVectorType *YElType = dyn_cast(Y->getType()); // If one of X, Y is not a vector, we have to splat it in order // to add the two of them. if (XElType && !YElType) { FixSummands(XElType, Y); YElType = cast(Y->getType()); } else if (YElType && !XElType) { FixSummands(YElType, X); XElType = cast(X->getType()); } assert(XElType && YElType && "Unknown vector types"); // Check that the summands are of compatible types if (XElType != YElType) { LLVM_DEBUG(dbgs() << "masked gathers/scatters: incompatible gep offsets\n"); return nullptr; } if (XElType->getElementType()->getScalarSizeInBits() != 32) { // Check that by adding the vectors we do not accidentally // create an overflow Constant *ConstX = dyn_cast(X); Constant *ConstY = dyn_cast(Y); if (!ConstX || !ConstY) return nullptr; unsigned TargetElemSize = 128 / XElType->getNumElements(); for (unsigned i = 0; i < XElType->getNumElements(); i++) { ConstantInt *ConstXEl = dyn_cast(ConstX->getAggregateElement(i)); ConstantInt *ConstYEl = dyn_cast(ConstY->getAggregateElement(i)); if (!ConstXEl || !ConstYEl || ConstXEl->getZExtValue() + ConstYEl->getZExtValue() >= (unsigned)(1 << (TargetElemSize - 1))) return nullptr; } } Value *Add = Builder.CreateAdd(X, Y); FixedVectorType *GEPType = cast(GEP->getType()); if (checkOffsetSize(Add, GEPType->getNumElements())) return Add; else return nullptr; } Value *MVEGatherScatterLowering::foldGEP(GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder) { Value *GEPPtr = GEP->getPointerOperand(); Offsets = GEP->getOperand(1); // We only merge geps with constant offsets, because only for those // we can make sure that we do not cause an overflow if (!isa(Offsets)) return nullptr; GetElementPtrInst *BaseGEP; if ((BaseGEP = dyn_cast(GEPPtr))) { // Merge the two geps into one Value *BaseBasePtr = foldGEP(BaseGEP, Offsets, Builder); if (!BaseBasePtr) return nullptr; Offsets = CheckAndCreateOffsetAdd(Offsets, GEP->getOperand(1), GEP, Builder); if (Offsets == nullptr) return nullptr; return BaseBasePtr; } return GEPPtr; } bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB, LoopInfo *LI) { GetElementPtrInst *GEP = dyn_cast(Address); if (!GEP) return false; bool Changed = false; if (GEP->hasOneUse() && dyn_cast(GEP->getPointerOperand())) { IRBuilder<> Builder(GEP->getContext()); Builder.SetInsertPoint(GEP); Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); Value *Offsets; Value *Base = foldGEP(GEP, Offsets, Builder); // We only want to merge the geps if there is a real chance that they can be // used by an MVE gather; thus the offset has to have the correct size // (always i32 if it is not of vector type) and the base has to be a // pointer. if (Offsets && Base && Base != GEP) { PointerType *BaseType = cast(Base->getType()); GetElementPtrInst *NewAddress = GetElementPtrInst::Create( BaseType->getPointerElementType(), Base, Offsets, "gep.merged", GEP); GEP->replaceAllUsesWith(NewAddress); GEP = NewAddress; Changed = true; } } Changed |= optimiseOffsets(GEP->getOperand(1), GEP->getParent(), LI); return Changed; } bool MVEGatherScatterLowering::runOnFunction(Function &F) { if (!EnableMaskedGatherScatters) return false; auto &TPC = getAnalysis(); auto &TM = TPC.getTM(); auto *ST = &TM.getSubtarget(F); if (!ST->hasMVEIntegerOps()) return false; LI = &getAnalysis().getLoopInfo(); SmallVector Gathers; SmallVector Scatters; bool Changed = false; for (BasicBlock &BB : F) { + Changed |= SimplifyInstructionsInBlock(&BB); + for (Instruction &I : BB) { IntrinsicInst *II = dyn_cast(&I); if (II && II->getIntrinsicID() == Intrinsic::masked_gather && isa(II->getType())) { Gathers.push_back(II); Changed |= optimiseAddress(II->getArgOperand(0), II->getParent(), LI); } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter && isa(II->getArgOperand(0)->getType())) { Scatters.push_back(II); Changed |= optimiseAddress(II->getArgOperand(1), II->getParent(), LI); } } } for (unsigned i = 0; i < Gathers.size(); i++) { IntrinsicInst *I = Gathers[i]; Value *L = lowerGather(I); if (L == nullptr) continue; // Get rid of any now dead instructions SimplifyInstructionsInBlock(cast(L)->getParent()); Changed = true; } for (unsigned i = 0; i < Scatters.size(); i++) { IntrinsicInst *I = Scatters[i]; Value *S = lowerScatter(I); if (S == nullptr) continue; // Get rid of any now dead instructions SimplifyInstructionsInBlock(cast(S)->getParent()); Changed = true; } return Changed; } diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp index b705208660df..cccac5595288 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVETailPredication.cpp @@ -1,429 +1,433 @@ //===- MVETailPredication.cpp - MVE Tail Predication ------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Armv8.1m introduced MVE, M-Profile Vector Extension, and low-overhead /// branches to help accelerate DSP applications. These two extensions, /// combined with a new form of predication called tail-predication, can be used /// to provide implicit vector predication within a low-overhead loop. /// This is implicit because the predicate of active/inactive lanes is /// calculated by hardware, and thus does not need to be explicitly passed /// to vector instructions. The instructions responsible for this are the /// DLSTP and WLSTP instructions, which setup a tail-predicated loop and the /// the total number of data elements processed by the loop. The loop-end /// LETP instruction is responsible for decrementing and setting the remaining /// elements to be processed and generating the mask of active lanes. /// /// The HardwareLoops pass inserts intrinsics identifying loops that the /// backend will attempt to convert into a low-overhead loop. The vectorizer is /// responsible for generating a vectorized loop in which the lanes are /// predicated upon an get.active.lane.mask intrinsic. This pass looks at these /// get.active.lane.mask intrinsic and attempts to convert them to VCTP /// instructions. This will be picked up by the ARM Low-overhead loop pass later /// in the backend, which performs the final transformation to a DLSTP or WLSTP /// tail-predicated loop. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMSubtarget.h" #include "ARMTargetTransformInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" using namespace llvm; #define DEBUG_TYPE "mve-tail-predication" #define DESC "Transform predicated vector loops to use MVE tail predication" cl::opt EnableTailPredication( "tail-predication", cl::desc("MVE tail-predication pass options"), cl::init(TailPredication::Enabled), cl::values(clEnumValN(TailPredication::Disabled, "disabled", "Don't tail-predicate loops"), clEnumValN(TailPredication::EnabledNoReductions, "enabled-no-reductions", "Enable tail-predication, but not for reduction loops"), clEnumValN(TailPredication::Enabled, "enabled", "Enable tail-predication, including reduction loops"), clEnumValN(TailPredication::ForceEnabledNoReductions, "force-enabled-no-reductions", "Enable tail-predication, but not for reduction loops, " "and force this which might be unsafe"), clEnumValN(TailPredication::ForceEnabled, "force-enabled", "Enable tail-predication, including reduction loops, " "and force this which might be unsafe"))); namespace { class MVETailPredication : public LoopPass { SmallVector MaskedInsts; Loop *L = nullptr; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; const ARMSubtarget *ST = nullptr; public: static char ID; MVETailPredication() : LoopPass(ID) { } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.setPreservesCFG(); } bool runOnLoop(Loop *L, LPPassManager&) override; private: /// Perform the relevant checks on the loop and convert active lane masks if /// possible. bool TryConvertActiveLaneMask(Value *TripCount); /// Perform several checks on the arguments of @llvm.get.active.lane.mask /// intrinsic. E.g., check that the loop induction variable and the element /// count are of the form we expect, and also perform overflow checks for /// the new expressions that are created. bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount); /// Insert the intrinsic to represent the effect of tail predication. void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount); /// Rematerialize the iteration count in exit blocks, which enables /// ARMLowOverheadLoops to better optimise away loop update statements inside /// hardware-loops. void RematerializeIterCount(); }; } // end namespace bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) { if (skipLoop(L) || !EnableTailPredication) return false; MaskedInsts.clear(); Function &F = *L->getHeader()->getParent(); auto &TPC = getAnalysis(); auto &TM = TPC.getTM(); ST = &TM.getSubtarget(F); TTI = &getAnalysis().getTTI(F); SE = &getAnalysis().getSE(); this->L = L; // The MVE and LOB extensions are combined to enable tail-predication, but // there's nothing preventing us from generating VCTP instructions for v8.1m. if (!ST->hasMVEIntegerOps() || !ST->hasV8_1MMainlineOps()) { LLVM_DEBUG(dbgs() << "ARM TP: Not a v8.1m.main+mve target.\n"); return false; } BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) return false; auto FindLoopIterations = [](BasicBlock *BB) -> IntrinsicInst* { for (auto &I : *BB) { auto *Call = dyn_cast(&I); if (!Call) continue; Intrinsic::ID ID = Call->getIntrinsicID(); if (ID == Intrinsic::start_loop_iterations || ID == Intrinsic::test_set_loop_iterations) return cast(&I); } return nullptr; }; // Look for the hardware loop intrinsic that sets the iteration count. IntrinsicInst *Setup = FindLoopIterations(Preheader); // The test.set iteration could live in the pre-preheader. if (!Setup) { if (!Preheader->getSinglePredecessor()) return false; Setup = FindLoopIterations(Preheader->getSinglePredecessor()); if (!Setup) return false; } LLVM_DEBUG(dbgs() << "ARM TP: Running on Loop: " << *L << *Setup << "\n"); bool Changed = TryConvertActiveLaneMask(Setup->getArgOperand(0)); return Changed; } // The active lane intrinsic has this form: // // @llvm.get.active.lane.mask(IV, TC) // // Here we perform checks that this intrinsic behaves as expected, // which means: // // 1) Check that the TripCount (TC) belongs to this loop (originally). // 2) The element count (TC) needs to be sufficiently large that the decrement // of element counter doesn't overflow, which means that we need to prove: // ceil(ElementCount / VectorWidth) >= TripCount // by rounding up ElementCount up: // ((ElementCount + (VectorWidth - 1)) / VectorWidth // and evaluate if expression isKnownNonNegative: // (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount // 3) The IV must be an induction phi with an increment equal to the // vector width. bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount) { bool ForceTailPredication = EnableTailPredication == TailPredication::ForceEnabledNoReductions || EnableTailPredication == TailPredication::ForceEnabled; Value *ElemCount = ActiveLaneMask->getOperand(1); + bool Changed = false; + if (!L->makeLoopInvariant(ElemCount, Changed)) + return false; + auto *EC= SE->getSCEV(ElemCount); auto *TC = SE->getSCEV(TripCount); int VectorWidth = cast(ActiveLaneMask->getType())->getNumElements(); if (VectorWidth != 4 && VectorWidth != 8 && VectorWidth != 16) return false; ConstantInt *ConstElemCount = nullptr; // 1) Smoke tests that the original scalar loop TripCount (TC) belongs to // this loop. The scalar tripcount corresponds the number of elements // processed by the loop, so we will refer to that from this point on. if (!SE->isLoopInvariant(EC, L)) { LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n"); return false; } if ((ConstElemCount = dyn_cast(ElemCount))) { ConstantInt *TC = dyn_cast(TripCount); if (!TC) { LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in " "set.loop.iterations\n"); return false; } // Calculate 2 tripcount values and check that they are consistent with // each other. The TripCount for a predicated vector loop body is // ceil(ElementCount/Width), or floor((ElementCount+Width-1)/Width) as we // work it out here. uint64_t TC1 = TC->getZExtValue(); uint64_t TC2 = (ConstElemCount->getZExtValue() + VectorWidth - 1) / VectorWidth; // If the tripcount values are inconsistent, we can't insert the VCTP and // trigger tail-predication; keep the intrinsic as a get.active.lane.mask // and legalize this. if (TC1 != TC2) { LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: " << TC1 << " from set.loop.iterations, and " << TC2 << " from get.active.lane.mask\n"); return false; } } else if (!ForceTailPredication) { // 2) We need to prove that the sub expression that we create in the // tail-predicated loop body, which calculates the remaining elements to be // processed, is non-negative, i.e. it doesn't overflow: // // ((ElementCount + VectorWidth - 1) / VectorWidth) - TripCount >= 0 // // This is true if: // // TripCount == (ElementCount + VectorWidth - 1) / VectorWidth // // which what we will be using here. // auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth)); // ElementCount + (VW-1): auto *ECPlusVWMinus1 = SE->getAddExpr(EC, SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1))); // Ceil = ElementCount + (VW-1) / VW auto *Ceil = SE->getUDivExpr(ECPlusVWMinus1, VW); // Prevent unused variable warnings with TC (void)TC; LLVM_DEBUG( dbgs() << "ARM TP: Analysing overflow behaviour for:\n"; dbgs() << "ARM TP: - TripCount = "; TC->dump(); dbgs() << "ARM TP: - ElemCount = "; EC->dump(); dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n"; dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump(); ); // As an example, almost all the tripcount expressions (produced by the // vectoriser) look like this: // // TC = ((-4 + (4 * ((3 + %N) /u 4))) /u 4) // // and "ElementCount + (VW-1) / VW": // // Ceil = ((3 + %N) /u 4) // // Check for equality of TC and Ceil by calculating SCEV expression // TC - Ceil and test it for zero. // bool Zero = SE->getMinusSCEV( SE->getBackedgeTakenCount(L), SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW), SE->getNegativeSCEV(VW)), VW)) ->isZero(); if (!Zero) { LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n"); return false; } } // 3) Find out if IV is an induction phi. Note that we can't use Loop // helpers here to get the induction variable, because the hardware loop is // no longer in loopsimplify form, and also the hwloop intrinsic uses a // different counter. Using SCEV, we check that the induction is of the // form i = i + 4, where the increment must be equal to the VectorWidth. auto *IV = ActiveLaneMask->getOperand(0); auto *IVExpr = SE->getSCEV(IV); auto *AddExpr = dyn_cast(IVExpr); if (!AddExpr) { LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump()); return false; } // Check that this AddRec is associated with this loop. if (AddExpr->getLoop() != L) { LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n"); return false; } auto *Base = dyn_cast(AddExpr->getOperand(0)); if (!Base || !Base->isZero()) { LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n"); return false; } auto *Step = dyn_cast(AddExpr->getOperand(1)); if (!Step) { LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: "; AddExpr->getOperand(1)->dump()); return false; } auto StepValue = Step->getValue()->getSExtValue(); if (VectorWidth == StepValue) return true; LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue << " doesn't match vector width " << VectorWidth << "\n"); return false; } void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount) { IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); Module *M = L->getHeader()->getModule(); Type *Ty = IntegerType::get(M->getContext(), 32); unsigned VectorWidth = cast(ActiveLaneMask->getType())->getNumElements(); // Insert a phi to count the number of elements processed by the loop. Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI()); PHINode *Processed = Builder.CreatePHI(Ty, 2); Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader()); // Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and // thus represent the effect of tail predication. Builder.SetInsertPoint(ActiveLaneMask); ConstantInt *Factor = ConstantInt::get(cast(Ty), VectorWidth); Intrinsic::ID VCTPID; switch (VectorWidth) { default: llvm_unreachable("unexpected number of lanes"); case 4: VCTPID = Intrinsic::arm_mve_vctp32; break; case 8: VCTPID = Intrinsic::arm_mve_vctp16; break; case 16: VCTPID = Intrinsic::arm_mve_vctp8; break; // FIXME: vctp64 currently not supported because the predicate // vector wants to be <2 x i1>, but v2i1 is not a legal MVE // type, so problems happen at isel time. // Intrinsic::arm_mve_vctp64 exists for ACLE intrinsics // purposes, but takes a v4i1 instead of a v2i1. } Function *VCTP = Intrinsic::getDeclaration(M, VCTPID); Value *VCTPCall = Builder.CreateCall(VCTP, Processed); ActiveLaneMask->replaceAllUsesWith(VCTPCall); // Add the incoming value to the new phi. // TODO: This add likely already exists in the loop. Value *Remaining = Builder.CreateSub(Processed, Factor); Processed->addIncoming(Remaining, L->getLoopLatch()); LLVM_DEBUG(dbgs() << "ARM TP: Insert processed elements phi: " << *Processed << "\n" << "ARM TP: Inserted VCTP: " << *VCTPCall << "\n"); } bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) { SmallVector ActiveLaneMasks; for (auto *BB : L->getBlocks()) for (auto &I : *BB) if (auto *Int = dyn_cast(&I)) if (Int->getIntrinsicID() == Intrinsic::get_active_lane_mask) ActiveLaneMasks.push_back(Int); if (ActiveLaneMasks.empty()) return false; LLVM_DEBUG(dbgs() << "ARM TP: Found predicated vector loop.\n"); for (auto *ActiveLaneMask : ActiveLaneMasks) { LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: " << *ActiveLaneMask << "\n"); if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) { LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n"); return false; } LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n"); InsertVCTPIntrinsic(ActiveLaneMask, TripCount); } // Remove dead instructions and now dead phis. for (auto *II : ActiveLaneMasks) RecursivelyDeleteTriviallyDeadInstructions(II); for (auto I : L->blocks()) DeleteDeadPHIs(I); return true; } Pass *llvm::createMVETailPredicationPass() { return new MVETailPredication(); } char MVETailPredication::ID = 0; INITIALIZE_PASS_BEGIN(MVETailPredication, DEBUG_TYPE, DESC, false, false) INITIALIZE_PASS_END(MVETailPredication, DEBUG_TYPE, DESC, false, false) diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp index 9a710b784fd1..c7f451cba14f 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/MVEVPTBlockPass.cpp @@ -1,326 +1,337 @@ //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Debug.h" #include #include using namespace llvm; #define DEBUG_TYPE "arm-mve-vpt" namespace { class MVEVPTBlock : public MachineFunctionPass { public: static char ID; const Thumb2InstrInfo *TII; const TargetRegisterInfo *TRI; MVEVPTBlock() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } StringRef getPassName() const override { return "MVE VPT block insertion pass"; } private: bool InsertVPTBlocks(MachineBasicBlock &MBB); }; char MVEVPTBlock::ID = 0; } // end anonymous namespace INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, const TargetRegisterInfo *TRI, unsigned &NewOpcode) { // Search backwards to the instruction that defines VPR. This may or not // be a VCMP, we check that after this loop. If we find another instruction // that reads cpsr, we return nullptr. MachineBasicBlock::iterator CmpMI = MI; while (CmpMI != MI->getParent()->begin()) { --CmpMI; if (CmpMI->modifiesRegister(ARM::VPR, TRI)) break; if (CmpMI->readsRegister(ARM::VPR, TRI)) break; } if (CmpMI == MI) return nullptr; NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); if (NewOpcode == 0) return nullptr; // Search forward from CmpMI to MI, checking if either register was def'd if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), MI, TRI)) return nullptr; if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), MI, TRI)) return nullptr; return &*CmpMI; } // Advances Iter past a block of predicated instructions. // Returns true if it successfully skipped the whole block of predicated // instructions. Returns false when it stopped early (due to MaxSteps), or if // Iter didn't point to a predicated instruction. static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, unsigned MaxSteps, unsigned &NumInstrsSteppedOver) { ARMVCC::VPTCodes NextPred = ARMVCC::None; Register PredReg; NumInstrsSteppedOver = 0; while (Iter != EndIter) { + if (Iter->isDebugInstr()) { + // Skip debug instructions + ++Iter; + continue; + } + NextPred = getVPTInstrPredicate(*Iter, PredReg); assert(NextPred != ARMVCC::Else && "VPT block pass does not expect Else preds"); if (NextPred == ARMVCC::None || MaxSteps == 0) break; --MaxSteps; ++Iter; ++NumInstrsSteppedOver; }; return NumInstrsSteppedOver != 0 && (NextPred == ARMVCC::None || Iter == EndIter); } // Returns true if at least one instruction in the range [Iter, End) defines // or kills VPR. static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, MachineBasicBlock::iterator End) { for (; Iter != End; ++Iter) if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR)) return true; return false; } // Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { switch (BlockSize) { case 1: return ARM::PredBlockMask::T; case 2: return ARM::PredBlockMask::TT; case 3: return ARM::PredBlockMask::TTT; case 4: return ARM::PredBlockMask::TTTT; default: llvm_unreachable("Invalid BlockSize!"); } } // Given an iterator (Iter) that points at an instruction with a "Then" // predicate, tries to create the largest block of continuous predicated // instructions possible, and returns the VPT Block Mask of that block. // // This will try to perform some minor optimization in order to maximize the // size of the block. static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl &DeadInstructions) { MachineBasicBlock::instr_iterator BlockBeg = Iter; (void)BlockBeg; assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && "Expected a Predicated Instruction"); LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); unsigned BlockSize; StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize); LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = std::next(BlockBeg); AddedInstIter != Iter; ++AddedInstIter) { + if (AddedInstIter->isDebugInstr()) + continue; dbgs() << " adding: "; AddedInstIter->dump(); }); // Generate the initial BlockMask ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); // Remove VPNOTs while there's still room in the block, so we can make the // largest block possible. ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; while (BlockSize < 4 && Iter != EndIter && Iter->getOpcode() == ARM::MVE_VPNOT) { // Try to skip all of the predicated instructions after the VPNOT, stopping // after (4 - BlockSize). If we can't skip them all, stop. unsigned ElseInstCnt = 0; MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), ElseInstCnt)) break; // Check if this VPNOT can be removed or not: It can only be removed if at // least one of the predicated instruction that follows it kills or sets // VPR. if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) break; - LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); + LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump()); // Record the new size of the block BlockSize += ElseInstCnt; assert(BlockSize <= 4 && "Block is too large!"); // Record the VPNot to remove it later. DeadInstructions.push_back(&*Iter); ++Iter; // Replace the predicates of the instructions we're adding. // Note that we are using "Iter" to iterate over the block so we can update // it at the same time. for (; Iter != VPNOTBlockEndIter; ++Iter) { + if (Iter->isDebugInstr()) + continue; + // Find the register in which the predicate is int OpIdx = findFirstVPTPredOperandIdx(*Iter); assert(OpIdx != -1); // Change the predicate and update the mask Iter->getOperand(OpIdx).setImm(CurrentPredicate); BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate); LLVM_DEBUG(dbgs() << " adding : "; Iter->dump()); } CurrentPredicate = (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); } return BlockMask; } bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { bool Modified = false; MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); SmallVector DeadInstructions; while (MBIter != EndIter) { MachineInstr *MI = &*MBIter; Register PredReg; DebugLoc DL = MI->getDebugLoc(); ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); // The idea of the predicate is that None, Then and Else are for use when // handling assembly language: they correspond to the three possible // suffixes "", "t" and "e" on the mnemonic. So when instructions are read // from assembly source or disassembled from object code, you expect to // see a mixture whenever there's a long VPT block. But in code // generation, we hope we'll never generate an Else as input to this pass. assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); if (Pred == ARMVCC::None) { ++MBIter; continue; } ARM::PredBlockMask BlockMask = CreateVPTBlock(MBIter, EndIter, DeadInstructions); // Search back for a VCMP that can be folded to create a VPT, or else // create a VPST directly MachineInstrBuilder MIBuilder; unsigned NewOpcode; LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n"); if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); MIBuilder.addImm((uint64_t)BlockMask); MIBuilder.add(VCMP->getOperand(1)); MIBuilder.add(VCMP->getOperand(2)); MIBuilder.add(VCMP->getOperand(3)); // We need to remove any kill flags between the original VCMP and the new // insertion point. for (MachineInstr &MII : make_range(VCMP->getIterator(), MI->getIterator())) { MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI); MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI); } VCMP->eraseFromParent(); } else { MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); MIBuilder.addImm((uint64_t)BlockMask); } // Erase all dead instructions (VPNOT's). Do that now so that they do not // mess with the bundle creation. for (MachineInstr *DeadMI : DeadInstructions) DeadMI->eraseFromParent(); DeadInstructions.clear(); finalizeBundle( Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); Modified = true; } return Modified; } bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { const ARMSubtarget &STI = static_cast(Fn.getSubtarget()); if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) return false; TII = static_cast(STI.getInstrInfo()); TRI = STI.getRegisterInfo(); LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" << "********** Function: " << Fn.getName() << '\n'); bool Modified = false; for (MachineBasicBlock &MBB : Fn) Modified |= InsertVPTBlocks(MBB); LLVM_DEBUG(dbgs() << "**************************************\n"); return Modified; } /// createMVEVPTBlock - Returns an instance of the MVE VPT block /// insertion pass. FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7833bfc1d1b6..26dc3afc899e 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1,16479 +1,16500 @@ //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the PPCISelLowering class. // //===----------------------------------------------------------------------===// #include "PPCISelLowering.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCCCState.h" #include "PPCCallingConv.h" #include "PPCFrameLowering.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionXCOFF.h" #include "llvm/MC/MCSymbolXCOFF.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "ppc-lowering" static cl::opt DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); static cl::opt DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); static cl::opt DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); static cl::opt DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden); static cl::opt DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden); static cl::opt UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden); // TODO - Remove this option if soft fp128 has been fully supported . static cl::opt EnableSoftFP128("enable-soft-fp128", cl::desc("temp option to enable soft fp128"), cl::Hidden); STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM"); STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed"); static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int); static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl); // FIXME: Remove this once the bug has been fixed! extern cl::opt ANDIGlueBug; PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. bool isPPC64 = Subtarget.isPPC64(); setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4)); // Set up the register classes. addRegisterClass(MVT::i32, &PPC::GPRCRegClass); if (!useSoftFloat()) { if (hasSPE()) { addRegisterClass(MVT::f32, &PPC::GPRCRegClass); // EFPU2 APU only supports f32 if (!Subtarget.hasEFPU2()) addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); } } // Match BITREVERSE to customized fast code sequence in the td file. setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); // Custom lower inline assembly to check for special registers. setOperationAction(ISD::INLINEASM, MVT::Other, Custom); setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); } if (Subtarget.isISA3_0()) { setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal); setTruncStoreAction(MVT::f64, MVT::f16, Legal); setTruncStoreAction(MVT::f32, MVT::f16, Legal); } else { // No extending loads from f16 or HW conversions back and forth. setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); if (!Subtarget.hasSPE()) { setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal); } // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry. const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; for (MVT VT : ScalarIntVTs) { setOperationAction(ISD::ADDC, VT, Legal); setOperationAction(ISD::ADDE, VT, Legal); setOperationAction(ISD::SUBC, VT, Legal); setOperationAction(ISD::SUBE, VT, Legal); } if (Subtarget.useCRBits()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (isPPC64 || Subtarget.hasFPCVT()) { setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote); AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote); AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); } else { setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); } // PowerPC does not support direct load/store of condition registers. setOperationAction(ISD::LOAD, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); // FIXME: Remove this once the ANDI glue bug is fixed: if (ANDIGlueBug) setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setTruncStoreAction(VT, MVT::i1, Expand); } addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); } // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom); // We do not currently implement these libm ops for PowerPC. setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); // PowerPC has no SREM/UREM instructions unless we are on P9 // On P9 we may use a hardware instruction to compute the remainder. // When the result of both the remainder and the division is required it is // more efficient to compute the remainder from the result of the division // rather than use the remainder instruction. The instructions are legalized // directly because the DivRemPairsPass performs the transformation at the IR // level. if (Subtarget.isISA3_0()) { setOperationAction(ISD::SREM, MVT::i32, Legal); setOperationAction(ISD::UREM, MVT::i32, Legal); setOperationAction(ISD::SREM, MVT::i64, Legal); setOperationAction(ISD::UREM, MVT::i64, Legal); } else { setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); } // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i64, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); // Handle constrained floating-point operations of scalar. // TODO: Handle SPE specific operation. setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); if (Subtarget.hasVSX()) { setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal); } if (Subtarget.hasFSQRT()) { setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); } if (Subtarget.hasFPRND()) { setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal); } // We don't support sin/cos/sqrt/fmod/pow setOperationAction(ISD::FSIN , MVT::f64, Expand); setOperationAction(ISD::FCOS , MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FREM , MVT::f64, Expand); setOperationAction(ISD::FPOW , MVT::f64, Expand); setOperationAction(ISD::FSIN , MVT::f32, Expand); setOperationAction(ISD::FCOS , MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FREM , MVT::f32, Expand); setOperationAction(ISD::FPOW , MVT::f32, Expand); if (Subtarget.hasSPE()) { setOperationAction(ISD::FMA , MVT::f64, Expand); setOperationAction(ISD::FMA , MVT::f32, Expand); } else { setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FMA , MVT::f32, Legal); } if (Subtarget.hasSPE()) setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); if (Subtarget.hasFCPSGN()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); } else { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); } if (Subtarget.hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f32, Legal); setOperationAction(ISD::FCEIL, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); } // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd // to speed up scalar BSWAP64. // CTPOP or CTTZ were introduced in P8/P9 respectively setOperationAction(ISD::BSWAP, MVT::i32 , Expand); if (Subtarget.hasP9Vector()) setOperationAction(ISD::BSWAP, MVT::i64 , Custom); else setOperationAction(ISD::BSWAP, MVT::i64 , Expand); if (Subtarget.isISA3_0()) { setOperationAction(ISD::CTTZ , MVT::i32 , Legal); setOperationAction(ISD::CTTZ , MVT::i64 , Legal); } else { setOperationAction(ISD::CTTZ , MVT::i32 , Expand); setOperationAction(ISD::CTTZ , MVT::i64 , Expand); } if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { setOperationAction(ISD::CTPOP, MVT::i32 , Expand); setOperationAction(ISD::CTPOP, MVT::i64 , Expand); } // PowerPC does not have ROTR setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); if (!Subtarget.useCRBits()) { // PowerPC does not have Select setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::f64, Expand); } // PowerPC wants to turn select_cc of FP into fsel when possible. setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit if (!Subtarget.useCRBits()) setOperationAction(ISD::SETCC, MVT::i32, Custom); if (Subtarget.hasFPU()) { setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal); setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal); } // PowerPC does not have BRCOND which requires SetCC if (!Subtarget.useCRBits()) setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); if (Subtarget.hasSPE()) { // SPE has built-in conversions setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::BITCAST, MVT::f32, Legal); setOperationAction(ISD::BITCAST, MVT::i32, Legal); setOperationAction(ISD::BITCAST, MVT::i64, Legal); setOperationAction(ISD::BITCAST, MVT::f64, Legal); if (TM.Options.UnsafeFPMath) { setOperationAction(ISD::LRINT, MVT::f64, Legal); setOperationAction(ISD::LRINT, MVT::f32, Legal); setOperationAction(ISD::LLRINT, MVT::f64, Legal); setOperationAction(ISD::LLRINT, MVT::f32, Legal); setOperationAction(ISD::LROUND, MVT::f64, Legal); setOperationAction(ISD::LROUND, MVT::f32, Legal); setOperationAction(ISD::LLROUND, MVT::f64, Legal); setOperationAction(ISD::LLROUND, MVT::f32, Legal); } } else { setOperationAction(ISD::BITCAST, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i32, Expand); setOperationAction(ISD::BITCAST, MVT::i64, Expand); setOperationAction(ISD::BITCAST, MVT::f64, Expand); } // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support // SjLj exception handling but a light-weight setjmp/longjmp replacement to // support continuation, user-level threading, and etc.. As a result, no // other SjLj exception interfaces are implemented and please don't build // your own exception handling based on them. // LLVM/Clang supports zero-cost DWARF exception handling. setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); // TRAP is legal. setOperationAction(ISD::TRAP, MVT::Other, Legal); // TRAMPOLINE is custom lowered. setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); if (Subtarget.is64BitELFABI()) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64); setOperationAction(ISD::VAARG, MVT::i8, Promote); AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64); setOperationAction(ISD::VAARG, MVT::i16, Promote); AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64); setOperationAction(ISD::VAARG, MVT::i32, Promote); AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64); setOperationAction(ISD::VAARG, MVT::Other, Expand); } else if (Subtarget.is32BitELFABI()) { // VAARG is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::i64, Custom); } else setOperationAction(ISD::VAARG, MVT::Other, Expand); // VACOPY is custom lowered with the 32-bit SVR4 ABI. if (Subtarget.is32BitELFABI()) setOperationAction(ISD::VACOPY , MVT::Other, Custom); else setOperationAction(ISD::VACOPY , MVT::Other, Expand); // Use the default implementation. setOperationAction(ISD::VAEND , MVT::Other, Expand); setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); // Comparisons that require checking two conditions. if (Subtarget.hasSPE()) { setCondCodeAction(ISD::SETO, MVT::f32, Expand); setCondCodeAction(ISD::SETO, MVT::f64, Expand); setCondCodeAction(ISD::SETUO, MVT::f32, Expand); setCondCodeAction(ISD::SETUO, MVT::f64, Expand); } setCondCodeAction(ISD::SETULT, MVT::f32, Expand); setCondCodeAction(ISD::SETULT, MVT::f64, Expand); setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) { setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); } } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. if (Subtarget.hasSPE()) { setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); } else { setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); } } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); } if (Subtarget.use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); // 64-bit PowerPC wants to expand i128 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); } else { // 32-bit PowerPC wants to expand i64 shifts itself. setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } // PowerPC has better expansions for funnel shifts than the generic // TargetLowering::expandFunnelShift. if (Subtarget.has64BitSupport()) { setOperationAction(ISD::FSHL, MVT::i64, Custom); setOperationAction(ISD::FSHR, MVT::i64, Custom); } setOperationAction(ISD::FSHL, MVT::i32, Custom); setOperationAction(ISD::FSHR, MVT::i32, Custom); if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal); setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal); } if (Subtarget.hasAltivec()) { for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { setOperationAction(ISD::SADDSAT, VT, Legal); setOperationAction(ISD::SSUBSAT, VT, Legal); setOperationAction(ISD::UADDSAT, VT, Legal); setOperationAction(ISD::USUBSAT, VT, Legal); } // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); // For v2i64, these are only valid with P8Vector. This is corrected after // the loop. if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) { setOperationAction(ISD::SMAX, VT, Legal); setOperationAction(ISD::SMIN, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); setOperationAction(ISD::UMIN, VT, Legal); } else { setOperationAction(ISD::SMAX, VT, Expand); setOperationAction(ISD::SMIN, VT, Expand); setOperationAction(ISD::UMAX, VT, Expand); setOperationAction(ISD::UMIN, VT, Expand); } if (Subtarget.hasVSX()) { setOperationAction(ISD::FMAXNUM, VT, Legal); setOperationAction(ISD::FMINNUM, VT, Legal); } // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { setOperationAction(ISD::CTPOP, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); } else { setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); } // Vector instructions introduced in P9 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128)) setOperationAction(ISD::CTTZ, VT, Legal); else setOperationAction(ISD::CTTZ, VT, Expand); // We promote all shuffles to v16i8. setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); // We promote all non-typed operations to v4i32. setOperationAction(ISD::AND , VT, Promote); AddPromotedToType (ISD::AND , VT, MVT::v4i32); setOperationAction(ISD::OR , VT, Promote); AddPromotedToType (ISD::OR , VT, MVT::v4i32); setOperationAction(ISD::XOR , VT, Promote); AddPromotedToType (ISD::XOR , VT, MVT::v4i32); setOperationAction(ISD::LOAD , VT, Promote); AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); setOperationAction(ISD::SELECT, VT, Promote); AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::SELECT_CC, VT, Promote); AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32); setOperationAction(ISD::STORE, VT, Promote); AddPromotedToType (ISD::STORE, VT, MVT::v4i32); // No other operations are legal. setOperationAction(ISD::MUL , VT, Expand); setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG10, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FEXP, VT, Expand); setOperationAction(ISD::FEXP2, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); setOperationAction(ISD::BUILD_VECTOR, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } } setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand); if (!Subtarget.hasP8Vector()) { setOperationAction(ISD::SMAX, MVT::v2i64, Expand); setOperationAction(ISD::SMIN, MVT::v2i64, Expand); setOperationAction(ISD::UMAX, MVT::v2i64, Expand); setOperationAction(ISD::UMIN, MVT::v2i64, Expand); } // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle // with merges, splats, etc. setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); // Vector truncates to sub-word integer that fit in an Altivec/VSX register // are cheap, so handle them before they get expanded to scalar. setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); setOperationAction(ISD::AND , MVT::v4i32, Legal); setOperationAction(ISD::OR , MVT::v4i32, Legal); setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, Subtarget.useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8. setOperationAction(ISD::ROTL, MVT::v1i128, Custom); // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w). if (Subtarget.hasAltivec()) for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8}) setOperationAction(ISD::ROTL, VT, Legal); // With hasP8Altivec set, we can lower ISD::ROTL to vrld. if (Subtarget.hasP8Altivec()) setOperationAction(ISD::ROTL, MVT::v2i64, Legal); addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); if (Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } if (Subtarget.hasP8Altivec()) setOperationAction(ISD::MUL, MVT::v4i32, Legal); else setOperationAction(ISD::MUL, MVT::v4i32, Custom); if (Subtarget.isISA3_1()) { setOperationAction(ISD::MUL, MVT::v2i64, Legal); setOperationAction(ISD::MULHS, MVT::v2i64, Legal); setOperationAction(ISD::MULHU, MVT::v2i64, Legal); setOperationAction(ISD::MULHS, MVT::v4i32, Legal); setOperationAction(ISD::MULHU, MVT::v4i32, Legal); setOperationAction(ISD::UDIV, MVT::v2i64, Legal); setOperationAction(ISD::SDIV, MVT::v2i64, Legal); setOperationAction(ISD::UDIV, MVT::v4i32, Legal); setOperationAction(ISD::SDIV, MVT::v4i32, Legal); setOperationAction(ISD::UREM, MVT::v2i64, Legal); setOperationAction(ISD::SREM, MVT::v2i64, Legal); setOperationAction(ISD::UREM, MVT::v4i32, Legal); setOperationAction(ISD::SREM, MVT::v4i32, Legal); setOperationAction(ISD::UREM, MVT::v1i128, Legal); setOperationAction(ISD::SREM, MVT::v1i128, Legal); setOperationAction(ISD::UDIV, MVT::v1i128, Legal); setOperationAction(ISD::SDIV, MVT::v1i128, Legal); setOperationAction(ISD::ROTL, MVT::v1i128, Legal); } setOperationAction(ISD::MUL, MVT::v8i16, Legal); setOperationAction(ISD::MUL, MVT::v16i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); // Altivec does not contain unordered floating-point compare instructions setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal); } if (Subtarget.hasDirectMove() && isPPC64) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); // The nearbyint variants are not allowed to raise the inexact exception // so we can only code-gen them with unsafe math. if (TM.Options.UnsafeFPMath) { setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); } setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::f64, Legal); setOperationAction(ISD::FRINT, MVT::f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::FRINT, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::FRINT, MVT::f32, Legal); setOperationAction(ISD::MUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); // Share the Altivec comparison restrictions. setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); setOperationAction(ISD::LOAD, MVT::v2f64, Legal); setOperationAction(ISD::STORE, MVT::v2f64, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) addRegisterClass(MVT::f32, &PPC::VSSRCRegClass); addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass); addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); if (Subtarget.hasP8Altivec()) { setOperationAction(ISD::SHL, MVT::v2i64, Legal); setOperationAction(ISD::SRA, MVT::v2i64, Legal); setOperationAction(ISD::SRL, MVT::v2i64, Legal); // 128 bit shifts can be accomplished via 3 instructions for SHL and // SRL, but not for SRA because of the instructions available: // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth // doing setOperationAction(ISD::SHL, MVT::v1i128, Expand); setOperationAction(ISD::SRL, MVT::v1i128, Expand); setOperationAction(ISD::SRA, MVT::v1i128, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Legal); } else { setOperationAction(ISD::SHL, MVT::v2i64, Expand); setOperationAction(ISD::SRA, MVT::v2i64, Expand); setOperationAction(ISD::SRL, MVT::v2i64, Expand); setOperationAction(ISD::SETCC, MVT::v2i64, Custom); // VSX v2i64 only supports non-arithmetic operations. setOperationAction(ISD::ADD, MVT::v2i64, Expand); setOperationAction(ISD::SUB, MVT::v2i64, Expand); } if (Subtarget.isISA3_1()) setOperationAction(ISD::SETCC, MVT::v1i128, Legal); else setOperationAction(ISD::SETCC, MVT::v1i128, Expand); setOperationAction(ISD::LOAD, MVT::v2i64, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::STORE, MVT::v2i64, Promote); AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); // Custom handling for partial vectors of integers converted to // floating point. We already have optimal handling for v2i32 through // the DAG combine, so those aren't necessary. setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal); if (Subtarget.hasDirectMove()) setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); // Handle constrained floating-point operations of vector. // The predictor is `hasVSX` because altivec instruction has // no exception but VSX vector instruction has. setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); } if (Subtarget.hasP8Altivec()) { addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass); addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass); } if (Subtarget.hasP9Vector()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); // 128 bit shifts can be accomplished via 3 instructions for SHL and // SRL, but not for SRA because of the instructions available: // VS{RL} and VS{RL}O. setOperationAction(ISD::SHL, MVT::v1i128, Legal); setOperationAction(ISD::SRL, MVT::v1i128, Legal); setOperationAction(ISD::SRA, MVT::v1i128, Expand); addRegisterClass(MVT::f128, &PPC::VRRCRegClass); setOperationAction(ISD::FADD, MVT::f128, Legal); setOperationAction(ISD::FSUB, MVT::f128, Legal); setOperationAction(ISD::FDIV, MVT::f128, Legal); setOperationAction(ISD::FMUL, MVT::f128, Legal); setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); // No extending loads to f128 on PPC. for (MVT FPT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); setOperationAction(ISD::FMA, MVT::f128, Legal); setCondCodeAction(ISD::SETULT, MVT::f128, Expand); setCondCodeAction(ISD::SETUGT, MVT::f128, Expand); setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand); setCondCodeAction(ISD::SETOGE, MVT::f128, Expand); setCondCodeAction(ISD::SETOLE, MVT::f128, Expand); setCondCodeAction(ISD::SETONE, MVT::f128, Expand); setOperationAction(ISD::FTRUNC, MVT::f128, Legal); setOperationAction(ISD::FRINT, MVT::f128, Legal); setOperationAction(ISD::FFLOOR, MVT::f128, Legal); setOperationAction(ISD::FCEIL, MVT::f128, Legal); setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal); setOperationAction(ISD::FROUND, MVT::f128, Legal); setOperationAction(ISD::SELECT, MVT::f128, Expand); setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setOperationAction(ISD::BITCAST, MVT::i128, Custom); // No implementation for these ops for PowerPC. setOperationAction(ISD::FSIN, MVT::f128, Expand); setOperationAction(ISD::FCOS, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FPOWI, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); // Handle constrained floating-point operations of fp128 setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal); setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal); setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); setOperationAction(ISD::BSWAP, MVT::v8i16, Legal); setOperationAction(ISD::BSWAP, MVT::v4i32, Legal); setOperationAction(ISD::BSWAP, MVT::v2i64, Legal); setOperationAction(ISD::BSWAP, MVT::v1i128, Legal); } else if (Subtarget.hasAltivec() && EnableSoftFP128) { addRegisterClass(MVT::f128, &PPC::VRRCRegClass); for (MVT FPT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand); setOperationAction(ISD::LOAD, MVT::f128, Promote); setOperationAction(ISD::STORE, MVT::f128, Promote); AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32); AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32); // Set FADD/FSUB as libcall to avoid the legalizer to expand the // fp_to_uint and int_to_fp. setOperationAction(ISD::FADD, MVT::f128, LibCall); setOperationAction(ISD::FSUB, MVT::f128, LibCall); setOperationAction(ISD::FMUL, MVT::f128, Expand); setOperationAction(ISD::FDIV, MVT::f128, Expand); setOperationAction(ISD::FNEG, MVT::f128, Expand); setOperationAction(ISD::FABS, MVT::f128, Expand); setOperationAction(ISD::FSIN, MVT::f128, Expand); setOperationAction(ISD::FCOS, MVT::f128, Expand); setOperationAction(ISD::FPOW, MVT::f128, Expand); setOperationAction(ISD::FPOWI, MVT::f128, Expand); setOperationAction(ISD::FREM, MVT::f128, Expand); setOperationAction(ISD::FSQRT, MVT::f128, Expand); setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); setTruncStoreAction(MVT::f128, MVT::f64, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); // Expand the fp_extend if the target type is fp128. setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand); // Expand the fp_round if the source type is fp128. for (MVT VT : {MVT::f32, MVT::f64}) { setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); } } if (Subtarget.hasP9Altivec()) { setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); } } if (Subtarget.pairedVectorMemops()) { addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass); setOperationAction(ISD::LOAD, MVT::v256i1, Custom); setOperationAction(ISD::STORE, MVT::v256i1, Custom); } if (Subtarget.hasMMA()) { addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass); setOperationAction(ISD::LOAD, MVT::v512i1, Custom); setOperationAction(ISD::STORE, MVT::v512i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom); } if (Subtarget.has64BitSupport()) setOperationAction(ISD::PREFETCH, MVT::Other, Legal); if (Subtarget.isISA3_1()) setOperationAction(ISD::SRA, MVT::v1i128, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); if (!isPPC64) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } setBooleanContents(ZeroOrOneBooleanContent); if (Subtarget.hasAltivec()) { // Altivec instructions set fields to all zeros or all ones. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); } if (!isPPC64) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); } if (!isPPC64) setMaxAtomicSizeInBitsSupported(32); setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::FMA); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); if (Subtarget.useCRBits()) setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::SELECT_CC); } if (Subtarget.hasP9Altivec()) { setTargetDAGCombine(ISD::ABS); setTargetDAGCombine(ISD::VSELECT); } setLibcallName(RTLIB::LOG_F128, "logf128"); setLibcallName(RTLIB::LOG2_F128, "log2f128"); setLibcallName(RTLIB::LOG10_F128, "log10f128"); setLibcallName(RTLIB::EXP_F128, "expf128"); setLibcallName(RTLIB::EXP2_F128, "exp2f128"); setLibcallName(RTLIB::SIN_F128, "sinf128"); setLibcallName(RTLIB::COS_F128, "cosf128"); setLibcallName(RTLIB::POW_F128, "powf128"); setLibcallName(RTLIB::FMIN_F128, "fminf128"); setLibcallName(RTLIB::FMAX_F128, "fmaxf128"); setLibcallName(RTLIB::REM_F128, "fmodf128"); setLibcallName(RTLIB::SQRT_F128, "sqrtf128"); setLibcallName(RTLIB::CEIL_F128, "ceilf128"); setLibcallName(RTLIB::FLOOR_F128, "floorf128"); setLibcallName(RTLIB::TRUNC_F128, "truncf128"); setLibcallName(RTLIB::ROUND_F128, "roundf128"); setLibcallName(RTLIB::LROUND_F128, "lroundf128"); setLibcallName(RTLIB::LLROUND_F128, "llroundf128"); setLibcallName(RTLIB::RINT_F128, "rintf128"); setLibcallName(RTLIB::LRINT_F128, "lrintf128"); setLibcallName(RTLIB::LLRINT_F128, "llrintf128"); setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128"); setLibcallName(RTLIB::FMA_F128, "fmaf128"); // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. if (Subtarget.useCRBits()) { setHasMultipleConditionRegisters(); setJumpIsExpensive(); } setMinFunctionAlignment(Align(4)); switch (Subtarget.getCPUDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_A2: case PPC::DIR_E500: case PPC::DIR_E500mc: case PPC::DIR_E5500: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: setPrefLoopAlignment(Align(16)); setPrefFunctionAlignment(Align(16)); break; } if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); else setSchedulingPreference(Sched::Hybrid); computeRegisterProperties(STI.getRegisterInfo()); // The Freescale cores do better with aggressive inlining of memcpy and // friends. GCC uses same threshold of 128 bytes (= 32 word stores). if (Subtarget.getCPUDirective() == PPC::DIR_E500mc || Subtarget.getCPUDirective() == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; MaxStoresPerMemcpyOptSize = 8; MaxStoresPerMemmove = 32; MaxStoresPerMemmoveOptSize = 8; } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) { // The A2 also benefits from (very) aggressive inlining of memcpy and // friends. The overhead of a the function call, even when warm, can be // over one hundred cycles. MaxStoresPerMemset = 128; MaxStoresPerMemcpy = 128; MaxStoresPerMemmove = 128; MaxLoadsPerMemcmp = 128; } else { MaxLoadsPerMemcmp = 8; MaxLoadsPerMemcmpOptSize = 4; } IsStrictFPEnabled = true; // Let the subtarget (CPU) decide if a predictable select is more expensive // than the corresponding branch. This information is used in CGP to decide // when to convert selects into branches. PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive(); } /// getMaxByValAlign - Helper for getByValTypeAlignment to determine /// the desired ByVal argument alignment. static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) { if (MaxAlign == MaxMaxAlign) return; if (VectorType *VTy = dyn_cast(Ty)) { if (MaxMaxAlign >= 32 && VTy->getPrimitiveSizeInBits().getFixedSize() >= 256) MaxAlign = Align(32); else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 && MaxAlign < 16) MaxAlign = Align(16); } else if (ArrayType *ATy = dyn_cast(Ty)) { Align EltAlign; getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; } else if (StructType *STy = dyn_cast(Ty)) { for (auto *EltTy : STy->elements()) { Align EltAlign; getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign); if (EltAlign > MaxAlign) MaxAlign = EltAlign; if (MaxAlign == MaxMaxAlign) break; } } } /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty, const DataLayout &DL) const { // 16byte and wider vectors are passed on 16byte boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary. Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4); if (Subtarget.hasAltivec()) getMaxByValAlign(Ty, Alignment, Align(16)); return Alignment.value(); } bool PPCTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } bool PPCTargetLowering::hasSPE() const { return Subtarget.hasSPE(); } bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { return VT.isScalarInteger(); } const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP"; case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; case PPCISD::FP_TO_UINT_IN_VSR: return "PPCISD::FP_TO_UINT_IN_VSR,"; case PPCISD::FP_TO_SINT_IN_VSR: return "PPCISD::FP_TO_SINT_IN_VSR"; case PPCISD::FRE: return "PPCISD::FRE"; case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; case PPCISD::FTSQRT: return "PPCISD::FTSQRT"; case PPCISD::FSQRT: return "PPCISD::FSQRT"; case PPCISD::STFIWX: return "PPCISD::STFIWX"; case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; case PPCISD::XXSPLTI_SP_TO_DP: return "PPCISD::XXSPLTI_SP_TO_DP"; case PPCISD::XXSPLTI32DX: return "PPCISD::XXSPLTI32DX"; case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; case PPCISD::VECSHL: return "PPCISD::VECSHL"; case PPCISD::CMPB: return "PPCISD::CMPB"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; case PPCISD::SRA: return "PPCISD::SRA"; case PPCISD::SHL: return "PPCISD::SHL"; case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; case PPCISD::CALL: return "PPCISD::CALL"; case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; case PPCISD::MFVSR: return "PPCISD::MFVSR"; case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; case PPCISD::SCALAR_TO_VECTOR_PERMUTED: return "PPCISD::SCALAR_TO_VECTOR_PERMUTED"; case PPCISD::ANDI_rec_1_EQ_BIT: return "PPCISD::ANDI_rec_1_EQ_BIT"; case PPCISD::ANDI_rec_1_GT_BIT: return "PPCISD::ANDI_rec_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec"; case PPCISD::LBRX: return "PPCISD::LBRX"; case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; case PPCISD::STXSIX: return "PPCISD::STXSIX"; case PPCISD::VEXTS: return "PPCISD::VEXTS"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; case PPCISD::ST_VSR_SCAL_INT: return "PPCISD::ST_VSR_SCAL_INT"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; case PPCISD::MFFS: return "PPCISD::MFFS"; case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; case PPCISD::CR6SET: return "PPCISD::CR6SET"; case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; case PPCISD::PADDI_DTPREL: return "PPCISD::PADDI_DTPREL"; case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; case PPCISD::SC: return "PPCISD::SC"; case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB"; case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE"; case PPCISD::RFEBB: return "PPCISD::RFEBB"; case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; case PPCISD::VABSD: return "PPCISD::VABSD"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD"; case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD"; case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; case PPCISD::STRICT_FADDRTZ: return "PPCISD::STRICT_FADDRTZ"; case PPCISD::STRICT_FCTIDZ: return "PPCISD::STRICT_FCTIDZ"; case PPCISD::STRICT_FCTIWZ: return "PPCISD::STRICT_FCTIWZ"; case PPCISD::STRICT_FCTIDUZ: return "PPCISD::STRICT_FCTIDUZ"; case PPCISD::STRICT_FCTIWUZ: return "PPCISD::STRICT_FCTIWUZ"; case PPCISD::STRICT_FCFID: return "PPCISD::STRICT_FCFID"; case PPCISD::STRICT_FCFIDU: return "PPCISD::STRICT_FCFIDU"; case PPCISD::STRICT_FCFIDS: return "PPCISD::STRICT_FCFIDS"; case PPCISD::STRICT_FCFIDUS: return "PPCISD::STRICT_FCFIDUS"; case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; } return nullptr; } EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (!VT.isVector()) return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; return VT.changeVectorElementTypeToInteger(); } bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); return true; } //===----------------------------------------------------------------------===// // Node matching predicates, for use by the tblgen matching code. //===----------------------------------------------------------------------===// /// isFloatingPointZero - Return true if this is 0.0 or -0.0. static bool isFloatingPointZero(SDValue Op) { if (ConstantFPSDNode *CFP = dyn_cast(Op)) return CFP->getValueAPF().isZero(); else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { // Maybe this has already been legalized into the constant pool? if (ConstantPoolSDNode *CP = dyn_cast(Op.getOperand(1))) if (const ConstantFP *CFP = dyn_cast(CP->getConstVal())) return CFP->getValueAPF().isZero(); } return false; } /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return /// true if Op is undef or if it matches the specified value. static bool isConstantOrUndef(int Op, int Val) { return Op < 0 || Op == Val; } /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 1; for (unsigned i = 0; i != 8; ++i) if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) return false; } return true; } /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 2; for (unsigned i = 0; i != 8; i += 2) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) return false; } return true; } /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the /// current subtarget. /// /// The ShuffleKind distinguishes between big-endian operations with /// two different inputs (0), either-endian operations with two identical /// inputs (1), and little-endian operations with two different inputs (2). /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); if (!Subtarget.hasP8Vector()) return false; bool IsLE = DAG.getDataLayout().isLittleEndian(); if (ShuffleKind == 0) { if (IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+7)) return false; } else if (ShuffleKind == 2) { if (!IsLE) return false; for (unsigned i = 0; i != 16; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+3)) return false; } else if (ShuffleKind == 1) { unsigned j = IsLE ? 0 : 4; for (unsigned i = 0; i != 8; i += 4) if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) || !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) || !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) || !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3)) return false; } return true; } /// isVMerge - Common function, used to match vmrg* shuffles. /// static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart) { if (N->getValueType(0) != MVT::v16i8) return false; assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && "Unsupported merge size!"); for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), LHSStart+j+i*UnitSize) || !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), RHSStart+j+i*UnitSize)) return false; } return true; } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 0, 16); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 8, 24); else return false; } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). /// The ShuffleKind distinguishes between big-endian merges with two /// different inputs (0), either-endian merges with two identical inputs (1), /// and little-endian merges with two different inputs (2). For the latter, /// the input operands are swapped (see PPCInstrAltivec.td). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 8, 8); else if (ShuffleKind == 2) // swapped return isVMerge(N, UnitSize, 8, 24); else return false; } else { if (ShuffleKind == 1) // unary return isVMerge(N, UnitSize, 0, 0); else if (ShuffleKind == 0) // normal return isVMerge(N, UnitSize, 0, 16); else return false; } } /** * Common function used to match vmrgew and vmrgow shuffles * * The indexOffset determines whether to look for even or odd words in * the shuffle mask. This is based on the of the endianness of the target * machine. * - Little Endian: * - Use offset of 0 to check for odd elements * - Use offset of 4 to check for even elements * - Big Endian: * - Use offset of 0 to check for even elements * - Use offset of 4 to check for odd elements * A detailed description of the vector element ordering for little endian and * big endian can be found at * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html * Targeting your applications - what little endian and big endian IBM XL C/C++ * compiler differences mean to you * * The mask to the shuffle vector instruction specifies the indices of the * elements from the two input vectors to place in the result. The elements are * numbered in array-access order, starting with the first vector. These vectors * are always of type v16i8, thus each vector will contain 16 elements of size * 8. More info on the shuffle vector can be found in the * http://llvm.org/docs/LangRef.html#shufflevector-instruction * Language Reference. * * The RHSStartValue indicates whether the same input vectors are used (unary) * or two different input vectors are used, based on the following: * - If the instruction uses the same vector for both inputs, the range of the * indices will be 0 to 15. In this case, the RHSStart value passed should * be 0. * - If the instruction has two different vectors then the range of the * indices will be 0 to 31. In this case, the RHSStart value passed should * be 16 (indices 0-15 specify elements in the first vector while indices 16 * to 31 specify elements in the second vector). * * \param[in] N The shuffle vector SD Node to analyze * \param[in] IndexOffset Specifies whether to look for even or odd elements * \param[in] RHSStartValue Specifies the starting index for the righthand input * vector to the shuffle_vector instruction * \return true iff this shuffle vector represents an even or odd word merge */ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset, unsigned RHSStartValue) { if (N->getValueType(0) != MVT::v16i8) return false; for (unsigned i = 0; i < 2; ++i) for (unsigned j = 0; j < 4; ++j) if (!isConstantOrUndef(N->getMaskElt(i*4+j), i*RHSStartValue+j+IndexOffset) || !isConstantOrUndef(N->getMaskElt(i*4+j+8), i*RHSStartValue+j+IndexOffset+8)) return false; return true; } /** * Determine if the specified shuffle mask is suitable for the vmrgew or * vmrgow instructions. * * \param[in] N The shuffle vector SD Node to analyze * \param[in] CheckEven Check for an even merge (true) or an odd merge (false) * \param[in] ShuffleKind Identify the type of merge: * - 0 = big-endian merge with two different inputs; * - 1 = either-endian merge with two identical inputs; * - 2 = little-endian merge with two different inputs (inputs are swapped for * little-endian merges). * \param[in] DAG The current SelectionDAG * \return true iff this shuffle mask */ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG) { if (DAG.getDataLayout().isLittleEndian()) { unsigned indexOffset = CheckEven ? 4 : 0; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 2) // swapped return isVMerge(N, indexOffset, 16); else return false; } else { unsigned indexOffset = CheckEven ? 0 : 4; if (ShuffleKind == 1) // Unary return isVMerge(N, indexOffset, 0); else if (ShuffleKind == 0) // Normal return isVMerge(N, indexOffset, 16); else return false; } return false; } /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. /// The ShuffleKind distinguishes between big-endian operations with two /// different inputs (0), either-endian operations with two identical inputs /// (1), and little-endian operations with two different inputs (2). For the /// latter, the input operands are swapped (see PPCInstrAltivec.td). int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG) { if (N->getValueType(0) != MVT::v16i8) return -1; ShuffleVectorSDNode *SVOp = cast(N); // Find the first non-undef value in the shuffle mask. unsigned i; for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) /*search*/; if (i == 16) return -1; // all undef. // Otherwise, check to see if the rest of the elements are consecutively // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; ShiftAmt -= i; bool isLE = DAG.getDataLayout().isLittleEndian(); if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) return -1; } else if (ShuffleKind == 1) { // Check the rest of the elements to see if they are consecutive. for (++i; i != 16; ++i) if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) return -1; } else return -1; if (isLE) ShiftAmt = 16 - ShiftAmt; return ShiftAmt; } /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.). bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) && EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes"); // The consecutive indices need to specify an element, not part of two // different elements. So abandon ship early if this isn't the case. if (N->getMaskElt(0) % EltSize != 0) return false; // This is a splat operation if each element of the permute is the same, and // if the value doesn't reference the second vector. unsigned ElementBase = N->getMaskElt(0); // FIXME: Handle UNDEF elements too! if (ElementBase >= 16) return false; // Check that the indices are consecutive, in the case of a multi-byte element // splatted with a v16i8 mask. for (unsigned i = 1; i != EltSize; ++i) if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) return false; for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { if (N->getMaskElt(i) < 0) continue; for (unsigned j = 0; j != EltSize; ++j) if (N->getMaskElt(i+j) != N->getMaskElt(j)) return false; } return true; } /// Check that the mask is shuffling N byte elements. Within each N byte /// element of the mask, the indices could be either in increasing or /// decreasing order as long as they are consecutive. /// \param[in] N the shuffle vector SD Node to analyze /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/ /// Word/DoubleWord/QuadWord). /// \param[in] StepLen the delta indices number among the N byte element, if /// the mask is in increasing/decreasing order then it is 1/-1. /// \return true iff the mask is shuffling N byte elements. static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width, int StepLen) { assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) && "Unexpected element width."); assert((StepLen == 1 || StepLen == -1) && "Unexpected element width."); unsigned NumOfElem = 16 / Width; unsigned MaskVal[16]; // Width is never greater than 16 for (unsigned i = 0; i < NumOfElem; ++i) { MaskVal[0] = N->getMaskElt(i * Width); if ((StepLen == 1) && (MaskVal[0] % Width)) { return false; } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) { return false; } for (unsigned int j = 1; j < Width; ++j) { MaskVal[j] = N->getMaskElt(i * Width + j); if (MaskVal[j] != MaskVal[j-1] + StepLen) { return false; } } } return true; } bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE) { if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; unsigned M2 = N->getMaskElt(8) / 4; unsigned M3 = N->getMaskElt(12) / 4; unsigned LittleEndianShifts[] = { 2, 1, 0, 3 }; unsigned BigEndianShifts[] = { 3, 0, 1, 2 }; // Below, let H and L be arbitrary elements of the shuffle mask // where H is in the range [4,7] and L is in the range [0,3]. // H, 1, 2, 3 or L, 5, 6, 7 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) || (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3]; InsertAtByte = IsLE ? 12 : 0; Swap = M0 < 4; return true; } // 0, H, 2, 3 or 4, L, 6, 7 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) || (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3]; InsertAtByte = IsLE ? 8 : 4; Swap = M1 < 4; return true; } // 0, 1, H, 3 or 4, 5, L, 7 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) || (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) { ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3]; InsertAtByte = IsLE ? 4 : 8; Swap = M2 < 4; return true; } // 0, 1, 2, H or 4, 5, 6, L if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) || (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) { ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3]; InsertAtByte = IsLE ? 0 : 12; Swap = M3 < 4; return true; } // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { ShiftElts = 0; Swap = true; unsigned XXINSERTWSrcElem = IsLE ? 2 : 1; if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 12 : 0; return true; } if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) { InsertAtByte = IsLE ? 8 : 4; return true; } if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) { InsertAtByte = IsLE ? 4 : 8; return true; } if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) { InsertAtByte = IsLE ? 0 : 12; return true; } } return false; } bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the word is consecutive. if (!isNByteElemShuffleMask(N, 4, 1)) return false; // Now we look at mask elements 0,4,8,12, which are the beginning of words. unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; unsigned M2 = N->getMaskElt(8) / 4; unsigned M3 = N->getMaskElt(12) / 4; // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { assert(M0 < 4 && "Indexing into an undef vector?"); if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) return false; ShiftElts = IsLE ? (4 - M0) % 4 : M0; Swap = false; return true; } // Ensure each word index of the ShuffleVector Mask is consecutive. if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) return false; if (IsLE) { if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { // Input vectors don't need to be swapped if the leading element // of the result is one of the 3 left elements of the second vector // (or if there is no shift to be done at all). Swap = false; ShiftElts = (8 - M0) % 8; } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { // Input vectors need to be swapped if the leading element // of the result is one of the 3 left elements of the first vector // (or if we're shifting by 4 - thereby simply swapping the vectors). Swap = true; ShiftElts = (4 - M0) % 4; } return true; } else { // BE if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { // Input vectors don't need to be swapped if the leading element // of the result is one of the 4 elements of the first vector. Swap = false; ShiftElts = M0; } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { // Input vectors need to be swapped if the leading element // of the result is one of the 4 elements of the right vector. Swap = true; ShiftElts = M0 - 4; } return true; } } bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); if (!isNByteElemShuffleMask(N, Width, -1)) return false; for (int i = 0; i < 16; i += Width) if (N->getMaskElt(i) != i + Width - 1) return false; return true; } bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 2); } bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 4); } bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 8); } bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) { return isXXBRShuffleMaskHelper(N, 16); } /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap /// if the inputs to the instruction should be swapped and set \p DM to the /// value for the immediate. /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI /// AND element 0 of the result comes from the first input (LE) or second input /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered. /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle /// mask. bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM, bool &Swap, bool IsLE) { assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); // Ensure each byte index of the double word is consecutive. if (!isNByteElemShuffleMask(N, 8, 1)) return false; unsigned M0 = N->getMaskElt(0) / 8; unsigned M1 = N->getMaskElt(8) / 8; assert(((M0 | M1) < 4) && "A mask element out of bounds?"); // If both vector operands for the shuffle are the same vector, the mask will // contain only elements from the first one and the second one will be undef. if (N->getOperand(1).isUndef()) { if ((M0 | M1) < 2) { DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1); Swap = false; return true; } else return false; } if (IsLE) { if (M0 > 1 && M1 < 2) { Swap = false; } else if (M0 < 2 && M1 > 1) { M0 = (M0 + 2) % 4; M1 = (M1 + 2) % 4; Swap = true; } else return false; // Note: if control flow comes here that means Swap is already set above DM = (((~M1) & 1) << 1) + ((~M0) & 1); return true; } else { // BE if (M0 < 2 && M1 > 1) { Swap = false; } else if (M0 > 1 && M1 < 2) { M0 = (M0 + 2) % 4; M1 = (M1 + 2) % 4; Swap = true; } else return false; // Note: if control flow comes here that means Swap is already set above DM = (M0 << 1) + (M1 & 1); return true; } } /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is /// appropriate for PPC mnemonics (which have a big endian bias - namely /// elements are counted from the left of the vector register). unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(N); assert(isSplatShuffleMask(SVOp, EltSize)); if (DAG.getDataLayout().isLittleEndian()) return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); else return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed /// by using a vspltis[bhw] instruction of the specified element size, return /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { SDValue OpVal(nullptr, 0); // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where // multiple elements of the buildvector are folded together into a single // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). unsigned EltSize = 16/N->getNumOperands(); if (EltSize < ByteSize) { unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. SDValue UniquedVals[4]; assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); // See if all of the elements in the buildvector agree across. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; // If the element isn't a constant, bail fully out. if (!isa(N->getOperand(i))) return SDValue(); if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. } // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains // either constant or undef values that are identical for each chunk. See // if these chunks can form into a larger vspltis*. // Check to see if all of the leading entries are either 0 or -1. If // neither, then this won't fit into the immediate field. bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { if (!UniquedVals[i].getNode()) continue; // Must have been undefs. LeadingZero &= isNullConstant(UniquedVals[i]); LeadingOnes &= isAllOnesConstant(UniquedVals[i]); } // Finally, check the least significant entry. if (LeadingZero) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); if (Val < 16) // 0,0,0,4 -> vspltisw(4) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } if (LeadingOnes) { if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32); } return SDValue(); } // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).isUndef()) continue; if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return SDValue(); } if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; if (ConstantSDNode *CN = dyn_cast(OpVal)) { Value = CN->getZExtValue(); } else if (ConstantFPSDNode *CN = dyn_cast(OpVal)) { assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); Value = FloatToBits(CN->getValueAPF().convertToFloat()); } // If the splat value is larger than the element value, then we can never do // this splat. The only case that we could fit the replicated bits into our // immediate field for would be zero, and we prefer to use vxor for it. if (ValSizeInBytes < ByteSize) return SDValue(); // If the element value is larger than the splat value, check if it consists // of a repeated bit pattern of size ByteSize. if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8)) return SDValue(); // Properly sign extend the value. int MaskVal = SignExtend32(Value, ByteSize * 8); // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. if (MaskVal == 0) return SDValue(); // Finally, if this value fits in a 5 bit sext field, return it if (SignExtend32<5>(MaskVal) == MaskVal) return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32); return SDValue(); } //===----------------------------------------------------------------------===// // Addressing Mode Selection //===----------------------------------------------------------------------===// /// isIntS16Immediate - This method tests to see if the node is either a 32-bit /// or 64-bit immediate, and if the value can be accurately represented as a /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) { if (!isa(N)) return false; Imm = (int16_t)cast(N)->getZExtValue(); if (N->getValueType(0) == MVT::i32) return Imm == (int32_t)cast(N)->getZExtValue(); else return Imm == (int64_t)cast(N)->getZExtValue(); } bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) { return isIntS16Immediate(Op.getNode(), Imm); } /// SelectAddressEVXRegReg - Given the specified address, check to see if it can /// be represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; ++UI) { if (MemSDNode *Memop = dyn_cast(*UI)) { if (Memop->getMemoryVT() == MVT::f64) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } } } return false; } /// isIntS34Immediate - This method tests if value of node given can be /// accurately represented as a sign extension from a 34-bit value. If so, /// this returns true and the immediate. bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) { if (!isa(N)) return false; Imm = (int64_t)cast(N)->getZExtValue(); return isInt<34>(Imm); } bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) { return isIntS34Immediate(Op.getNode(), Imm); } /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is /// non-zero and N can be represented by a base register plus a signed 16-bit /// displacement, make a more precise judgement by checking (displacement % \p /// EncodingAlignment). bool PPCTargetLowering::SelectAddressRegReg( SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const { // If we have a PC Relative target flag don't select as [reg+reg]. It will be // a [pc+imm]. if (SelectAddressPCRel(N, Base)) return false; int16_t Imm = 0; if (N.getOpcode() == ISD::ADD) { // Is there any SPE load/store (f64), which can't handle 16bit offset? // SPE load/store can only handle 8-bit offsets. if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG)) return true; if (isIntS16Immediate(N.getOperand(1), Imm) && (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) return false; // r+i if (N.getOperand(1).getOpcode() == PPCISD::Lo) return false; // r+i Base = N.getOperand(0); Index = N.getOperand(1); return true; } else if (N.getOpcode() == ISD::OR) { if (isIntS16Immediate(N.getOperand(1), Imm) && (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) return false; // r+i can fold it if we can. // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are provably // disjoint. KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); if (LHSKnown.Zero.getBoolValue()) { KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1)); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } } } return false; } // If we happen to be doing an i64 load or store into a stack slot that has // less than a 4-byte alignment, then the frame-index elimination may need to // use an indexed load or store instruction (because the offset may not be a // multiple of 4). The extra register needed to hold the offset comes from the // register scavenger, and it is possible that the scavenger will need to use // an emergency spill slot. As a result, we need to make sure that a spill slot // is allocated when doing an i64 load/store into a less-than-4-byte-aligned // stack slot. static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { // FIXME: This does not handle the LWA case. if (VT != MVT::i64) return; // NOTE: We'll exclude negative FIs here, which come from argument // lowering, because there are no known test cases triggering this problem // using packed structures (or similar). We can remove this exclusion if // we find such a test case. The reason why this is so test-case driven is // because this entire 'fixup' is only to prevent crashes (from the // register scavenger) on not-really-valid inputs. For example, if we have: // %a = alloca i1 // %b = bitcast i1* %a to i64* // store i64* a, i64 b // then the store should really be marked as 'align 1', but is not. If it // were marked as 'align 1' then the indexed form would have been // instruction-selected initially, and the problem this 'fixup' is preventing // won't happen regardless. if (FrameIdx < 0) return; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); if (MFI.getObjectAlign(FrameIdx) >= Align(4)) return; PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setHasNonRISpills(); } /// Returns true if the address N can be represented by a base register plus /// a signed 16-bit displacement [r+imm], and if it is not better /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept /// displacements that are multiples of that value. bool PPCTargetLowering::SelectAddressRegImm( SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const { // FIXME dl should come from parent load or store, not from address SDLoc dl(N); // If we have a PC Relative target flag don't select as [reg+imm]. It will be // a [pc+imm]. if (SelectAddressPCRel(N, Base)) return false; // If this can be more profitably realized as r+r, fail. if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment)) return false; if (N.getOpcode() == ISD::ADD) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) { Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } return true; // [r+i] } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { // Match LOAD (ADD (X, Lo(G))). assert(!cast(N.getOperand(1).getOperand(1))->getZExtValue() && "Cannot handle constant offsets yet!"); Disp = N.getOperand(1).getOperand(0); // The global address. assert(Disp.getOpcode() == ISD::TargetGlobalAddress || Disp.getOpcode() == ISD::TargetGlobalTLSAddress || Disp.getOpcode() == ISD::TargetConstantPool || Disp.getOpcode() == ISD::TargetJumpTable); Base = N.getOperand(0); return true; // [&g+r] } } else if (N.getOpcode() == ISD::OR) { int16_t imm = 0; if (isIntS16Immediate(N.getOperand(1), imm) && (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) { // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else { Base = N.getOperand(0); } Disp = DAG.getTargetConstant(imm, dl, N.getValueType()); return true; } } } else if (ConstantSDNode *CN = dyn_cast(N)) { // Loading from a constant address. // If this address fits entirely in a 16-bit sext immediate field, codegen // this as "d, 0" int16_t Imm; if (isIntS16Immediate(CN, Imm) && (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) { Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0)); Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } // Handle 32-bit sext immediates with LIS + addr mode. if ((CN->getValueType(0) == MVT::i32 || (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && (!EncodingAlignment || isAligned(*EncodingAlignment, CN->getZExtValue()))) { int Addr = (int)CN->getZExtValue(); // Otherwise, break this down into an LIS + disp. Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32); Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl, MVT::i32); unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); return true; } } Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout())); if (FrameIndexSDNode *FI = dyn_cast(N)) { Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); } else Base = N; return true; // [r+0] } /// Similar to the 16-bit case but for instructions that take a 34-bit /// displacement field (prefixed loads/stores). bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const { // Only on 64-bit targets. if (N.getValueType() != MVT::i64) return false; SDLoc dl(N); int64_t Imm = 0; if (N.getOpcode() == ISD::ADD) { if (!isIntS34Immediate(N.getOperand(1), Imm)) return false; Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); else Base = N.getOperand(0); return true; } if (N.getOpcode() == ISD::OR) { if (!isIntS34Immediate(N.getOperand(1), Imm)) return false; // If this is an or of disjoint bitfields, we can codegen this as an add // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0)); if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL) return false; if (FrameIndexSDNode *FI = dyn_cast(N.getOperand(0))) Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); else Base = N.getOperand(0); Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); return true; } if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const. Disp = DAG.getTargetConstant(Imm, dl, N.getValueType()); Base = DAG.getRegister(PPC::ZERO8, N.getValueType()); return true; } return false; } /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const { // Check to see if we can easily represent this as an [r+r] address. This // will fail if it thinks that the address is more profitably represented as // reg+imm, e.g. where imm = 0. if (SelectAddressRegReg(N, Base, Index, DAG)) return true; // If the address is the result of an add, we will utilize the fact that the // address calculation includes an implicit add. However, we can reduce // register pressure if we do not materialize a constant just for use as the // index register. We only get rid of the add if it is not an add of a // value and a 16-bit signed constant and both have a single use. int16_t imm = 0; if (N.getOpcode() == ISD::ADD && (!isIntS16Immediate(N.getOperand(1), imm) || !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) { Base = N.getOperand(0); Index = N.getOperand(1); return true; } // Otherwise, do it the hard way, using R0 as the base register. Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; } template static bool isValidPCRelNode(SDValue N) { Ty *PCRelCand = dyn_cast(N); return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG); } /// Returns true if this address is a PC Relative address. /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG /// or if the node opcode is PPCISD::MAT_PCREL_ADDR. bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const { // This is a materialize PC Relative node. Always select this as PC Relative. Base = N; if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR) return true; if (isValidPCRelNode(N) || isValidPCRelNode(N) || isValidPCRelNode(N) || isValidPCRelNode(N)) return true; return false; } /// Returns true if we should use a direct load into vector instruction /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence. static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) { // If there are any other uses other than scalar to vector, then we should // keep it as a scalar load -> direct move pattern to prevent multiple // loads. LoadSDNode *LD = dyn_cast(N); if (!LD) return false; EVT MemVT = LD->getMemoryVT(); if (!MemVT.isSimple()) return false; switch(MemVT.getSimpleVT().SimpleTy) { case MVT::i64: break; case MVT::i32: if (!ST.hasP8Vector()) return false; break; case MVT::i16: case MVT::i8: if (!ST.hasP9Vector()) return false; break; default: return false; } SDValue LoadedVal(N, 0); if (!LoadedVal.hasOneUse()) return false; for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE; ++UI) if (UI.getUse().get().getResNo() == 0 && UI->getOpcode() != ISD::SCALAR_TO_VECTOR && UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED) return false; return true; } /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { if (DisablePPCPreinc) return false; bool isLoad = true; SDValue Ptr; EVT VT; unsigned Alignment; if (LoadSDNode *LD = dyn_cast(N)) { Ptr = LD->getBasePtr(); VT = LD->getMemoryVT(); Alignment = LD->getAlignment(); } else if (StoreSDNode *ST = dyn_cast(N)) { Ptr = ST->getBasePtr(); VT = ST->getMemoryVT(); Alignment = ST->getAlignment(); isLoad = false; } else return false; // Do not generate pre-inc forms for specific loads that feed scalar_to_vector // instructions because we can fold these into a more efficient instruction // instead, (such as LXSD). if (isLoad && usePartialVectorLoads(N, Subtarget)) { return false; } // PowerPC doesn't have preinc load/store instructions for vectors if (VT.isVector()) return false; if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { // Common code will reject creating a pre-inc form if the base pointer // is a frame index, or if N is a store and the base pointer is either // the same as or a predecessor of the value being stored. Check for // those situations here, and try with swapped Base/Offset instead. bool Swap = false; if (isa(Base) || isa(Base)) Swap = true; else if (!isLoad) { SDValue Val = cast(N)->getValue(); if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) Swap = true; } if (Swap) std::swap(Base, Offset); AM = ISD::PRE_INC; return true; } // LDU/STU can only handle immediates that are a multiple of 4. if (VT != MVT::i64) { if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None)) return false; } else { // LDU/STU need an address with at least 4-byte alignment. if (Alignment < 4) return false; if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4))) return false; } if (LoadSDNode *LD = dyn_cast(N)) { // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of // sext i32 to i64 when addr mode is r+i. if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && LD->getExtensionType() == ISD::SEXTLOAD && isa(Offset)) return false; } AM = ISD::PRE_INC; return true; } //===----------------------------------------------------------------------===// // LowerOperation implementation //===----------------------------------------------------------------------===// /// Return true if we should reference labels using a PICBase, set the HiOpFlags /// and LoOpFlags to the target MO flags. static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV = nullptr) { HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; // Don't use the pic base if not in PIC relocation model. if (IsPIC) { HiOpFlags |= PPCII::MO_PIC_FLAG; LoOpFlags |= PPCII::MO_PIC_FLAG; } } static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG) { SDLoc DL(HiPart); EVT PtrVT = HiPart.getValueType(); SDValue Zero = DAG.getConstant(0, DL, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); // With PIC, the first instruction is actually "GR+hi(&G)". if (isPIC) Hi = DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); // Generate non-pic code that has direct accesses to the constant pool. // The address of the global is just (hi(&g)+lo(&g)). return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); } static void setUsesTOCBasePtr(MachineFunction &MF) { PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setUsesTOCBasePtr(); } static void setUsesTOCBasePtr(SelectionDAG &DAG) { setUsesTOCBasePtr(DAG.getMachineFunction()); } SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const { const bool Is64Bit = Subtarget.isPPC64(); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) : Subtarget.isAIXABI() ? DAG.getRegister(PPC::R2, VT) : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT); SDValue Ops[] = { GA, Reg }; return DAG.getMemIntrinsicNode( PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT, MachinePointerInfo::getGOT(DAG.getMachineFunction()), None, MachineMemOperand::MOLoad); } SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *CP = cast(Op); const Constant *C = CP->getConstVal(); // 64-bit SVR4 ABI and AIX ABI code are always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { if (Subtarget.isUsingPCRelativeCalls()) { SDLoc DL(CP); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue ConstPool = DAG.getTargetConstantPool( C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG); return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool); } setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0); return getTOCEntry(DAG, SDLoc(CP), GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(CP), GA); } SDValue CPIHi = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag); SDValue CPILo = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag); return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG); } // For 64-bit PowerPC, prefer the more compact relative encodings. // This trades 32 bits per jump table entry for one or two instructions // on the jump site. unsigned PPCTargetLowering::getJumpTableEncoding() const { if (isJumpTableRelative()) return MachineJumpTableInfo::EK_LabelDifference32; return TargetLowering::getJumpTableEncoding(); } bool PPCTargetLowering::isJumpTableRelative() const { if (UseAbsoluteJumpTables) return false; if (Subtarget.isPPC64() || Subtarget.isAIXABI()) return true; return TargetLowering::isJumpTableRelative(); } SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const { if (!Subtarget.isPPC64() || Subtarget.isAIXABI()) return TargetLowering::getPICJumpTableRelocBase(Table, DAG); switch (getTargetMachine().getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: return TargetLowering::getPICJumpTableRelocBase(Table, DAG); default: return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(), getPointerTy(DAG.getDataLayout())); } } const MCExpr * PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const { if (!Subtarget.isPPC64() || Subtarget.isAIXABI()) return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); switch (getTargetMachine().getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); default: return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); } } SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); // isUsingPCRelativeCalls() returns true when PCRelative is enabled if (Subtarget.isUsingPCRelativeCalls()) { SDLoc DL(JT); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG); SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA); return MatAddr; } // 64-bit SVR4 ABI and AIX ABI code are always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return getTOCEntry(DAG, SDLoc(JT), GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, SDLoc(GA), GA); } SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); BlockAddressSDNode *BASDN = cast(Op); const BlockAddress *BA = BASDN->getBlockAddress(); // isUsingPCRelativeCalls() returns true when PCRelative is enabled if (Subtarget.isUsingPCRelativeCalls()) { SDLoc DL(BASDN); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(), PPCII::MO_PCREL_FLAG); SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA); return MatAddr; } // 64-bit SVR4 ABI and AIX ABI code are always position-independent. // The actual BlockAddress is stored in the TOC. if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); return getTOCEntry(DAG, SDLoc(BASDN), GA); } // 32-bit position-independent ELF stores the BlockAddress in the .got. if (Subtarget.is32BitELFABI() && isPositionIndependent()) return getTOCEntry( DAG, SDLoc(BASDN), DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset())); unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME: TLS addresses currently use medium model code sequences, // which is the most useful form. Eventually support for small and // large models could be added if users need it, at the cost of // additional complexity. GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(GA, DAG); SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool is64bit = Subtarget.isPPC64(); const Module *M = DAG.getMachineFunction().getFunction().getParent(); PICLevel::Level picLevel = M->getPICLevel(); const TargetMachine &TM = getTargetMachine(); TLSModel::Model Model = TM.getTLSModel(GV); if (Model == TLSModel::LocalExec) { if (Subtarget.isUsingPCRelativeCalls()) { SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64); SDValue TGA = DAG.getTargetGlobalAddress( GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG)); SDValue MatAddr = DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA); return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr); } SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_HA); SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_LO); SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64) : DAG.getRegister(PPC::R2, MVT::i32); SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); } if (Model == TLSModel::InitialExec) { bool IsPCRel = Subtarget.isUsingPCRelativeCalls(); SDValue TGA = DAG.getTargetGlobalAddress( GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0); SDValue TGATLS = DAG.getTargetGlobalAddress( GV, dl, PtrVT, 0, IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS); SDValue TPOffset; if (IsPCRel) { SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA); TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel, MachinePointerInfo()); } else { SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA); } else { if (!TM.isPositionIndependent()) GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); else if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr); } return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); } if (Model == TLSModel::GeneralDynamic) { if (Subtarget.isUsingPCRelativeCalls()) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_GOT_TLSGD_PCREL_FLAG); return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA); } SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); } if (Model == TLSModel::LocalDynamic) { if (Subtarget.isUsingPCRelativeCalls()) { SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_GOT_TLSLD_PCREL_FLAG); SDValue MatPCRel = DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA); return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA); } SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); SDValue GOTPtr; if (is64bit) { setUsesTOCBasePtr(DAG); SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, GOTReg, TGA); } else { if (picLevel == PICLevel::SmallPIC) GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); else GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); } SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl, PtrVT, GOTPtr, TGA, TGA); SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, TLSAddr, TGA); return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); } llvm_unreachable("Unknown TLS model!"); } SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); SDLoc DL(GSDN); const GlobalValue *GV = GSDN->getGlobal(); // 64-bit SVR4 ABI & AIX ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) { if (Subtarget.isUsingPCRelativeCalls()) { EVT Ty = getPointerTy(DAG.getDataLayout()); if (isAccessedAsGotIndirect(Op)) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(), PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG); SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA); SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel, MachinePointerInfo()); return Load; } else { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(), PPCII::MO_PCREL_FLAG); return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA); } } setUsesTOCBasePtr(DAG); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); return getTOCEntry(DAG, DL, GA); } unsigned MOHiFlag, MOLoFlag; bool IsPIC = isPositionIndependent(); getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); if (IsPIC && Subtarget.isSVR4ABI()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), PPCII::MO_PIC_FLAG); return getTOCEntry(DAG, DL, GA); } SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); return LowerLabelRef(GAHi, GALo, IsPIC, DAG); } SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc dl(Op); if (Op.getValueType() == MVT::v2i64) { // When the operands themselves are v2i64 values, we need to do something // special because VSX has no underlying comparison operations for these. if (Op.getOperand(0).getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. if (CC == ISD::SETEQ || CC == ISD::SETNE) { return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, DAG.getSetCC(dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), CC)); } return SDValue(); } // We handle most of these in the usual way. return Op; } // If we're comparing for equality to zero, expose the fact that this is // implemented as a ctlz/srl pair on ppc, so that the dag combiner can // fold the new nodes. if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG)) return V; if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { // Leave comparisons against 0 and -1 alone for now, since they're usually // optimized. FIXME: revisit this when we can custom lower all setcc // optimizations. if (C->isAllOnesValue() || C->isNullValue()) return SDValue(); } // If we have an integer seteq/setne, turn it into a compare against zero // by xor'ing the rhs with the lhs, which is faster than setting a // condition register, reading it back out, and masking the correct bit. The // normal approach here uses sub to do this instead of xor. Using xor exposes // the result to other bit-twiddling opportunities. EVT LHSVT = Op.getOperand(0).getValueType(); if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { EVT VT = Op.getValueType(); SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), Op.getOperand(1)); return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC); } return SDValue(); } SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue InChain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc dl(Node); assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); // gpr_index SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, VAListPtr, MachinePointerInfo(SV), MVT::i8); InChain = GprIndex.getValue(1); if (VT == MVT::i64) { // Check if GprIndex is even SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, DAG.getConstant(0, dl, MVT::i32), ISD::SETNE); SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, DAG.getConstant(1, dl, MVT::i32)); // Align GprIndex to be even if it isn't GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, GprIndex); } // fpr index is 1 byte after gpr SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(1, dl, MVT::i32)); // fpr SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, FprPtr, MachinePointerInfo(SV), MVT::i8); InChain = FprIndex.getValue(1); SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(8, dl, MVT::i32)); SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, DAG.getConstant(4, dl, MVT::i32)); // areas SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo()); InChain = OverflowArea.getValue(1); SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo()); InChain = RegSaveArea.getValue(1); // select overflow_area if index > 8 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(8, dl, MVT::i32), ISD::SETLT); // adjustment constant gpr_index * 4/8 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); // OurReg = RegSaveArea + RegConstant SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, RegConstant); // Floating types are 32 bytes into RegSaveArea if (VT.isFloatingPoint()) OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, DAG.getConstant(32, dl, MVT::i32)); // increase {f,g}pr_index by 1 (or 2 if VT is i64) SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl, MVT::i32)); InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, VT.isInteger() ? VAListPtr : FprPtr, MachinePointerInfo(SV), MVT::i8); // determine if we should load from reg_save_area or overflow_area SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); // increase overflow_area by 4/8 if gpr/fpr > 8 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, DAG.getConstant(VT.isInteger() ? 4 : 8, dl, MVT::i32)); OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, OverflowAreaPlusN); InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr, MachinePointerInfo(), MVT::i32); return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); // We have to copy the entire va_list struct: // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8), false, true, false, MachinePointerInfo(), MachinePointerInfo()); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX."); return Op.getOperand(0); } SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo &MFI = *MF.getInfo(); assert((Op.getOpcode() == ISD::INLINEASM || Op.getOpcode() == ISD::INLINEASM_BR) && "Expecting Inline ASM node."); // If an LR store is already known to be required then there is not point in // checking this ASM as well. if (MFI.isLRStoreRequired()) return Op; // Inline ASM nodes have an optional last operand that is an incoming Flag of // type MVT::Glue. We want to ignore this last operand if that is the case. unsigned NumOps = Op.getNumOperands(); if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) --NumOps; // Check all operands that may contain the LR. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast(Op.getOperand(i))->getZExtValue(); unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegUse: case InlineAsm::Kind_Imm: case InlineAsm::Kind_Mem: i += NumVals; break; case InlineAsm::Kind_Clobber: case InlineAsm::Kind_RegDef: case InlineAsm::Kind_RegDefEarlyClobber: { for (; NumVals; --NumVals, ++i) { Register Reg = cast(Op.getOperand(i))->getReg(); if (Reg != PPC::LR && Reg != PPC::LR8) continue; MFI.setLRStoreRequired(); return Op; } break; } } } return Op; } SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX."); SDValue Chain = Op.getOperand(0); SDValue Trmp = Op.getOperand(1); // trampoline SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value SDLoc dl(Op); EVT PtrVT = getPointerTy(DAG.getDataLayout()); bool isPPC64 = (PtrVT == MVT::i64); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Ty = IntPtrTy; Entry.Node = Trmp; Args.push_back(Entry); // TrampSize == (isPPC64 ? 48 : 40); Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl, isPPC64 ? MVT::i64 : MVT::i32); Args.push_back(Entry); Entry.Node = FPtr; Args.push_back(Entry); Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDLoc dl(Op); if (Subtarget.isPPC64() || Subtarget.isAIXABI()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), MachinePointerInfo(SV)); } // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { // char gpr; /* index into the array of 8 GPRs // * stored in the register save area // * gpr=0 corresponds to r3, // * gpr=1 to r4, etc. // */ // char fpr; /* index into the array of 8 FPRs // * stored in the register save area // * fpr=0 corresponds to f1, // * fpr=1 to f2, etc. // */ // char *overflow_arg_area; // /* location on stack that holds // * the next overflow argument // */ // char *reg_save_area; // /* where r3:r10 and f1:f8 (if saved) // * are stored // */ // } va_list[1]; SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32); SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32); SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), PtrVT); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); uint64_t FrameOffset = PtrVT.getSizeInBits()/8; SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT); uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT); uint64_t FPROffset = 1; SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); // Store first byte : number of int regs SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1), MachinePointerInfo(SV), MVT::i8); uint64_t nextOffset = FPROffset; SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), ConstFPROffset); // Store second byte : number of float regs SDValue secondStore = DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, MachinePointerInfo(SV, nextOffset), MVT::i8); nextOffset += StackOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); // Store second word : arguments given on stack SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, MachinePointerInfo(SV, nextOffset)); nextOffset += FrameOffset; nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); // Store third word : arguments given in registers return DAG.getStore(thirdStore, dl, FR, nextPtr, MachinePointerInfo(SV, nextOffset)); } /// FPR - The set of FP registers that should be allocated for arguments /// on Darwin and AIX. static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13}; /// CalculateStackSlotSize - Calculates the size reserved for this argument on /// the stack. static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { unsigned ArgSize = ArgVT.getStoreSize(); if (Flags.isByVal()) ArgSize = Flags.getByValSize(); // Round up to multiples of the pointer size, except for array members, // which are always packed. if (!Flags.isInConsecutiveRegs()) ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; return ArgSize; } /// CalculateStackSlotAlignment - Calculates the alignment of this argument /// on the stack. static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize) { Align Alignment(PtrByteSize); // Altivec parameters are padded to a 16 byte boundary. if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128 || ArgVT == MVT::f128) Alignment = Align(16); // ByVal parameters are aligned as requested. if (Flags.isByVal()) { auto BVAlign = Flags.getNonZeroByValAlign(); if (BVAlign > PtrByteSize) { if (BVAlign.value() % PtrByteSize != 0) llvm_unreachable( "ByVal alignment is not a multiple of the pointer size"); Alignment = BVAlign; } } // Array members are always packed to their original alignment. if (Flags.isInConsecutiveRegs()) { // If the array member was split into multiple registers, the first // needs to be aligned to the size of the full type. (Except for // ppcf128, which is only aligned as its f64 components.) if (Flags.isSplit() && OrigVT != MVT::ppcf128) Alignment = Align(OrigVT.getStoreSize()); else Alignment = Align(ArgVT.getStoreSize()); } return Alignment; } /// CalculateStackSlotUsed - Return whether this argument will use its /// stack slot (instead of being passed in registers). ArgOffset, /// AvailableFPRs, and AvailableVRs must hold the current argument /// position, and will be updated to account for this argument. static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs) { bool UseMemory = false; // Respect alignment of argument on the stack. Align Alignment = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = alignTo(ArgOffset, Alignment); // If there's no space left in the argument save area, we must // use memory (this check also catches zero-sized arguments). if (ArgOffset >= LinkageSize + ParamAreaSize) UseMemory = true; // Allocate argument on the stack. ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // If we overran the argument save area, we must use memory // (this check catches arguments passed partially in memory) if (ArgOffset > LinkageSize + ParamAreaSize) UseMemory = true; // However, if the argument is actually passed in an FPR or a VR, // we don't use memory after all. if (!Flags.isByVal()) { if (ArgVT == MVT::f32 || ArgVT == MVT::f64) if (AvailableFPRs > 0) { --AvailableFPRs; return false; } if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 || ArgVT == MVT::v1i128 || ArgVT == MVT::f128) if (AvailableVRs > 0) { --AvailableVRs; return false; } } return UseMemory; } /// EnsureStackAlignment - Round stack frame size up from NumBytes to /// ensure minimum alignment required for target. static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes) { return alignTo(NumBytes, Lowering->getStackAlign()); } SDValue PPCTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { if (Subtarget.isAIXABI()) return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); if (Subtarget.is64BitELFABI()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); assert(Subtarget.is32BitELFABI()); return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } SDValue PPCTargetLowering::LowerFormalArguments_32SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ // +--> | Back chain | // | +-----------------------------------+ // | | Floating-point register save area | // | +-----------------------------------+ // | | General register save area | // | +-----------------------------------+ // | | CR save word | // | +-----------------------------------+ // | | VRSAVE save word | // | +-----------------------------------+ // | | Alignment padding | // | +-----------------------------------+ // | | Vector register save area | // | +-----------------------------------+ // | | Local variable space | // | +-----------------------------------+ // | | Parameter list area | // | +-----------------------------------+ // | | LR save word | // | +-----------------------------------+ // SP--> +--- | Back chain | // +-----------------------------------+ // // Specifications: // System V Application Binary Interface PowerPC Processor Supplement // AltiVec Technology Programming Interface Manual MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); const Align PtrAlign(4); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, PtrAlign); if (useSoftFloat()) CCInfo.PreAnalyzeFormalArguments(Ins); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); CCInfo.clearWasPPCF128(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; // Arguments stored in registers. if (VA.isRegLoc()) { const TargetRegisterClass *RC; EVT ValVT = VA.getValVT(); switch (ValVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("ValVT not supported by formal arguments Lowering"); case MVT::i1: case MVT::i32: RC = &PPC::GPRCRegClass; break; case MVT::f32: if (Subtarget.hasP8Vector()) RC = &PPC::VSSRCRegClass; else if (Subtarget.hasSPE()) RC = &PPC::GPRCRegClass; else RC = &PPC::F4RCRegClass; break; case MVT::f64: if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; else if (Subtarget.hasSPE()) // SPE passes doubles in GPR pairs. RC = &PPC::GPRCRegClass; else RC = &PPC::F8RCRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: RC = &PPC::VRRCRegClass; break; case MVT::v4f32: RC = &PPC::VRRCRegClass; break; case MVT::v2f64: case MVT::v2i64: RC = &PPC::VRRCRegClass; break; } SDValue ArgValue; // Transform the arguments stored in physical registers into // virtual ones. if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) { assert(i + 1 < e && "No second half of double precision argument"); unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC); unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC); SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32); SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32); if (!Subtarget.isLittleEndian()) std::swap (ArgValueLo, ArgValueHi); ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo, ArgValueHi); } else { unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT == MVT::i1 ? MVT::i32 : ValVT); if (ValVT == MVT::i1) ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); } InVals.push_back(ArgValue); } else { // Argument stored in memory. assert(VA.isMemLoc()); // Get the extended size of the argument type in stack unsigned ArgSize = VA.getLocVT().getStoreSize(); // Get the actual size of the argument type unsigned ObjSize = VA.getValVT().getStoreSize(); unsigned ArgOffset = VA.getLocMemOffset(); // Stack objects in PPC32 are right justified. ArgOffset += ArgSize - ObjSize; int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back( DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo())); } } // Assign locations to all of the incoming aggregate by value arguments. // Aggregates passed by value are stored in the local variable space of the // caller's stack frame, right above the parameter list area. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign); CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); MinReservedArea = std::max(MinReservedArea, LinkageSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); SmallVector MemOps; // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { static const MCPhysReg GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); static const MCPhysReg FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; unsigned NumFPArgRegs = array_lengthof(FPArgRegs); if (useSoftFloat() || hasSPE()) NumFPArgRegs = 0; FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs)); FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs)); // Make room for NumGPArgRegs and NumFPArgRegs. int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; FuncInfo->setVarArgsStackOffset( MFI.CreateFixedObject(PtrVT.getSizeInBits()/8, CCInfo.getNextStackOffset(), true)); FuncInfo->setVarArgsFrameIndex( MFI.CreateStackObject(Depth, Align(8), false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // The fixed integer arguments of a variadic function are stored to the // VarArgsFrameIndex on the stack so that they may be loaded by // dereferencing the result of va_next. for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 // is set. // The double arguments are stored to the VarArgsFrameIndex // on the stack. for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by eight for the next argument to store SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, const SDLoc &dl) const { if (Flags.isSExt()) ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); else if (Flags.isZExt()) ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, DAG.getValueType(ObjectVT)); return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); assert(!(CallConv == CallingConv::Fast && isVarArg) && "fastcc not supported on varargs functions"); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Potential tail calls could cause overwriting of argument stack slots. bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 8; unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned Num_GPR_Regs = array_lengthof(GPR); const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13; const unsigned Num_VR_Regs = array_lengthof(VR); // Do a first pass over the arguments to determine whether the ABI // guarantees that our caller has allocated the parameter save area // on its stack frame. In the ELFv1 ABI, this is always the case; // in the ELFv2 ABI, it is true if this is a vararg function or if // any parameter is located in a stack slot. bool HasParameterArea = !isELFv2ABI || isVarArg; unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = Num_FPR_Regs; unsigned AvailableVRs = Num_VR_Regs; for (unsigned i = 0, e = Ins.size(); i != e; ++i) { if (Ins[i].Flags.isNest()) continue; if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs)) HasParameterArea = true; } // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; SmallVector MemOps; Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { SDValue ArgVal; bool needsLoad = false; EVT ObjectVT = Ins[ArgNo].VT; EVT OrigVT = Ins[ArgNo].ArgVT; unsigned ObjSize = ObjectVT.getStoreSize(); unsigned ArgSize = ObjSize; ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; if (Ins[ArgNo].isOrigArg()) { std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[ArgNo].getOrigArgIndex(); } // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. unsigned CurArgOffset; Align Alignment; auto ComputeArgOffset = [&]() { /* Respect alignment of argument on the stack. */ Alignment = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); ArgOffset = alignTo(ArgOffset, Alignment); CurArgOffset = ArgOffset; }; if (CallConv != CallingConv::Fast) { ComputeArgOffset(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, Num_GPR_Regs); } // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. if (Flags.isByVal()) { assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); if (CallConv == CallingConv::Fast) ComputeArgOffset(); // ObjSize is the true size, ArgSize rounded up to multiple of registers. ObjSize = Flags.getByValSize(); ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; // Empty aggregate parameters do not take up registers. Examples: // struct { } a; // union { } b; // int c[0]; // etc. However, we have to provide a place-holder in InVals, so // pretend we have an 8-byte item at the current address for that // purpose. if (!ObjSize) { int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); continue; } // Create a stack object covering all stack doublewords occupied // by the argument. If the argument is (fully or partially) on // the stack, or if the argument is fully in registers but the // caller has allocated the parameter save anyway, we can refer // directly to the caller's stack frame. Otherwise, create a // local copy in our own frame. int FI; if (HasParameterArea || ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true); else FI = MFI.CreateStackObject(ArgSize, Alignment, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); // Handle aggregates smaller than 8 bytes. if (ObjSize < PtrByteSize) { // The value of the object is its address, which differs from the // address of the enclosing doubleword on big-endian systems. SDValue Arg = FIN; if (!isLittleEndian) { SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT); Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); } InVals.push_back(Arg); if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store; if (ObjSize==1 || ObjSize==2 || ObjSize==4) { EVT ObjType = (ObjSize == 1 ? MVT::i8 : (ObjSize == 2 ? MVT::i16 : MVT::i32)); Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, MachinePointerInfo(&*FuncArg), ObjType); } else { // For sizes that don't fit a truncating store (3, 5, 6, 7), // store the whole register as-is to the parameter save area // slot. Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(&*FuncArg)); } MemOps.push_back(Store); } // Whether we copied from a register or not, advance the offset // into the parameter save area by a full doubleword. ArgOffset += PtrByteSize; continue; } // The value of the object is its address, which is the address of // its first stack doubleword. InVals.push_back(FIN); // Store whatever pieces of the object are in registers to memory. for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { if (GPR_idx == Num_GPR_Regs) break; unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Addr = FIN; if (j) { SDValue Off = DAG.getConstant(j, dl, PtrVT); Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); } SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, MachinePointerInfo(&*FuncArg, j)); MemOps.push_back(Store); ++GPR_idx; } ArgOffset += ArgSize; continue; } switch (ObjectVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unhandled argument type!"); case MVT::i1: case MVT::i32: case MVT::i64: if (Flags.isNest()) { // The 'nest' parameter, if any, is passed in R11. unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); break; } // These can be scalar arguments or elements of an integer array type // passed directly. Clang may use those instead of "byval" aggregate // types to avoid forcing arguments to memory unnecessarily. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; ArgSize = PtrByteSize; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 8; break; case MVT::f32: case MVT::f64: // These can be scalar arguments or elements of a float array type // passed directly. The latter are used to implement ELFv2 homogenous // float aggregates. if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasP8Vector() ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass); else VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) { // FIXME: We may want to re-enable this for CallingConv::Fast on the P8 // once we support fp <-> gpr moves. // This can only ever happen in the presence of f32 array types, // since otherwise we never run out of FPRs before running out // of GPRs. unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass); FuncInfo->addLiveInAttr(VReg, Flags); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::f32) { if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); } ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (CallConv != CallingConv::Fast || needsLoad) { ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; ArgOffset += ArgSize; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: case MVT::f128: // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. if (VR_idx != Num_VR_Regs) { unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++VR_idx; } else { if (CallConv == CallingConv::Fast) ComputeArgOffset(); needsLoad = true; } if (CallConv != CallingConv::Fast || needsLoad) ArgOffset += 16; break; } // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type. if (needsLoad) { if (ObjSize < ArgSize && !isLittleEndian) CurArgOffset += ArgSize - ObjSize; int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo()); } InVals.push_back(ArgVal); } // Area that is at least reserved in the caller of this function. unsigned MinReservedArea; if (HasParameterArea) MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); else MinReservedArea = LinkageSize; // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. MinReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea); FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. // On ELFv2ABI spec, it writes: // C programs that are intended to be *portable* across different compilers // and architectures must use the header file to deal with variable // argument lists. if (isVarArg && MFI.hasVAStart()) { int Depth = ArgOffset; FuncInfo->setVarArgsFrameIndex( MFI.CreateFixedObject(PtrByteSize, Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by dereferencing // the result of va_next. for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx < Num_GPR_Regs; ++GPR_idx) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address by four for the next argument to store SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { if (!isTailCall) return 0; PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo(); unsigned CallerMinReservedArea = FI->getMinReservedArea(); int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; // Remember only if the new adjustment is bigger. if (SPDiff < FI->getTailCallSPDelta()) FI->setTailCallSPDelta(SPDiff); return SPDiff; } static bool isFunctionGlobalAddress(SDValue Callee); static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM) { // It does not make sense to call callsShareTOCBase() with a caller that // is PC Relative since PC Relative callers do not have a TOC. #ifndef NDEBUG const PPCSubtarget *STICaller = &TM.getSubtarget(*Caller); assert(!STICaller->isUsingPCRelativeCalls() && "PC Relative callers do not have a TOC and cannot share a TOC Base"); #endif // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols // don't have enough information to determine if the caller and callee share // the same TOC base, so we have to pessimistically assume they don't for // correctness. GlobalAddressSDNode *G = dyn_cast(Callee); if (!G) return false; const GlobalValue *GV = G->getGlobal(); // If the callee is preemptable, then the static linker will use a plt-stub // which saves the toc to the stack, and needs a nop after the call // instruction to convert to a toc-restore. if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV)) return false; // Functions with PC Relative enabled may clobber the TOC in the same DSO. // We may need a TOC restore in the situation where the caller requires a // valid TOC but the callee is PC Relative and does not. const Function *F = dyn_cast(GV); const GlobalAlias *Alias = dyn_cast(GV); // If we have an Alias we can try to get the function from there. if (Alias) { const GlobalObject *GlobalObj = Alias->getBaseObject(); F = dyn_cast(GlobalObj); } // If we still have no valid function pointer we do not have enough // information to determine if the callee uses PC Relative calls so we must // assume that it does. if (!F) return false; // If the callee uses PC Relative we cannot guarantee that the callee won't // clobber the TOC of the caller and so we must assume that the two // functions do not share a TOC base. const PPCSubtarget *STICallee = &TM.getSubtarget(*F); if (STICallee->isUsingPCRelativeCalls()) return false; // If the GV is not a strong definition then we need to assume it can be // replaced by another function at link time. The function that replaces // it may not share the same TOC as the caller since the callee may be // replaced by a PC Relative version of the same function. if (!GV->isStrongDefinitionForLinker()) return false; // The medium and large code models are expected to provide a sufficiently // large TOC to provide all data addressing needs of a module with a // single TOC. if (CodeModel::Medium == TM.getCodeModel() || CodeModel::Large == TM.getCodeModel()) return true; // Any explicitly-specified sections and section prefixes must also match. // Also, if we're using -ffunction-sections, then each function is always in // a different section (the same is true for COMDAT functions). if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() || GV->getSection() != Caller->getSection()) return false; if (const auto *F = dyn_cast(GV)) { if (F->getSectionPrefix() != Caller->getSectionPrefix()) return false; } return true; } static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl &Outs) { assert(Subtarget.is64BitELFABI()); const unsigned PtrByteSize = 8; const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); const unsigned ParamAreaSize = NumGPRs * PtrByteSize; unsigned NumBytes = LinkageSize; unsigned AvailableFPRs = NumFPRs; unsigned AvailableVRs = NumVRs; for (const ISD::OutputArg& Param : Outs) { if (Param.Flags.isNest()) continue; if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytes, AvailableFPRs, AvailableVRs)) return true; } return false; } static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) { if (CB.arg_size() != CallerFn->arg_size()) return false; auto CalleeArgIter = CB.arg_begin(); auto CalleeArgEnd = CB.arg_end(); Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin(); for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) { const Value* CalleeArg = *CalleeArgIter; const Value* CallerArg = &(*CallerArgIter); if (CalleeArg == CallerArg) continue; // e.g. @caller([4 x i64] %a, [4 x i64] %b) { // tail call @callee([4 x i64] undef, [4 x i64] %b) // } // 1st argument of callee is undef and has the same type as caller. if (CalleeArg->getType() == CallerArg->getType() && isa(CalleeArg)) continue; return false; } return true; } // Returns true if TCO is possible between the callers and callees // calling conventions. static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC) { // Tail calls are possible with fastcc and ccc. auto isTailCallableCC = [] (CallingConv::ID CC){ return CC == CallingConv::C || CC == CallingConv::Fast; }; if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC)) return false; // We can safely tail call both fastcc and ccc callees from a c calling // convention caller. If the caller is fastcc, we may have less stack space // than a non-fastcc caller with the same signature so disable tail-calls in // that case. return CallerCC == CallingConv::C || CallerCC == CalleeCC; } bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4( SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &Ins, SelectionDAG &DAG) const { bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; if (DisableSCO && !TailCallOpt) return false; // Variadic argument functions are not supported. if (isVarArg) return false; auto &Caller = DAG.getMachineFunction().getFunction(); // Check that the calling conventions are compatible for tco. if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC)) return false; // Caller contains any byval parameter is not supported. if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); })) return false; // Callee contains any byval parameter is not supported, too. // Note: This is a quick work around, because in some cases, e.g. // caller's stack size > callee's stack size, we are still able to apply // sibling call optimization. For example, gcc is able to do SCO for caller1 // in the following example, but not for caller2. // struct test { // long int a; // char ary[56]; // } gTest; // __attribute__((noinline)) int callee(struct test v, struct test *b) { // b->a = v.a; // return 0; // } // void caller1(struct test a, struct test c, struct test *b) { // callee(gTest, b); } // void caller2(struct test *b) { callee(gTest, b); } if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); })) return false; // If callee and caller use different calling conventions, we cannot pass // parameters on stack since offsets for the parameter area may be different. if (Caller.getCallingConv() != CalleeCC && needStackSlotPassParameters(Subtarget, Outs)) return false; // All variants of 64-bit ELF ABIs without PC-Relative addressing require that // the caller and callee share the same TOC for TCO/SCO. If the caller and // callee potentially have different TOC bases then we cannot tail call since // we need to restore the TOC pointer after the call. // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977 // We cannot guarantee this for indirect calls or calls to external functions. // When PC-Relative addressing is used, the concept of the TOC is no longer // applicable so this check is not required. // Check first for indirect calls. if (!Subtarget.isUsingPCRelativeCalls() && !isFunctionGlobalAddress(Callee) && !isa(Callee)) return false; // Check if we share the TOC base. if (!Subtarget.isUsingPCRelativeCalls() && !callsShareTOCBase(&Caller, Callee, getTargetMachine())) return false; // TCO allows altering callee ABI, so we don't have to check further. if (CalleeCC == CallingConv::Fast && TailCallOpt) return true; if (DisableSCO) return false; // If callee use the same argument list that caller is using, then we can // apply SCO on this case. If it is not, then we need to check if callee needs // stack for passing arguments. // PC Relative tail calls may not have a CallBase. // If there is no CallBase we cannot verify if we have the same argument // list so assume that we don't have the same argument list. if (CB && !hasSameArgumentList(&Caller, *CB) && needStackSlotPassParameters(Subtarget, Outs)) return false; else if (!CB && needStackSlotPassParameters(Subtarget, Outs)) return false; return true; } /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. bool PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { if (!getTargetMachine().Options.GuaranteedTailCallOpt) return false; // Variable argument functions are not supported. if (isVarArg) return false; MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallerCC = MF.getFunction().getCallingConv(); if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { // Functions containing by val parameters are not supported. for (unsigned i = 0; i != Ins.size(); i++) { ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Flags.isByVal()) return false; } // Non-PIC/GOT tail calls are supported. if (getTargetMachine().getRelocationModel() != Reloc::PIC_) return true; // At the moment we can only do local tail calls (in same module, hidden // or protected) if we are generating PIC. if (GlobalAddressSDNode *G = dyn_cast(Callee)) return G->getGlobal()->hasHiddenVisibility() || G->getGlobal()->hasProtectedVisibility(); } return false; } /// isCallCompatibleAddress - Return the immediate to use if the specified /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); if (!C) return nullptr; int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. SignExtend32<26>(Addr) != Addr) return nullptr; // Top 6 bits have to be sext of immediate. return DAG .getConstant( (int)C->getZExtValue() >> 2, SDLoc(Op), DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())) .getNode(); } namespace { struct TailCallArgumentInfo { SDValue Arg; SDValue FrameIdxOp; int FrameIdx = 0; TailCallArgumentInfo() = default; }; } // end anonymous namespace /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. static void StoreTailCallArgumentsToStackSlot( SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl &TailCallArgs, SmallVectorImpl &MemOpChains, const SDLoc &dl) { for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { SDValue Arg = TailCallArgs[i].Arg; SDValue FIN = TailCallArgs[i].FrameIdxOp; int FI = TailCallArgs[i].FrameIdx; // Store relative to framepointer. MemOpChains.push_back(DAG.getStore( Chain, dl, Arg, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); } } /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to /// the appropriate stack slot for the tail call optimized function call. static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl) { if (SPDiff) { // Calculate the new stack slot for the return address. MachineFunction &MF = DAG.getMachineFunction(); const PPCSubtarget &Subtarget = MF.getSubtarget(); const PPCFrameLowering *FL = Subtarget.getFrameLowering(); bool isPPC64 = Subtarget.isPPC64(); int SlotSize = isPPC64 ? 8 : 4; int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset(); int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize, NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, MachinePointerInfo::getFixedStack(MF, NewRetAddr)); } return Chain; } /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate /// the position of the argument. static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8; int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; Info.Arg = Arg; Info.FrameIdxOp = FIN; Info.FrameIdx = FI; TailCallArguments.push_back(Info); } /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address /// stack slot. Returns the chain as result and the loaded frame pointers in /// LROpOut/FPOpout. Used when tail calling. SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr( SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut, SDValue &FPOpOut, const SDLoc &dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo()); Chain = SDValue(LROpOut.getNode(), 1); } return Chain; } /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified /// by "Src" to address "Dst" of size "Size". Alignment information is /// specified by the specific parameter attribute. The copy will be passed as /// a byval function parameter. /// Sometimes what we are copying is the end of a larger object, the part that /// does not fit in registers. static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false, false, MachinePointerInfo(), MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of /// tail calls. static void LowerMemOpCallTo( SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl &MemOpChains, SmallVectorImpl &TailCallArguments, const SDLoc &dl) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); if (!isTailCall) { if (isVector) { SDValue StackPtr; if (isPPC64) StackPtr = DAG.getRegister(PPC::X1, MVT::i64); else StackPtr = DAG.getRegister(PPC::R1, MVT::i32); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, DAG.getConstant(ArgOffset, dl, PtrVT)); } MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); // Calculate and remember argument location. } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, TailCallArguments); } static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl &TailCallArguments) { // Emit a sequence of copyto/copyfrom virtual registers for arguments that // might overwrite each other in case of tail call optimization. SmallVector MemOpChains2; // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); if (!MemOpChains2.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl); // Emit callseq_end just before tailcall node. Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(0, dl, true), InFlag, dl); InFlag = Chain.getValue(1); } // Is this global address that of a function that can be called by name? (as // opposed to something that must hold a descriptor for an indirect call). static bool isFunctionGlobalAddress(SDValue Callee) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (Callee.getOpcode() == ISD::GlobalTLSAddress || Callee.getOpcode() == ISD::TargetGlobalTLSAddress) return false; return G->getGlobal()->getValueType()->isFunctionTy(); } return false; } SDValue PPCTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { SmallVector RVLocs; CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCRetInfo.AnalyzeCallResult( Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) ? RetCC_PPC_Cold : RetCC_PPC); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Val; if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) { SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Lo.getValue(1); InFlag = Lo.getValue(2); VA = RVLocs[++i]; // skip ahead to next loc SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); if (!Subtarget.isLittleEndian()) std::swap (Lo, Hi); Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi); } else { Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG, const PPCSubtarget &Subtarget, bool isPatchPoint) { // PatchPoint calls are not indirect. if (isPatchPoint) return false; if (isFunctionGlobalAddress(Callee) || dyn_cast(Callee)) return false; // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not // becuase the immediate function pointer points to a descriptor instead of // a function entry point. The ELFv2 ABI cannot use a BLA because the function // pointer immediate points to the global entry point, while the BLA would // need to jump to the local entry point (see rL211174). if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() && isBLACompatibleAddress(Callee, DAG)) return false; return true; } // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls. static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) { return Subtarget.isAIXABI() || (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()); } static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, const Function &Caller, const SDValue &Callee, const PPCSubtarget &Subtarget, const TargetMachine &TM) { if (CFlags.IsTailCall) return PPCISD::TC_RETURN; // This is a call through a function pointer. if (CFlags.IsIndirect) { // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross // indirect calls. The save of the caller's TOC pointer to the stack will be // inserted into the DAG as part of call lowering. The restore of the TOC // pointer is modeled by using a pseudo instruction for the call opcode that // represents the 2 instruction sequence of an indirect branch and link, // immediately followed by a load of the TOC pointer from the the stack save // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC // as it is not saved or used. return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC : PPCISD::BCTRL; } if (Subtarget.isUsingPCRelativeCalls()) { assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI."); return PPCISD::CALL_NOTOC; } // The ABIs that maintain a TOC pointer accross calls need to have a nop // immediately following the call instruction if the caller and callee may // have different TOC bases. At link time if the linker determines the calls // may not share a TOC base, the call is redirected to a trampoline inserted // by the linker. The trampoline will (among other things) save the callers // TOC pointer at an ABI designated offset in the linkage area and the linker // will rewrite the nop to be a load of the TOC pointer from the linkage area // into gpr2. if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL : PPCISD::CALL_NOP; return PPCISD::CALL; } static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget) { if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI()) if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) return SDValue(Dest, 0); // Returns true if the callee is local, and false otherwise. auto isLocalCallee = [&]() { const GlobalAddressSDNode *G = dyn_cast(Callee); const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); const GlobalValue *GV = G ? G->getGlobal() : nullptr; return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) && !dyn_cast_or_null(GV); }; // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in // a static relocation model causes some versions of GNU LD (2.17.50, at // least) to force BSS-PLT, instead of secure-PLT, even if all objects are // built with secure-PLT. bool UsePlt = Subtarget.is32BitELFABI() && !isLocalCallee() && Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_; const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) { const TargetMachine &TM = Subtarget.getTargetMachine(); const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering(); MCSymbolXCOFF *S = cast(TLOF->getFunctionEntryPointSymbol(GV, TM)); MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); return DAG.getMCSymbol(S, PtrVT); }; if (isFunctionGlobalAddress(Callee)) { const GlobalValue *GV = cast(Callee)->getGlobal(); if (Subtarget.isAIXABI()) { assert(!isa(GV) && "IFunc is not supported on AIX."); return getAIXFuncEntryPointSymbolSDNode(GV); } return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0, UsePlt ? PPCII::MO_PLT : 0); } if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const char *SymName = S->getSymbol(); if (Subtarget.isAIXABI()) { // If there exists a user-declared function whose name is the same as the // ExternalSymbol's, then we pick up the user-declared version. const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); if (const Function *F = dyn_cast_or_null(Mod->getNamedValue(SymName))) return getAIXFuncEntryPointSymbolSDNode(F); // On AIX, direct function calls reference the symbol for the function's // entry point, which is named by prepending a "." before the function's // C-linkage name. A Qualname is returned here because an external // function entry point is a csect with XTY_ER property. const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) { auto &Context = DAG.getMachineFunction().getMMI().getContext(); MCSectionXCOFF *Sec = Context.getXCOFFSection( (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER, SectionKind::getMetadata()); return Sec->getQualNameSymbol(); }; SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data(); } return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(), UsePlt ? PPCII::MO_PLT : 0); } // No transformation needed. assert(Callee.getNode() && "What no callee?"); return Callee; } static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) { assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START && "Expected a CALLSEQ_STARTSDNode."); // The last operand is the chain, except when the node has glue. If the node // has glue, then the last operand is the glue, and the chain is the second // last operand. SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1); if (LastValue.getValueType() != MVT::Glue) return LastValue; return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2); } // Creates the node that moves a functions address into the count register // to prepare for an indirect call instruction. static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl) { SDValue MTCTROps[] = {Chain, Callee, Glue}; EVT ReturnTypes[] = {MVT::Other, MVT::Glue}; Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2), makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2)); // The glue is the second value produced. Glue = Chain.getValue(1); } static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). // The function descriptor is a three doubleword structure with the // following fields: function entry point, TOC base address and // environment pointer. // Thus for a call through a function pointer, the following actions need // to be performed: // 1. Save the TOC of the caller in the TOC save area of its stack // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). // 2. Load the address of the function entry point from the function // descriptor. // 3. Load the TOC of the callee from the function descriptor into r2. // 4. Load the environment pointer from the function descriptor into // r11. // 5. Branch to the function entry point address. // 6. On return of the callee, the TOC of the caller needs to be // restored (this is done in FinishCall()). // // The loads are scheduled at the beginning of the call sequence, and the // register copies are flagged together to ensure that no other // operations can be scheduled in between. E.g. without flagging the // copies together, a TOC access in the caller could be scheduled between // the assignment of the callee TOC and the branch to the callee, which leads // to incorrect code. // Start by loading the function address from the descriptor. SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart); auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors() ? (MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant) : MachineMemOperand::MONone; MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr); // Registers used in building the DAG. const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister(); const MCRegister TOCReg = Subtarget.getTOCPointerRegister(); // Offsets of descriptor members. const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset(); const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset(); const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4; // One load for the functions entry point address. SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI, Alignment, MMOFlags); // One for loading the TOC anchor for the module that contains the called // function. SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff); SDValue TOCPtr = DAG.getLoad(RegVT, dl, LDChain, AddTOC, MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags); // One for loading the environment pointer. SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff); SDValue LoadEnvPtr = DAG.getLoad(RegVT, dl, LDChain, AddPtr, MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags); // Then copy the newly loaded TOC anchor to the TOC pointer. SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue); Chain = TOCVal.getValue(0); Glue = TOCVal.getValue(1); // If the function call has an explicit 'nest' parameter, it takes the // place of the environment pointer. assert((!hasNest || !Subtarget.isAIXABI()) && "Nest parameter is not supported on AIX."); if (!hasNest) { SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue); Chain = EnvVal.getValue(0); Glue = EnvVal.getValue(1); } // The rest of the indirect call sequence is the same as the non-descriptor // DAG. prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl); } static void buildCallOperands(SmallVectorImpl &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector, 8> &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget) { const bool IsPPC64 = Subtarget.isPPC64(); // MVT for a general purpose register. const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; // First operand is always the chain. Ops.push_back(Chain); // If it's a direct call pass the callee as the second operand. if (!CFlags.IsIndirect) Ops.push_back(Callee); else { assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect."); // For the TOC based ABIs, we have saved the TOC pointer to the linkage area // on the stack (this would have been done in `LowerCall_64SVR4` or // `LowerCall_AIX`). The call instruction is a pseudo instruction that // represents both the indirect branch and a load that restores the TOC // pointer from the linkage area. The operand for the TOC restore is an add // of the TOC save offset to the stack pointer. This must be the second // operand: after the chain input but before any other variadic arguments. // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not // saved or used. if (isTOCSaveRestoreRequired(Subtarget)) { const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT); unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff); Ops.push_back(AddTOC); } // Add the register used for the environment pointer. if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest) Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(), RegVT)); // Add CTR register as callee so a bctr can be emitted later. if (CFlags.IsTailCall) Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT)); } // If this is a tail call add stack pointer delta. if (CFlags.IsTailCall) Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32)); // Add argument registers to the end of the list so that they are known live // into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is // no way to mark dependencies as implicit here. // We will add the R2/X2 dependency in EmitInstrWithCustomInserter. if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) && !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls()) Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT)); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls if (CFlags.IsVarArg && Subtarget.is32BitELFABI()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); // If the glue is valid, it is the last operand. if (Glue.getNode()) Ops.push_back(Glue); } SDValue PPCTargetLowering::FinishCall( CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector, 8> &RegsToPass, SDValue Glue, SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff, unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals, const CallBase *CB) const { if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) || Subtarget.isAIXABI()) setUsesTOCBasePtr(DAG); unsigned CallOpc = getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee, Subtarget, DAG.getTarget()); if (!CFlags.IsIndirect) Callee = transformCallee(Callee, DAG, dl, Subtarget); else if (Subtarget.usesFunctionDescriptors()) prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB, dl, CFlags.HasNest, Subtarget); else prepareIndirectCall(DAG, Callee, Glue, Chain, dl); // Build the operand list for the call instruction. SmallVector Ops; buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee, SPDiff, Subtarget); // Emit tail call. if (CFlags.IsTailCall) { // Indirect tail call when using PC Relative calls do not have the same // constraints. assert(((Callee.getOpcode() == ISD::Register && cast(Callee)->getReg() == PPC::CTR) || Callee.getOpcode() == ISD::TargetExternalSymbol || Callee.getOpcode() == ISD::TargetGlobalAddress || isa(Callee) || (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) && "Expecting a global address, external symbol, absolute value, " "register or an indirect tail call when PC Relative calls are " "used."); // PC Relative calls also use TC_RETURN as the way to mark tail calls. assert(CallOpc == PPCISD::TC_RETURN && "Unexpected call opcode for a tail call."); DAG.getMachineFunction().getFrameInfo().setHasTailCall(); return DAG.getNode(CallOpc, dl, MVT::Other, Ops); } std::array ReturnTypes = {{MVT::Other, MVT::Glue}}; Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge); Glue = Chain.getValue(1); // When performing tail call optimization the callee pops its arguments off // the stack. Account for this here so these bytes can be pushed back on in // PPCFrameLowering::eliminateCallFramePseudoInstr. int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast && getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true), DAG.getIntPtrConstant(BytesCalleePops, dl, true), Glue, dl); Glue = Chain.getValue(1); return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl, DAG, InVals); } SDValue PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &dl = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool isVarArg = CLI.IsVarArg; bool isPatchPoint = CLI.IsPatchPoint; const CallBase *CB = CLI.CB; if (isTailCall) { if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall())) isTailCall = false; else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) isTailCall = IsEligibleForTailCallOptimization_64SVR4( Callee, CallConv, CB, isVarArg, Outs, Ins, DAG); else isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); if (isTailCall) { ++NumTailCalls; if (!getTargetMachine().Options.GuaranteedTailCallOpt) ++NumSiblingCalls; // PC Relative calls no longer guarantee that the callee is a Global // Address Node. The callee could be an indirect tail call in which // case the SDValue for the callee could be a load (to load the address // of a function pointer) or it may be a register copy (to move the // address of the callee from a function parameter into a virtual // register). It may also be an ExternalSymbolSDNode (ex memcopy). assert((Subtarget.isUsingPCRelativeCalls() || isa(Callee)) && "Callee should be an llvm::Function object."); LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName() << "\nTCO callee: "); LLVM_DEBUG(Callee.dump()); } } if (!isTailCall && CB && CB->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // When long calls (i.e. indirect calls) are always used, calls are always // made via function pointer. If we have a function name, first translate it // into a pointer. if (Subtarget.useLongCalls() && isa(Callee) && !isTailCall) Callee = LowerGlobalAddress(Callee, DAG); CallFlags CFlags( CallConv, isTailCall, isVarArg, isPatchPoint, isIndirectCall(Callee, DAG, Subtarget, isPatchPoint), // hasNest Subtarget.is64BitELFABI() && any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }), CLI.NoMerge); if (Subtarget.isAIXABI()) return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, InVals, CB); assert(Subtarget.isSVR4ABI()); if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, InVals, CB); return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG, InVals, CB); } SDValue PPCTargetLowering::LowerCall_32SVR4( SDValue Chain, SDValue Callee, CallFlags CFlags, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, const CallBase *CB) const { // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description // of the 32-bit SVR4 ABI stack frame layout. const CallingConv::ID CallConv = CFlags.CallConv; const bool IsVarArg = CFlags.IsVarArg; const bool IsTailCall = CFlags.IsTailCall; assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && "Unknown calling convention!"); const Align PtrAlign(4); MachineFunction &MF = DAG.getMachineFunction(); // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && CallConv == CallingConv::Fast) MF.getInfo()->setHasFastCall(); // Count how many bytes are to be pushed on the stack, including the linkage // area, parameter list area and the part of the local variable space which // contains copies of aggregates which are passed by value. // Assign locations to all of the outgoing arguments. SmallVector ArgLocs; PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(), PtrAlign); if (useSoftFloat()) CCInfo.PreAnalyzeCallOperands(Outs); if (IsVarArg) { // Handle fixed and variable vector arguments differently. // Fixed vector arguments go into registers as long as registers are // available. Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; if (Outs[i].IsFixed) { Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } else { Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); } if (Result) { #ifndef NDEBUG errs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"; #endif llvm_unreachable(nullptr); } } } else { // All arguments are treated the same. CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); } CCInfo.clearWasPPCF128(); // Assign locations to all of the outgoing aggregate by value arguments. SmallVector ByValArgLocs; CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext()); // Reserve stack space for the allocations in CCInfo. CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign); CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); // Size of the linkage area, parameter list area and the part of the local // space variable where copies of aggregates which are passed by value are // stored. unsigned NumBytes = CCByValInfo.getNextStackOffset(); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be moved somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; bool seenFloatArg = false; // Walk the register/memloc assignments, inserting copies/loads. // i - Tracks the index into the list of registers allocated for the call // RealArgIdx - Tracks the index into the list of actual function arguments // j - Tracks the index into the list of byval arguments for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size(); i != e; ++i, ++RealArgIdx) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[RealArgIdx]; ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags; if (Flags.isByVal()) { // Argument is an aggregate which is passed by value, thus we need to // create a copy of it in the local variable space of the current stack // frame (which is the stack frame of the caller) and pass the address of // this copy to the callee. assert((j < ByValArgLocs.size()) && "Index out of bounds!"); CCValAssign &ByValVA = ByValArgLocs[j++]; assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); // Memory reserved in the local variable space of the callers stack frame. unsigned LocMemOffset = ByValVA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); // Create a copy of the argument in the local area of the current // stack frame. SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // This must go outside the CALLSEQ_START..END. SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0, SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); Chain = CallSeqStart = NewCallSeqStart; // Pass the address of the aggregate copy on the stack either in a // physical register or in the parameter list area of the current stack // frame to the callee. Arg = PtrOff; } // When useCRBits() is true, there can be i1 arguments. // It is because getRegisterType(MVT::i1) => MVT::i1, // and for other integer types getRegisterType() => MVT::i32. // Extend i1 and ensure callee will get i32. if (Arg.getValueType() == MVT::i1) Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, Arg); if (VA.isRegLoc()) { seenFloatArg |= VA.getLocVT().isFloatingPoint(); // Put argument in a physical register. if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) { bool IsLE = Subtarget.isLittleEndian(); SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, DAG.getIntPtrConstant(IsLE ? 0 : 1, dl)); RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0))); SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, DAG.getIntPtrConstant(IsLE ? 1 : 0, dl)); RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(), SVal.getValue(0))); } else RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { // Put argument in the parameter list area of the current stack frame. assert(VA.isMemLoc()); unsigned LocMemOffset = VA.getLocMemOffset(); if (!IsTailCall) { SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()), StackPtr, PtrOff); MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); } else { // Calculate and remember argument location. CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, TailCallArguments); } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } // Set CR bit 6 to true if this is a vararg call with floating args passed in // registers. if (IsVarArg) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, InFlag }; Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); InFlag = Chain.getValue(1); } if (IsTailCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CB); } // Copy an argument into memory, being careful to do this outside the // call sequence for the call to which the argument belongs. SDValue PPCTargetLowering::createMemcpyOutsideCallSeq( SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) const { SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, CallSeqStart.getNode()->getOperand(0), Flags, DAG, dl); // The MEMCPY must go outside the CALLSEQ_START..END. int64_t FrameSize = CallSeqStart.getConstantOperandVal(1); SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0, SDLoc(MemcpyCall)); DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); return NewCallSeqStart; } SDValue PPCTargetLowering::LowerCall_64SVR4( SDValue Chain, SDValue Callee, CallFlags CFlags, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, const CallBase *CB) const { bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); bool IsSibCall = false; bool IsFastCall = CFlags.CallConv == CallingConv::Fast; EVT PtrVT = getPointerTy(DAG.getDataLayout()); unsigned PtrByteSize = 8; MachineFunction &MF = DAG.getMachineFunction(); if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt) IsSibCall = true; // Mark this function as potentially containing a function that contains a // tail call. As a consequence the frame pointer will be used for dynamicalloc // and restoring the callers stack pointer in this functions epilog. This is // done because by tail calling the called function might overwrite the value // in this function's (MF) stack pointer stack slot 0(SP). if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall) MF.getInfo()->setHasFastCall(); assert(!(IsFastCall && CFlags.IsVarArg) && "fastcc not supported on varargs functions"); // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage // area is 32 bytes reserved space for [SP][CR][LR][TOC]. unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); unsigned NumBytes = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; const unsigned NumGPRs = array_lengthof(GPR); const unsigned NumFPRs = useSoftFloat() ? 0 : 13; const unsigned NumVRs = array_lengthof(VR); // On ELFv2, we can avoid allocating the parameter area if all the arguments // can be passed to the callee in registers. // For the fast calling convention, there is another check below. // Note: We should keep consistent with LowerFormalArguments_64SVR4() bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall; if (!HasParameterArea) { unsigned ParamAreaSize = NumGPRs * PtrByteSize; unsigned AvailableFPRs = NumFPRs; unsigned AvailableVRs = NumVRs; unsigned NumBytesTmp = NumBytes; for (unsigned i = 0; i != NumOps; ++i) { if (Outs[i].Flags.isNest()) continue; if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags, PtrByteSize, LinkageSize, ParamAreaSize, NumBytesTmp, AvailableFPRs, AvailableVRs)) HasParameterArea = true; } } // When using the fast calling convention, we don't provide backing for // arguments that will be in registers. unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0; // Avoid allocating parameter area for fastcc functions if all the arguments // can be passed in the registers. if (IsFastCall) HasParameterArea = false; // Add up all the space actually used. for (unsigned i = 0; i != NumOps; ++i) { ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; if (Flags.isNest()) continue; if (IsFastCall) { if (Flags.isByVal()) { NumGPRsUsed += (Flags.getByValSize()+7)/8; if (NumGPRsUsed > NumGPRs) HasParameterArea = true; } else { switch (ArgVT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected ValueType for argument!"); case MVT::i1: case MVT::i32: case MVT::i64: if (++NumGPRsUsed <= NumGPRs) continue; break; case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: case MVT::f128: if (++NumVRsUsed <= NumVRs) continue; break; case MVT::v4f32: if (++NumVRsUsed <= NumVRs) continue; break; case MVT::f32: case MVT::f64: if (++NumFPRsUsed <= NumFPRs) continue; break; } HasParameterArea = true; } } /* Respect alignment of argument on the stack. */ auto Alignement = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); NumBytes = alignTo(NumBytes, Alignement); NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); if (Flags.isInConsecutiveRegsLast()) NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } unsigned NumBytesActuallyUsed = NumBytes; // In the old ELFv1 ABI, // the prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. // In the ELFv2 ABI, we allocate the parameter area iff a callee // really requires memory operands, e.g. a vararg function. if (HasParameterArea) NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); else NumBytes = LinkageSize; // Tail call needs the stack to be aligned. if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall) NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes); int SPDiff = 0; // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. if (!IsSibCall) SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes); // To protect arguments on the stack from being clobbered in a tail call, // force all the loads to happen before doing any other lowering. if (CFlags.IsTailCall) Chain = DAG.getStackArgumentTokenFactor(Chain); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!IsSibCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; // Load the return address and frame pointer so it can be move somewhere else // later. SDValue LROp, FPOp; Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl); // Set up a copy of the stack pointer for use loading and storing any // arguments that may not fit in the registers available for argument // passing. SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); // Figure out which arguments are going to go in registers, and which in // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. unsigned ArgOffset = LinkageSize; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; EVT ArgVT = Outs[i].VT; EVT OrigVT = Outs[i].ArgVT; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; // We re-align the argument offset for each argument, except when using the // fast calling convention, when we need to make sure we do that only when // we'll actually use a stack slot. auto ComputePtrOff = [&]() { /* Respect alignment of argument on the stack. */ auto Alignment = CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); ArgOffset = alignTo(ArgOffset, Alignment); PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); }; if (!IsFastCall) { ComputePtrOff(); /* Compute GPR index associated with argument offset. */ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; GPR_idx = std::min(GPR_idx, NumGPRs); } // Promote integers to 64-bit values. if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { // FIXME: Should this use ANY_EXTEND if neither sext nor zext? unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); } // FIXME memcpy is used way more than necessary. Correctness first. // Note: "by value" is code for passing a structure by value, not // basic types. if (Flags.isByVal()) { // Note: Size includes alignment padding, so // struct x { short a; char b; } // will have Size = 4. With #pragma pack(1), it will have Size = 3. // These are the proper values we need for right-justifying the // aggregate in a parameter register. unsigned Size = Flags.getByValSize(); // An empty aggregate parameter takes up no storage and no // registers. if (Size == 0) continue; if (IsFastCall) ComputePtrOff(); // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); if (GPR_idx != NumGPRs) { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, MachinePointerInfo(), VT); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; continue; } } if (GPR_idx == NumGPRs && Size < 8) { SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(PtrByteSize - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); ArgOffset += PtrByteSize; continue; } // Copy entire object into memory. There are cases where gcc-generated // code assumes it is there, even if it could be put entirely into // registers. (This is not what the doc says.) // FIXME: The above statement is likely due to a misunderstanding of the // documents. All arguments must be copied into the parameter area BY // THE CALLEE in the event that the callee takes the address of any // formal argument. That has not yet been implemented. However, it is // reasonable to use the stack area as a staging area for the register // load. // Skip this for small aggregates, as we will use the same slot for a // right-justified copy, below. if (Size >= 8) Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, CallSeqStart, Flags, DAG, dl); // When a register is available, pass a small aggregate right-justified. if (Size < 8 && GPR_idx != NumGPRs) { // The easiest way to get this right-justified in a register // is to copy the structure into the rightmost portion of a // local variable slot, then load the whole slot into the // register. // FIXME: The memcpy seems to produce pretty awful code for // small aggregates, particularly for packed ones. // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. SDValue AddPtr = PtrOff; if (!isLittleEndian) { SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType()); AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); // Load the slot into the register. SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); // Done with this argument. ArgOffset += PtrByteSize; continue; } // For aggregates larger than PtrByteSize, copy the pieces of the // object that fit into registers from the parameter save area. for (unsigned j=0; j gpr moves. // In the non-vararg case, this can only ever happen in the // presence of f32 array types, since otherwise we never run // out of FPRs before running out of GPRs. SDValue ArgVal; // Double values are always passed in a single GPR. if (Arg.getValueType() != MVT::f32) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); // Non-array float values are extended and passed in a GPR. } else if (!Flags.isInConsecutiveRegs()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); // If we have an array of floats, we collect every odd element // together with its predecessor into one GPR. } else if (ArgOffset % PtrByteSize != 0) { SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (!isLittleEndian) std::swap(Lo, Hi); ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); // The final element, if even, goes into the first half of a GPR. } else if (Flags.isInConsecutiveRegsLast()) { ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); if (!isLittleEndian) ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, DAG.getConstant(32, dl, MVT::i32)); // Non-final even elements are skipped; they will be handled // together the with subsequent argument on the next go-around. } else ArgVal = SDValue(); if (ArgVal.getNode()) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal)); } else { if (IsFastCall) ComputePtrOff(); // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. if (Arg.getValueType() == MVT::f32 && !isLittleEndian && !Flags.isInConsecutiveRegs()) { SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } assert(HasParameterArea && "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, CFlags.IsTailCall, false, MemOpChains, TailCallArguments, dl); NeededLoad = true; } // When passing an array of floats, the array occupies consecutive // space in the argument area; only round up to the next doubleword // at the end of the array. Otherwise, each float takes 8 bytes. if (!IsFastCall || NeededLoad) { ArgOffset += (Arg.getValueType() == MVT::f32 && Flags.isInConsecutiveRegs()) ? 4 : 8; if (Flags.isInConsecutiveRegsLast()) ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; } break; } case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: case MVT::v1i128: case MVT::f128: // These can be scalar arguments or elements of a vector array type // passed directly. The latter are used to implement ELFv2 homogenous // vector aggregates. // For a varargs call, named arguments go into VRs or on the stack as // usual; unnamed arguments always go to the stack or the corresponding // GPRs when within range. For now, we always put the value in both // locations (or even all three). if (CFlags.IsVarArg) { assert(HasParameterArea && "Parameter area must exist if we have a varargs call."); // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Store); if (VR_idx != NumVRs) { SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } ArgOffset += 16; for (unsigned i=0; i<16; i+=PtrByteSize) { if (GPR_idx == NumGPRs) break; SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, dl, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo()); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } break; } // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); } else { if (IsFastCall) ComputePtrOff(); assert(HasParameterArea && "Parameter area must exist to pass an argument in memory."); LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, CFlags.IsTailCall, true, MemOpChains, TailCallArguments, dl); if (IsFastCall) ArgOffset += 16; } if (!IsFastCall) ArgOffset += 16; break; } } assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) && "mismatch in size of parameter area"); (void)NumBytesActuallyUsed; if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). // See prepareDescriptorIndirectCall and buildCallOperands for more // information about calls through function pointers in the 64-bit SVR4 ABI. if (CFlags.IsIndirect) { // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the // caller in the TOC save area. if (isTOCSaveRestoreRequired(Subtarget)) { assert(!CFlags.IsTailCall && "Indirect tails calls not supported"); // Load r2 into a virtual register and store it to the TOC save area. setUsesTOCBasePtr(DAG); SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); // TOC save area offset. unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo::getStack( DAG.getMachineFunction(), TOCSaveOffset)); } // In the ELFv2 ABI, R12 must contain the address of an indirect callee. // This does not mean the MTCTR instruction must use R12; it's easier // to model this as an extra parameter, so do that. if (isELFv2ABI && !CFlags.IsPatchPoint) RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } if (CFlags.IsTailCall && !IsSibCall) PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp, TailCallArguments); return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CB); } static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { const PPCSubtarget &Subtarget = static_cast( State.getMachineFunction().getSubtarget()); const bool IsPPC64 = Subtarget.isPPC64(); const Align PtrAlign = IsPPC64 ? Align(8) : Align(4); const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32; if (ValVT.isVector() && !State.getMachineFunction() .getTarget() .Options.EnableAIXExtendedAltivecABI) report_fatal_error("the default Altivec AIX ABI is not yet supported"); if (ValVT == MVT::f128) report_fatal_error("f128 is unimplemented on AIX."); if (ArgFlags.isNest()) report_fatal_error("Nest arguments are unimplemented."); static const MCPhysReg GPR_32[] = {// 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10}; static const MCPhysReg GPR_64[] = {// 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10}; static const MCPhysReg VR[] = {// Vector registers. PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13}; if (ArgFlags.isByVal()) { if (ArgFlags.getNonZeroByValAlign() > PtrAlign) report_fatal_error("Pass-by-value arguments with alignment greater than " "register width are not supported."); const unsigned ByValSize = ArgFlags.getByValSize(); // An empty aggregate parameter takes up no storage and no registers, // but needs a MemLoc for a stack slot for the formal arguments side. if (ByValSize == 0) { State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE, State.getNextStackOffset(), RegVT, LocInfo)); return false; } const unsigned StackSize = alignTo(ByValSize, PtrAlign); unsigned Offset = State.AllocateStack(StackSize, PtrAlign); for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrAlign.value()) { if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else { State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE, Offset, MVT::INVALID_SIMPLE_VALUE_TYPE, LocInfo)); break; } } return false; } // Arguments always reserve parameter save area. switch (ValVT.SimpleTy) { default: report_fatal_error("Unhandled value type for argument."); case MVT::i64: // i64 arguments should have been split to i32 for PPC32. assert(IsPPC64 && "PPC32 should have split i64 values."); LLVM_FALLTHROUGH; case MVT::i1: case MVT::i32: { const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign); // AIX integer arguments are always passed in register width. if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits()) LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt : CCValAssign::LocInfo::ZExt; if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo)); else State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo)); return false; } case MVT::f32: case MVT::f64: { // Parameter save area (PSA) is reserved even if the float passes in fpr. const unsigned StoreSize = LocVT.getStoreSize(); // Floats are always 4-byte aligned in the PSA on AIX. // This includes f64 in 64-bit mode for ABI compatibility. const unsigned Offset = State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4)); unsigned FReg = State.AllocateReg(FPR); if (FReg) State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo)); // Reserve and initialize GPRs or initialize the PSA as required. for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) { if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) { assert(FReg && "An FPR should be available when a GPR is reserved."); if (State.isVarArg()) { // Successfully reserved GPRs are only initialized for vararg calls. // Custom handling is required for: // f64 in PPC32 needs to be split into 2 GPRs. // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR. State.addLoc( CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo)); } } else { // If there are insufficient GPRs, the PSA needs to be initialized. // Initialization occurs even if an FPR was initialized for // compatibility with the AIX XL compiler. The full memory for the // argument will be initialized even if a prior word is saved in GPR. // A custom memLoc is used when the argument also passes in FPR so // that the callee handling can skip over it easily. State.addLoc( FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo) : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); break; } } return false; } case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2i64: case MVT::v2f64: case MVT::v1i128: { if (State.isVarArg()) report_fatal_error( "variadic arguments for vector types are unimplemented for AIX"); if (unsigned VReg = State.AllocateReg(VR)) State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo)); else { report_fatal_error( "passing vector parameters to the stack is unimplemented for AIX"); } return false; } } return true; } static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64) { assert((IsPPC64 || SVT != MVT::i64) && "i64 should have been split for 32-bit codegen."); switch (SVT) { default: report_fatal_error("Unexpected value type for formal argument"); case MVT::i1: case MVT::i32: case MVT::i64: return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; case MVT::f32: return &PPC::F4RCRegClass; case MVT::f64: return &PPC::F8RCRegClass; case MVT::v4f32: case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: case MVT::v2i64: case MVT::v2f64: case MVT::v1i128: return &PPC::VRRCRegClass; } } static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl) { assert(ValVT.isScalarInteger() && LocVT.isScalarInteger()); assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits()); if (Flags.isSExt()) ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, DAG.getValueType(ValVT)); else if (Flags.isZExt()) ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, DAG.getValueType(ValVT)); return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue); } static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) { const unsigned LASize = FL->getLinkageSize(); if (PPC::GPRCRegClass.contains(Reg)) { assert(Reg >= PPC::R3 && Reg <= PPC::R10 && "Reg must be a valid argument register!"); return LASize + 4 * (Reg - PPC::R3); } if (PPC::G8RCRegClass.contains(Reg)) { assert(Reg >= PPC::X3 && Reg <= PPC::X10 && "Reg must be a valid argument register!"); return LASize + 8 * (Reg - PPC::X3); } llvm_unreachable("Only general purpose registers expected."); } // AIX ABI Stack Frame Layout: // // Low Memory +--------------------------------------------+ // SP +---> | Back chain | ---+ // | +--------------------------------------------+ | // | | Saved Condition Register | | // | +--------------------------------------------+ | // | | Saved Linkage Register | | // | +--------------------------------------------+ | Linkage Area // | | Reserved for compilers | | // | +--------------------------------------------+ | // | | Reserved for binders | | // | +--------------------------------------------+ | // | | Saved TOC pointer | ---+ // | +--------------------------------------------+ // | | Parameter save area | // | +--------------------------------------------+ // | | Alloca space | // | +--------------------------------------------+ // | | Local variable space | // | +--------------------------------------------+ // | | Float/int conversion temporary | // | +--------------------------------------------+ // | | Save area for AltiVec registers | // | +--------------------------------------------+ // | | AltiVec alignment padding | // | +--------------------------------------------+ // | | Save area for VRSAVE register | // | +--------------------------------------------+ // | | Save area for General Purpose registers | // | +--------------------------------------------+ // | | Save area for Floating Point registers | // | +--------------------------------------------+ // +---- | Back chain | // High Memory +--------------------------------------------+ // // Specifications: // AIX 7.2 Assembler Language Reference // Subroutine linkage convention SDValue PPCTargetLowering::LowerFormalArguments_AIX( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold || CallConv == CallingConv::Fast) && "Unexpected calling convention!"); if (getTargetMachine().Options.GuaranteedTailCallOpt) report_fatal_error("Tail call support is unimplemented on AIX."); if (useSoftFloat()) report_fatal_error("Soft float support is unimplemented on AIX."); const PPCSubtarget &Subtarget = static_cast(DAG.getSubtarget()); const bool IsPPC64 = Subtarget.isPPC64(); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo(); CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); const EVT PtrVT = getPointerTy(MF.getDataLayout()); // Reserve space for the linkage area on the stack. const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize)); CCInfo.AnalyzeFormalArguments(Ins, CC_AIX); SmallVector MemOps; for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) { CCValAssign &VA = ArgLocs[I++]; MVT LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags; if (VA.isMemLoc() && VA.getValVT().isVector()) report_fatal_error( "passing vector parameters to the stack is unimplemented for AIX"); // For compatibility with the AIX XL compiler, the float args in the // parameter save area are initialized even if the argument is available // in register. The caller is required to initialize both the register // and memory, however, the callee can choose to expect it in either. // The memloc is dismissed here because the argument is retrieved from // the register. if (VA.isMemLoc() && VA.needsCustom()) continue; if (VA.isRegLoc()) { if (VA.getValVT().isScalarInteger()) FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32 ? PPCFunctionInfo::ShortFloatPoint : PPCFunctionInfo::LongFloatPoint); } if (Flags.isByVal() && VA.isMemLoc()) { const unsigned Size = alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize, PtrByteSize); const int FI = MF.getFrameInfo().CreateFixedObject( Size, VA.getLocMemOffset(), /* IsImmutable */ false, /* IsAliased */ true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); continue; } if (Flags.isByVal()) { assert(VA.isRegLoc() && "MemLocs should already be handled."); const MCPhysReg ArgReg = VA.getLocReg(); const PPCFrameLowering *FL = Subtarget.getFrameLowering(); if (Flags.getNonZeroByValAlign() > PtrByteSize) report_fatal_error("Over aligned byvals not supported yet."); const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize); const int FI = MF.getFrameInfo().CreateFixedObject( StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false, /* IsAliased */ true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); // Add live ins for all the RegLocs for the same ByVal. const TargetRegisterClass *RegClass = IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg, unsigned Offset) { const unsigned VReg = MF.addLiveIn(PhysReg, RegClass); // Since the callers side has left justified the aggregate in the // register, we can simply store the entire register into the stack // slot. SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); // The store to the fixedstack object is needed becuase accessing a // field of the ByVal will use a gep and load. Ideally we will optimize // to extracting the value from the register directly, and elide the // stores when the arguments address is not taken, but that will need to // be future work. SDValue Store = DAG.getStore( CopyFrom.getValue(1), dl, CopyFrom, DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)), MachinePointerInfo::getFixedStack(MF, FI, Offset)); MemOps.push_back(Store); }; unsigned Offset = 0; HandleRegLoc(VA.getLocReg(), Offset); Offset += PtrByteSize; for (; Offset != StackSize && ArgLocs[I].isRegLoc(); Offset += PtrByteSize) { assert(ArgLocs[I].getValNo() == VA.getValNo() && "RegLocs should be for ByVal argument."); const CCValAssign RL = ArgLocs[I++]; HandleRegLoc(RL.getLocReg(), Offset); FuncInfo->appendParameterType(PPCFunctionInfo::FixedType); } if (Offset != StackSize) { assert(ArgLocs[I].getValNo() == VA.getValNo() && "Expected MemLoc for remaining bytes."); assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes."); // Consume the MemLoc.The InVal has already been emitted, so nothing // more needs to be done. ++I; } continue; } EVT ValVT = VA.getValVT(); if (VA.isRegLoc() && !VA.needsCustom()) { MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy; unsigned VReg = MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64)); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); if (ValVT.isScalarInteger() && (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) { ArgValue = truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl); } InVals.push_back(ArgValue); continue; } if (VA.isMemLoc()) { const unsigned LocSize = LocVT.getStoreSize(); const unsigned ValSize = ValVT.getStoreSize(); assert((ValSize <= LocSize) && "Object size is larger than size of MemLoc"); int CurArgOffset = VA.getLocMemOffset(); // Objects are right-justified because AIX is big-endian. if (LocSize > ValSize) CurArgOffset += LocSize - ValSize; // Potential tail calls could cause overwriting of argument stack slots. const bool IsImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && (CallConv == CallingConv::Fast)); int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue ArgValue = DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo()); InVals.push_back(ArgValue); continue; } } // On AIX a minimum of 8 words is saved to the parameter save area. const unsigned MinParameterSaveArea = 8 * PtrByteSize; // Area that is at least reserved in the caller of this function. unsigned CallerReservedArea = std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so // that taking the difference between two stack areas will result in an // aligned stack. CallerReservedArea = EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea); FuncInfo->setMinReservedArea(CallerReservedArea); if (isVarArg) { FuncInfo->setVarArgsFrameIndex( MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10}; static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10}; const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32); // The fixed integer arguments of a variadic function are stored to the // VarArgsFrameIndex on the stack so that they may be loaded by // dereferencing the result of va_next. for (unsigned GPRIndex = (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize; GPRIndex < NumGPArgRegs; ++GPRIndex) { const unsigned VReg = IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass) : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo()); MemOps.push_back(Store); // Increment the address for the next argument to store. SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT); FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } } if (!MemOps.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } SDValue PPCTargetLowering::LowerCall_AIX( SDValue Chain, SDValue Callee, CallFlags CFlags, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl &InVals, const CallBase *CB) const { // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the // AIX ABI stack frame layout. assert((CFlags.CallConv == CallingConv::C || CFlags.CallConv == CallingConv::Cold || CFlags.CallConv == CallingConv::Fast) && "Unexpected calling convention!"); if (CFlags.IsPatchPoint) report_fatal_error("This call type is unimplemented on AIX."); const PPCSubtarget& Subtarget = static_cast(DAG.getSubtarget()); MachineFunction &MF = DAG.getMachineFunction(); SmallVector ArgLocs; CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs, *DAG.getContext()); // Reserve space for the linkage save area (LSA) on the stack. // In both PPC32 and PPC64 there are 6 reserved slots in the LSA: // [SP][CR][LR][2 x reserved][TOC]. // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64. const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize(); const bool IsPPC64 = Subtarget.isPPC64(); const EVT PtrVT = getPointerTy(DAG.getDataLayout()); const unsigned PtrByteSize = IsPPC64 ? 8 : 4; CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize)); CCInfo.AnalyzeCallOperands(Outs, CC_AIX); // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if the callee // is variadic. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize; const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize, CCInfo.getNextStackOffset()); // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass. Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl); SDValue CallSeqStart = Chain; SmallVector, 8> RegsToPass; SmallVector MemOpChains; // Set up a copy of the stack pointer for loading and storing any // arguments that may not fit in the registers available for argument // passing. const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64) : DAG.getRegister(PPC::R1, MVT::i32); for (unsigned I = 0, E = ArgLocs.size(); I != E;) { const unsigned ValNo = ArgLocs[I].getValNo(); SDValue Arg = OutVals[ValNo]; ISD::ArgFlagsTy Flags = Outs[ValNo].Flags; if (Flags.isByVal()) { const unsigned ByValSize = Flags.getByValSize(); // Nothing to do for zero-sized ByVals on the caller side. if (!ByValSize) { ++I; continue; } auto GetLoad = [&](EVT VT, unsigned LoadOffset) { return DAG.getExtLoad( ISD::ZEXTLOAD, dl, PtrVT, Chain, (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) : Arg, MachinePointerInfo(), VT); }; unsigned LoadOffset = 0; // Initialize registers, which are fully occupied by the by-val argument. while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) { SDValue Load = GetLoad(PtrVT, LoadOffset); MemOpChains.push_back(Load.getValue(1)); LoadOffset += PtrByteSize; const CCValAssign &ByValVA = ArgLocs[I++]; assert(ByValVA.getValNo() == ValNo && "Unexpected location for pass-by-value argument."); RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load)); } if (LoadOffset == ByValSize) continue; // There must be one more loc to handle the remainder. assert(ArgLocs[I].getValNo() == ValNo && "Expected additional location for by-value argument."); if (ArgLocs[I].isMemLoc()) { assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg."); const CCValAssign &ByValVA = ArgLocs[I++]; ISD::ArgFlagsTy MemcpyFlags = Flags; // Only memcpy the bytes that don't pass in register. MemcpyFlags.setByValSize(ByValSize - LoadOffset); Chain = CallSeqStart = createMemcpyOutsideCallSeq( (LoadOffset != 0) ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset)) : Arg, DAG.getObjectPtrOffset(dl, StackPtr, TypeSize::Fixed(ByValVA.getLocMemOffset())), CallSeqStart, MemcpyFlags, DAG, dl); continue; } // Initialize the final register residue. // Any residue that occupies the final by-val arg register must be // left-justified on AIX. Loads must be a power-of-2 size and cannot be // larger than the ByValSize. For example: a 7 byte by-val arg requires 4, // 2 and 1 byte loads. const unsigned ResidueBytes = ByValSize % PtrByteSize; assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize && "Unexpected register residue for by-value argument."); SDValue ResidueVal; for (unsigned Bytes = 0; Bytes != ResidueBytes;) { const unsigned N = PowerOf2Floor(ResidueBytes - Bytes); const MVT VT = N == 1 ? MVT::i8 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64)); SDValue Load = GetLoad(VT, LoadOffset); MemOpChains.push_back(Load.getValue(1)); LoadOffset += N; Bytes += N; // By-val arguments are passed left-justfied in register. // Every load here needs to be shifted, otherwise a full register load // should have been used. assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) && "Unexpected load emitted during handling of pass-by-value " "argument."); unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8); EVT ShiftAmountTy = getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout()); SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy); SDValue ShiftedLoad = DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt); ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal, ShiftedLoad) : ShiftedLoad; } const CCValAssign &ByValVA = ArgLocs[I++]; RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal)); continue; } CCValAssign &VA = ArgLocs[I++]; const MVT LocVT = VA.getLocVT(); const MVT ValVT = VA.getValVT(); if (VA.isMemLoc() && VA.getValVT().isVector()) report_fatal_error( "passing vector parameters to the stack is unimplemented for AIX"); switch (VA.getLocInfo()) { default: report_fatal_error("Unexpected argument extension type."); case CCValAssign::Full: break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; } if (VA.isRegLoc() && !VA.needsCustom()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; } if (VA.isMemLoc()) { SDValue PtrOff = DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); MemOpChains.push_back( DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo())); continue; } // Custom handling is used for GPR initializations for vararg float // arguments. assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg && ValVT.isFloatingPoint() && LocVT.isInteger() && "Unexpected register handling for calling convention."); SDValue ArgAsInt = DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg); if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize()) // f32 in 32-bit GPR // f64 in 64-bit GPR RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt)); else if (Arg.getValueType().getFixedSizeInBits() < LocVT.getFixedSizeInBits()) // f32 in 64-bit GPR. RegsToPass.push_back(std::make_pair( VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT))); else { // f64 in two 32-bit GPRs // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs. assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 && "Unexpected custom register for argument!"); CCValAssign &GPR1 = VA; SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt, DAG.getConstant(32, dl, MVT::i8)); RegsToPass.push_back(std::make_pair( GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32))); if (I != E) { // If only 1 GPR was available, there will only be one custom GPR and // the argument will also pass in memory. CCValAssign &PeekArg = ArgLocs[I]; if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) { assert(PeekArg.needsCustom() && "A second custom GPR is expected."); CCValAssign &GPR2 = ArgLocs[I++]; RegsToPass.push_back(std::make_pair( GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32))); } } } } if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // For indirect calls, we need to save the TOC base to the stack for // restoration after the call. if (CFlags.IsIndirect) { assert(!CFlags.IsTailCall && "Indirect tail-calls not supported."); const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister(); const MCRegister StackPtrReg = Subtarget.getStackPointerRegister(); const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; const unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset(); setUsesTOCBasePtr(DAG); SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT); SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl); SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore( Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset)); } // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; for (auto Reg : RegsToPass) { Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag); InFlag = Chain.getValue(1); } const int SPDiff = 0; return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff, NumBytes, Ins, InVals, CB); } bool PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn( Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) ? RetCC_PPC_Cold : RetCC_PPC); } SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold) ? RetCC_PPC_Cold : RetCC_PPC); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Arg = OutVals[RealResIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); break; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); break; } if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) { bool isLittleEndian = Subtarget.isLittleEndian(); // Legalize ret f64 -> ret 2 x i32. SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl)); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg, DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl)); Flag = Chain.getValue(1); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Get the correct type for integers. EVT IntVT = Op.getValueType(); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue FPSIdx = getFramePointerFrameIndex(DAG); // Build a DYNAREAOFFSET node. SDValue Ops[2] = {Chain, FPSIdx}; SDVTList VTs = DAG.getVTList(IntVT); return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { // When we pop the dynamic allocation we need to restore the SP link. SDLoc dl(Op); // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Construct the stack pointer operand. bool isPPC64 = Subtarget.isPPC64(); unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; SDValue StackPtr = DAG.getRegister(SP, PtrVT); // Get the operands for the STACKRESTORE. SDValue Chain = Op.getOperand(0); SDValue SaveSP = Op.getOperand(1); // Load the old link SP. SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo()); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); // Store the old link SP. return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo()); } SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int RASI = FI->getReturnAddrSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset(); // Allocate the frame index for frame pointer save area. RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); } return DAG.getFrameIndex(RASI, PtrVT); } SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Get current frame pointer save index. The users of this index will be // primarily DYNALLOC instructions. PPCFunctionInfo *FI = MF.getInfo(); int FPSI = FI->getFramePointerSaveIndex(); // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset(); // Allocate the frame index for frame pointer save area. FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } return DAG.getFrameIndex(FPSI, PtrVT); } SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDLoc dl(Op); // Get the correct type for pointers. EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Negate the size. SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, DAG.getConstant(0, dl, PtrVT), Size); // Construct a node for the frame pointer save index. SDValue FPSIdx = getFramePointerFrameIndex(DAG); SDValue Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); if (hasInlineStackProbe(MF)) return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops); return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); } SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool isPPC64 = Subtarget.isPPC64(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false); return DAG.getFrameIndex(FI, PtrVT); } SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0), Op.getOperand(1)); } SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueType().isVector()) return LowerVectorLoad(Op, DAG); assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 loads"); // First, load 8 bits into 32 bits, then truncate to 1 bit. SDLoc dl(Op); LoadSDNode *LD = cast(Op); SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); MachineMemOperand *MMO = LD->getMemOperand(); SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain, BasePtr, MVT::i8, MMO); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; return DAG.getMergeValues(Ops, dl); } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (Op.getOperand(1).getValueType().isVector()) return LowerVectorStore(Op, DAG); assert(Op.getOperand(1).getValueType() == MVT::i1 && "Custom lowering only for i1 stores"); // First, zero extend to 32 bits, then use a truncating store to 8 bits. SDLoc dl(Op); StoreSDNode *ST = cast(Op); SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); MachineMemOperand *MMO = ST->getMemOperand(); Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()), Value); return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); } // FIXME: Remove this once the ANDI glue bug is fixed: SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i1 && "Custom lowering only for i1 results"); SDLoc DL(Op); return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0)); } SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const { // Implements a vector truncate that fits in a vector register as a shuffle. // We want to legalize vector truncates down to where the source fits in // a vector register (and target is therefore smaller than vector register // size). At that point legalization will try to custom lower the sub-legal // result and get here - where we can contain the truncate as a single target // operation. // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows: // to // // We will implement it for big-endian ordering as this (where x denotes // undefined): // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u> // // The same operation in little-endian ordering will be: // to // EVT TrgVT = Op.getValueType(); assert(TrgVT.isVector() && "Vector type expected."); unsigned TrgNumElts = TrgVT.getVectorNumElements(); EVT EltVT = TrgVT.getVectorElementType(); if (!isOperationCustom(Op.getOpcode(), TrgVT) || TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) || !isPowerOf2_32(EltVT.getSizeInBits())) return SDValue(); SDValue N1 = Op.getOperand(0); EVT SrcVT = N1.getValueType(); unsigned SrcSize = SrcVT.getSizeInBits(); if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) || !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits())) return SDValue(); if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2) return SDValue(); unsigned WideNumElts = 128 / EltVT.getSizeInBits(); EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); SDLoc DL(Op); SDValue Op1, Op2; if (SrcSize == 256) { EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout()); EVT SplitVT = N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext()); unsigned SplitNumElts = SplitVT.getVectorNumElements(); Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, DAG.getConstant(0, DL, VecIdxTy)); Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1, DAG.getConstant(SplitNumElts, DL, VecIdxTy)); } else { Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL); Op2 = DAG.getUNDEF(WideVT); } // First list the elements we want to keep. unsigned SizeMult = SrcSize / TrgVT.getSizeInBits(); SmallVector ShuffV; if (Subtarget.isLittleEndian()) for (unsigned i = 0; i < TrgNumElts; ++i) ShuffV.push_back(i * SizeMult); else for (unsigned i = 1; i <= TrgNumElts; ++i) ShuffV.push_back(i * SizeMult - 1); // Populate the remaining elements with undefs. for (unsigned i = TrgNumElts; i < WideNumElts; ++i) // ShuffV.push_back(i + WideNumElts); ShuffV.push_back(WideNumElts + 1); Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1); Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2); return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV); } /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when /// possible. SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Not FP, or using SPE? Not a fsel. if (!Op.getOperand(0).getValueType().isFloatingPoint() || !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE()) return Op; ISD::CondCode CC = cast(Op.getOperand(4))->get(); EVT ResVT = Op.getValueType(); EVT CmpVT = Op.getOperand(0).getValueType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); SDLoc dl(Op); SDNodeFlags Flags = Op.getNode()->getFlags(); // We have xsmaxcdp/xsmincdp which are OK to emit even in the // presence of infinities. if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) { switch (CC) { default: break; case ISD::SETOGT: case ISD::SETGT: return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS); case ISD::SETOLT: case ISD::SETLT: return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS); } } // We might be able to do better than this under some circumstances, but in // general, fsel-based lowering of select is a finite-math-only optimization. // For more information, see section F.3 of the 2.06 ISA specification. // With ISA 3.0 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) || (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs())) return Op; // If the RHS of the comparison is a 0.0, we don't need to do the // subtraction at all. SDValue Sel1; if (isFloatingPointZero(RHS)) switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); LLVM_FALLTHROUGH; case ISD::SETEQ: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); case ISD::SETULT: case ISD::SETLT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt LLVM_FALLTHROUGH; case ISD::SETOGE: case ISD::SETGE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); case ISD::SETUGT: case ISD::SETGT: std::swap(TV, FV); // fsel is natively setge, swap operands for setlt LLVM_FALLTHROUGH; case ISD::SETOLE: case ISD::SETLE: if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); } SDValue Cmp; switch (CC) { default: break; // SETUO etc aren't handled by fsel. case ISD::SETNE: std::swap(TV, FV); LLVM_FALLTHROUGH; case ISD::SETEQ: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); return DAG.getNode(PPCISD::FSEL, dl, ResVT, DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); case ISD::SETULT: case ISD::SETLT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOGE: case ISD::SETGE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); case ISD::SETUGT: case ISD::SETGT: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); case ISD::SETOLE: case ISD::SETLE: Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags); if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); } return Op; } static unsigned getPPCStrictOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("No strict version of this opcode!"); case PPCISD::FCTIDZ: return PPCISD::STRICT_FCTIDZ; case PPCISD::FCTIWZ: return PPCISD::STRICT_FCTIWZ; case PPCISD::FCTIDUZ: return PPCISD::STRICT_FCTIDUZ; case PPCISD::FCTIWUZ: return PPCISD::STRICT_FCTIWUZ; case PPCISD::FCFID: return PPCISD::STRICT_FCFID; case PPCISD::FCFIDU: return PPCISD::STRICT_FCFIDU; case PPCISD::FCFIDS: return PPCISD::STRICT_FCFIDS; case PPCISD::FCFIDUS: return PPCISD::STRICT_FCFIDUS; } } static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { SDLoc dl(Op); bool IsStrict = Op->isStrictFPOpcode(); bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || Op.getOpcode() == ISD::STRICT_FP_TO_SINT; // TODO: Any other flags to propagate? SDNodeFlags Flags; Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); // For strict nodes, source is the second operand. SDValue Src = Op.getOperand(IsStrict ? 1 : 0); SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); assert(Src.getValueType().isFloatingPoint()); if (Src.getValueType() == MVT::f32) { if (IsStrict) { Src = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags); Chain = Src.getValue(1); } else Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); } SDValue Conv; unsigned Opc = ISD::DELETED_NODE; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Opc = IsSigned ? PPCISD::FCTIWZ : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ); break; case MVT::i64: assert((IsSigned || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ; } if (IsStrict) { Opc = getPPCStrictOpcode(Opc); Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags); } else { Conv = DAG.getNode(Opc, dl, MVT::f64, Src); } return Conv; } void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const { SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || Op.getOpcode() == ISD::STRICT_FP_TO_SINT; bool IsStrict = Op->isStrictFPOpcode(); // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && (IsSigned || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast(FIPtr)->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); // Emit a store to the stack slot. SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode(); Align Alignment(DAG.getEVTAlign(Tmp.getValueType())); if (i32Stack) { MachineFunction &MF = DAG.getMachineFunction(); Alignment = Align(4); MachineMemOperand *MMO = MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment); SDValue Ops[] = { Chain, Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment); // Result is a load from the stack slot. If loading 4 bytes, make sure to // add in a bias on big endian. if (Op.getValueType() == MVT::i32 && !i32Stack) { FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, dl, FIPtr.getValueType())); MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4); } RLI.Chain = Chain; RLI.Ptr = FIPtr; RLI.MPI = MPI; RLI.Alignment = Alignment; } /// Custom lowers floating point to integer conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { SDValue Conv = convertFPToInt(Op, DAG, Subtarget); SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv); if (Op->isStrictFPOpcode()) return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl); else return Mov; } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { bool IsStrict = Op->isStrictFPOpcode(); bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || Op.getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Src = Op.getOperand(IsStrict ? 1 : 0); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); // FP to INT conversions are legal for f128. if (SrcVT == MVT::f128) return Subtarget.hasP9Vector() ? Op : SDValue(); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). if (SrcVT == MVT::ppcf128) { if (DstVT == MVT::i32) { // TODO: Conservatively pass only nofpexcept flag here. Need to check and // set other fast-math flags to FP operations in both strict and // non-strict cases. (FP_TO_SINT, FSUB) SDNodeFlags Flags; Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); if (IsSigned) { SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(0, dl)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src, DAG.getIntPtrConstant(1, dl)); // Add the two halves of the long double in round-to-zero mode, and use // a smaller FP_TO_SINT. if (IsStrict) { SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl, DAG.getVTList(MVT::f64, MVT::Other), {Op.getOperand(0), Lo, Hi}, Flags); return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, DAG.getVTList(MVT::i32, MVT::Other), {Res.getValue(1), Res}, Flags); } else { SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); } } else { const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT); if (IsStrict) { // Sel = Src < 0x80000000 // FltOfs = select Sel, 0.0, 0x80000000 // IntOfs = select Sel, 0, 0x80000000 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs SDValue Chain = Op.getOperand(0); EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); EVT DstSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true); Chain = Sel.getValue(1); SDValue FltOfs = DAG.getSelect( dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst); Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT); SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, DAG.getVTList(SrcVT, MVT::Other), {Chain, Src, FltOfs}, Flags); Chain = Val.getValue(1); SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, DAG.getVTList(DstVT, MVT::Other), {Chain, Val}, Flags); Chain = SInt.getValue(1); SDValue IntOfs = DAG.getSelect( dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask); SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs); return DAG.getMergeValues({Result, Chain}, dl); } else { // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst); True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True); True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask); SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE); } } } return SDValue(); } if (Subtarget.hasDirectMove() && Subtarget.isPPC64()) return LowerFP_TO_INTDirectMove(Op, DAG, dl); ReuseLoadInfo RLI; LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); } // We're trying to insert a regular store, S, and then a load, L. If the // incoming value, O, is a load, we might just be able to have our load use the // address used by O. However, we don't know if anything else will store to // that address before we can load from it. To prevent this situation, we need // to insert our load, L, into the chain as a peer of O. To do this, we give L // the same chain operand as O, we create a token factor from the chain results // of O and L, and we replace all uses of O's chain result with that token // factor (see spliceIntoChain below for this last part). bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET) const { // Conservatively skip reusing for constrained FP nodes. if (Op->isStrictFPOpcode()) return false; SDLoc dl(Op); bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT && (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32); if (ET == ISD::NON_EXTLOAD && (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) && isOperationLegalOrCustom(Op.getOpcode(), Op.getOperand(0).getValueType())) { LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); return true; } LoadSDNode *LD = dyn_cast(Op); if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || LD->isNonTemporal()) return false; if (LD->getMemoryVT() != MemVT) return false; // If the result of the load is an illegal type, then we can't build a // valid chain for reuse since the legalised loads and token factor node that // ties the legalised loads together uses a different output chain then the // illegal load. if (!isTypeLegal(LD->getValueType(0))) return false; RLI.Ptr = LD->getBasePtr(); if (LD->isIndexed() && !LD->getOffset().isUndef()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, LD->getOffset()); } RLI.Chain = LD->getChain(); RLI.MPI = LD->getPointerInfo(); RLI.IsDereferenceable = LD->isDereferenceable(); RLI.IsInvariant = LD->isInvariant(); RLI.Alignment = LD->getAlign(); RLI.AAInfo = LD->getAAInfo(); RLI.Ranges = LD->getRanges(); RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); return true; } // Given the head of the old chain, ResChain, insert a token factor containing // it and NewResChain, and make users of ResChain now be users of that token // factor. // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead. void PPCTargetLowering::spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const { if (!ResChain) return; SDLoc dl(NewResChain); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, NewResChain, DAG.getUNDEF(MVT::Other)); assert(TF.getNode() != NewResChain.getNode() && "A new TF really is required here"); DAG.ReplaceAllUsesOfValueWith(ResChain, TF); DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); } /// Analyze profitability of direct move /// prefer float load to int load plus direct move /// when there is no integer use of int load bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const { SDNode *Origin = Op.getOperand(0).getNode(); if (Origin->getOpcode() != ISD::LOAD) return true; // If there is no LXSIBZX/LXSIHZX, like Power8, // prefer direct move if the memory size is 1 or 2 bytes. MachineMemOperand *MMO = cast(Origin)->getMemOperand(); if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2) return true; for (SDNode::use_iterator UI = Origin->use_begin(), UE = Origin->use_end(); UI != UE; ++UI) { // Only look at the users of the loaded value. if (UI.getUse().get().getResNo() != 0) continue; if (UI->getOpcode() != ISD::SINT_TO_FP && UI->getOpcode() != ISD::UINT_TO_FP && UI->getOpcode() != ISD::STRICT_SINT_TO_FP && UI->getOpcode() != ISD::STRICT_UINT_TO_FP) return true; } return false; } static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain = SDValue()) { bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP || Op.getOpcode() == ISD::STRICT_SINT_TO_FP; SDLoc dl(Op); // TODO: Any other flags to propagate? SDNodeFlags Flags; Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT(); unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS) : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU); EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64; if (Op->isStrictFPOpcode()) { if (!Chain) Chain = Op.getOperand(0); return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags); } else return DAG.getNode(ConvOpc, dl, ConvTy, Src); } /// Custom lowers integer to floating point conversions to use /// the direct move instructions available in ISA 2.07 to avoid the /// need for load/store combinations. SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && "Invalid floating point type as target of conversion"); assert(Subtarget.hasFPCVT() && "Int to FP conversions with direct moves require FPCVT"); SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32; bool Signed = Op.getOpcode() == ISD::SINT_TO_FP || Op.getOpcode() == ISD::STRICT_SINT_TO_FP; unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA; SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src); return convertIntToFP(Op, Mov, DAG, Subtarget); } static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) { EVT VecVT = Vec.getValueType(); assert(VecVT.isVector() && "Expected a vector type."); assert(VecVT.getSizeInBits() < 128 && "Vector is already full width."); EVT EltVT = VecVT.getVectorElementType(); unsigned WideNumElts = 128 / EltVT.getSizeInBits(); EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts); unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements(); SmallVector Ops(NumConcat); Ops[0] = Vec; SDValue UndefVec = DAG.getUNDEF(VecVT); for (unsigned i = 1; i < NumConcat; ++i) Ops[i] = UndefVec; return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops); } SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { bool IsStrict = Op->isStrictFPOpcode(); unsigned Opc = Op.getOpcode(); SDValue Src = Op.getOperand(IsStrict ? 1 : 0); assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) && "Unexpected conversion type"); assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) && "Supports conversions to v2f64/v4f32 only."); // TODO: Any other flags to propagate? SDNodeFlags Flags; Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP; bool FourEltRes = Op.getValueType() == MVT::v4f32; SDValue Wide = widenVec(DAG, Src, dl); EVT WideVT = Wide.getValueType(); unsigned WideNumElts = WideVT.getVectorNumElements(); MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64; SmallVector ShuffV; for (unsigned i = 0; i < WideNumElts; ++i) ShuffV.push_back(i + WideNumElts); int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2; int SaveElts = FourEltRes ? 4 : 2; if (Subtarget.isLittleEndian()) for (int i = 0; i < SaveElts; i++) ShuffV[i * Stride] = i; else for (int i = 1; i <= SaveElts; i++) ShuffV[i * Stride - 1] = i - 1; SDValue ShuffleSrc2 = SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT); SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV); SDValue Extend; if (SignedConv) { Arrange = DAG.getBitcast(IntermediateVT, Arrange); EVT ExtVT = Src.getValueType(); if (Subtarget.hasP9Altivec()) ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(), IntermediateVT.getVectorNumElements()); Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange, DAG.getValueType(ExtVT)); } else Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange); if (IsStrict) return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other), {Op.getOperand(0), Extend}, Flags); return DAG.getNode(Opc, dl, Op.getValueType(), Extend); } SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP || Op.getOpcode() == ISD::STRICT_SINT_TO_FP; bool IsStrict = Op->isStrictFPOpcode(); SDValue Src = Op.getOperand(IsStrict ? 1 : 0); SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); // TODO: Any other flags to propagate? SDNodeFlags Flags; Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept()); EVT InVT = Src.getValueType(); EVT OutVT = Op.getValueType(); if (OutVT.isVector() && OutVT.isFloatingPoint() && isOperationCustom(Op.getOpcode(), InVT)) return LowerINT_TO_FPVector(Op, DAG, dl); // Conversions to f128 are legal. if (Op.getValueType() == MVT::f128) return Subtarget.hasP9Vector() ? Op : SDValue(); // Don't handle ppc_fp128 here; let it be lowered to a libcall. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (Src.getValueType() == MVT::i1) { SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src, DAG.getConstantFP(1.0, dl, Op.getValueType()), DAG.getConstantFP(0.0, dl, Op.getValueType())); if (IsStrict) return DAG.getMergeValues({Sel, Chain}, dl); else return Sel; } // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) && Subtarget.isPPC64() && Subtarget.hasFPCVT()) return LowerINT_TO_FPDirectMove(Op, DAG, dl); assert((IsSigned || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); if (Src.getValueType() == MVT::i64) { SDValue SINT = Src; // When converting to single-precision, we actually need to convert // to double-precision first and then round to single-precision. // To avoid double-rounding effects during that operation, we have // to prepare the input operand. Bits that might be truncated when // converting to double-precision are replaced by a bit that won't // be lost at this stage, but is below the single-precision rounding // position. // // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this // is the case, we are guaranteed the value will fit into the 53 bit // mantissa of an IEEE double-precision value without rounding.) // If any of those low 11 bits were not zero originally, make sure // bit 12 (value 2048) is set instead, so that the final rounding // to single-precision gets the correct result. SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, SINT, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::ADD, dl, MVT::i64, Round, DAG.getConstant(2047, dl, MVT::i64)); Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round, DAG.getConstant(-2048, dl, MVT::i64)); // However, we cannot use that value unconditionally: if the magnitude // of the input value is small, the bit-twiddling we did above might // end up visibly changing the output. Fortunately, in that case, we // don't need to twiddle bits since the original input will convert // exactly to double-precision floating-point already. Therefore, // construct a conditional to use the original value if the top 11 // bits are all sign-bit copies, and use the rounded value computed // above otherwise. SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, SINT, DAG.getConstant(53, dl, MVT::i32)); Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, Cond, DAG.getConstant(1, dl, MVT::i64)); Cond = DAG.getSetCC( dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64), Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT); SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); } ReuseLoadInfo RLI; SDValue Bits; MachineFunction &MF = DAG.getMachineFunction(); if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasLFIWAX() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (Subtarget.hasFPCVT() && canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); } else if (((Subtarget.hasLFIWAX() && SINT.getOpcode() == ISD::SIGN_EXTEND) || (Subtarget.hasFPCVT() && SINT.getOpcode() == ISD::ZERO_EXTEND)) && SINT.getOperand(0).getValueType() == MVT::i32) { MachineFrameInfo &MFI = MF.getFrameInfo(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); int FrameIdx = MFI.CreateStackObject(4, Align(4), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); Chain = Store; assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Chain; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = Align(4); MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); Chain = Bits.getValue(1); } else Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain); if (IsStrict) Chain = FP.getValue(1); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { if (IsStrict) FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other), {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); else FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); } return FP; } assert(Src.getValueType() == MVT::i32 && "Unhandled INT_TO_FP type in custom expander!"); // Since we only generate this in 64-bit mode, we can take advantage of // 64-bit registers. In particular, sign extend the input value into the // 64-bit register with extsw, store the WHOLE 64-bit value into the stack // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); EVT PtrVT = getPointerTy(MF.getDataLayout()); SDValue Ld; if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { ReuseLoadInfo RLI; bool ReusingLoad; if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) { int FrameIdx = MFI.CreateStackObject(4, Align(4), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Store = DAG.getStore(Chain, dl, Src, FIdx, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), FrameIdx)); Chain = Store; assert(cast(Store)->getMemoryVT() == MVT::i32 && "Expected an i32 store"); RLI.Ptr = FIdx; RLI.Chain = Chain; RLI.MPI = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); RLI.Alignment = Align(4); } MachineMemOperand *MMO = MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, RLI.Alignment, RLI.AAInfo, RLI.Ranges); SDValue Ops[] = { RLI.Chain, RLI.Ptr }; Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); Chain = Ld.getValue(1); if (ReusingLoad) spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); } else { assert(Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); int FrameIdx = MFI.CreateStackObject(8, Align(8), false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src); // STD the extended value into the stack slot. SDValue Store = DAG.getStore( Chain, dl, Ext64, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); Chain = Store; // Load the value as a double. Ld = DAG.getLoad( MVT::f64, dl, Chain, FIdx, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx)); Chain = Ld.getValue(1); } // FCFID it and return it. SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain); if (IsStrict) Chain = FP.getValue(1); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { if (IsStrict) FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other), {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags); else FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); } return FP; } SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); /* The rounding mode is in bits 30:31 of FPSR, and has the following settings: 00 Round to nearest 01 Round to 0 10 Round to +inf 11 Round to -inf FLT_ROUNDS, on the other hand, expects the following: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to +inf 3 Round to -inf To perform the conversion, we do: ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) */ MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = getPointerTy(MF.getDataLayout()); // Save FP Control Word to register SDValue Chain = Op.getOperand(0); SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain); Chain = MFFS.getValue(1); SDValue CWD; if (isTypeLegal(MVT::i64)) { CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS)); } else { // Save FP register to stack slot int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo()); // Load FP Control Word from low 32 bits of stack slot. assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) && "Stack slot adjustment is valid only on big endian subtargets!"); SDValue Four = DAG.getConstant(4, dl, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo()); Chain = CWD.getValue(1); } // Transform as necessary SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)); SDValue CWD2 = DAG.getNode(ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::AND, dl, MVT::i32, DAG.getNode(ISD::XOR, dl, MVT::i32, CWD, DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(3, dl, MVT::i32)), DAG.getConstant(1, dl, MVT::i32)); SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); RetVal = DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); return DAG.getMergeValues({RetVal, Chain}, dl); } SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); SDLoc dl(Op); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SHL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRL!"); // Expand into a bunch of logical ops. Note that these ops // depend on the PPC behavior for oversized shift amounts. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); assert(Op.getNumOperands() == 3 && VT == Op.getOperand(1).getValueType() && "Unexpected SRA!"); // Expand into a bunch of logical ops, followed by a select_cc. SDValue Lo = Op.getOperand(0); SDValue Hi = Op.getOperand(1); SDValue Amt = Op.getOperand(2); EVT AmtVT = Amt.getValueType(); SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Amt); SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, DAG.getConstant(-BitWidth, dl, AmtVT)); SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); bool IsFSHL = Op.getOpcode() == ISD::FSHL; SDValue X = Op.getOperand(0); SDValue Y = Op.getOperand(1); SDValue Z = Op.getOperand(2); EVT AmtVT = Z.getValueType(); // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) // This is simpler than TargetLowering::expandFunnelShift because we can rely // on PowerPC shift by BW being well defined. Z = DAG.getNode(ISD::AND, dl, AmtVT, Z, DAG.getConstant(BitWidth - 1, dl, AmtVT)); SDValue SubZ = DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z); X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ); Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z); return DAG.getNode(ISD::OR, dl, VT, X, Y); } //===----------------------------------------------------------------------===// // Vector related lowering. // /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an /// element size of SplatSize. Cast the result to VT. static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { static const MVT VTys[] = { // canonical VT to use for each size. MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 }; EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize. if (Val == ((1LLU << (SplatSize * 8)) - 1)) { SplatSize = 1; Val = 0xFF; } EVT CanonicalVT = VTys[SplatSize-1]; // Build a canonical splat for this value. return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT)); } /// BuildIntrinsicOp - Return a unary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op); } /// BuildIntrinsicOp - Return a binary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = LHS.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), LHS, RHS); } /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the /// specified intrinsic ID. static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, SDValue Op2, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT = MVT::Other) { if (DestVT == MVT::Other) DestVT = Op0.getValueType(); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2); } /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified /// amount. The result has the specified value type. static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl) { // Force LHS/RHS to be the right type. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); int Ops[16]; for (unsigned i = 0; i != 16; ++i) Ops[i] = i + Amt; SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, T); } /// Do we have an efficient pattern in a .td file for this node? /// /// \param V - pointer to the BuildVectorSDNode being matched /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves? /// /// There are some patterns where it is beneficial to keep a BUILD_VECTOR /// node as a BUILD_VECTOR node rather than expanding it. The patterns where /// the opposite is true (expansion is beneficial) are: /// - The node builds a vector out of integers that are not 32 or 64-bits /// - The node builds a vector out of constants /// - The node is a "load-and-splat" /// In all other cases, we will choose to keep the BUILD_VECTOR. static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector) { EVT VecVT = V->getValueType(0); bool RightType = VecVT == MVT::v2f64 || (HasP8Vector && VecVT == MVT::v4f32) || (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32)); if (!RightType) return false; bool IsSplat = true; bool IsLoad = false; SDValue Op0 = V->getOperand(0); // This function is called in a block that confirms the node is not a constant // splat. So a constant BUILD_VECTOR here means the vector is built out of // different constants. if (V->isConstant()) return false; for (int i = 0, e = V->getNumOperands(); i < e; ++i) { if (V->getOperand(i).isUndef()) return false; // We want to expand nodes that represent load-and-splat even if the // loaded value is a floating point truncation or conversion to int. if (V->getOperand(i).getOpcode() == ISD::LOAD || (V->getOperand(i).getOpcode() == ISD::FP_ROUND && V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT && V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) || (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT && V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD)) IsLoad = true; // If the operands are different or the input is not a load and has more // uses than just this BV node, then it isn't a splat. if (V->getOperand(i) != Op0 || (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode()))) IsSplat = false; } return !(IsSplat && IsLoad); } // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128. SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Op0 = Op->getOperand(0); if ((Op.getValueType() != MVT::f128) || (Op0.getOpcode() != ISD::BUILD_PAIR) || (Op0.getOperand(0).getValueType() != MVT::i64) || (Op0.getOperand(1).getValueType() != MVT::i64)) return SDValue(); return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0), Op0.getOperand(1)); } static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) { const SDValue *InputLoad = &Op; if (InputLoad->getOpcode() == ISD::BITCAST) InputLoad = &InputLoad->getOperand(0); if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR || InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) { IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED; InputLoad = &InputLoad->getOperand(0); } if (InputLoad->getOpcode() != ISD::LOAD) return nullptr; LoadSDNode *LD = cast(*InputLoad); return ISD::isNormalLoad(LD) ? InputLoad : nullptr; } // Convert the argument APFloat to a single precision APFloat if there is no // loss in information during the conversion to single precision APFloat and the // resulting number is not a denormal number. Return true if successful. bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) { APFloat APFloatToConvert = ArgAPFloat; bool LosesInfo = true; APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo); bool Success = (!LosesInfo && !APFloatToConvert.isDenormal()); if (Success) ArgAPFloat = APFloatToConvert; return Success; } // Bitcast the argument APInt to a double and convert it to a single precision // APFloat, bitcast the APFloat to an APInt and assign it to the original // argument if there is no loss in information during the conversion from // double to single precision APFloat and the resulting number is not a denormal // number. Return true if successful. bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) { double DpValue = ArgAPInt.bitsToDouble(); APFloat APFloatDp(DpValue); bool Success = convertToNonDenormSingle(APFloatDp); if (Success) ArgAPInt = APFloatDp.bitcastToAPInt(); return Success; } // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen // this case more efficiently than a constant pool load, lower it to the // sequence of ops that should be used. SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; bool BVNIsConstantSplat = BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()); // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same // double. This is to exploit the XXSPLTIDP instruction. // If we lose precision, we use XXSPLTI32DX. if (BVNIsConstantSplat && (SplatBitSize == 64) && Subtarget.hasPrefixInstrs()) { // Check the type first to short-circuit so we don't modify APSplatBits if // this block isn't executed. if ((Op->getValueType(0) == MVT::v2f64) && convertToNonDenormSingle(APSplatBits)) { SDValue SplatNode = DAG.getNode( PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); return DAG.getBitcast(Op.getValueType(), SplatNode); } else { // We may lose precision, so we have to use XXSPLTI32DX. uint32_t Hi = (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); uint32_t Lo = (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF); SDValue SplatNode = DAG.getUNDEF(MVT::v2i64); if (!Hi || !Lo) // If either load is 0, then we should generate XXLXOR to set to 0. SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64); if (Hi) SplatNode = DAG.getNode( PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, DAG.getTargetConstant(0, dl, MVT::i32), DAG.getTargetConstant(Hi, dl, MVT::i32)); if (Lo) SplatNode = DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, DAG.getTargetConstant(1, dl, MVT::i32), DAG.getTargetConstant(Lo, dl, MVT::i32)); return DAG.getBitcast(Op.getValueType(), SplatNode); } } if (!BVNIsConstantSplat || SplatBitSize > 32) { bool IsPermutedLoad = false; const SDValue *InputLoad = getNormalLoadInput(Op.getOperand(0), IsPermutedLoad); // Handle load-and-splat patterns as we have instructions that will do this // in one go. if (InputLoad && DAG.isSplatValue(Op, true)) { LoadSDNode *LD = cast(*InputLoad); // We have handling for 4 and 8 byte elements. unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits(); // Checking for a single use of this load, we have to check for vector // width (128 bits) / ElementSize uses (since each operand of the // BUILD_VECTOR is a separate use of the value. unsigned NumUsesOfInputLD = 128 / ElementSize; for (SDValue BVInOp : Op->ops()) if (BVInOp.isUndef()) NumUsesOfInputLD--; assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?"); if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) && ((Subtarget.hasVSX() && ElementSize == 64) || (Subtarget.hasP9Vector() && ElementSize == 32))) { SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr DAG.getValueType(Op.getValueType()) // VT }; SDValue LdSplt = DAG.getMemIntrinsicNode( PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // Replace all uses of the output chain of the original load with the // output chain of the new load. DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1)); return LdSplt; } } // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to // 32-bits can be lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. if (Subtarget.hasVSX() && Subtarget.isPPC64() && haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), Subtarget.hasP8Vector())) return Op; return SDValue(); } uint64_t SplatBits = APSplatBits.getZExtValue(); uint64_t SplatUndef = APSplatUndef.getZExtValue(); unsigned SplatSize = SplatBitSize / 8; // First, handle single instruction cases. // All zeros? if (SplatBits == 0) { // Canonicalize all zero vectors to be v4i32. if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { SDValue Z = DAG.getConstant(0, dl, MVT::v4i32); Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); } return Op; } // We have XXSPLTIW for constant splats four bytes wide. // Given vector length is a multiple of 4, 2-byte splats can be replaced // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be // turned into a 4-byte splat of 0xABABABAB. if (Subtarget.hasPrefixInstrs() && SplatSize == 2) return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2, Op.getValueType(), DAG, dl); if (Subtarget.hasPrefixInstrs() && SplatSize == 4) return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG, dl); // We have XXSPLTIB for constant splats one byte wide. if (Subtarget.hasP9Vector() && SplatSize == 1) return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG, dl); // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> (32-SplatBitSize)); if (SextVal >= -16 && SextVal <= 15) return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG, dl); // Two instruction sequences. // If this value is in the range [-32,30] and is even, use: // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) // If this value is in the range [17,31] and is odd, use: // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) // If this value is in the range [-31,-17] and is odd, use: // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) // Note the last two are three-instruction sequences. if (SextVal >= -32 && SextVal <= 31) { // To avoid having these optimizations undone by constant folding, // we convert to a pseudo that will be expanded later into one of // the above forms. SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32); EVT VT = (SplatSize == 1 ? MVT::v16i8 : (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32); SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); if (VT == Op.getValueType()) return RetVal; else return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important // for fneg/fabs. if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { // Make -1 and vspltisw -1: SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl); // Make the VSLW intrinsic, computing 0x8000_0000. SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, OnesV, DAG, dl); // xor by OnesV to invert it. Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 }; for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { // Indirect through the SplatCsts array so that we favor 'vsplti -1' for // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' int i = SplatCsts[idx]; // Figure out what shift amount will be used by altivec if shifted by i in // this splat size. unsigned TypeShiftAmt = i & (SplatBitSize-1); // vsplti + shl self. if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, Intrinsic::ppc_altivec_vslw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + srl self. if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, Intrinsic::ppc_altivec_vsrw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); static const unsigned IIDs[] = { // Intrinsic to use for each size. Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, Intrinsic::ppc_altivec_vrlw }; Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } // t = vsplti c, result = vsldoi t, t, 1 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 2 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } // t = vsplti c, result = vsldoi t, t, 3 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl); unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3; return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl); } } return SDValue(); } /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); enum { OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> OP_VMRGHW, OP_VMRGLW, OP_VSPLTISW0, OP_VSPLTISW1, OP_VSPLTISW2, OP_VSPLTISW3, OP_VSLDOI4, OP_VSLDOI8, OP_VSLDOI12 }; if (OpNum == OP_COPY) { if (LHSID == (1*9+2)*9+3) return LHS; assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); return RHS; } SDValue OpLHS, OpRHS; OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); int ShufIdxs[16]; switch (OpNum) { default: llvm_unreachable("Unknown i32 permute!"); case OP_VMRGHW: ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; break; case OP_VMRGLW: ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; break; case OP_VSPLTISW0: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+0; break; case OP_VSPLTISW1: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+4; break; case OP_VSPLTISW2: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+8; break; case OP_VSPLTISW3: for (unsigned i = 0; i != 16; ++i) ShufIdxs[i] = (i&3)+12; break; case OP_VSLDOI4: return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI8: return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); case OP_VSLDOI12: return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); } EVT VT = OpLHS.getValueType(); OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); return DAG.getNode(ISD::BITCAST, dl, VT, T); } /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled /// by the VINSERTB instruction introduced in ISA 3.0, else just return default /// SDValue. SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const { const unsigned BytesInVector = 16; bool IsLE = Subtarget.isLittleEndian(); SDLoc dl(N); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); unsigned ShiftElts = 0, InsertAtByte = 0; bool Swap = false; // Shifts required to get the byte we want at element 7. unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9}; unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8}; ArrayRef Mask = N->getMask(); int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; // For each mask element, find out if we're just inserting something // from V2 into V1 or vice versa. // Possible permutations inserting an element from V2 into V1: // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 // ... // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X // Inserting from V1 into V2 will be similar, except mask range will be // [16,31]. bool FoundCandidate = false; // If both vector operands for the shuffle are the same vector, the mask // will contain only elements from the first one and the second one will be // undef. unsigned VINSERTBSrcElem = IsLE ? 8 : 7; // Go through the mask of half-words to find an element that's being moved // from one vector to the other. for (unsigned i = 0; i < BytesInVector; ++i) { unsigned CurrentElement = Mask[i]; // If 2nd operand is undefined, we should only look for element 7 in the // Mask. if (V2.isUndef() && CurrentElement != VINSERTBSrcElem) continue; bool OtherElementsInOrder = true; // Examine the other elements in the Mask to see if they're in original // order. for (unsigned j = 0; j < BytesInVector; ++j) { if (j == i) continue; // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined, // in which we always assume we're always picking from the 1st operand. int MaskOffset = (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0; if (Mask[j] != OriginalOrder[j] + MaskOffset) { OtherElementsInOrder = false; break; } } // If other elements are in original order, we record the number of shifts // we need to get the element we want into element 7. Also record which byte // in the vector we should insert into. if (OtherElementsInOrder) { // If 2nd operand is undefined, we assume no shifts and no swapping. if (V2.isUndef()) { ShiftElts = 0; Swap = false; } else { // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4. ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF] : BigEndianShifts[CurrentElement & 0xF]; Swap = CurrentElement < BytesInVector; } InsertAtByte = IsLE ? BytesInVector - (i + 1) : i; FoundCandidate = true; break; } } if (!FoundCandidate) return SDValue(); // Candidate found, construct the proper SDAG sequence with VINSERTB, // optionally with VECSHL if shift is required. if (Swap) std::swap(V1, V2); if (V2.isUndef()) V2 = V1; if (ShiftElts) { SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, DAG.getConstant(ShiftElts, dl, MVT::i32)); return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl, DAG.getConstant(InsertAtByte, dl, MVT::i32)); } return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); } /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled /// by the VINSERTH instruction introduced in ISA 3.0, else just return default /// SDValue. SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const { const unsigned NumHalfWords = 8; const unsigned BytesInVector = NumHalfWords * 2; // Check that the shuffle is on half-words. if (!isNByteElemShuffleMask(N, 2, 1)) return SDValue(); bool IsLE = Subtarget.isLittleEndian(); SDLoc dl(N); SDValue V1 = N->getOperand(0); SDValue V2 = N->getOperand(1); unsigned ShiftElts = 0, InsertAtByte = 0; bool Swap = false; // Shifts required to get the half-word we want at element 3. unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5}; unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4}; uint32_t Mask = 0; uint32_t OriginalOrderLow = 0x1234567; uint32_t OriginalOrderHigh = 0x89ABCDEF; // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a // 32-bit space, only need 4-bit nibbles per element. for (unsigned i = 0; i < NumHalfWords; ++i) { unsigned MaskShift = (NumHalfWords - 1 - i) * 4; Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift); } // For each mask element, find out if we're just inserting something // from V2 into V1 or vice versa. Possible permutations inserting an element // from V2 into V1: // X, 1, 2, 3, 4, 5, 6, 7 // 0, X, 2, 3, 4, 5, 6, 7 // 0, 1, X, 3, 4, 5, 6, 7 // 0, 1, 2, X, 4, 5, 6, 7 // 0, 1, 2, 3, X, 5, 6, 7 // 0, 1, 2, 3, 4, X, 6, 7 // 0, 1, 2, 3, 4, 5, X, 7 // 0, 1, 2, 3, 4, 5, 6, X // Inserting from V1 into V2 will be similar, except mask range will be [8,15]. bool FoundCandidate = false; // Go through the mask of half-words to find an element that's being moved // from one vector to the other. for (unsigned i = 0; i < NumHalfWords; ++i) { unsigned MaskShift = (NumHalfWords - 1 - i) * 4; uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF; uint32_t MaskOtherElts = ~(0xF << MaskShift); uint32_t TargetOrder = 0x0; // If both vector operands for the shuffle are the same vector, the mask // will contain only elements from the first one and the second one will be // undef. if (V2.isUndef()) { ShiftElts = 0; unsigned VINSERTHSrcElem = IsLE ? 4 : 3; TargetOrder = OriginalOrderLow; Swap = false; // Skip if not the correct element or mask of other elements don't equal // to our expected order. if (MaskOneElt == VINSERTHSrcElem && (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; FoundCandidate = true; break; } } else { // If both operands are defined. // Target order is [8,15] if the current mask is between [0,7]. TargetOrder = (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow; // Skip if mask of other elements don't equal our expected order. if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) { // We only need the last 3 bits for the number of shifts. ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7] : BigEndianShifts[MaskOneElt & 0x7]; InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2; Swap = MaskOneElt < NumHalfWords; FoundCandidate = true; break; } } } if (!FoundCandidate) return SDValue(); // Candidate found, construct the proper SDAG sequence with VINSERTH, // optionally with VECSHL if shift is required. if (Swap) std::swap(V1, V2); if (V2.isUndef()) V2 = V1; SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); if (ShiftElts) { // Double ShiftElts because we're left shifting on v16i8 type. SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2, DAG.getConstant(2 * ShiftElts, dl, MVT::i32)); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl); SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2); SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise /// return the default SDValue. SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) const { // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles // to v16i8. Peek through the bitcasts to get the actual operands. SDValue LHS = peekThroughBitcasts(SVN->getOperand(0)); SDValue RHS = peekThroughBitcasts(SVN->getOperand(1)); auto ShuffleMask = SVN->getMask(); SDValue VecShuffle(SVN, 0); SDLoc DL(SVN); // Check that we have a four byte shuffle. if (!isNByteElemShuffleMask(SVN, 4, 1)) return SDValue(); // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx. if (RHS->getOpcode() != ISD::BUILD_VECTOR) { std::swap(LHS, RHS); VecShuffle = DAG.getCommutedVectorShuffle(*SVN); ShuffleMask = cast(VecShuffle)->getMask(); } // Ensure that the RHS is a vector of constants. BuildVectorSDNode *BVN = dyn_cast(RHS.getNode()); if (!BVN) return SDValue(); // Check if RHS is a splat of 4-bytes (or smaller). APInt APSplatValue, APSplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize, HasAnyUndefs, 0, !Subtarget.isLittleEndian()) || SplatBitSize > 32) return SDValue(); // Check that the shuffle mask matches the semantics of XXSPLTI32DX. // The instruction splats a constant C into two words of the source vector // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }. // Thus we check that the shuffle mask is the equivalent of // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively. // Note: the check above of isNByteElemShuffleMask() ensures that the bytes // within each word are consecutive, so we only need to check the first byte. SDValue Index; bool IsLE = Subtarget.isLittleEndian(); if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) && (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 && ShuffleMask[4] > 15 && ShuffleMask[12] > 15)) Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32); else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) && (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 && ShuffleMask[0] > 15 && ShuffleMask[8] > 15)) Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32); else return SDValue(); // If the splat is narrower than 32-bits, we need to get the 32-bit value // for XXSPLTI32DX. unsigned SplatVal = APSplatValue.getZExtValue(); for (; SplatBitSize < 32; SplatBitSize <<= 1) SplatVal |= (SplatVal << SplatBitSize); SDValue SplatNode = DAG.getNode( PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS), Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32)); return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode); } /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8). /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is /// a multiple of 8. Otherwise convert it to a scalar rotation(i128) /// i.e (or (shl x, C1), (srl x, 128-C1)). SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL"); assert(Op.getValueType() == MVT::v1i128 && "Only set v1i128 as custom, other type shouldn't reach here!"); SDLoc dl(Op); SDValue N0 = peekThroughBitcasts(Op.getOperand(0)); SDValue N1 = peekThroughBitcasts(Op.getOperand(1)); unsigned SHLAmt = N1.getConstantOperandVal(0); if (SHLAmt % 8 == 0) { SmallVector Mask(16, 0); std::iota(Mask.begin(), Mask.end(), 0); std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end()); if (SDValue Shuffle = DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0), DAG.getUNDEF(MVT::v16i8), Mask)) return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle); } SDValue ArgVal = DAG.getBitcast(MVT::i128, N0); SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal, DAG.getConstant(SHLAmt, dl, MVT::i32)); SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal, DAG.getConstant(128 - SHLAmt, dl, MVT::i32)); SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp); return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp); } /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this /// is a shuffle we can handle in a single instruction, return it. Otherwise, /// return the code it can be lowered into. Worst case, it can always be /// lowered into a vperm. SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast(Op); // Any nodes that were combined in the target-independent combiner prior // to vector legalization will not be sent to the target combine. Try to // combine it here. if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) { if (!isa(NewShuffle)) return NewShuffle; Op = NewShuffle; SVOp = cast(Op); V1 = Op.getOperand(0); V2 = Op.getOperand(1); } EVT VT = Op.getValueType(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned ShiftElts, InsertAtByte; bool Swap = false; // If this is a load-and-splat, we can do that with a single instruction // in some cases. However if the load has multiple uses, we don't want to // combine it because that will just produce multiple loads. bool IsPermutedLoad = false; const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad); if (InputLoad && Subtarget.hasVSX() && V2.isUndef() && (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) && InputLoad->hasOneUse()) { bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4); int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG); // The splat index for permuted loads will be in the left half of the vector // which is strictly wider than the loaded value by 8 bytes. So we need to // adjust the splat index to point to the correct address in memory. if (IsPermutedLoad) { assert(isLittleEndian && "Unexpected permuted load on big endian target"); SplatIdx += IsFourByte ? 2 : 1; assert((SplatIdx < (IsFourByte ? 4 : 2)) && "Splat of a value outside of the loaded memory"); } LoadSDNode *LD = cast(*InputLoad); // For 4-byte load-and-splat, we need Power9. if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) { uint64_t Offset = 0; if (IsFourByte) Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4; else Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8; SDValue BasePtr = LD->getBasePtr(); if (Offset != 0) BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), BasePtr, DAG.getIntPtrConstant(Offset, dl)); SDValue Ops[] = { LD->getChain(), // Chain BasePtr, // BasePtr DAG.getValueType(Op.getValueType()) // VT }; SDVTList VTL = DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other); SDValue LdSplt = DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL, Ops, LD->getMemoryVT(), LD->getMemOperand()); DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1)); if (LdSplt.getValueType() != SVOp->getValueType(0)) LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt); return LdSplt; } } if (Subtarget.hasP9Vector() && PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2); if (ShiftElts) { SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2, DAG.getConstant(InsertAtByte, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } if (Subtarget.hasPrefixInstrs()) { SDValue SplatInsertNode; if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG))) return SplatInsertNode; } if (Subtarget.hasP9Altivec()) { SDValue NewISDNode; if ((NewISDNode = lowerToVINSERTH(SVOp, DAG))) return NewISDNode; if ((NewISDNode = lowerToVINSERTB(SVOp, DAG))) return NewISDNode; } if (Subtarget.hasVSX() && PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); } if (Subtarget.hasVSX() && PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { if (Swap) std::swap(V1, V2); SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2); SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2, DAG.getConstant(ShiftElts, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI); } if (Subtarget.hasP9Vector()) { if (PPC::isXXBRHShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1); SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord); } else if (PPC::isXXBRWShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord); } else if (PPC::isXXBRDShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1); SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord); } else if (PPC::isXXBRQShuffleMask(SVOp)) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1); SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord); } } if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG); SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, DAG.getConstant(SplatIdx, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat); } // Left shifts of 8 bytes are actually swaps. Convert accordingly. if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) { SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1); SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv); return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap); } } // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be // selected by the instruction selector. if (V2.isUndef()) { if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) { return Op; } } // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. unsigned int ShuffleKind = isLittleEndian ? 2 : 0; if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) || (Subtarget.hasP8Altivec() && ( PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) || PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG)))) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our // perfect shuffle table to emit an optimal matching sequence. ArrayRef PermMask = SVOp->getMask(); unsigned PFIndexes[4]; bool isFourElementShuffle = true; for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number unsigned EltNo = 8; // Start out undef. for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. if (PermMask[i*4+j] < 0) continue; // Undef, ignore it. unsigned ByteSource = PermMask[i*4+j]; if ((ByteSource & 3) != j) { isFourElementShuffle = false; break; } if (EltNo == 8) { EltNo = ByteSource/4; } else if (EltNo != ByteSource/4) { isFourElementShuffle = false; break; } } PFIndexes[i] = EltNo; } // If this shuffle can be expressed as a shuffle of 4-byte elements, use the // perfect shuffle vector to determine if it is cost effective to do this as // discrete instructions, or whether we should use a vperm. // For now, we skip this for little endian until such time as we have a // little-endian perfect shuffle table. if (isFourElementShuffle && !isLittleEndian) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; unsigned Cost = (PFEntry >> 30); // Determining when to avoid vperm is tricky. Many things affect the cost // of vperm, particularly how many times the perm mask needs to be computed. // For example, if the perm mask can be hoisted out of a loop or is already // used (perhaps because there are multiple permutes with the same shuffle // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of // the loop requires an extra register. // // As a compromise, we only emit discrete instructions if the shuffle can be // generated in 3 or fewer operations. When we have loop information // available, if this block is within a loop, we should avoid using vperm // for 3-operation perms and use a constant pool load instead. if (Cost < 3) return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant // vector that will get spilled to the constant pool. if (V2.isUndef()) V2 = V1; // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. // For little endian, the order of the input vectors is reversed, and // the permutation mask is complemented with respect to 31. This is // necessary to produce proper semantics with the big-endian-biased vperm // instruction. EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; SmallVector ResultMask; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) if (isLittleEndian) ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j), dl, MVT::i32)); else ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl, MVT::i32)); } ShufflesHandledWithVPERM++; SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask); LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n"); LLVM_DEBUG(SVOp->dump()); LLVM_DEBUG(dbgs() << "With the following permute control vector:\n"); LLVM_DEBUG(VPermMask.dump()); if (isLittleEndian) return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V2, V1, VPermMask); else return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); } /// getVectorCompareInfo - Given an intrinsic, return false if it is not a /// vector comparison. If it is, return true and fill in Opc/isDot with /// information about the intrinsic. static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget) { unsigned IntrinsicID = cast(Intrin.getOperand(0))->getZExtValue(); CompareOpc = -1; isDot = false; switch (IntrinsicID) { default: return false; // Comparison predicates. case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = true; break; case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = true; break; case Intrinsic::ppc_altivec_vcmpequd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 199; isDot = true; } else return false; break; case Intrinsic::ppc_altivec_vcmpneb_p: case Intrinsic::ppc_altivec_vcmpneh_p: case Intrinsic::ppc_altivec_vcmpnew_p: case Intrinsic::ppc_altivec_vcmpnezb_p: case Intrinsic::ppc_altivec_vcmpnezh_p: case Intrinsic::ppc_altivec_vcmpnezw_p: if (Subtarget.hasP9Altivec()) { switch (IntrinsicID) { default: llvm_unreachable("Unknown comparison intrinsic."); case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break; case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break; case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break; case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break; case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break; case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break; } isDot = true; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtsd_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 967; isDot = true; } else return false; break; case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = true; break; case Intrinsic::ppc_altivec_vcmpgtud_p: if (Subtarget.hasP8Altivec()) { CompareOpc = 711; isDot = true; } else return false; break; case Intrinsic::ppc_altivec_vcmpequq: case Intrinsic::ppc_altivec_vcmpgtsq: case Intrinsic::ppc_altivec_vcmpgtuq: if (!Subtarget.isISA3_1()) return false; switch (IntrinsicID) { default: llvm_unreachable("Unknown comparison intrinsic."); case Intrinsic::ppc_altivec_vcmpequq: CompareOpc = 455; break; case Intrinsic::ppc_altivec_vcmpgtsq: CompareOpc = 903; break; case Intrinsic::ppc_altivec_vcmpgtuq: CompareOpc = 647; break; } break; // VSX predicate comparisons use the same infrastructure case Intrinsic::ppc_vsx_xvcmpeqdp_p: case Intrinsic::ppc_vsx_xvcmpgedp_p: case Intrinsic::ppc_vsx_xvcmpgtdp_p: case Intrinsic::ppc_vsx_xvcmpeqsp_p: case Intrinsic::ppc_vsx_xvcmpgesp_p: case Intrinsic::ppc_vsx_xvcmpgtsp_p: if (Subtarget.hasVSX()) { switch (IntrinsicID) { case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break; case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break; case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break; case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break; case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break; case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break; } isDot = true; } else return false; break; // Normal Comparisons. case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; break; case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; break; case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; break; case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; break; case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; break; case Intrinsic::ppc_altivec_vcmpequd: if (Subtarget.hasP8Altivec()) CompareOpc = 199; else return false; break; case Intrinsic::ppc_altivec_vcmpneb: case Intrinsic::ppc_altivec_vcmpneh: case Intrinsic::ppc_altivec_vcmpnew: case Intrinsic::ppc_altivec_vcmpnezb: case Intrinsic::ppc_altivec_vcmpnezh: case Intrinsic::ppc_altivec_vcmpnezw: if (Subtarget.hasP9Altivec()) switch (IntrinsicID) { default: llvm_unreachable("Unknown comparison intrinsic."); case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break; case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break; case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break; case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break; case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break; case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break; } else return false; break; case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; break; case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; break; case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; break; case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; break; case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; break; case Intrinsic::ppc_altivec_vcmpgtsd: if (Subtarget.hasP8Altivec()) CompareOpc = 967; else return false; break; case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; break; case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; break; case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; break; case Intrinsic::ppc_altivec_vcmpgtud: if (Subtarget.hasP8Altivec()) CompareOpc = 711; else return false; break; case Intrinsic::ppc_altivec_vcmpequq_p: case Intrinsic::ppc_altivec_vcmpgtsq_p: case Intrinsic::ppc_altivec_vcmpgtuq_p: if (!Subtarget.isISA3_1()) return false; switch (IntrinsicID) { default: llvm_unreachable("Unknown comparison intrinsic."); case Intrinsic::ppc_altivec_vcmpequq_p: CompareOpc = 455; break; case Intrinsic::ppc_altivec_vcmpgtsq_p: CompareOpc = 903; break; case Intrinsic::ppc_altivec_vcmpgtuq_p: CompareOpc = 647; break; } isDot = true; break; } return true; } /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom /// lower, do it, otherwise return null. SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntrinsicID = cast(Op.getOperand(0))->getZExtValue(); SDLoc dl(Op); switch (IntrinsicID) { case Intrinsic::thread_pointer: // Reads the thread pointer register, used for __builtin_thread_pointer. if (Subtarget.isPPC64()) return DAG.getRegister(PPC::X13, MVT::i64); return DAG.getRegister(PPC::R2, MVT::i32); case Intrinsic::ppc_mma_disassemble_acc: case Intrinsic::ppc_vsx_disassemble_pair: { int NumVecs = 2; SDValue WideVec = Op.getOperand(1); if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) { NumVecs = 4; WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec); } SmallVector RetOps; for (int VecNo = 0; VecNo < NumVecs; VecNo++) { SDValue Extract = DAG.getNode( PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec, DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo : VecNo, dl, MVT::i64)); RetOps.push_back(Extract); } return DAG.getMergeValues(RetOps, dl); } } // If this is a lowered altivec predicate compare, CompareOpc is set to the // opcode number of the comparison. int CompareOpc; bool isDot; if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget)) return SDValue(); // Don't custom lower most intrinsics. // If this is a non-dot comparison, make the VCMP node and we are done. if (!isDot) { SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(CompareOpc, dl, MVT::i32)); return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); } // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { Op.getOperand(2), // LHS Op.getOperand(3), // RHS DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops); // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, DAG.getRegister(PPC::CR6, MVT::i32), CompNode.getValue(1)); // Unpack the result based on how the target uses it. unsigned BitNo; // Bit # of CR6. bool InvertBit; // Invert result? switch (cast(Op.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Return the value of the EQ bit of CR6. BitNo = 0; InvertBit = false; break; case 1: // Return the inverted value of the EQ bit of CR6. BitNo = 0; InvertBit = true; break; case 2: // Return the value of the LT bit of CR6. BitNo = 2; InvertBit = false; break; case 3: // Return the inverted value of the LT bit of CR6. BitNo = 2; InvertBit = true; break; } // Shift the bit into the low position. Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32)); // Isolate the bit. Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); // If we are supposed to, toggle the bit. if (InvertBit) Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, DAG.getConstant(1, dl, MVT::i32)); return Flags; } SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const { // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to // the beginning of the argument list. int ArgStart = isa(Op.getOperand(0)) ? 0 : 1; SDLoc DL(Op); switch (cast(Op.getOperand(ArgStart))->getZExtValue()) { case Intrinsic::ppc_cfence: { assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(ArgStart + 1)), Op.getOperand(0)), 0); } default: break; } return SDValue(); } // Lower scalar BSWAP64 to xxbrd. SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // MTVSRDD Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), Op.getOperand(0)); // XXBRD Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op); // MFVSRD int VectorIndex = 0; if (Subtarget.isLittleEndian()) VectorIndex = 1; Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op, DAG.getTargetConstant(VectorIndex, dl, MVT::i32)); return Op; } // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be // compared to a value that is atomically loaded (atomic loads zero-extend). SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP && "Expecting an atomic compare-and-swap here."); SDLoc dl(Op); auto *AtomicNode = cast(Op.getNode()); EVT MemVT = AtomicNode->getMemoryVT(); if (MemVT.getSizeInBits() >= 32) return Op; SDValue CmpOp = Op.getOperand(2); // If this is already correctly zero-extended, leave it alone. auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits()); if (DAG.MaskedValueIsZero(CmpOp, HighBits)) return Op; // Clear the high bits of the compare operand. unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1; SDValue NewCmpOp = DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp, DAG.getConstant(MaskVal, dl, MVT::i32)); // Replace the existing compare operand with the properly zero-extended one. SmallVector Ops; for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++) Ops.push_back(AtomicNode->getOperand(i)); Ops[2] = NewCmpOp; MachineMemOperand *MMO = AtomicNode->getMemOperand(); SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other); auto NodeTy = (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16; return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO); } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(16, Align(16), false); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); // Store the input value into Value#0 of the stack slot. SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, MachinePointerInfo()); // Load it out. return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Should only be called for ISD::INSERT_VECTOR_ELT"); ConstantSDNode *C = dyn_cast(Op.getOperand(2)); // We have legal lowering for constant indices but not for variable ones. if (!C) return SDValue(); EVT VT = Op.getValueType(); SDLoc dl(Op); SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types. if (VT == MVT::v8i16 || VT == MVT::v16i8) { SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2); unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8; unsigned InsertAtElement = C->getZExtValue(); unsigned InsertAtByte = InsertAtElement * BytesInEachElement; if (Subtarget.isLittleEndian()) { InsertAtByte = (16 - BytesInEachElement) - InsertAtByte; } return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz, DAG.getConstant(InsertAtByte, dl, MVT::i32)); } return Op; } SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); LoadSDNode *LN = cast(Op.getNode()); SDValue LoadChain = LN->getChain(); SDValue BasePtr = LN->getBasePtr(); EVT VT = Op.getValueType(); if (VT != MVT::v256i1 && VT != MVT::v512i1) return Op; // Type v256i1 is used for pairs and v512i1 is used for accumulators. // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in // 2 or 4 vsx registers. assert((VT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"); assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && "Type unsupported without paired vector support"); Align Alignment = LN->getAlign(); SmallVector Loads; SmallVector LoadChains; unsigned NumVecs = VT.getSizeInBits() / 128; for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { SDValue Load = DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr, LN->getPointerInfo().getWithOffset(Idx * 16), commonAlignment(Alignment, Idx * 16), LN->getMemOperand()->getFlags(), LN->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(16, dl, BasePtr.getValueType())); Loads.push_back(Load); LoadChains.push_back(Load.getValue(1)); } if (Subtarget.isLittleEndian()) { std::reverse(Loads.begin(), Loads.end()); std::reverse(LoadChains.begin(), LoadChains.end()); } SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Value = DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD, dl, VT, Loads); SDValue RetOps[] = {Value, TF}; return DAG.getMergeValues(RetOps, dl); } SDValue PPCTargetLowering::LowerVectorStore(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); StoreSDNode *SN = cast(Op.getNode()); SDValue StoreChain = SN->getChain(); SDValue BasePtr = SN->getBasePtr(); SDValue Value = SN->getValue(); EVT StoreVT = Value.getValueType(); if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1) return Op; // Type v256i1 is used for pairs and v512i1 is used for accumulators. // Here we create 2 or 4 v16i8 stores to store the pair or accumulator // underlying registers individually. assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) && "Type unsupported without MMA"); assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) && "Type unsupported without paired vector support"); Align Alignment = SN->getAlign(); SmallVector Stores; unsigned NumVecs = 2; if (StoreVT == MVT::v512i1) { Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value); NumVecs = 4; } for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx; SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value, DAG.getConstant(VecNum, dl, MVT::i64)); SDValue Store = DAG.getStore(StoreChain, dl, Elt, BasePtr, SN->getPointerInfo().getWithOffset(Idx * 16), commonAlignment(Alignment, Idx * 16), SN->getMemOperand()->getFlags(), SN->getAAInfo()); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getConstant(16, dl, BasePtr.getValueType())); Stores.push_back(Store); } SDValue TF = DAG.getTokenFactor(dl, Stores); return TF; } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (Op.getValueType() == MVT::v4i32) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl); // +16 as shift amt. SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl); SDValue RHSSwap = // = vrlw RHS, 16 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); // Shrinkify inputs to v8i16. LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); // Low parts multiplied together, generating 32-bit results (we ignore the // top parts). SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, LHS, RHS, DAG, dl, MVT::v4i32); SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); // Shift the high parts up 16 bits. HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG, dl); return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); } else if (Op.getValueType() == MVT::v16i8) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); bool isLittleEndian = Subtarget.isLittleEndian(); // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, LHS, RHS, DAG, dl, MVT::v8i16); EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); // Multiply the odd 8-bit parts, producing 16-bit sums. SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, LHS, RHS, DAG, dl, MVT::v8i16); OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); // Merge the results together. Because vmuleub and vmuloub are // instructions with a big-endian bias, we must reverse the // element numbering and reverse the meaning of "odd" and "even" // when generating little endian code. int Ops[16]; for (unsigned i = 0; i != 8; ++i) { if (isLittleEndian) { Ops[i*2 ] = 2*i; Ops[i*2+1] = 2*i+16; } else { Ops[i*2 ] = 2*i+1; Ops[i*2+1] = 2*i+1+16; } } if (isLittleEndian) return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); else return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { llvm_unreachable("Unknown mul to lower!"); } } SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 && !Subtarget.hasP9Vector()) return SDValue(); return Op; } // Custom lowering for fpext vf32 to v2f64 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::FP_EXTEND && "Should only be called for ISD::FP_EXTEND"); // FIXME: handle extends from half precision float vectors on P9. // We only want to custom lower an extend from v2f32 to v2f64. if (Op.getValueType() != MVT::v2f64 || Op.getOperand(0).getValueType() != MVT::v2f32) return SDValue(); SDLoc dl(Op); SDValue Op0 = Op.getOperand(0); switch (Op0.getOpcode()) { default: return SDValue(); case ISD::EXTRACT_SUBVECTOR: { assert(Op0.getNumOperands() == 2 && isa(Op0->getOperand(1)) && "Node should have 2 operands with second one being a constant!"); if (Op0.getOperand(0).getValueType() != MVT::v4f32) return SDValue(); // Custom lower is only done for high or low doubleword. int Idx = cast(Op0.getOperand(1))->getZExtValue(); if (Idx % 2 != 0) return SDValue(); // Since input is v4f32, at this point Idx is either 0 or 2. // Shift to get the doubleword position we want. int DWord = Idx >> 1; // High and low word positions are different on little endian. if (Subtarget.isLittleEndian()) DWord ^= 0x1; return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32)); } case ISD::FADD: case ISD::FMUL: case ISD::FSUB: { SDValue NewLoad[2]; for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) { // Ensure both input are loads. SDValue LdOp = Op0.getOperand(i); if (LdOp.getOpcode() != ISD::LOAD) return SDValue(); // Generate new load node. LoadSDNode *LD = cast(LdOp); SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; NewLoad[i] = DAG.getMemIntrinsicNode( PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, LD->getMemoryVT(), LD->getMemOperand()); } SDValue NewOp = DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0], NewLoad[1], Op0.getNode()->getFlags()); return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp, DAG.getConstant(0, dl, MVT::i32)); } case ISD::LOAD: { LoadSDNode *LD = cast(Op0); SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()}; SDValue NewLd = DAG.getMemIntrinsicNode( PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps, LD->getMemoryVT(), LD->getMemOperand()); return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd, DAG.getConstant(0, dl, MVT::i32)); } } llvm_unreachable("ERROR:Should return for all cases within swtich."); } /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Wasn't expecting to be able to lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::INLINEASM: case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); // Variable argument lowering. case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); // Exception handling lowering. case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); case ISD::FSHL: return LowerFunnelShift(Op, DAG); case ISD::FSHR: return LowerFunnelShift(Op, DAG); // Vector-related lowering. case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::ROTL: return LowerROTL(Op, DAG); // For counter-based loop handling. case ISD::INTRINSIC_W_CHAIN: return SDValue(); case ISD::BITCAST: return LowerBITCAST(Op, DAG); // Frame & Return address. case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); case ISD::BSWAP: return LowerBSWAP(Op, DAG); case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG); } } void PPCTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const { SDLoc dl(N); switch (N->getOpcode()) { default: llvm_unreachable("Do not know how to custom type legalize this operation!"); case ISD::READCYCLECOUNTER: { SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); Results.push_back( DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1))); Results.push_back(RTB.getValue(2)); break; } case ISD::INTRINSIC_W_CHAIN: { if (cast(N->getOperand(1))->getZExtValue() != Intrinsic::loop_decrement) break; assert(N->getValueType(0) == MVT::i1 && "Unexpected result type for CTR decrement intrinsic"); EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), N->getValueType(0)); SDVTList VTs = DAG.getVTList(SVT, MVT::Other); SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), N->getOperand(1)); Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt)); Results.push_back(NewInt.getValue(1)); break; } case ISD::VAARG: { if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64()) return; EVT VT = N->getValueType(0); if (VT == MVT::i64) { SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG); Results.push_back(NewNode); Results.push_back(NewNode.getValue(1)); } return; } case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() == MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; case ISD::TRUNCATE: { if (!N->getValueType(0).isVector()) return; SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG); if (Lowered) Results.push_back(Lowered); return; } case ISD::FSHL: case ISD::FSHR: // Don't handle funnel shifts here. return; case ISD::BITCAST: // Don't handle bitcast here. return; case ISD::FP_EXTEND: SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG); if (Lowered) Results.push_back(Lowered); return; } } //===----------------------------------------------------------------------===// // Other Lowering Code //===----------------------------------------------------------------------===// static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Function *Func = Intrinsic::getDeclaration(M, Id); return Builder.CreateCall(Func, {}); } // The mappings for emitLeading/TrailingFence is taken from // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { if (Ord == AtomicOrdering::SequentiallyConsistent) return callIntrinsic(Builder, Intrinsic::ppc_sync); if (isReleaseOrStronger(Ord)) return callIntrinsic(Builder, Intrinsic::ppc_lwsync); return nullptr; } Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst, AtomicOrdering Ord) const { if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) { // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. if (isa(Inst) && Subtarget.isPPC64()) return Builder.CreateCall( Intrinsic::getDeclaration( Builder.GetInsertBlock()->getParent()->getParent(), Intrinsic::ppc_cfence, {Inst->getType()}), {Inst}); // FIXME: Can use isync for rmw operation. return callIntrinsic(Builder, Intrinsic::ppc_lwsync); } return nullptr; } MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (AtomicSize) { default: llvm_unreachable("Unexpected size of atomic entity"); case 1: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 2: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); break; case 4: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case 8: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = ++BB->getIterator(); Register dest = MI.getOperand(0).getReg(); Register ptrA = MI.getOperand(1).getReg(); Register ptrB = MI.getOperand(2).getReg(); Register incr = MI.getOperand(3).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); if (CmpOpcode) F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); Register TmpReg = (!BinOpcode) ? incr : RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // loopMBB: // l[wd]arx dest, ptr // add r0, dest, incr // st[wd]cx. r0, ptr // bne- loopMBB // fallthrough --> exitMBB // For max/min... // loopMBB: // l[wd]arx dest, ptr // cmpl?[wd] incr, dest // bgt exitMBB // loop2MBB: // st[wd]cx. dest, ptr // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); if (CmpOpcode) { // Signed comparisons of byte or halfword values must be sign-extended. if (CmpOpcode == PPC::CMPW && AtomicSize < 4) { Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH), ExtReg).addReg(dest); BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) .addReg(incr).addReg(ExtReg); } else BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) .addReg(incr).addReg(dest); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(exitMBB); BB = loop2MBB; } BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; return BB; } static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) { switch(MI.getOpcode()) { default: return false; case PPC::COPY: return TII->isSignExtended(MI); case PPC::LHA: case PPC::LHA8: case PPC::LHAU: case PPC::LHAU8: case PPC::LHAUX: case PPC::LHAUX8: case PPC::LHAX: case PPC::LHAX8: case PPC::LWA: case PPC::LWAUX: case PPC::LWAX: case PPC::LWAX_32: case PPC::LWA_32: case PPC::PLHA: case PPC::PLHA8: case PPC::PLHA8pc: case PPC::PLHApc: case PPC::PLWA: case PPC::PLWA8: case PPC::PLWA8pc: case PPC::PLWApc: case PPC::EXTSB: case PPC::EXTSB8: case PPC::EXTSB8_32_64: case PPC::EXTSB8_rec: case PPC::EXTSB_rec: case PPC::EXTSH: case PPC::EXTSH8: case PPC::EXTSH8_32_64: case PPC::EXTSH8_rec: case PPC::EXTSH_rec: case PPC::EXTSW: case PPC::EXTSWSLI: case PPC::EXTSWSLI_32_64: case PPC::EXTSWSLI_32_64_rec: case PPC::EXTSWSLI_rec: case PPC::EXTSW_32: case PPC::EXTSW_32_64: case PPC::EXTSW_32_64_rec: case PPC::EXTSW_rec: case PPC::SRAW: case PPC::SRAWI: case PPC::SRAWI_rec: case PPC::SRAW_rec: return true; } return false; } MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary( MachineInstr &MI, MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const PPCInstrInfo *TII = Subtarget.getInstrInfo(); // If this is a signed comparison and the value being compared is not known // to be sign extended, sign extend it here. DebugLoc dl = MI.getDebugLoc(); MachineFunction *F = BB->getParent(); MachineRegisterInfo &RegInfo = F->getRegInfo(); Register incr = MI.getOperand(3).getReg(); bool IsSignExtended = Register::isVirtualRegister(incr) && isSignExtended(*RegInfo.getVRegDef(incr), TII); if (CmpOpcode == PPC::CMPW && !IsSignExtended) { Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg) .addReg(MI.getOperand(3).getReg()); MI.getOperand(3).setReg(ValueReg); } // If we support part-word atomic mnemonics, just use them if (Subtarget.hasPartwordAtomics()) return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode, CmpPred); // In 64 bit mode we have to use 64 bits for addresses, even though the // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = Subtarget.isPPC64(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); Register dest = MI.getOperand(0).getReg(); Register ptrA = MI.getOperand(1).getReg(); Register ptrB = MI.getOperand(2).getReg(); MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr; MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); if (CmpOpcode) F->insert(It, loop2MBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; Register PtrReg = RegInfo.createVirtualRegister(RC); Register Shift1Reg = RegInfo.createVirtualRegister(GPRC); Register ShiftReg = isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); Register Incr2Reg = RegInfo.createVirtualRegister(GPRC); Register MaskReg = RegInfo.createVirtualRegister(GPRC); Register Mask2Reg = RegInfo.createVirtualRegister(GPRC); Register Mask3Reg = RegInfo.createVirtualRegister(GPRC); Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC); Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC); Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC); Register TmpDestReg = RegInfo.createVirtualRegister(GPRC); Register Ptr1Reg; Register TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loopMBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw incr2, incr, shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // loopMBB: // lwarx tmpDest, ptr // add tmp, tmpDest, incr2 // andc tmp2, tmpDest, mask // and tmp3, tmp, mask // or tmp4, tmp3, tmp2 // stwcx. tmp4, ptr // bne- loopMBB // fallthrough --> exitMBB // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA) .addReg(ptrB); } else { Ptr1Reg = ptrB; } // We need use 32-bit subregister to avoid mismatch register class in 64-bit // mode. BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) .addImm(3) .addImm(27) .addImm(is8bit ? 28 : 27); if (!isLittleEndian) BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) .addReg(Shift1Reg) .addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg) .addImm(0) .addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg) .addImm(0) .addImm(0) .addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) .addReg(Mask3Reg) .addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg) .addReg(ShiftReg); BB = loopMBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg) .addReg(PtrReg); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) .addReg(Incr2Reg) .addReg(TmpDestReg); BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) .addReg(TmpDestReg) .addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg); if (CmpOpcode) { // For unsigned comparisons, we can directly compare the shifted values. // For signed comparisons we shift and sign extend. Register SReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(PPC::AND), SReg) .addReg(TmpDestReg) .addReg(MaskReg); unsigned ValueReg = SReg; unsigned CmpReg = Incr2Reg; if (CmpOpcode == PPC::CMPW) { ValueReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg) .addReg(SReg) .addReg(ShiftReg); Register ValueSReg = RegInfo.createVirtualRegister(GPRC); BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg) .addReg(ValueReg); ValueReg = ValueSReg; CmpReg = incr; } BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0) .addReg(CmpReg) .addReg(ValueReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(CmpPred) .addReg(PPC::CR0) .addMBB(exitMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(exitMBB); BB = loop2MBB; } BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(Tmp4Reg) .addReg(ZeroReg) .addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(loopMBB); BB->addSuccessor(loopMBB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) .addReg(TmpDestReg) .addReg(ShiftReg); return BB; } llvm::MachineBasicBlock * PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); MachineFunction::iterator I = ++MBB->getIterator(); Register DstReg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(DstReg); assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); Register mainDstReg = MRI.createVirtualRegister(RC); Register restoreDstReg = MRI.createVirtualRegister(RC); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); // For v = setjmp(buf), we generate // // thisMBB: // SjLjSetup mainMBB // bl mainMBB // v_restore = 1 // b sinkMBB // // mainMBB: // buf[LabelOffset] = LR // v_main = 0 // // sinkMBB: // v = phi(main, restore) // MachineBasicBlock *thisMBB = MBB; MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); MF->insert(I, mainMBB); MF->insert(I, sinkMBB); MachineInstrBuilder MIB; // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); // Note that the structure of the jmp_buf used here is not compatible // with that used by libc, and is not designed to be. Specifically, it // stores only those 'reserved' registers that LLVM does not otherwise // understand how to spill. Also, by convention, by the time this // intrinsic is called, Clang has already stored the frame address in the // first slot of the buffer and stack address in the third. Following the // X86 target code, we'll store the jump address in the second slot. We also // need to save the TOC pointer (R2) to handle jumps between shared // libraries, and that will be stored in the fourth slot. The thread // identifier (R13) is not affected. // thisMBB: const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); // Prepare IP either in reg. const TargetRegisterClass *PtrRC = getRegClassFor(PVT); Register LabelReg = MRI.createVirtualRegister(PtrRC); Register BufReg = MI.getOperand(1).getReg(); if (Subtarget.is64BitELFABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) .addImm(TOCOffset) .addReg(BufReg) .cloneMemRefs(MI); } // Naked functions never have a base pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned BaseReg; if (MF->getFunction().hasFnAttribute(Attribute::Naked)) BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; MIB = BuildMI(*thisMBB, MI, DL, TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) .addReg(BaseReg) .addImm(BPOffset) .addReg(BufReg) .cloneMemRefs(MI); // Setup MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); MIB.addRegMask(TRI->getNoPreservedMask()); BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) .addMBB(mainMBB); MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); thisMBB->addSuccessor(mainMBB, BranchProbability::getZero()); thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne()); // mainMBB: // mainDstReg = 0 MIB = BuildMI(mainMBB, DL, TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); // Store IP if (Subtarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) .addReg(LabelReg) .addImm(LabelOffset) .addReg(BufReg); } MIB.cloneMemRefs(MI); BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); mainMBB->addSuccessor(sinkMBB); // sinkMBB: BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(PPC::PHI), DstReg) .addReg(mainDstReg).addMBB(mainMBB) .addReg(restoreDstReg).addMBB(thisMBB); MI.eraseFromParent(); return sinkMBB; } MachineBasicBlock * PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI.getDebugLoc(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); MVT PVT = getPointerTy(MF->getDataLayout()); assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); const TargetRegisterClass *RC = (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; Register Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; unsigned BP = (PVT == MVT::i64) ? PPC::X30 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29 : PPC::R30); MachineInstrBuilder MIB; const int64_t LabelOffset = 1 * PVT.getStoreSize(); const int64_t SPOffset = 2 * PVT.getStoreSize(); const int64_t TOCOffset = 3 * PVT.getStoreSize(); const int64_t BPOffset = 4 * PVT.getStoreSize(); Register BufReg = MI.getOperand(0).getReg(); // Reload FP (the jumped-to function may not have had a // frame pointer, and if so, then its r31 will be restored // as necessary). if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) .addImm(0) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) .addImm(0) .addReg(BufReg); } MIB.cloneMemRefs(MI); // Reload IP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) .addImm(LabelOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) .addImm(LabelOffset) .addReg(BufReg); } MIB.cloneMemRefs(MI); // Reload SP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) .addImm(SPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) .addImm(SPOffset) .addReg(BufReg); } MIB.cloneMemRefs(MI); // Reload BP if (PVT == MVT::i64) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) .addImm(BPOffset) .addReg(BufReg); } else { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) .addImm(BPOffset) .addReg(BufReg); } MIB.cloneMemRefs(MI); // Reload TOC if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { setUsesTOCBasePtr(*MBB->getParent()); MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) .addImm(TOCOffset) .addReg(BufReg) .cloneMemRefs(MI); } // Jump BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); MI.eraseFromParent(); return MBB; } bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const { // If the function specifically requests inline stack probes, emit them. if (MF.getFunction().hasFnAttribute("probe-stack")) return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == "inline-asm"; return false; } unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) && "Unexpected stack alignment"); // The default stack probe size is 4096 if the function has no // stack-probe-size attribute. unsigned StackProbeSize = 4096; const Function &Fn = MF.getFunction(); if (Fn.hasFnAttribute("stack-probe-size")) Fn.getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); // Round down to the stack alignment. StackProbeSize &= ~(StackAlign - 1); return StackProbeSize ? StackProbeSize : StackAlign; } // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted // into three phases. In the first phase, it uses pseudo instruction // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and // FinalStackPtr. In the second phase, it generates a loop for probing blocks. // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of // MaxCallFrameSize so that it can calculate correct data area pointer. MachineBasicBlock * PPCTargetLowering::emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const { const bool isPPC64 = Subtarget.isPPC64(); MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); const unsigned ProbeSize = getStackProbeSize(*MF); const BasicBlock *ProbedBB = MBB->getBasicBlock(); MachineRegisterInfo &MRI = MF->getRegInfo(); // The CFG of probing stack looks as // +-----+ // | MBB | // +--+--+ // | // +----v----+ // +--->+ TestMBB +---+ // | +----+----+ | // | | | // | +-----v----+ | // +---+ BlockMBB | | // +----------+ | // | // +---------+ | // | TailMBB +<--+ // +---------+ // In MBB, calculate previous frame pointer and final stack pointer. // In TestMBB, test if sp is equal to final stack pointer, if so, jump to // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB. // TailMBB is spliced via \p MI. MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB); MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB); MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB); MachineFunction::iterator MBBIter = ++MBB->getIterator(); MF->insert(MBBIter, TestMBB); MF->insert(MBBIter, BlockMBB); MF->insert(MBBIter, TailMBB); const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; Register DstReg = MI.getOperand(0).getReg(); Register NegSizeReg = MI.getOperand(1).getReg(); Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); // Since value of NegSizeReg might be realigned in prologepilog, insert a // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and // NegSize. unsigned ProbeOpc; if (!MRI.hasOneNonDBGUse(NegSizeReg)) ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32; else // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg // and NegSizeReg will be allocated in the same phyreg to avoid // redundant copy when NegSizeReg has only one use which is current MI and // will be replaced by PREPARE_PROBED_ALLOCA then. ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32; BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer) .addDef(ActualNegSizeReg) .addReg(NegSizeReg) .add(MI.getOperand(2)) .add(MI.getOperand(3)); // Calculate final stack pointer, which equals to SP + ActualNegSize. BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), FinalStackPtr) .addReg(SPReg) .addReg(ActualNegSizeReg); // Materialize a scratch register for update. int64_t NegProbeSize = -(int64_t)ProbeSize; assert(isInt<32>(NegProbeSize) && "Unhandled probe size!"); Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); if (!isInt<16>(NegProbeSize)) { Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) .addImm(NegProbeSize >> 16); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI), ScratchReg) .addReg(TempReg) .addImm(NegProbeSize & 0xFFFF); } else BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg) .addImm(NegProbeSize); { // Probing leading residual part. Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div) .addReg(ActualNegSizeReg) .addReg(ScratchReg); Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul) .addReg(Div) .addReg(ScratchReg); Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod) .addReg(Mul) .addReg(ActualNegSizeReg); BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) .addReg(FramePointer) .addReg(SPReg) .addReg(NegMod); } { // Remaining part should be multiple of ProbeSize. Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult) .addReg(SPReg) .addReg(FinalStackPtr); BuildMI(TestMBB, DL, TII->get(PPC::BCC)) .addImm(PPC::PRED_EQ) .addReg(CmpResult) .addMBB(TailMBB); TestMBB->addSuccessor(BlockMBB); TestMBB->addSuccessor(TailMBB); } { // Touch the block. // |P...|P...|P... BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) .addReg(FramePointer) .addReg(SPReg) .addReg(ScratchReg); BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB); BlockMBB->addSuccessor(TestMBB); } // Calculation of MaxCallFrameSize is deferred to prologepilog, use // DYNAREAOFFSET pseudo instruction to get the future result. Register MaxCallFrameSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC); BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET), MaxCallFrameSizeReg) .add(MI.getOperand(2)) .add(MI.getOperand(3)); BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg) .addReg(SPReg) .addReg(MaxCallFrameSizeReg); // Splice instructions after MI to TailMBB. TailMBB->splice(TailMBB->end(), MBB, std::next(MachineBasicBlock::iterator(MI)), MBB->end()); TailMBB->transferSuccessorsAndUpdatePHIs(MBB); MBB->addSuccessor(TestMBB); // Delete the pseudo instruction. MI.eraseFromParent(); ++NumDynamicAllocaProbed; return TailMBB; } MachineBasicBlock * PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { if (MI.getOpcode() == TargetOpcode::STACKMAP || MI.getOpcode() == TargetOpcode::PATCHPOINT) { if (Subtarget.is64BitELFABI() && MI.getOpcode() == TargetOpcode::PATCHPOINT && !Subtarget.isUsingPCRelativeCalls()) { // Call lowering should have added an r2 operand to indicate a dependence // on the TOC base pointer value. It can't however, because there is no // way to mark the dependence as implicit there, and so the stackmap code // will confuse it with a regular operand. Instead, add the dependence // here. MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true)); } return emitPatchPoint(MI, BB); } if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 || MI.getOpcode() == PPC::EH_SjLj_SetJmp64) { return emitEHSjLjSetJmp(MI, BB); } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 || MI.getOpcode() == PPC::EH_SjLj_LongJmp64) { return emitEHSjLjLongJmp(MI, BB); } const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // To "insert" these instructions we actually have to insert their // control-flow patterns. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); MachineFunction *F = BB->getParent(); if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8) { SmallVector Cond; if (MI.getOpcode() == PPC::SELECT_CC_I4 || MI.getOpcode() == PPC::SELECT_CC_I8) Cond.push_back(MI.getOperand(4)); else Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); Cond.push_back(MI.getOperand(1)); DebugLoc dl = MI.getDebugLoc(); TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond, MI.getOperand(2).getReg(), MI.getOperand(3).getReg()); } else if (MI.getOpcode() == PPC::SELECT_CC_F4 || MI.getOpcode() == PPC::SELECT_CC_F8 || MI.getOpcode() == PPC::SELECT_CC_F16 || MI.getOpcode() == PPC::SELECT_CC_VRRC || MI.getOpcode() == PPC::SELECT_CC_VSFRC || MI.getOpcode() == PPC::SELECT_CC_VSSRC || MI.getOpcode() == PPC::SELECT_CC_VSRC || MI.getOpcode() == PPC::SELECT_CC_SPE4 || MI.getOpcode() == PPC::SELECT_CC_SPE || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_F16 || MI.getOpcode() == PPC::SELECT_SPE || MI.getOpcode() == PPC::SELECT_SPE4 || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { // The incoming instruction knows the destination vreg to set, the // condition code register to branch on, the true/false values to // select between, and a branch opcode to use. // thisMBB: // ... // TrueVal = ... // cmpTY ccX, r1, r2 // bCC copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 || MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 || MI.getOpcode() == PPC::SELECT_F16 || MI.getOpcode() == PPC::SELECT_SPE4 || MI.getOpcode() == PPC::SELECT_SPE || MI.getOpcode() == PPC::SELECT_VRRC || MI.getOpcode() == PPC::SELECT_VSFRC || MI.getOpcode() == PPC::SELECT_VSSRC || MI.getOpcode() == PPC::SELECT_VSRC) { BuildMI(BB, dl, TII->get(PPC::BC)) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } else { unsigned SelectPred = MI.getOperand(4).getImm(); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(SelectPred) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(3).getReg()) .addMBB(copy0MBB) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB); } else if (MI.getOpcode() == PPC::ReadTB) { // To read the 64-bit time-base register on a 32-bit target, we read the // two halves. Should the counter have wrapped while it was being read, we // need to try again. // ... // readLoop: // mfspr Rx,TBU # load from TBU // mfspr Ry,TB # load from TB // mfspr Rz,TBU # load from TBU // cmpw crX,Rx,Rz # check if 'old'='new' // bne readLoop # branch if they're not equal // ... MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); DebugLoc dl = MI.getDebugLoc(); F->insert(It, readMBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(readMBB); BB = readMBB; MachineRegisterInfo &RegInfo = F->getRegInfo(); Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); Register LoReg = MI.getOperand(0).getReg(); Register HiReg = MI.getOperand(1).getReg(); BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) .addReg(HiReg) .addReg(ReadAgainReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(CmpReg) .addMBB(readMBB); BB->addSuccessor(readMBB); BB->addSuccessor(sinkMBB); } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32) BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64) BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32) BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64) BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || (Subtarget.hasPartwordAtomics() && MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; auto LoadMnemonic = PPC::LDARX; auto StoreMnemonic = PPC::STDCX; switch (MI.getOpcode()) { default: llvm_unreachable("Compare and swap of unknown size"); case PPC::ATOMIC_CMP_SWAP_I8: LoadMnemonic = PPC::LBARX; StoreMnemonic = PPC::STBCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I16: LoadMnemonic = PPC::LHARX; StoreMnemonic = PPC::STHCX; assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); break; case PPC::ATOMIC_CMP_SWAP_I32: LoadMnemonic = PPC::LWARX; StoreMnemonic = PPC::STWCX; break; case PPC::ATOMIC_CMP_SWAP_I64: LoadMnemonic = PPC::LDARX; StoreMnemonic = PPC::STDCX; break; } Register dest = MI.getOperand(0).getReg(); Register ptrA = MI.getOperand(1).getReg(); Register ptrB = MI.getOperand(2).getReg(); Register oldval = MI.getOperand(3).getReg(); Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // loop1MBB: // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval) .addReg(dest); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval) .addReg(ptrA) .addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest) .addReg(ptrA) .addReg(ptrB); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { // We must use 64-bit registers for addresses when targeting 64-bit, // since we're actually doing arithmetic on them. Other registers // can be 32-bit. bool is64bit = Subtarget.isPPC64(); bool isLittleEndian = Subtarget.isLittleEndian(); bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; Register dest = MI.getOperand(0).getReg(); Register ptrA = MI.getOperand(1).getReg(); Register ptrB = MI.getOperand(2).getReg(); Register oldval = MI.getOperand(3).getReg(); Register newval = MI.getOperand(4).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loop1MBB); F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; Register PtrReg = RegInfo.createVirtualRegister(RC); Register Shift1Reg = RegInfo.createVirtualRegister(GPRC); Register ShiftReg = isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC); Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC); Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC); Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC); Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC); Register MaskReg = RegInfo.createVirtualRegister(GPRC); Register Mask2Reg = RegInfo.createVirtualRegister(GPRC); Register Mask3Reg = RegInfo.createVirtualRegister(GPRC); Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC); Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC); Register TmpDestReg = RegInfo.createVirtualRegister(GPRC); Register Ptr1Reg; Register TmpReg = RegInfo.createVirtualRegister(GPRC); Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; // thisMBB: // ... // fallthrough --> loopMBB BB->addSuccessor(loop1MBB); // The 4-byte load must be aligned, while a char or short may be // anywhere in the word. Hence all this nasty bookkeeping code. // add ptr1, ptrA, ptrB [copy if ptrA==0] // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] // xori shift, shift1, 24 [16] // rlwinm ptr, ptr1, 0, 0, 29 // slw newval2, newval, shift // slw oldval2, oldval,shift // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] // slw mask, mask2, shift // and newval3, newval2, mask // and oldval3, oldval2, mask // loop1MBB: // lwarx tmpDest, ptr // and tmp, tmpDest, mask // cmpw tmp, oldval3 // bne- midMBB // loop2MBB: // andc tmp2, tmpDest, mask // or tmp4, tmp2, newval3 // stwcx. tmp4, ptr // bne- loop1MBB // b exitBB // midMBB: // stwcx. tmpDest, ptr // exitBB: // srw dest, tmpDest, shift if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA) .addReg(ptrB); } else { Ptr1Reg = ptrB; } // We need use 32-bit subregister to avoid mismatch register class in 64-bit // mode. BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg) .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0) .addImm(3) .addImm(27) .addImm(is8bit ? 28 : 27); if (!isLittleEndian) BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg) .addReg(Shift1Reg) .addImm(is8bit ? 24 : 16); if (is64bit) BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) .addReg(Ptr1Reg) .addImm(0) .addImm(61); else BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) .addReg(Ptr1Reg) .addImm(0) .addImm(0) .addImm(29); BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) .addReg(newval) .addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) .addReg(oldval) .addReg(ShiftReg); if (is8bit) BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); else { BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) .addReg(Mask3Reg) .addImm(65535); } BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) .addReg(Mask2Reg) .addReg(ShiftReg); BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) .addReg(NewVal2Reg) .addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) .addReg(OldVal2Reg) .addReg(MaskReg); BB = loop1MBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) .addReg(ZeroReg) .addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::AND), TmpReg) .addReg(TmpDestReg) .addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) .addReg(TmpReg) .addReg(OldVal3Reg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(midMBB); BB->addSuccessor(loop2MBB); BB->addSuccessor(midMBB); BB = loop2MBB; BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg) .addReg(TmpDestReg) .addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg) .addReg(Tmp2Reg) .addReg(NewVal3Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(Tmp4Reg) .addReg(ZeroReg) .addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); BB->addSuccessor(loop1MBB); BB->addSuccessor(exitMBB); BB = midMBB; BuildMI(BB, dl, TII->get(PPC::STWCX)) .addReg(TmpDestReg) .addReg(ZeroReg) .addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest) .addReg(TmpReg) .addReg(ShiftReg); } else if (MI.getOpcode() == PPC::FADDrtz) { // This pseudo performs an FADD with rounding mode temporarily forced // to round-to-zero. We emit this via custom inserter since the FPSCR // is not modeled at the SelectionDAG level. Register Dest = MI.getOperand(0).getReg(); Register Src1 = MI.getOperand(1).getReg(); Register Src2 = MI.getOperand(2).getReg(); DebugLoc dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); // Set rounding mode to round-to-zero. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)) .addImm(31) .addReg(PPC::RM, RegState::ImplicitDefine); BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)) .addImm(30) .addReg(PPC::RM, RegState::ImplicitDefine); // Perform addition. auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest) .addReg(Src1) .addReg(Src2); if (MI.getFlag(MachineInstr::NoFPExcept)) MIB.setMIFlag(MachineInstr::NoFPExcept); // Restore FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg); } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT || MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT || MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) { unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 || MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) ? PPC::ANDI8_rec : PPC::ANDI_rec; bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT || MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8); MachineRegisterInfo &RegInfo = F->getRegInfo(); Register Dest = RegInfo.createVirtualRegister( Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass); DebugLoc Dl = MI.getDebugLoc(); BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest) .addReg(MI.getOperand(1).getReg()) .addImm(1); BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT); } else if (MI.getOpcode() == PPC::TCHECK_RET) { DebugLoc Dl = MI.getDebugLoc(); MachineRegisterInfo &RegInfo = F->getRegInfo(); Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg); BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(CRReg); } else if (MI.getOpcode() == PPC::TBEGIN_RET) { DebugLoc Dl = MI.getDebugLoc(); unsigned Imm = MI.getOperand(1).getImm(); BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm); BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) .addReg(PPC::CR0EQ); } else if (MI.getOpcode() == PPC::SETRNDi) { DebugLoc dl = MI.getDebugLoc(); Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); // The floating point rounding mode is in the bits 62:63 of FPCSR, and has // the following settings: // 00 Round to nearest // 01 Round to 0 // 10 Round to +inf // 11 Round to -inf // When the operand is immediate, using the two least significant bits of // the immediate to set the bits 62:63 of FPSCR. unsigned Mode = MI.getOperand(1).getImm(); BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0)) .addImm(31) .addReg(PPC::RM, RegState::ImplicitDefine); BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0)) .addImm(30) .addReg(PPC::RM, RegState::ImplicitDefine); } else if (MI.getOpcode() == PPC::SETRND) { DebugLoc dl = MI.getDebugLoc(); // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg. // If the target doesn't have DirectMove, we should use stack to do the // conversion, because the target doesn't have the instructions like mtvsrd // or mfvsrd to do this conversion directly. auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) { if (Subtarget.hasDirectMove()) { BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg) .addReg(SrcReg); } else { // Use stack to do the register copy. unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD; MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg); if (RC == &PPC::F8RCRegClass) { // Copy register from F8RCRegClass to G8RCRegclass. assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) && "Unsupported RegClass."); StoreOp = PPC::STFD; LoadOp = PPC::LD; } else { // Copy register from G8RCRegClass to F8RCRegclass. assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) && (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) && "Unsupported RegClass."); } MachineFrameInfo &MFI = F->getFrameInfo(); int FrameIdx = MFI.CreateStackObject(8, Align(8), false); MachineMemOperand *MMOStore = F->getMachineMemOperand( MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx), MFI.getObjectAlign(FrameIdx)); // Store the SrcReg into the stack. BuildMI(*BB, MI, dl, TII->get(StoreOp)) .addReg(SrcReg) .addImm(0) .addFrameIndex(FrameIdx) .addMemOperand(MMOStore); MachineMemOperand *MMOLoad = F->getMachineMemOperand( MachinePointerInfo::getFixedStack(*F, FrameIdx, 0), MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx), MFI.getObjectAlign(FrameIdx)); // Load from the stack where SrcReg is stored, and save to DestReg, // so we have done the RegClass conversion from RegClass::SrcReg to // RegClass::DestReg. BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg) .addImm(0) .addFrameIndex(FrameIdx) .addMemOperand(MMOLoad); } }; Register OldFPSCRReg = MI.getOperand(0).getReg(); // Save FPSCR value. BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg); // When the operand is gprc register, use two least significant bits of the // register and mtfsf instruction to set the bits 62:63 of FPSCR. // // copy OldFPSCRTmpReg, OldFPSCRReg // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1) // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62 // copy NewFPSCRReg, NewFPSCRTmpReg // mtfsf 255, NewFPSCRReg MachineOperand SrcOp = MI.getOperand(1); MachineRegisterInfo &RegInfo = F->getRegInfo(); Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg); Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); // The first operand of INSERT_SUBREG should be a register which has // subregisters, we only care about its RegClass, so we should use an // IMPLICIT_DEF register. BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg); BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg) .addReg(ImDefReg) .add(SrcOp) .addImm(1); Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg) .addReg(OldFPSCRTmpReg) .addReg(ExtSrcReg) .addImm(0) .addImm(62); Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg); // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63 // bits of FPSCR. BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)) .addImm(255) .addReg(NewFPSCRReg) .addImm(0) .addImm(0); } else if (MI.getOpcode() == PPC::SETFLM) { DebugLoc Dl = MI.getDebugLoc(); // Result of setflm is previous FPSCR content, so we need to save it first. Register OldFPSCRReg = MI.getOperand(0).getReg(); BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg); // Put bits in 32:63 to FPSCR. Register NewFPSCRReg = MI.getOperand(1).getReg(); BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF)) .addImm(255) .addReg(NewFPSCRReg) .addImm(0) .addImm(0); } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 || MI.getOpcode() == PPC::PROBED_ALLOCA_64) { return emitProbedAlloca(MI, BB); } else { llvm_unreachable("Unexpected instr type to insert"); } MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } //===----------------------------------------------------------------------===// // Target Optimization Hooks //===----------------------------------------------------------------------===// static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { // For the estimates, convergence is quadratic, so we essentially double the // number of digits correct after every iteration. For both FRE and FRSQRTE, // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), // this is 2^-14. IEEE float has 23 digits and double has 52 digits. int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) RefinementSteps++; return RefinementSteps; } SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, const DenormalMode &Mode) const { // We only have VSX Vector Test for software Square Root. EVT VT = Op.getValueType(); if (!isTypeLegal(MVT::i1) || (VT != MVT::f64 && ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))) return TargetLowering::getSqrtInputTest(Op, DAG, Mode); SDLoc DL(Op); // The output register of FTSQRT is CR field. SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op); // ftsqrt BF,FRB // Let e_b be the unbiased exponent of the double-precision // floating-point operand in register FRB. // fe_flag is set to 1 if either of the following conditions occurs. // - The double-precision floating-point operand in register FRB is a zero, // a NaN, or an infinity, or a negative value. // - e_b is less than or equal to -970. // Otherwise fe_flag is set to 0. // Both VSX and non-VSX versions would set EQ bit in the CR if the number is // not eligible for iteration. (zero/negative/infinity/nan or unbiased // exponent is less than -970) SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32); return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1, FTSQRT, SRIdxVal), 0); } SDValue PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op, SelectionDAG &DAG) const { // We only have VSX Vector Square Root. EVT VT = Op.getValueType(); if (VT != MVT::f64 && ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) return TargetLowering::getSqrtResultForDenormInput(Op, DAG); return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op); } SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX())) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); // The Newton-Raphson computation with a single constant does not provide // enough accuracy on some CPUs. UseOneConstNR = !Subtarget.needsTwoConstNR(); return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); } SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || (VT == MVT::v4f32 && Subtarget.hasAltivec()) || (VT == MVT::v2f64 && Subtarget.hasVSX())) { if (RefinementSteps == ReciprocalEstimate::Unspecified) RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } unsigned PPCTargetLowering::combineRepeatedFPDivisors() const { // Note: This functionality is used only when unsafe-fp-math is enabled, and // on cores with reciprocal estimates (which are used when unsafe-fp-math is // enabled for division), this functionality is redundant with the default // combiner logic (once the division -> reciprocal/multiply transformation // has taken place). As a result, this matters more for older cores than for // newer ones. // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal if there are two or more FDIVs (for embedded cores with only // one FP pipeline) for three or more FDIVs (for generic OOO cores). switch (Subtarget.getCPUDirective()) { default: return 3; case PPC::DIR_440: case PPC::DIR_A2: case PPC::DIR_E500: case PPC::DIR_E500mc: case PPC::DIR_E5500: return 2; } } // isConsecutiveLSLoc needs to work even if all adds have not yet been // collapsed, and so we need to look through chains of them. static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t& Offset, SelectionDAG &DAG) { if (DAG.isBaseWithConstantOffset(Loc)) { Base = Loc.getOperand(0); Offset += cast(Loc.getOperand(1))->getSExtValue(); // The base might itself be a base plus an offset, and if so, accumulate // that as well. getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG); } } static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (VT.getSizeInBits() / 8 != Bytes) return false; SDValue BaseLoc = Base->getBasePtr(); if (Loc.getOpcode() == ISD::FrameIndex) { if (BaseLoc.getOpcode() != ISD::FrameIndex) return false; const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FI = cast(Loc)->getIndex(); int BFI = cast(BaseLoc)->getIndex(); int FS = MFI.getObjectSize(FI); int BFS = MFI.getObjectSize(BFI); if (FS != BFS || FS != (int)Bytes) return false; return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes); } SDValue Base1 = Loc, Base2 = BaseLoc; int64_t Offset1 = 0, Offset2 = 0; getBaseWithConstantOffset(Loc, Base1, Offset1, DAG); getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG); if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes)) return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; Offset1 = 0; Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; } // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does // not enforce equality of the chain operands. static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG) { if (LSBaseSDNode *LS = dyn_cast(N)) { EVT VT = LS->getMemoryVT(); SDValue Loc = LS->getBasePtr(); return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvw4x_be: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_lxvd2x: case Intrinsic::ppc_vsx_lxvd2x_be: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); } if (N->getOpcode() == ISD::INTRINSIC_VOID) { EVT VT; switch (cast(N->getOperand(1))->getZExtValue()) { default: return false; case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_vsx_stxvw4x: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_stxvd2x: VT = MVT::v2f64; break; case Intrinsic::ppc_vsx_stxvw4x_be: VT = MVT::v4i32; break; case Intrinsic::ppc_vsx_stxvd2x_be: VT = MVT::v2f64; break; case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; } return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); } return false; } // Return true is there is a nearyby consecutive load to the one provided // (regardless of alignment). We search up and down the chain, looking though // token factors and other loads (but nothing else). As a result, a true result // indicates that it is safe to create a new consecutive load adjacent to the // load provided. static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { SDValue Chain = LD->getChain(); EVT VT = LD->getMemoryVT(); SmallSet LoadRoots; SmallVector Queue(1, Chain.getNode()); SmallSet Visited; // First, search up the chain, branching to follow all token-factor operands. // If we find a consecutive load, then we're done, otherwise, record all // nodes just above the top-level loads and token factors. while (!Queue.empty()) { SDNode *ChainNext = Queue.pop_back_val(); if (!Visited.insert(ChainNext).second) continue; if (MemSDNode *ChainLD = dyn_cast(ChainNext)) { if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; if (!Visited.count(ChainLD->getChain().getNode())) Queue.push_back(ChainLD->getChain().getNode()); } else if (ChainNext->getOpcode() == ISD::TokenFactor) { for (const SDUse &O : ChainNext->ops()) if (!Visited.count(O.getNode())) Queue.push_back(O.getNode()); } else LoadRoots.insert(ChainNext); } // Second, search down the chain, starting from the top-level nodes recorded // in the first phase. These top-level nodes are the nodes just above all // loads and token factors. Starting with their uses, recursively look though // all loads (just the chain uses) and token factors to find a consecutive // load. Visited.clear(); Queue.clear(); for (SmallSet::iterator I = LoadRoots.begin(), IE = LoadRoots.end(); I != IE; ++I) { Queue.push_back(*I); while (!Queue.empty()) { SDNode *LoadRoot = Queue.pop_back_val(); if (!Visited.insert(LoadRoot).second) continue; if (MemSDNode *ChainLD = dyn_cast(LoadRoot)) if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) return true; for (SDNode::use_iterator UI = LoadRoot->use_begin(), UE = LoadRoot->use_end(); UI != UE; ++UI) if (((isa(*UI) && cast(*UI)->getChain().getNode() == LoadRoot) || UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) Queue.push_back(*UI); } } return false; } /// This function is called when we have proved that a SETCC node can be replaced /// by subtraction (and other supporting instructions) so that the result of /// comparison is kept in a GPR instead of CR. This function is purely for /// codegen purposes and has some flags to guide the codegen process. static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); // Zero extend the operands to the largest legal integer. Originally, they // must be of a strictly smaller size. auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0), DAG.getConstant(Size, DL, MVT::i32)); auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1), DAG.getConstant(Size, DL, MVT::i32)); // Swap if needed. Depends on the condition code. if (Swap) std::swap(Op0, Op1); // Subtract extended integers. auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1); // Move the sign bit to the least significant position and zero out the rest. // Now the least significant bit carries the result of original comparison. auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode, DAG.getConstant(Size - 1, DL, MVT::i32)); auto Final = Shifted; // Complement the result if needed. Based on the condition code. if (Complement) Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted, DAG.getConstant(1, DL, MVT::i64)); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final); } SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected."); SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // Size of integers being compared has a critical role in the following // analysis, so we prefer to do this when all types are legal. if (!DCI.isAfterLegalizeDAG()) return SDValue(); // If all users of SETCC extend its value to a legal integer type // then we replace SETCC with a subtraction for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { if (UI->getOpcode() != ISD::ZERO_EXTEND) return SDValue(); } ISD::CondCode CC = cast(N->getOperand(2))->get(); auto OpSize = N->getOperand(0).getValueSizeInBits(); unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits(); if (OpSize < Size) { switch (CC) { default: break; case ISD::SETULT: return generateEquivalentSub(N, Size, false, false, DL, DAG); case ISD::SETULE: return generateEquivalentSub(N, Size, true, true, DL, DAG); case ISD::SETUGT: return generateEquivalentSub(N, Size, false, true, DL, DAG); case ISD::SETUGE: return generateEquivalentSub(N, Size, true, false, DL, DAG); } } return SDValue(); } SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); // If we're tracking CR bits, we need to be careful that we don't have: // trunc(binary-ops(zext(x), zext(y))) // or // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) // such that we're unnecessarily moving things into GPRs when it would be // better to keep them in CR bits. // Note that trunc here can be an actual i1 trunc, or can be the effective // truncation that comes from a setcc or select_cc. if (N->getOpcode() == ISD::TRUNCATE && N->getValueType(0) != MVT::i1) return SDValue(); if (N->getOperand(0).getValueType() != MVT::i32 && N->getOperand(0).getValueType() != MVT::i64) return SDValue(); if (N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) { // If we're looking at a comparison, then we need to make sure that the // high bits (all except for the first) don't matter the result. ISD::CondCode CC = cast(N->getOperand( N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); unsigned OpBits = N->getOperand(0).getValueSizeInBits(); if (ISD::isSignedIntSetCC(CC)) { if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) return SDValue(); } else if (ISD::isUnsignedIntSetCC(CC)) { if (!DAG.MaskedValueIsZero(N->getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-1)) || !DAG.MaskedValueIsZero(N->getOperand(1), APInt::getHighBitsSet(OpBits, OpBits-1))) return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI) : SDValue()); } else { // This is neither a signed nor an unsigned comparison, just make sure // that the high bits are equal. KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0)); KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1)); // We don't really care about what is known about the first bit (if // anything), so pretend that it is known zero for both to ensure they can // be compared as constants. Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0); Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0); if (!Op1Known.isConstant() || !Op2Known.isConstant() || Op1Known.getConstant() != Op2Known.getConstant()) return SDValue(); } } // We now know that the higher-order bits are irrelevant, we just need to // make sure that all of the intermediate operations are bit operations, and // all inputs are extensions. if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC && N->getOperand(0).getOpcode() != ISD::TRUNCATE && N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) return SDValue(); if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && N->getOperand(1).getOpcode() != ISD::AND && N->getOperand(1).getOpcode() != ISD::OR && N->getOperand(1).getOpcode() != ISD::XOR && N->getOperand(1).getOpcode() != ISD::SELECT && N->getOperand(1).getOpcode() != ISD::SELECT_CC && N->getOperand(1).getOpcode() != ISD::TRUNCATE && N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) return SDValue(); SmallVector Inputs; SmallVector BinOps, PromOps; SmallPtrSet Visited; for (unsigned i = 0; i < 2; ++i) { if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(N->getOperand(i))) Inputs.push_back(N->getOperand(i)); else BinOps.push_back(N->getOperand(i)); if (N->getOpcode() == ISD::TRUNCATE) break; } // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by extensions. while (!BinOps.empty()) { SDValue BinOp = BinOps.pop_back_val(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not an extension or another binary // operation; we'll abort this transformation. return SDValue(); } } } // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i] || User->getOperand(1) == Inputs[i]) return SDValue(); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // Make sure that we're not going to promote the non-output-value // operand(s) or SELECT or SELECT_CC. // FIXME: Although we could sometimes handle this, and it does occur in // practice that one of the condition inputs to the select is also one of // the outputs, we currently can't deal with this. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) return SDValue(); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i] || User->getOperand(1) == PromOps[i]) return SDValue(); } } } // Replace all inputs with the extension operand. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constants may have users outside the cluster of to-be-promoted nodes, // and so we need to replace those as we do the promotions. if (isa(Inputs[i])) continue; else DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (i1) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. Any intermediate truncations or // extensions disappear. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); if (PromOp.getOpcode() == ISD::TRUNCATE || PromOp.getOpcode() == ISD::SIGN_EXTEND || PromOp.getOpcode() == ISD::ZERO_EXTEND || PromOp.getOpcode() == ISD::ANY_EXTEND) { if (!isa(PromOp.getOperand(0)) && PromOp.getOperand(0).getValueType() != MVT::i1) { // The operand is not yet ready (see comment below). PromOpHandles.emplace_front(PromOp); continue; } SDValue RepValue = PromOp.getOperand(0); if (isa(RepValue)) RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); continue; } unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != MVT::i1) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != MVT::i1)) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If there are any constant inputs, make sure they're replaced now. for (unsigned i = 0; i < 2; ++i) if (isa(Ops[C+i])) Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); } // Now we're left with the initial truncation itself. if (N->getOpcode() == ISD::TRUNCATE) return N->getOperand(0); // Otherwise, this is a comparison. The operands to be compared have just // changed type (to i1), but everything else is the same. return SDValue(N, 0); } SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); // If we're tracking CR bits, we need to be careful that we don't have: // zext(binary-ops(trunc(x), trunc(y))) // or // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) // such that we're unnecessarily moving things into CR bits that can more // efficiently stay in GPRs. Note that if we're not certain that the high // bits are set as required by the final extension, we still may need to do // some masking to get the proper behavior. // This same functionality is important on PPC64 when dealing with // 32-to-64-bit extensions; these occur often when 32-bit values are used as // the return values of functions. Because it is so similar, it is handled // here as well. if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) return SDValue(); if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) || (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64()))) return SDValue(); if (N->getOperand(0).getOpcode() != ISD::AND && N->getOperand(0).getOpcode() != ISD::OR && N->getOperand(0).getOpcode() != ISD::XOR && N->getOperand(0).getOpcode() != ISD::SELECT && N->getOperand(0).getOpcode() != ISD::SELECT_CC) return SDValue(); SmallVector Inputs; SmallVector BinOps(1, N->getOperand(0)), PromOps; SmallPtrSet Visited; // Visit all inputs, collect all binary operations (and, or, xor and // select) that are all fed by truncations. while (!BinOps.empty()) { SDValue BinOp = BinOps.pop_back_val(); if (!Visited.insert(BinOp.getNode()).second) continue; PromOps.push_back(BinOp); for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { // The condition of the select is not promoted. if (BinOp.getOpcode() == ISD::SELECT && i == 0) continue; if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) continue; if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || isa(BinOp.getOperand(i))) { Inputs.push_back(BinOp.getOperand(i)); } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || BinOp.getOperand(i).getOpcode() == ISD::OR || BinOp.getOperand(i).getOpcode() == ISD::XOR || BinOp.getOperand(i).getOpcode() == ISD::SELECT || BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { BinOps.push_back(BinOp.getOperand(i)); } else { // We have an input that is not a truncation or another binary // operation; we'll abort this transformation. return SDValue(); } } } // The operands of a select that must be truncated when the select is // promoted because the operand is actually part of the to-be-promoted set. DenseMap SelectTruncOp[2]; // Make sure that this is a self-contained cluster of operations (which // is not quite the same thing as saying that everything has only one // use). for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), UE = Inputs[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == Inputs[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == Inputs[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), UE = PromOps[i].getNode()->use_end(); UI != UE; ++UI) { SDNode *User = *UI; if (User != N && !Visited.count(User)) return SDValue(); // If we're going to promote the non-output-value operand(s) or SELECT or // SELECT_CC, record them for truncation. if (User->getOpcode() == ISD::SELECT) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); } else if (User->getOpcode() == ISD::SELECT_CC) { if (User->getOperand(0) == PromOps[i]) SelectTruncOp[0].insert(std::make_pair(User, User->getOperand(0).getValueType())); if (User->getOperand(1) == PromOps[i]) SelectTruncOp[1].insert(std::make_pair(User, User->getOperand(1).getValueType())); } } } unsigned PromBits = N->getOperand(0).getValueSizeInBits(); bool ReallyNeedsExt = false; if (N->getOpcode() != ISD::ANY_EXTEND) { // If all of the inputs are not already sign/zero extended, then // we'll still need to do that at the end. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { if (isa(Inputs[i])) continue; unsigned OpBits = Inputs[i].getOperand(0).getValueSizeInBits(); assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); if ((N->getOpcode() == ISD::ZERO_EXTEND && !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), APInt::getHighBitsSet(OpBits, OpBits-PromBits))) || (N->getOpcode() == ISD::SIGN_EXTEND && DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < (OpBits-(PromBits-1)))) { ReallyNeedsExt = true; break; } } } // Replace all inputs, either with the truncation operand, or a // truncation or extension to the final output type. for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { // Constant inputs need to be replaced with the to-be-promoted nodes that // use them because they might have users outside of the cluster of // promoted nodes. if (isa(Inputs[i])) continue; SDValue InSrc = Inputs[i].getOperand(0); if (Inputs[i].getValueType() == N->getValueType(0)) DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); else if (N->getOpcode() == ISD::SIGN_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); else if (N->getOpcode() == ISD::ZERO_EXTEND) DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); else DAG.ReplaceAllUsesOfValueWith(Inputs[i], DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); } std::list PromOpHandles; for (auto &PromOp : PromOps) PromOpHandles.emplace_back(PromOp); // Replace all operations (these are all the same, but have a different // (promoted) return type). DAG.getNode will validate that the types of // a binary operator match, so go through the list in reverse so that // we've likely promoted both operands first. while (!PromOpHandles.empty()) { SDValue PromOp = PromOpHandles.back().getValue(); PromOpHandles.pop_back(); unsigned C; switch (PromOp.getOpcode()) { default: C = 0; break; case ISD::SELECT: C = 1; break; case ISD::SELECT_CC: C = 2; break; } if ((!isa(PromOp.getOperand(C)) && PromOp.getOperand(C).getValueType() != N->getValueType(0)) || (!isa(PromOp.getOperand(C+1)) && PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { // The to-be-promoted operands of this node have not yet been // promoted (this should be rare because we're going through the // list backward, but if one of the operands has several users in // this cluster of to-be-promoted nodes, it is possible). PromOpHandles.emplace_front(PromOp); continue; } // For SELECT and SELECT_CC nodes, we do a similar check for any // to-be-promoted comparison inputs. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { if ((SelectTruncOp[0].count(PromOp.getNode()) && PromOp.getOperand(0).getValueType() != N->getValueType(0)) || (SelectTruncOp[1].count(PromOp.getNode()) && PromOp.getOperand(1).getValueType() != N->getValueType(0))) { PromOpHandles.emplace_front(PromOp); continue; } } SmallVector Ops(PromOp.getNode()->op_begin(), PromOp.getNode()->op_end()); // If this node has constant inputs, then they'll need to be promoted here. for (unsigned i = 0; i < 2; ++i) { if (!isa(Ops[C+i])) continue; if (Ops[C+i].getValueType() == N->getValueType(0)) continue; if (N->getOpcode() == ISD::SIGN_EXTEND) Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else if (N->getOpcode() == ISD::ZERO_EXTEND) Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); else Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); } // If we've promoted the comparison inputs of a SELECT or SELECT_CC, // truncate them again to the original value type. if (PromOp.getOpcode() == ISD::SELECT || PromOp.getOpcode() == ISD::SELECT_CC) { auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); if (SI0 != SelectTruncOp[0].end()) Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); if (SI1 != SelectTruncOp[1].end()) Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); } DAG.ReplaceAllUsesOfValueWith(PromOp, DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); } // Now we're left with the initial extension itself. if (!ReallyNeedsExt) return N->getOperand(0); // To zero extend, just mask off everything except for the first bit (in the // i1 case). if (N->getOpcode() == ISD::ZERO_EXTEND) return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), DAG.getConstant(APInt::getLowBitsSet( N->getValueSizeInBits(0), PromBits), dl, N->getValueType(0))); assert(N->getOpcode() == ISD::SIGN_EXTEND && "Invalid extension type"); EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout()); SDValue ShiftCst = DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy); return DAG.getNode( ISD::SRA, dl, N->getValueType(0), DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst), ShiftCst); } SDValue PPCTargetLowering::combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node"); ISD::CondCode CC = cast(N->getOperand(2))->get(); if (CC == ISD::SETNE || CC == ISD::SETEQ) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); // If there is a '0 - y' pattern, canonicalize the pattern to the RHS. if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) std::swap(LHS, RHS); // x == 0-y --> x+y == 0 // x != 0-y --> x+y != 0 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) { SDLoc DL(N); SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); EVT OpVT = LHS.getValueType(); SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1)); return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC); } } return DAGCombineTruncBoolExt(N, DCI); } // Is this an extending load from an f32 to an f64? static bool isFPExtLoad(SDValue Op) { if (LoadSDNode *LD = dyn_cast(Op.getNode())) return LD->getExtensionType() == ISD::EXTLOAD && Op.getValueType() == MVT::f64; return false; } /// Reduces the number of fp-to-int conversion when building a vector. /// /// If this vector is built out of floating to integer conversions, /// transform it to a vector built out of floating point values followed by a /// single floating to integer conversion of the vector. /// Namely (build_vector (fptosi $A), (fptosi $B), ...) /// becomes (fptosi (build_vector ($A, $B, ...))) SDValue PPCTargetLowering:: combineElementTruncationToVectorTruncation(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue FirstInput = N->getOperand(0); assert(FirstInput.getOpcode() == PPCISD::MFVSR && "The input operand must be an fp-to-int conversion."); // This combine happens after legalization so the fp_to_[su]i nodes are // already converted to PPCSISD nodes. unsigned FirstConversion = FirstInput.getOperand(0).getOpcode(); if (FirstConversion == PPCISD::FCTIDZ || FirstConversion == PPCISD::FCTIDUZ || FirstConversion == PPCISD::FCTIWZ || FirstConversion == PPCISD::FCTIWUZ) { bool IsSplat = true; bool Is32Bit = FirstConversion == PPCISD::FCTIWZ || FirstConversion == PPCISD::FCTIWUZ; EVT SrcVT = FirstInput.getOperand(0).getValueType(); SmallVector Ops; EVT TargetVT = N->getValueType(0); for (int i = 0, e = N->getNumOperands(); i < e; ++i) { SDValue NextOp = N->getOperand(i); if (NextOp.getOpcode() != PPCISD::MFVSR) return SDValue(); unsigned NextConversion = NextOp.getOperand(0).getOpcode(); if (NextConversion != FirstConversion) return SDValue(); // If we are converting to 32-bit integers, we need to add an FP_ROUND. // This is not valid if the input was originally double precision. It is // also not profitable to do unless this is an extending load in which // case doing this combine will allow us to combine consecutive loads. if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0))) return SDValue(); if (N->getOperand(i) != FirstInput) IsSplat = false; } // If this is a splat, we leave it as-is since there will be only a single // fp-to-int conversion followed by a splat of the integer. This is better // for 32-bit and smaller ints and neutral for 64-bit ints. if (IsSplat) return SDValue(); // Now that we know we have the right type of node, get its operands for (int i = 0, e = N->getNumOperands(); i < e; ++i) { SDValue In = N->getOperand(i).getOperand(0); if (Is32Bit) { // For 32-bit values, we need to add an FP_ROUND node (if we made it // here, we know that all inputs are extending loads so this is safe). if (In.isUndef()) Ops.push_back(DAG.getUNDEF(SrcVT)); else { SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0), DAG.getIntPtrConstant(1, dl)); Ops.push_back(Trunc); } } else Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0)); } unsigned Opcode; if (FirstConversion == PPCISD::FCTIDZ || FirstConversion == PPCISD::FCTIWZ) Opcode = ISD::FP_TO_SINT; else Opcode = ISD::FP_TO_UINT; EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32; SDValue BV = DAG.getBuildVector(NewVT, dl, Ops); return DAG.getNode(Opcode, dl, TargetVT, BV); } return SDValue(); } /// Reduce the number of loads when building a vector. /// /// Building a vector out of multiple loads can be converted to a load /// of the vector type if the loads are consecutive. If the loads are /// consecutive but in descending order, a shuffle is added at the end /// to reorder the vector. static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SDLoc dl(N); // Return early for non byte-sized type, as they can't be consecutive. if (!N->getValueType(0).getVectorElementType().isByteSized()) return SDValue(); bool InputsAreConsecutiveLoads = true; bool InputsAreReverseConsecutive = true; unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize(); SDValue FirstInput = N->getOperand(0); bool IsRoundOfExtLoad = false; if (FirstInput.getOpcode() == ISD::FP_ROUND && FirstInput.getOperand(0).getOpcode() == ISD::LOAD) { LoadSDNode *LD = dyn_cast(FirstInput.getOperand(0)); IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD; } // Not a build vector of (possibly fp_rounded) loads. if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) || N->getNumOperands() == 1) return SDValue(); for (int i = 1, e = N->getNumOperands(); i < e; ++i) { // If any inputs are fp_round(extload), they all must be. if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND) return SDValue(); SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) : N->getOperand(i); if (NextInput.getOpcode() != ISD::LOAD) return SDValue(); SDValue PreviousInput = IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1); LoadSDNode *LD1 = dyn_cast(PreviousInput); LoadSDNode *LD2 = dyn_cast(NextInput); // If any inputs are fp_round(extload), they all must be. if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD) return SDValue(); if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG)) InputsAreConsecutiveLoads = false; if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG)) InputsAreReverseConsecutive = false; // Exit early if the loads are neither consecutive nor reverse consecutive. if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive) return SDValue(); } assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) && "The loads cannot be both consecutive and reverse consecutive."); SDValue FirstLoadOp = IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput; SDValue LastLoadOp = IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) : N->getOperand(N->getNumOperands()-1); LoadSDNode *LD1 = dyn_cast(FirstLoadOp); LoadSDNode *LDL = dyn_cast(LastLoadOp); if (InputsAreConsecutiveLoads) { assert(LD1 && "Input needs to be a LoadSDNode."); return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), LD1->getAlignment()); } if (InputsAreReverseConsecutive) { assert(LDL && "Input needs to be a LoadSDNode."); SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(), LDL->getBasePtr(), LDL->getPointerInfo(), LDL->getAlignment()); SmallVector Ops; for (int i = N->getNumOperands() - 1; i >= 0; i--) Ops.push_back(i); return DAG.getVectorShuffle(N->getValueType(0), dl, Load, DAG.getUNDEF(N->getValueType(0)), Ops); } return SDValue(); } // This function adds the required vector_shuffle needed to get // the elements of the vector extract in the correct position // as specified by the CorrectElems encoding. static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems) { SDLoc dl(N); unsigned NumElems = Input.getValueType().getVectorNumElements(); SmallVector ShuffleMask(NumElems, -1); // Knowing the element indices being extracted from the original // vector and the order in which they're being inserted, just put // them at element indices required for the instruction. for (unsigned i = 0; i < N->getNumOperands(); i++) { if (DAG.getDataLayout().isLittleEndian()) ShuffleMask[CorrectElems & 0xF] = Elems & 0xF; else ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4; CorrectElems = CorrectElems >> 8; Elems = Elems >> 8; } SDValue Shuffle = DAG.getVectorShuffle(Input.getValueType(), dl, Input, DAG.getUNDEF(Input.getValueType()), ShuffleMask); EVT VT = N->getValueType(0); SDValue Conv = DAG.getBitcast(VT, Shuffle); EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), Input.getValueType().getVectorElementType(), VT.getVectorNumElements()); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv, DAG.getValueType(ExtVT)); } // Look for build vector patterns where input operands come from sign // extended vector_extract elements of specific indices. If the correct indices // aren't used, add a vector shuffle to fix up the indices and create // SIGN_EXTEND_INREG node which selects the vector sign extend instructions // during instruction selection. static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) { // This array encodes the indices that the vector sign extend instructions // extract from when extending from one type to another for both BE and LE. // The right nibble of each byte corresponds to the LE incides. // and the left nibble of each byte corresponds to the BE incides. // For example: 0x3074B8FC byte->word // For LE: the allowed indices are: 0x0,0x4,0x8,0xC // For BE: the allowed indices are: 0x3,0x7,0xB,0xF // For example: 0x000070F8 byte->double word // For LE: the allowed indices are: 0x0,0x8 // For BE: the allowed indices are: 0x7,0xF uint64_t TargetElems[] = { 0x3074B8FC, // b->w 0x000070F8, // b->d 0x10325476, // h->w 0x00003074, // h->d 0x00001032, // w->d }; uint64_t Elems = 0; int Index; SDValue Input; auto isSExtOfVecExtract = [&](SDValue Op) -> bool { if (!Op) return false; if (Op.getOpcode() != ISD::SIGN_EXTEND && Op.getOpcode() != ISD::SIGN_EXTEND_INREG) return false; // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value // of the right width. SDValue Extract = Op.getOperand(0); if (Extract.getOpcode() == ISD::ANY_EXTEND) Extract = Extract.getOperand(0); if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; ConstantSDNode *ExtOp = dyn_cast(Extract.getOperand(1)); if (!ExtOp) return false; Index = ExtOp->getZExtValue(); if (Input && Input != Extract.getOperand(0)) return false; if (!Input) Input = Extract.getOperand(0); Elems = Elems << 8; Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4; Elems |= Index; return true; }; // If the build vector operands aren't sign extended vector extracts, // of the same input vector, then return. for (unsigned i = 0; i < N->getNumOperands(); i++) { if (!isSExtOfVecExtract(N->getOperand(i))) { return SDValue(); } } // If the vector extract indicies are not correct, add the appropriate // vector_shuffle. int TgtElemArrayIdx; int InputSize = Input.getValueType().getScalarSizeInBits(); int OutputSize = N->getValueType(0).getScalarSizeInBits(); if (InputSize + OutputSize == 40) TgtElemArrayIdx = 0; else if (InputSize + OutputSize == 72) TgtElemArrayIdx = 1; else if (InputSize + OutputSize == 48) TgtElemArrayIdx = 2; else if (InputSize + OutputSize == 80) TgtElemArrayIdx = 3; else if (InputSize + OutputSize == 96) TgtElemArrayIdx = 4; else return SDValue(); uint64_t CorrectElems = TargetElems[TgtElemArrayIdx]; CorrectElems = DAG.getDataLayout().isLittleEndian() ? CorrectElems & 0x0F0F0F0F0F0F0F0F : CorrectElems & 0xF0F0F0F0F0F0F0F0; if (Elems != CorrectElems) { return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems); } // Regular lowering will catch cases where a shuffle is not needed. return SDValue(); } // Look for the pattern of a load from a narrow width to i128, feeding // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node // (LXVRZX). This node represents a zero extending load that will be matched // to the Load VSX Vector Rightmost instructions. static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); // This combine is only eligible for a BUILD_VECTOR of v1i128. if (N->getValueType(0) != MVT::v1i128) return SDValue(); SDValue Operand = N->getOperand(0); // Proceed with the transformation if the operand to the BUILD_VECTOR // is a load instruction. if (Operand.getOpcode() != ISD::LOAD) return SDValue(); LoadSDNode *LD = dyn_cast(Operand); EVT MemoryType = LD->getMemoryVT(); // This transformation is only valid if the we are loading either a byte, // halfword, word, or doubleword. bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 || MemoryType == MVT::i32 || MemoryType == MVT::i64; // Ensure that the load from the narrow width is being zero extended to i128. if (!ValidLDType || (LD->getExtensionType() != ISD::ZEXTLOAD && LD->getExtensionType() != ISD::EXTLOAD)) return SDValue(); SDValue LoadOps[] = { LD->getChain(), LD->getBasePtr(), DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)}; return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL, DAG.getVTList(MVT::v1i128, MVT::Other), LoadOps, MemoryType, LD->getMemOperand()); } SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Should be called with a BUILD_VECTOR node"); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); if (!Subtarget.hasVSX()) return SDValue(); // The target independent DAG combiner will leave a build_vector of // float-to-int conversions intact. We can generate MUCH better code for // a float-to-int conversion of a vector of floats. SDValue FirstInput = N->getOperand(0); if (FirstInput.getOpcode() == PPCISD::MFVSR) { SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI); if (Reduced) return Reduced; } // If we're building a vector out of consecutive loads, just load that // vector type. SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG); if (Reduced) return Reduced; // If we're building a vector out of extended elements from another vector // we have P9 vector integer extend instructions. The code assumes legal // input types (i.e. it can't handle things like v4i16) so do not run before // legalization. if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) { Reduced = combineBVOfVecSExt(N, DAG); if (Reduced) return Reduced; } // On Power10, the Load VSX Vector Rightmost instructions can be utilized // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR // is a load from to i128. if (Subtarget.isISA3_1()) { SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG); if (BVOfZLoad) return BVOfZLoad; } if (N->getValueType(0) != MVT::v2f64) return SDValue(); // Looking for: // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1)) if (FirstInput.getOpcode() != ISD::SINT_TO_FP && FirstInput.getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP && N->getOperand(1).getOpcode() != ISD::UINT_TO_FP) return SDValue(); if (FirstInput.getOpcode() != N->getOperand(1).getOpcode()) return SDValue(); SDValue Ext1 = FirstInput.getOperand(0); SDValue Ext2 = N->getOperand(1).getOperand(0); if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); ConstantSDNode *Ext1Op = dyn_cast(Ext1.getOperand(1)); ConstantSDNode *Ext2Op = dyn_cast(Ext2.getOperand(1)); if (!Ext1Op || !Ext2Op) return SDValue(); if (Ext1.getOperand(0).getValueType() != MVT::v4i32 || Ext1.getOperand(0) != Ext2.getOperand(0)) return SDValue(); int FirstElem = Ext1Op->getZExtValue(); int SecondElem = Ext2Op->getZExtValue(); int SubvecIdx; if (FirstElem == 0 && SecondElem == 1) SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0; else if (FirstElem == 2 && SecondElem == 3) SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1; else return SDValue(); SDValue SrcVec = Ext1.getOperand(0); auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ? PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP; return DAG.getNode(NodeType, dl, MVT::v2f64, SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl)); } SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const { assert((N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) && "Need an int -> FP conversion node here"); if (useSoftFloat() || !Subtarget.has64BitSupport()) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Op(N, 0); // Don't handle ppc_fp128 here or conversions that are out-of-range capable // from the hardware. if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); if (!Op.getOperand(0).getValueType().isSimple()) return SDValue(); if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) || Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64)) return SDValue(); SDValue FirstOperand(Op.getOperand(0)); bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD && (FirstOperand.getValueType() == MVT::i8 || FirstOperand.getValueType() == MVT::i16); if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) { bool Signed = N->getOpcode() == ISD::SINT_TO_FP; bool DstDouble = Op.getValueType() == MVT::f64; unsigned ConvOp = Signed ? (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) : (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS); SDValue WidthConst = DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2, dl, false); LoadSDNode *LDN = cast(FirstOperand.getNode()); SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst }; SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i8, LDN->getMemOperand()); // For signed conversion, we need to sign-extend the value in the VSR if (Signed) { SDValue ExtOps[] = { Ld, WidthConst }; SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps); return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext); } else return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld); } // For i32 intermediate values, unfortunately, the conversion functions // leave the upper 32 bits of the value are undefined. Within the set of // scalar instructions, we have no method for zero- or sign-extending the // value. Thus, we cannot handle i32 intermediate values here. if (Op.getOperand(0).getValueType() == MVT::i32) return SDValue(); assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; // If we're converting from a float, to an int, and back to a float again, // then we don't need the store/load pair at all. if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && Subtarget.hasFPCVT()) || (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { SDValue Src = Op.getOperand(0).getOperand(0); if (Src.getValueType() == MVT::f32) { Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); DCI.AddToWorklist(Src.getNode()); } else if (Src.getValueType() != MVT::f64) { // Make sure that we don't pick up a ppc_fp128 source value. return SDValue(); } unsigned FCTOp = Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ; SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0, dl)); DCI.AddToWorklist(FP.getNode()); } return FP; } return SDValue(); } // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for // builtins) into loads with swaps. SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX load"); case ISD::LOAD: { LoadSDNode *LD = cast(N); Chain = LD->getChain(); Base = LD->getBasePtr(); MMO = LD->getMemOperand(); // If the MMO suggests this isn't a load of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_W_CHAIN: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Similarly to the store case below, Intrin->getBasePtr() doesn't get // us what we want. Get operand 2 instead. Base = Intrin->getOperand(2); MMO = Intrin->getMemOperand(); break; } } MVT VecTy = N->getValueType(0).getSimpleVT(); // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is // aligned and the type is a vector with elements up to 4 bytes if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) && VecTy.getScalarSizeInBits() <= 32) { return SDValue(); } SDValue LoadOps[] = { Chain, Base }; SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, DAG.getVTList(MVT::v2f64, MVT::Other), LoadOps, MVT::v2f64, MMO); DCI.AddToWorklist(Load.getNode()); Chain = Load.getValue(1); SDValue Swap = DAG.getNode( PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load); DCI.AddToWorklist(Swap.getNode()); // Add a bitcast if the resulting load type doesn't match v2f64. if (VecTy != MVT::v2f64) { SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap); DCI.AddToWorklist(N.getNode()); // Package {bitcast value, swap's chain} to match Load's shape. return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other), N, Swap.getValue(1)); } return Swap; } // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for // builtins) into stores with swaps. SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Chain; SDValue Base; unsigned SrcOpnd; MachineMemOperand *MMO; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode for little endian VSX store"); case ISD::STORE: { StoreSDNode *ST = cast(N); Chain = ST->getChain(); Base = ST->getBasePtr(); MMO = ST->getMemOperand(); SrcOpnd = 1; // If the MMO suggests this isn't a store of a full vector, leave // things alone. For a built-in, we have to make the change for // correctness, so if there is a size problem that will be a bug. if (MMO->getSize() < 16) return SDValue(); break; } case ISD::INTRINSIC_VOID: { MemIntrinsicSDNode *Intrin = cast(N); Chain = Intrin->getChain(); // Intrin->getBasePtr() oddly does not get what we want. Base = Intrin->getOperand(3); MMO = Intrin->getMemOperand(); SrcOpnd = 2; break; } } SDValue Src = N->getOperand(SrcOpnd); MVT VecTy = Src.getValueType().getSimpleVT(); // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is // aligned and the type is a vector with elements up to 4 bytes if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) && VecTy.getScalarSizeInBits() <= 32) { return SDValue(); } // All stores are done as v2f64 and possible bit cast. if (VecTy != MVT::v2f64) { Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src); DCI.AddToWorklist(Src.getNode()); } SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src); DCI.AddToWorklist(Swap.getNode()); Chain = Swap.getValue(1); SDValue StoreOps[] = { Chain, Swap, Base }; SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, DAG.getVTList(MVT::Other), StoreOps, VecTy, MMO); DCI.AddToWorklist(Store.getNode()); return Store; } // Handle DAG combine for STORE (FP_TO_INT F). SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); unsigned Opcode = N->getOperand(1).getOpcode(); assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) && "Not a FP_TO_INT Instruction!"); SDValue Val = N->getOperand(1).getOperand(0); EVT Op1VT = N->getOperand(1).getValueType(); EVT ResVT = Val.getValueType(); if (!isTypeLegal(ResVT)) return SDValue(); // Only perform combine for conversion to i64/i32 or power9 i16/i8. bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32 || Op1VT == MVT::i64 || (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8))); if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() || cast(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt) return SDValue(); // Extend f32 values to f64 if (ResVT.getScalarSizeInBits() == 32) { Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); } // Set signed or unsigned conversion opcode. unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ? PPCISD::FP_TO_SINT_IN_VSR : PPCISD::FP_TO_UINT_IN_VSR; Val = DAG.getNode(ConvOpcode, dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val); DCI.AddToWorklist(Val.getNode()); // Set number of bytes being converted. unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8; SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2), DAG.getIntPtrConstant(ByteSize, dl, false), DAG.getValueType(Op1VT) }; Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); return Val; } static bool isAlternatingShuffMask(const ArrayRef &Mask, int NumElts) { // Check that the source of the element keeps flipping // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts). bool PrevElemFromFirstVec = Mask[0] < NumElts; for (int i = 1, e = Mask.size(); i < e; i++) { if (PrevElemFromFirstVec && Mask[i] < NumElts) return false; if (!PrevElemFromFirstVec && Mask[i] >= NumElts) return false; PrevElemFromFirstVec = !PrevElemFromFirstVec; } return true; } static bool isSplatBV(SDValue Op) { if (Op.getOpcode() != ISD::BUILD_VECTOR) return false; SDValue FirstOp; // Find first non-undef input. for (int i = 0, e = Op.getNumOperands(); i < e; i++) { FirstOp = Op.getOperand(i); if (!FirstOp.isUndef()) break; } // All inputs are undef or the same as the first non-undef input. for (int i = 1, e = Op.getNumOperands(); i < e; i++) if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef()) return false; return true; } static SDValue isScalarToVec(SDValue Op) { if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) return Op; if (Op.getOpcode() != ISD::BITCAST) return SDValue(); Op = Op.getOperand(0); if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) return Op; return SDValue(); } static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec) { for (int i = 0, e = ShuffV.size(); i < e; i++) { int Idx = ShuffV[i]; if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx)) ShuffV[i] += HalfVec; } } // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if // the original is: // ( (scalar_to_vector (Ty (extract_elt %a, C)))) // In such a case, just change the shuffle mask to extract the element // from the permuted index. static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) { SDLoc dl(OrigSToV); EVT VT = OrigSToV.getValueType(); assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR && "Expecting a SCALAR_TO_VECTOR here"); SDValue Input = OrigSToV.getOperand(0); if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { ConstantSDNode *Idx = dyn_cast(Input.getOperand(1)); SDValue OrigVector = Input.getOperand(0); // Can't handle non-const element indices or different vector types // for the input to the extract and the output of the scalar_to_vector. if (Idx && VT == OrigVector.getValueType()) { SmallVector NewMask(VT.getVectorNumElements(), -1); NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue(); return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask); } } return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT, OrigSToV.getOperand(0)); } // On little endian subtargets, combine shuffles such as: // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, , %b // into: // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, , %b // because the latter can be matched to a single instruction merge. // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute // to put the value into element zero. Adjust the shuffle mask so that the // vector can remain in permuted form (to prevent a swap prior to a shuffle). SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) const { SDValue LHS = SVN->getOperand(0); SDValue RHS = SVN->getOperand(1); auto Mask = SVN->getMask(); int NumElts = LHS.getValueType().getVectorNumElements(); SDValue Res(SVN, 0); SDLoc dl(SVN); // None of these combines are useful on big endian systems since the ISA // already has a big endian bias. if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX()) return Res; // If this is not a shuffle of a shuffle and the first element comes from // the second vector, canonicalize to the commuted form. This will make it // more likely to match one of the single instruction patterns. if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE && RHS.getOpcode() != ISD::VECTOR_SHUFFLE) { std::swap(LHS, RHS); Res = DAG.getCommutedVectorShuffle(*SVN); Mask = cast(Res)->getMask(); } // Adjust the shuffle mask if either input vector comes from a // SCALAR_TO_VECTOR and keep the respective input vector in permuted // form (to prevent the need for a swap). SmallVector ShuffV(Mask.begin(), Mask.end()); SDValue SToVLHS = isScalarToVec(LHS); SDValue SToVRHS = isScalarToVec(RHS); if (SToVLHS || SToVRHS) { int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() : SToVRHS.getValueType().getVectorNumElements(); int NumEltsOut = ShuffV.size(); // Initially assume that neither input is permuted. These will be adjusted // accordingly if either input is. int LHSMaxIdx = -1; int RHSMinIdx = -1; int RHSMaxIdx = -1; int HalfVec = LHS.getValueType().getVectorNumElements() / 2; // Get the permuted scalar to vector nodes for the source(s) that come from // ISD::SCALAR_TO_VECTOR. if (SToVLHS) { // Set up the values for the shuffle vector fixup. LHSMaxIdx = NumEltsOut / NumEltsIn; SToVLHS = getSToVPermuted(SToVLHS, DAG); if (SToVLHS.getValueType() != LHS.getValueType()) SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS); LHS = SToVLHS; } if (SToVRHS) { RHSMinIdx = NumEltsOut; RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx; SToVRHS = getSToVPermuted(SToVRHS, DAG); if (SToVRHS.getValueType() != RHS.getValueType()) SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS); RHS = SToVRHS; } // Fix up the shuffle mask to reflect where the desired element actually is. // The minimum and maximum indices that correspond to element zero for both // the LHS and RHS are computed and will control which shuffle mask entries // are to be changed. For example, if the RHS is permuted, any shuffle mask // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by // HalfVec to refer to the corresponding element in the permuted vector. fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx, HalfVec); Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); // We may have simplified away the shuffle. We won't be able to do anything // further with it here. if (!isa(Res)) return Res; Mask = cast(Res)->getMask(); } // The common case after we commuted the shuffle is that the RHS is a splat // and we have elements coming in from the splat at indices that are not // conducive to using a merge. // Example: // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, if (!isSplatBV(RHS)) return Res; // We are looking for a mask such that all even elements are from // one vector and all odd elements from the other. if (!isAlternatingShuffMask(Mask, NumElts)) return Res; // Adjust the mask so we are pulling in the same index from the splat // as the index from the interesting vector in consecutive elements. // Example (even elements from first vector): // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, if (Mask[0] < NumElts) for (int i = 1, e = Mask.size(); i < e; i += 2) ShuffV[i] = (ShuffV[i - 1] + NumElts); // Example (odd elements from first vector): // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, else for (int i = 0, e = Mask.size(); i < e; i += 2) ShuffV[i] = (ShuffV[i + 1] + NumElts); // If the RHS has undefs, we need to remove them since we may have created // a shuffle that adds those instead of the splat value. SDValue SplatVal = cast(RHS.getNode())->getSplatValue(); RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal); Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV); return Res; } SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, DAGCombinerInfo &DCI) const { assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && "Not a reverse memop pattern!"); auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool { auto Mask = SVN->getMask(); int i = 0; auto I = Mask.rbegin(); auto E = Mask.rend(); for (; I != E; ++I) { if (*I != i) return false; i++; } return true; }; SelectionDAG &DAG = DCI.DAG; EVT VT = SVN->getValueType(0); if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX()) return SDValue(); // Before P9, we have PPCVSXSwapRemoval pass to hack the element order. // See comment in PPCVSXSwapRemoval.cpp. // It is conflict with PPCVSXSwapRemoval opt. So we don't do it. if (!Subtarget.hasP9Vector()) return SDValue(); if(!IsElementReverse(SVN)) return SDValue(); if (LSBase->getOpcode() == ISD::LOAD) { SDLoc dl(SVN); SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()}; return DAG.getMemIntrinsicNode( PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps, LSBase->getMemoryVT(), LSBase->getMemOperand()); } if (LSBase->getOpcode() == ISD::STORE) { SDLoc dl(LSBase); SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0), LSBase->getBasePtr()}; return DAG.getMemIntrinsicNode( PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps, LSBase->getMemoryVT(), LSBase->getMemOperand()); } llvm_unreachable("Expected a load or store node here"); } SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); switch (N->getOpcode()) { default: break; case ISD::ADD: return combineADD(N, DCI); case ISD::SHL: return combineSHL(N, DCI); case ISD::SRA: return combineSRA(N, DCI); case ISD::SRL: return combineSRL(N, DCI); case ISD::MUL: return combineMUL(N, DCI); case ISD::FMA: case PPCISD::FNMSUB: return combineFMALike(N, DCI); case PPCISD::SHL: if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); break; case PPCISD::SRL: if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0. return N->getOperand(0); break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { if (C->isNullValue() || // 0 >>s V -> 0. C->isAllOnesValue()) // -1 >>s V -> -1. return N->getOperand(0); } break; case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: return combineTRUNCATE(N, DCI); case ISD::SETCC: if (SDValue CSCC = combineSetCC(N, DCI)) return CSCC; LLVM_FALLTHROUGH; case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); case ISD::VECTOR_SHUFFLE: if (ISD::isNormalLoad(N->getOperand(0).getNode())) { LSBaseSDNode* LSBase = cast(N->getOperand(0)); return combineVReverseMemOP(cast(N), LSBase, DCI); } return combineVectorShuffle(cast(N), DCI.DAG); case ISD::STORE: { EVT Op1VT = N->getOperand(1).getValueType(); unsigned Opcode = N->getOperand(1).getOpcode(); if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) { SDValue Val= combineStoreFPToInt(N, DCI); if (Val) return Val; } if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) { ShuffleVectorSDNode *SVN = cast(N->getOperand(1)); SDValue Val= combineVReverseMemOP(SVN, cast(N), DCI); if (Val) return Val; } // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast(N)->isUnindexed() && Opcode == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && (Op1VT == MVT::i32 || Op1VT == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) { // STBRX can only handle simple types and it makes no sense to store less // two bytes in byte-reversed order. EVT mVT = cast(N)->getMemoryVT(); if (mVT.isExtended() || mVT.getSizeInBits() < 16) break; SDValue BSwapOp = N->getOperand(1).getOperand(0); // Do an any-extend to 32-bits if this is a half-word input. if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); // If the type of BSWAP operand is wider than stored memory width // it need to be shifted to the right side before STBRX. if (Op1VT.bitsGT(mVT)) { int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits(); BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp, DAG.getConstant(Shift, dl, MVT::i32)); // Need to truncate if this is a bswap of i64 stored as i32/i16. if (Op1VT == MVT::i64) BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp); } SDValue Ops[] = { N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT) }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); } // STORE Constant:i32<0> -> STORE Constant:i64<0> // So it can increase the chance of CSE constant construction. if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() && isa(N->getOperand(1)) && Op1VT == MVT::i32) { // Need to sign-extended to 64-bits to handle negative values. EVT MemVT = cast(N)->getMemoryVT(); uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1), MemVT.getSizeInBits()); SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64); // DAG.getTruncStore() can't be used here because it doesn't accept // the general (base + offset) addressing mode. // So we use UpdateNodeOperands and setTruncatingStore instead. DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2), N->getOperand(3)); cast(N)->setTruncatingStore(true); return SDValue(N, 0); } // For little endian, VSX stores require generating xxswapd/lxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Op1VT.isSimple()) { MVT StoreVT = Op1VT.getSimpleVT(); if (Subtarget.needsSwapsForVSXMemOps() && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); } break; } case ISD::LOAD: { LoadSDNode *LD = cast(N); EVT VT = LD->getValueType(0); // For little endian, VSX loads require generating lxvd2x/xxswapd. // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (VT.isSimple()) { MVT LoadVT = VT.getSimpleVT(); if (Subtarget.needsSwapsForVSXMemOps() && (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) return expandVSXLoadForLE(N, DCI); } // We sometimes end up with a 64-bit integer load, from which we extract // two single-precision floating-point numbers. This happens with // std::complex, and other similar structures, because of the way we // canonicalize structure copies. However, if we lack direct moves, // then the final bitcasts from the extracted integer values to the // floating-point numbers turn into store/load pairs. Even with direct moves, // just loading the two floating-point numbers is likely better. auto ReplaceTwoFloatLoad = [&]() { if (VT != MVT::i64) return false; if (LD->getExtensionType() != ISD::NON_EXTLOAD || LD->isVolatile()) return false; // We're looking for a sequence like this: // t13: i64,ch = load t0, t6, undef:i64 // t16: i64 = srl t13, Constant:i32<32> // t17: i32 = truncate t16 // t18: f32 = bitcast t17 // t19: i32 = truncate t13 // t20: f32 = bitcast t19 if (!LD->hasNUsesOfValue(2, 0)) return false; auto UI = LD->use_begin(); while (UI.getUse().getResNo() != 0) ++UI; SDNode *Trunc = *UI++; while (UI.getUse().getResNo() != 0) ++UI; SDNode *RightShift = *UI; if (Trunc->getOpcode() != ISD::TRUNCATE) std::swap(Trunc, RightShift); if (Trunc->getOpcode() != ISD::TRUNCATE || Trunc->getValueType(0) != MVT::i32 || !Trunc->hasOneUse()) return false; if (RightShift->getOpcode() != ISD::SRL || !isa(RightShift->getOperand(1)) || RightShift->getConstantOperandVal(1) != 32 || !RightShift->hasOneUse()) return false; SDNode *Trunc2 = *RightShift->use_begin(); if (Trunc2->getOpcode() != ISD::TRUNCATE || Trunc2->getValueType(0) != MVT::i32 || !Trunc2->hasOneUse()) return false; SDNode *Bitcast = *Trunc->use_begin(); SDNode *Bitcast2 = *Trunc2->use_begin(); if (Bitcast->getOpcode() != ISD::BITCAST || Bitcast->getValueType(0) != MVT::f32) return false; if (Bitcast2->getOpcode() != ISD::BITCAST || Bitcast2->getValueType(0) != MVT::f32) return false; if (Subtarget.isLittleEndian()) std::swap(Bitcast, Bitcast2); // Bitcast has the second float (in memory-layout order) and Bitcast2 // has the first one. SDValue BasePtr = LD->getBasePtr(); if (LD->isIndexed()) { assert(LD->getAddressingMode() == ISD::PRE_INC && "Non-pre-inc AM on PPC?"); BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, LD->getOffset()); } auto MMOFlags = LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile; SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->getAlignment(), MMOFlags, LD->getAAInfo()); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(4, dl)); SDValue FloatLoad2 = DAG.getLoad( MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr, LD->getPointerInfo().getWithOffset(4), MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo()); if (LD->isIndexed()) { // Note that DAGCombine should re-form any pre-increment load(s) from // what is produced here if that makes sense. DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr); } DCI.CombineTo(Bitcast2, FloatLoad); DCI.CombineTo(Bitcast, FloatLoad2); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1), SDValue(FloatLoad2.getNode(), 1)); return true; }; if (ReplaceTwoFloatLoad()) return SDValue(N, 0); EVT MemVT = LD->getMemoryVT(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty); if (LD->isUnindexed() && VT.isVector() && ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) && // P8 and later hardware should just use LOAD. !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v4f32))) && LD->getAlign() < ABIAlignment) { // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); bool isLittleEndian = Subtarget.isLittleEndian(); // This implements the loading of unaligned vectors as described in // the venerable Apple Velocity Engine overview. Specifically: // https://developer.apple.com/hardwaredrivers/ve/alignment.html // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html // // The general idea is to expand a sequence of one or more unaligned // loads into an alignment-based permutation-control instruction (lvsl // or lvsr), a series of regular vector loads (which always truncate // their input address to an aligned address), and a series of // permutations. The results of these permutations are the requested // loaded values. The trick is that the last "extra" load is not taken // from the address you might suspect (sizeof(vector) bytes after the // last requested load), but rather sizeof(vector) - 1 bytes after the // last requested vector. The point of this is to avoid a page fault if // the base address happened to be aligned. This works because if the // base address is aligned, then adding less than a full vector length // will cause the last vector in the sequence to be (re)loaded. // Otherwise, the next vector will be fetched as you might suspect was // necessary. // We might be able to reuse the permutation generation from // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. Intrinsic::ID Intr, IntrLD, IntrPerm; MVT PermCntlTy, PermTy, LDTy; Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl; IntrLD = Intrinsic::ppc_altivec_lvx; IntrPerm = Intrinsic::ppc_altivec_vperm; PermCntlTy = MVT::v16i8; PermTy = MVT::v4i32; LDTy = MVT::v4i32; SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy); // Create the new MMO for the new base load. It is like the original MMO, // but represents an area in memory almost twice the vector size centered // on the original address. If the address is unaligned, we might start // reading up to (sizeof(vector)-1) bytes below the address of the // original unaligned load. MachineFunction &MF = DAG.getMachineFunction(); MachineMemOperand *BaseMMO = MF.getMachineMemOperand(LD->getMemOperand(), -(long)MemVT.getStoreSize()+1, 2*MemVT.getStoreSize()-1); // Create the new base load. SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout())); SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue BaseLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), BaseLoadOps, LDTy, BaseMMO); // Note that the value of IncOffset (which is provided to the next // load's pointer info offset value, and thus used to calculate the // alignment), and the value of IncValue (which is actually used to // increment the pointer value) are different! This is because we // require the next load to appear to be aligned, even though it // is actually offset from the base pointer by a lesser amount. int IncOffset = VT.getSizeInBits() / 8; int IncValue = IncOffset; // Walk (both up and down) the chain looking for another load at the real // (aligned) offset (the alignment of the other load does not matter in // this case). If found, then do not use the offset reduction trick, as // that will prevent the loads from being later combined (as they would // otherwise be duplicates). if (!findConsecutiveLoad(LD, DAG)) --IncValue; SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout())); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); MachineMemOperand *ExtraMMO = MF.getMachineMemOperand(LD->getMemOperand(), 1, 2*MemVT.getStoreSize()-1); SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; SDValue ExtraLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, DAG.getVTList(PermTy, MVT::Other), ExtraLoadOps, LDTy, ExtraMMO); SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, BaseLoad.getValue(1), ExtraLoad.getValue(1)); // Because vperm has a big-endian bias, we must reverse the order // of the input vectors and complement the permute control vector // when generating little endian code. We have already handled the // latter by using lvsr instead of lvsl, so just reverse BaseLoad // and ExtraLoad here. SDValue Perm; if (isLittleEndian) Perm = BuildIntrinsicOp(IntrPerm, ExtraLoad, BaseLoad, PermCntl, DAG, dl); else Perm = BuildIntrinsicOp(IntrPerm, BaseLoad, ExtraLoad, PermCntl, DAG, dl); if (VT != PermTy) Perm = Subtarget.hasAltivec() ? DAG.getNode(ISD::BITCAST, dl, VT, Perm) : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, DAG.getTargetConstant(1, dl, MVT::i64)); // second argument is 1 because this rounding // is always exact. // The output of the permutation is our loaded result, the TokenFactor is // our new chain. DCI.CombineTo(N, Perm, TF); return SDValue(N, 0); } } break; case ISD::INTRINSIC_WO_CHAIN: { bool isLittleEndian = Subtarget.isLittleEndian(); unsigned IID = cast(N->getOperand(0))->getZExtValue(); Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); int Bits = 4 /* 16 byte alignment */; if (DAG.MaskedValueIsZero(Add->getOperand(1), APInt::getAllOnesValue(Bits /* alignment */) .zext(Add.getScalarValueSizeInBits()))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(UI->getOperand(0))->getZExtValue() == IID) { // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. return SDValue(*UI, 0); } } } if (isa(Add->getOperand(1))) { SDNode *BasePtr = Add->getOperand(0).getNode(); for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::ADD && isa(UI->getOperand(1)) && (cast(Add->getOperand(1))->getZExtValue() - cast(UI->getOperand(1))->getZExtValue()) % (1ULL << Bits) == 0) { SDNode *OtherAdd = *UI; for (SDNode::use_iterator VI = OtherAdd->use_begin(), VE = OtherAdd->use_end(); VI != VE; ++VI) { if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast(VI->getOperand(0))->getZExtValue() == IID) { return SDValue(*VI, 0); } } } } } } // Combine vmaxsw/h/b(a, a's negation) to abs(a) // Expose the vabsduw/h/b opportunity for down stream if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() && (IID == Intrinsic::ppc_altivec_vmaxsw || IID == Intrinsic::ppc_altivec_vmaxsh || IID == Intrinsic::ppc_altivec_vmaxsb)) { SDValue V1 = N->getOperand(1); SDValue V2 = N->getOperand(2); if ((V1.getSimpleValueType() == MVT::v4i32 || V1.getSimpleValueType() == MVT::v8i16 || V1.getSimpleValueType() == MVT::v16i8) && V1.getSimpleValueType() == V2.getSimpleValueType()) { // (0-a, a) if (V1.getOpcode() == ISD::SUB && ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) && V1.getOperand(1) == V2) { return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2); } // (a, 0-a) if (V2.getOpcode() == ISD::SUB && ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) && V2.getOperand(1) == V1) { return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); } // (x-y, y-x) if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB && V1.getOperand(0) == V2.getOperand(1) && V1.getOperand(1) == V2.getOperand(0)) { return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1); } } } } break; case ISD::INTRINSIC_W_CHAIN: // For little endian, VSX loads require generating lxvd2x/xxswapd. // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x: return expandVSXLoadForLE(N, DCI); } } break; case ISD::INTRINSIC_VOID: // For little endian, VSX stores require generating xxswapd/stxvd2x. // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. if (Subtarget.needsSwapsForVSXMemOps()) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; case Intrinsic::ppc_vsx_stxvw4x: case Intrinsic::ppc_vsx_stxvd2x: return expandVSXStoreForLE(N, DCI); } } break; case ISD::BSWAP: // Turn BSWAP (LOAD) -> lhbrx/lwbrx. if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || (Subtarget.hasLDBRX() && Subtarget.isPPC64() && N->getValueType(0) == MVT::i64))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr(), // Ptr DAG.getValueType(N->getValueType(0)) // VT }; SDValue BSLoad = DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, DAG.getVTList(N->getValueType(0) == MVT::i64 ? MVT::i64 : MVT::i32, MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; if (N->getValueType(0) == MVT::i16) ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); // First, combine the bswap away. This makes the value produced by the // load dead. DCI.CombineTo(N, ResVal); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the bswap is dead. DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } break; case PPCISD::VCMP: // If a VCMP_rec node already exists with exactly the same operands as this // node, use its result instead of this node (VCMP_rec computes both a CR6 // and a normal output). // if (!N->getOperand(0).hasOneUse() && !N->getOperand(1).hasOneUse() && !N->getOperand(2).hasOneUse()) { // Scan all of the users of the LHS, looking for VCMP_rec's that match. SDNode *VCMPrecNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); UI != E; ++UI) if (UI->getOpcode() == PPCISD::VCMP_rec && UI->getOperand(1) == N->getOperand(1) && UI->getOperand(2) == N->getOperand(2) && UI->getOperand(0) == N->getOperand(0)) { VCMPrecNode = *UI; break; } // If there is no VCMP_rec node, or if the flag value has a single use, // don't transform this. if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1)) break; // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. SDNode *FlagUser = nullptr; for (SDNode::use_iterator UI = VCMPrecNode->use_begin(); FlagUser == nullptr; ++UI) { assert(UI != VCMPrecNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) { FlagUser = User; break; } } } // If the user is a MFOCRF instruction, we know this is safe. // Otherwise we give up for right now. if (FlagUser->getOpcode() == PPCISD::MFOCRF) return SDValue(VCMPrecNode, 0); } break; case ISD::BRCOND: { SDValue Cond = N->getOperand(1); SDValue Target = N->getOperand(2); if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(Cond.getOperand(1))->getZExtValue() == Intrinsic::loop_decrement) { // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); assert(Cond.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, N->getOperand(0), Target); } } break; case ISD::BR_CC: { // If this is a branch on an altivec predicate comparison, lower this so // that we don't have to do a MFOCRF: instead, branch directly on CR6. This // lowering is done pre-legalize, because the legalizer lowers the predicate // compare down to code that is difficult to reassemble. ISD::CondCode CC = cast(N->getOperand(1))->get(); SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); // Sometimes the promoted value of the intrinsic is ANDed by some non-zero // value. If so, pass-through the AND to get to the intrinsic. if (LHS.getOpcode() == ISD::AND && LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(0).getOperand(1))->getZExtValue() == Intrinsic::loop_decrement && isa(LHS.getOperand(1)) && !isNullConstant(LHS.getOperand(1))) LHS = LHS.getOperand(0); if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && cast(LHS.getOperand(1))->getZExtValue() == Intrinsic::loop_decrement && isa(RHS)) { assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Counter decrement comparison is not EQ or NE"); unsigned Val = cast(RHS)->getZExtValue(); bool isBDNZ = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val); // We now need to make the intrinsic dead (it cannot be instruction // selected). DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); assert(LHS.getNode()->hasOneUse() && "Counter decrement has more than one use"); return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } int CompareOpc; bool isDot; if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && isa(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) { assert(isDot && "Can't compare against a vector result!"); // If this is a comparison against something other than 0/1, then we know // that the condition is never/always true. unsigned Val = cast(RHS)->getZExtValue(); if (Val != 0 && Val != 1) { if (CC == ISD::SETEQ) // Cond never true, remove branch. return N->getOperand(0); // Always !=, turn it into an unconditional branch. return DAG.getNode(ISD::BR, dl, MVT::Other, N->getOperand(0), N->getOperand(4)); } bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); // Create the PPCISD altivec 'dot' comparison node. SDValue Ops[] = { LHS.getOperand(2), // LHS of compare LHS.getOperand(3), // RHS of compare DAG.getConstant(CompareOpc, dl, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops); // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; switch (cast(LHS.getOperand(1))->getZExtValue()) { default: // Can't happen, don't crash on invalid number though. case 0: // Branch on the value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; break; case 1: // Branch on the inverted value of the EQ bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; break; case 2: // Branch on the value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; break; case 3: // Branch on the inverted value of the LT bit of CR6. CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; break; } return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), DAG.getConstant(CompOpc, dl, MVT::i32), DAG.getRegister(PPC::CR6, MVT::i32), N->getOperand(4), CompNode.getValue(1)); } break; } case ISD::BUILD_VECTOR: return DAGCombineBuildVector(N, DCI); case ISD::ABS: return combineABS(N, DCI); case ISD::VSELECT: return combineVSelect(N, DCI); } return SDValue(); } SDValue PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const { // fold (sdiv X, pow2) EVT VT = N->getValueType(0); if (VT == MVT::i64 && !Subtarget.isPPC64()) return SDValue(); if ((VT != MVT::i32 && VT != MVT::i64) || !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) return SDValue(); SDLoc DL(N); SDValue N0 = N->getOperand(0); bool IsNegPow2 = (-Divisor).isPowerOf2(); unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT); SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); Created.push_back(Op.getNode()); if (IsNegPow2) { Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); Created.push_back(Op.getNode()); } return Op; } //===----------------------------------------------------------------------===// // Inline Assembly Support //===----------------------------------------------------------------------===// void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { Known.resetAll(); switch (Op.getOpcode()) { default: break; case PPCISD::LBRX: { // lhbrx is known to have the top bits cleared out. if (cast(Op.getOperand(2))->getVT() == MVT::i16) Known.Zero = 0xFFFF0000; break; } case ISD::INTRINSIC_WO_CHAIN: { switch (cast(Op.getOperand(0))->getZExtValue()) { default: break; case Intrinsic::ppc_altivec_vcmpbfp_p: case Intrinsic::ppc_altivec_vcmpeqfp_p: case Intrinsic::ppc_altivec_vcmpequb_p: case Intrinsic::ppc_altivec_vcmpequh_p: case Intrinsic::ppc_altivec_vcmpequw_p: case Intrinsic::ppc_altivec_vcmpequd_p: case Intrinsic::ppc_altivec_vcmpequq_p: case Intrinsic::ppc_altivec_vcmpgefp_p: case Intrinsic::ppc_altivec_vcmpgtfp_p: case Intrinsic::ppc_altivec_vcmpgtsb_p: case Intrinsic::ppc_altivec_vcmpgtsh_p: case Intrinsic::ppc_altivec_vcmpgtsw_p: case Intrinsic::ppc_altivec_vcmpgtsd_p: case Intrinsic::ppc_altivec_vcmpgtsq_p: case Intrinsic::ppc_altivec_vcmpgtub_p: case Intrinsic::ppc_altivec_vcmpgtuh_p: case Intrinsic::ppc_altivec_vcmpgtuw_p: case Intrinsic::ppc_altivec_vcmpgtud_p: case Intrinsic::ppc_altivec_vcmpgtuq_p: Known.Zero = ~1U; // All bits but the low one are known to be zero. break; } } } } Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { switch (Subtarget.getCPUDirective()) { default: break; case PPC::DIR_970: case PPC::DIR_PWR4: case PPC::DIR_PWR5: case PPC::DIR_PWR5X: case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: case PPC::DIR_PWR9: case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: { if (!ML) break; if (!DisableInnermostLoopAlign32) { // If the nested loop is an innermost loop, prefer to a 32-byte alignment, // so that we can decrease cache misses and branch-prediction misses. // Actual alignment of the loop will depend on the hotness check and other // logic in alignBlocks. if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty()) return Align(32); } const PPCInstrInfo *TII = Subtarget.getInstrInfo(); // For small loops (between 5 and 8 instructions), align to a 32-byte // boundary so that the entire loop fits in one instruction-cache line. uint64_t LoopSize = 0; for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) { LoopSize += TII->getInstSizeInBytes(*J); if (LoopSize > 32) break; } if (LoopSize > 16 && LoopSize <= 32) return Align(32); break; } } return TargetLowering::getPrefLoopAlignment(ML); } /// getConstraintType - Given a constraint, return the type of /// constraint it is for this target. PPCTargetLowering::ConstraintType PPCTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { default: break; case 'b': case 'r': case 'f': case 'd': case 'v': case 'y': return C_RegisterClass; case 'Z': // FIXME: While Z does indicate a memory constraint, it specifically // indicates an r+r address (used in conjunction with the 'y' modifier // in the replacement string). Currently, we're forcing the base // register to be r0 in the asm printer (which is interpreted as zero) // and forming the complete address in the second register. This is // suboptimal. return C_Memory; } } else if (Constraint == "wc") { // individual CR bits. return C_RegisterClass; } else if (Constraint == "wa" || Constraint == "wd" || Constraint == "wf" || Constraint == "ws" || Constraint == "wi" || Constraint == "ww") { return C_RegisterClass; // VSX registers. } return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight PPCTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) return CW_Register; // an individual CR bit. else if ((StringRef(constraint) == "wa" || StringRef(constraint) == "wd" || StringRef(constraint) == "wf") && type->isVectorTy()) return CW_Register; else if (StringRef(constraint) == "wi" && type->isIntegerTy(64)) return CW_Register; // just hold 64-bit integers data. else if (StringRef(constraint) == "ws" && type->isDoubleTy()) return CW_Register; else if (StringRef(constraint) == "ww" && type->isFloatTy()) return CW_Register; switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'b': if (type->isIntegerTy()) weight = CW_Register; break; case 'f': if (type->isFloatTy()) weight = CW_Register; break; case 'd': if (type->isDoubleTy()) weight = CW_Register; break; case 'v': if (type->isVectorTy()) weight = CW_Register; break; case 'y': weight = CW_Register; break; case 'Z': weight = CW_Memory; break; } return weight; } std::pair PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); // 'd' and 'f' constraints are both defined to be "the floating point // registers", where one is for 32-bit and the other for 64-bit. We don't // really care overly much here so just give them all the same reg classes. case 'd': case 'f': if (Subtarget.hasSPE()) { if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::GPRCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::SPERCRegClass); } else { if (VT == MVT::f32 || VT == MVT::i32) return std::make_pair(0U, &PPC::F4RCRegClass); if (VT == MVT::f64 || VT == MVT::i64) return std::make_pair(0U, &PPC::F8RCRegClass); } break; case 'v': if (Subtarget.hasAltivec()) return std::make_pair(0U, &PPC::VRRCRegClass); break; case 'y': // crrc return std::make_pair(0U, &PPC::CRRCRegClass); } } else if (Constraint == "wc" && Subtarget.useCRBits()) { // An individual CR bit. return std::make_pair(0U, &PPC::CRBITRCRegClass); } else if ((Constraint == "wa" || Constraint == "wd" || Constraint == "wf" || Constraint == "wi") && Subtarget.hasVSX()) { return std::make_pair(0U, &PPC::VSRCRegClass); } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) { if (VT == MVT::f32 && Subtarget.hasP8Vector()) return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); } else if (Constraint == "lr") { if (VT == MVT::i64) return std::make_pair(0U, &PPC::LR8RCRegClass); else return std::make_pair(0U, &PPC::LRRCRegClass); } - // If we name a VSX register, we can't defer to the base class because it - // will not recognize the correct register (their names will be VSL{0-31} - // and V{0-31} so they won't match). So we match them here. - if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { - int VSNum = atoi(Constraint.data() + 3); - assert(VSNum >= 0 && VSNum <= 63 && - "Attempted to access a vsr out of range"); - if (VSNum < 32) - return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); - return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + // Handle special cases of physical registers that are not properly handled + // by the base class. + if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') { + // If we name a VSX register, we can't defer to the base class because it + // will not recognize the correct register (their names will be VSL{0-31} + // and V{0-31} so they won't match). So we match them here. + if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') { + int VSNum = atoi(Constraint.data() + 3); + assert(VSNum >= 0 && VSNum <= 63 && + "Attempted to access a vsr out of range"); + if (VSNum < 32) + return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass); + return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass); + } + + // For float registers, we can't defer to the base class as it will match + // the SPILLTOVSRRC class. + if (Constraint.size() > 3 && Constraint[1] == 'f') { + int RegNum = atoi(Constraint.data() + 2); + if (RegNum > 31 || RegNum < 0) + report_fatal_error("Invalid floating point register number"); + if (VT == MVT::f32 || VT == MVT::i32) + return Subtarget.hasSPE() + ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass); + if (VT == MVT::f64 || VT == MVT::i64) + return Subtarget.hasSPE() + ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass) + : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass); + } } + std::pair R = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers // (which we call X[0-9]+). If a 64-bit value has been requested, and a // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent // register. // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use // the AsmName field from *RegisterInfo.td, then this would not be necessary. if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && PPC::GPRCRegClass.contains(R.first)) return std::make_pair(TRI->getMatchingSuperReg(R.first, PPC::sub_32, &PPC::G8RCRegClass), &PPC::G8RCRegClass); // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { R.first = PPC::CR0; R.second = &PPC::CRRCRegClass; } return R; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result; // Only support length 1 constraints. if (Constraint.length() > 1) return; char Letter = Constraint[0]; switch (Letter) { default: break; case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': { ConstantSDNode *CST = dyn_cast(Op); if (!CST) return; // Must be an immediate to match. SDLoc dl(Op); int64_t Value = CST->getSExtValue(); EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative // numbers are printed as such. switch (Letter) { default: llvm_unreachable("Unknown constraint letter!"); case 'I': // "I" is a signed 16-bit constant. if (isInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'J': // "J" is a constant with only the high-order 16 bits nonzero. if (isShiftedUInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. if (isShiftedInt<16, 16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'K': // "K" is a constant with only the low-order 16 bits nonzero. if (isUInt<16>(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'M': // "M" is a constant that is greater than 31. if (Value > 31) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'N': // "N" is a positive constant that is an exact power of two. if (Value > 0 && isPowerOf2_64(Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'O': // "O" is the constant zero. if (Value == 0) Result = DAG.getTargetConstant(Value, dl, TCVT); break; case 'P': // "P" is a constant whose negation is a signed 16-bit constant. if (isInt<16>(-Value)) Result = DAG.getTargetConstant(Value, dl, TCVT); break; } break; } } if (Result.getNode()) { Ops.push_back(Result); return; } // Handle standard constraint letters. TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // Vector type r+i form is supported since power9 as DQ form. We don't check // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC, // imm form is preferred and the offset can be adjusted to use imm form later // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and // max offset to check legal addressing mode, we should be a little aggressive // to contain other offsets for that LSRUse. if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector()) return false; // PPC allows a sign-extended 16-bit immediate field. if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) return false; // No global is ever allowed as a base. if (AM.BaseGV) return false; // PPC only support r+r, switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; case 1: if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. return false; // Otherwise we have r+r or r+i. break; case 2: if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. return false; // Allow 2*r as r+r. break; default: // No other scales are supported. return false; } return true; } SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); // Make sure the function does not optimize away the store of the RA to // the stack. PPCFunctionInfo *FuncInfo = MF.getInfo(); FuncInfo->setLRStoreRequired(); bool isPPC64 = Subtarget.isPPC64(); auto PtrVT = getPointerTy(MF.getDataLayout()); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl, isPPC64 ? MVT::i64 : MVT::i32); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), MachinePointerInfo()); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, MachinePointerInfo()); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT PtrVT = getPointerTy(MF.getDataLayout()); bool isPPC64 = PtrVT == MVT::i64; // Naked functions never have a frame pointer, and so we use r1. For all // other functions, this decision must be delayed until during PEI. unsigned FrameReg; if (MF.getFunction().hasFnAttribute(Attribute::Naked)) FrameReg = isPPC64 ? PPC::X1 : PPC::R1; else FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); return FrameAddr; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const { bool isPPC64 = Subtarget.isPPC64(); bool is64Bit = isPPC64 && VT == LLT::scalar(64); if (!is64Bit && VT != LLT::scalar(32)) report_fatal_error("Invalid register global variable type"); Register Reg = StringSwitch(RegName) .Case("r1", is64Bit ? PPC::X1 : PPC::R1) .Case("r2", isPPC64 ? Register() : PPC::R2) .Case("r13", (is64Bit ? PPC::X13 : PPC::R13)) .Default(Register()); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const { // 32-bit SVR4 ABI access everything as got-indirect. if (Subtarget.is32BitELFABI()) return true; // AIX accesses everything indirectly through the TOC, which is similar to // the GOT. if (Subtarget.isAIXABI()) return true; CodeModel::Model CModel = getTargetMachine().getCodeModel(); // If it is small or large code model, module locals are accessed // indirectly by loading their address from .toc/.got. if (CModel == CodeModel::Small || CModel == CodeModel::Large) return true; // JumpTable and BlockAddress are accessed as got-indirect. if (isa(GA) || isa(GA)) return true; if (GlobalAddressSDNode *G = dyn_cast(GA)) return Subtarget.isGVIndirectSymbol(G->getGlobal()); return false; } bool PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. return false; } bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::ppc_altivec_lvebx: case Intrinsic::ppc_altivec_lvehx: case Intrinsic::ppc_altivec_lvewx: case Intrinsic::ppc_vsx_lxvd2x: case Intrinsic::ppc_vsx_lxvw4x: case Intrinsic::ppc_vsx_lxvd2x_be: case Intrinsic::ppc_vsx_lxvw4x_be: case Intrinsic::ppc_vsx_lxvl: case Intrinsic::ppc_vsx_lxvll: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_lvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_lvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_lvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_lxvd2x: case Intrinsic::ppc_vsx_lxvd2x_be: VT = MVT::v2f64; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = VT; Info.ptrVal = I.getArgOperand(0); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = Align(1); Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: case Intrinsic::ppc_altivec_stvebx: case Intrinsic::ppc_altivec_stvehx: case Intrinsic::ppc_altivec_stvewx: case Intrinsic::ppc_vsx_stxvd2x: case Intrinsic::ppc_vsx_stxvw4x: case Intrinsic::ppc_vsx_stxvd2x_be: case Intrinsic::ppc_vsx_stxvw4x_be: case Intrinsic::ppc_vsx_stxvl: case Intrinsic::ppc_vsx_stxvll: { EVT VT; switch (Intrinsic) { case Intrinsic::ppc_altivec_stvebx: VT = MVT::i8; break; case Intrinsic::ppc_altivec_stvehx: VT = MVT::i16; break; case Intrinsic::ppc_altivec_stvewx: VT = MVT::i32; break; case Intrinsic::ppc_vsx_stxvd2x: case Intrinsic::ppc_vsx_stxvd2x_be: VT = MVT::v2f64; break; default: VT = MVT::v4i32; break; } Info.opc = ISD::INTRINSIC_VOID; Info.memVT = VT; Info.ptrVal = I.getArgOperand(1); Info.offset = -VT.getStoreSize()+1; Info.size = 2*VT.getStoreSize()-1; Info.align = Align(1); Info.flags = MachineMemOperand::MOStore; return true; } default: break; } return false; } /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { if (getTargetMachine().getOptLevel() != CodeGenOpt::None) { // We should use Altivec/VSX loads and stores when available. For unaligned // addresses, unaligned VSX loads are only fast starting with the P8. if (Subtarget.hasAltivec() && Op.size() >= 16 && (Op.isAligned(Align(16)) || ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector()))) return MVT::v4i32; } if (Subtarget.isPPC64()) { return MVT::i64; } return MVT::i32; } /// Returns true if it is beneficial to convert a load of a constant /// to just the constant itself. bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); return !(BitSize == 0 || BitSize > 64); } bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); return NumBits1 == 64 && NumBits2 == 32; } bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { // Generally speaking, zexts are not free, but they are free when they can be // folded with other operations. if (LoadSDNode *LD = dyn_cast(Val)) { EVT MemVT = LD->getMemoryVT(); if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || (Subtarget.isPPC64() && MemVT == MVT::i32)) && (LD->getExtensionType() == ISD::NON_EXTLOAD || LD->getExtensionType() == ISD::ZEXTLOAD)) return true; } // FIXME: Add other cases... // - 32-bit shifts with a zext to i64 // - zext after ctlz, bswap, etc. // - zext after and by a constant mask return TargetLowering::isZExtFree(Val, VT2); } bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const { assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types"); // Extending to float128 is not free. if (DestVT == MVT::f128) return false; return true; } bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { return isInt<16>(Imm) || isUInt<16>(Imm); } bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { if (DisablePPCUnaligned) return false; // PowerPC supports unaligned memory access for simple non-vector types. // Although accessing unaligned addresses is not as efficient as accessing // aligned addresses, it is generally more efficient than manual expansion, // and generally only traps for software emulation when crossing page // boundaries. if (!VT.isSimple()) return false; if (VT.isFloatingPoint() && !VT.isVector() && !Subtarget.allowsUnalignedFPAccess()) return false; if (VT.getSimpleVT().isVector()) { if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64 && VT != MVT::v4f32 && VT != MVT::v4i32) return false; } else { return false; } } if (VT == MVT::ppcf128) return false; if (Fast) *Fast = true; return true; } bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const { // Check integral scalar types. if (!VT.isScalarInteger()) return false; if (auto *ConstNode = dyn_cast(C.getNode())) { if (!ConstNode->getAPIntValue().isSignedIntN(64)) return false; // This transformation will generate >= 2 operations. But the following // cases will generate <= 2 instructions during ISEL. So exclude them. // 1. If the constant multiplier fits 16 bits, it can be handled by one // HW instruction, ie. MULLI // 2. If the multiplier after shifted fits 16 bits, an extra shift // instruction is needed than case 1, ie. MULLI and RLDICR int64_t Imm = ConstNode->getSExtValue(); unsigned Shift = countTrailingZeros(Imm); Imm >>= Shift; if (isInt<16>(Imm)) return false; uint64_t UImm = static_cast(Imm); if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) || isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm)) return true; } return false; } bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { return isFMAFasterThanFMulAndFAdd( MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext())); } bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const { switch (Ty->getScalarType()->getTypeID()) { case Type::FloatTyID: case Type::DoubleTyID: return true; case Type::FP128TyID: return Subtarget.hasP9Vector(); default: return false; } } // FIXME: add more patterns which are not profitable to hoist. bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const { if (!I->hasOneUse()) return true; Instruction *User = I->user_back(); assert(User && "A single use instruction with no uses."); switch (I->getOpcode()) { case Instruction::FMul: { // Don't break FMA, PowerPC prefers FMA. if (User->getOpcode() != Instruction::FSub && User->getOpcode() != Instruction::FAdd) return true; const TargetOptions &Options = getTargetMachine().Options; const Function *F = I->getFunction(); const DataLayout &DL = F->getParent()->getDataLayout(); Type *Ty = User->getOperand(0)->getType(); return !( isFMAFasterThanFMulAndFAdd(*F, Ty) && isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); } case Instruction::Load: { // Don't break "store (load float*)" pattern, this pattern will be combined // to "store (load int32)" in later InstCombine pass. See function // combineLoadToOperationType. On PowerPC, loading a float point takes more // cycles than loading a 32 bit integer. LoadInst *LI = cast(I); // For the loads that combineLoadToOperationType does nothing, like // ordered load, it should be profitable to hoist them. // For swifterror load, it can only be used for pointer to pointer type, so // later type check should get rid of this case. if (!LI->isUnordered()) return true; if (User->getOpcode() != Instruction::Store) return true; if (I->getType()->getTypeID() != Type::FloatTyID) return true; return false; } default: return true; } return true; } const MCPhysReg * PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call // site. Hence we include LR in the scratch registers, which are in turn added // as implicit-defs for stackmaps and patchpoints. The same reasoning applies // to CTR, which is used by any indirect call. static const MCPhysReg ScratchRegs[] = { PPC::X12, PPC::LR8, PPC::CTR8, 0 }; return ScratchRegs; } Register PPCTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X3 : PPC::R3; } Register PPCTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; } bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasVSX()) return true; return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { if (DisableILPPref || Subtarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; } // Create a fast isel object. FastISel * PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const { return PPC::createFastISel(FuncInfo, LibInfo); } // 'Inverted' means the FMA opcode after negating one multiplicand. // For example, (fma -a b c) = (fnmsub a b c) static unsigned invertFMAOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Invalid FMA opcode for PowerPC!"); case ISD::FMA: return PPCISD::FNMSUB; case PPCISD::FNMSUB: return ISD::FMA; } } SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth) const { if (Depth > SelectionDAG::MaxRecursionDepth) return SDValue(); unsigned Opc = Op.getOpcode(); EVT VT = Op.getValueType(); SDNodeFlags Flags = Op.getNode()->getFlags(); switch (Opc) { case PPCISD::FNMSUB: if (!Op.hasOneUse() || !isTypeLegal(VT)) break; const TargetOptions &Options = getTargetMachine().Options; SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); SDLoc Loc(Op); NegatibleCost N2Cost = NegatibleCost::Expensive; SDValue NegN2 = getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1); if (!NegN2) return SDValue(); // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c)) // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c)) // These transformations may change sign of zeroes. For example, // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1. if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) { // Try and choose the cheaper one to negate. NegatibleCost N0Cost = NegatibleCost::Expensive; SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize, N0Cost, Depth + 1); NegatibleCost N1Cost = NegatibleCost::Expensive; SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize, N1Cost, Depth + 1); if (NegN0 && N0Cost <= N1Cost) { Cost = std::min(N0Cost, N2Cost); return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags); } else if (NegN1) { Cost = std::min(N1Cost, N2Cost); return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags); } } // (fneg (fnmsub a b c)) => (fma a b (fneg c)) if (isOperationLegal(ISD::FMA, VT)) { Cost = N2Cost; return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags); } break; } return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); } // Override to enable LOAD_STACK_GUARD lowering on Linux. bool PPCTargetLowering::useLoadStackGuardNode() const { if (!Subtarget.isTargetLinux()) return TargetLowering::useLoadStackGuardNode(); return true; } // Override to disable global variable loading on Linux. void PPCTargetLowering::insertSSPDeclarations(Module &M) const { if (!Subtarget.isTargetLinux()) return TargetLowering::insertSSPDeclarations(M); } bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { if (!VT.isSimple() || !Subtarget.hasVSX()) return false; switch(VT.getSimpleVT().SimpleTy) { default: // For FP types that are currently not supported by PPC backend, return // false. Examples: f16, f80. return false; case MVT::f32: case MVT::f64: if (Subtarget.hasPrefixInstrs()) { // With prefixed instructions, we can materialize anything that can be // represented with a 32-bit immediate, not just positive zero. APFloat APFloatOfImm = Imm; return convertToNonDenormSingle(APFloatOfImm); } LLVM_FALLTHROUGH; case MVT::ppcf128: return Imm.isPosZero(); } } // For vector shift operation op, fold // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y) static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarSizeInBits(); unsigned Opcode = N->getOpcode(); unsigned TargetOpcode; switch (Opcode) { default: llvm_unreachable("Unexpected shift operation"); case ISD::SHL: TargetOpcode = PPCISD::SHL; break; case ISD::SRL: TargetOpcode = PPCISD::SRL; break; case ISD::SRA: TargetOpcode = PPCISD::SRA; break; } if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) && N1->getOpcode() == ISD::AND) if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1))) if (Mask->getZExtValue() == OpSizeInBits - 1) return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0)); return SDValue(); } SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; SDValue N0 = N->getOperand(0); ConstantSDNode *CN1 = dyn_cast(N->getOperand(1)); if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() || N0.getOpcode() != ISD::SIGN_EXTEND || N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr || N->getValueType(0) != MVT::i64) return SDValue(); // We can't save an operation here if the value is already extended, and // the existing shift is easier to combine. SDValue ExtsSrc = N0.getOperand(0); if (ExtsSrc.getOpcode() == ISD::TRUNCATE && ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext) return SDValue(); SDLoc DL(N0); SDValue ShiftBy = SDValue(CN1, 0); // We want the shift amount to be i32 on the extswli, but the shift could // have an i64. if (ShiftBy.getValueType() == MVT::i64) ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32); return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0), ShiftBy); } SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; return SDValue(); } SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = stripModuloOnShift(*this, N, DCI.DAG)) return Value; return SDValue(); } // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1)) // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0)) // When C is zero, the equation (addi Z, -C) can be simplified to Z // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { if (!Subtarget.isPPC64()) return SDValue(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); auto isZextOfCompareWithConstant = [](SDValue Op) { if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() || Op.getValueType() != MVT::i64) return false; SDValue Cmp = Op.getOperand(0); if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() || Cmp.getOperand(0).getValueType() != MVT::i64) return false; if (auto *Constant = dyn_cast(Cmp.getOperand(1))) { int64_t NegConstant = 0 - Constant->getSExtValue(); // Due to the limitations of the addi instruction, // -C is required to be [-32768, 32767]. return isInt<16>(NegConstant); } return false; }; bool LHSHasPattern = isZextOfCompareWithConstant(LHS); bool RHSHasPattern = isZextOfCompareWithConstant(RHS); // If there is a pattern, canonicalize a zext operand to the RHS. if (LHSHasPattern && !RHSHasPattern) std::swap(LHS, RHS); else if (!LHSHasPattern && !RHSHasPattern) return SDValue(); SDLoc DL(N); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue); SDValue Cmp = RHS.getOperand(0); SDValue Z = Cmp.getOperand(0); auto *Constant = dyn_cast(Cmp.getOperand(1)); assert(Constant && "Constant Should not be a null pointer."); int64_t NegConstant = 0 - Constant->getSExtValue(); switch(cast(Cmp.getOperand(2))->get()) { default: break; case ISD::SETNE: { // when C == 0 // --> addze X, (addic Z, -1).carry // / // add X, (zext(setne Z, C))-- // \ when -32768 <= -C <= 32767 && C != 0 // --> addze X, (addic (addi Z, -C), -1).carry SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, DAG.getConstant(NegConstant, DL, MVT::i64)); SDValue AddOrZ = NegConstant != 0 ? Add : Z; SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue), AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64)); return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), SDValue(Addc.getNode(), 1)); } case ISD::SETEQ: { // when C == 0 // --> addze X, (subfic Z, 0).carry // / // add X, (zext(sete Z, C))-- // \ when -32768 <= -C <= 32767 && C != 0 // --> addze X, (subfic (addi Z, -C), 0).carry SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z, DAG.getConstant(NegConstant, DL, MVT::i64)); SDValue AddOrZ = NegConstant != 0 ? Add : Z; SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue), DAG.getConstant(0, DL, MVT::i64), AddOrZ); return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64), SDValue(Subc.getNode(), 1)); } } return SDValue(); } // Transform // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to // (MAT_PCREL_ADDR GlobalAddr+(C1+C2)) // In this case both C1 and C2 must be known constants. // C1+C2 must fit into a 34 bit signed integer. static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { if (!Subtarget.isUsingPCRelativeCalls()) return SDValue(); // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node. // If we find that node try to cast the Global Address and the Constant. SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR) std::swap(LHS, RHS); if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR) return SDValue(); // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node. GlobalAddressSDNode *GSDN = dyn_cast(LHS.getOperand(0)); ConstantSDNode* ConstNode = dyn_cast(RHS); // Check that both casts succeeded. if (!GSDN || !ConstNode) return SDValue(); int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue(); SDLoc DL(GSDN); // The signed int offset needs to fit in 34 bits. if (!isInt<34>(NewOffset)) return SDValue(); // The new global address is a copy of the old global address except // that it has the updated Offset. SDValue GA = DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0), NewOffset, GSDN->getTargetFlags()); SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA); return MatPCRel; } SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) return Value; if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget)) return Value; return SDValue(); } // Detect TRUNCATE operations on bitcasts of float128 values. // What we are looking for here is the situtation where we extract a subset // of bits from a 128 bit float. // This can be of two forms: // 1) BITCAST of f128 feeding TRUNCATE // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE // The reason this is required is because we do not have a legal i128 type // and so we want to prevent having to store the f128 and then reload part // of it. SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const { // If we are using CRBits then try that first. if (Subtarget.useCRBits()) { // Check if CRBits did anything and return that if it did. if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI)) return CRTruncValue; } SDLoc dl(N); SDValue Op0 = N->getOperand(0); // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b) if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) { EVT VT = N->getValueType(0); if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) return SDValue(); SDValue Sub = Op0.getOperand(0); if (Sub.getOpcode() == ISD::SUB) { SDValue SubOp0 = Sub.getOperand(0); SDValue SubOp1 = Sub.getOperand(1); if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) && (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) { return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0), SubOp1.getOperand(0), DCI.DAG.getTargetConstant(0, dl, MVT::i32)); } } } // Looking for a truncate of i128 to i64. if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64) return SDValue(); int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0; // SRL feeding TRUNCATE. if (Op0.getOpcode() == ISD::SRL) { ConstantSDNode *ConstNode = dyn_cast(Op0.getOperand(1)); // The right shift has to be by 64 bits. if (!ConstNode || ConstNode->getZExtValue() != 64) return SDValue(); // Switch the element number to extract. EltToExtract = EltToExtract ? 0 : 1; // Update Op0 past the SRL. Op0 = Op0.getOperand(0); } // BITCAST feeding a TRUNCATE possibly via SRL. if (Op0.getOpcode() == ISD::BITCAST && Op0.getValueType() == MVT::i128 && Op0.getOperand(0).getValueType() == MVT::f128) { SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0)); return DCI.DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast, DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32)); } return SDValue(); } SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1)); if (!ConstOpOrElement) return SDValue(); // An imul is usually smaller than the alternative sequence for legal type. if (DAG.getMachineFunction().getFunction().hasMinSize() && isOperationLegal(ISD::MUL, N->getValueType(0))) return SDValue(); auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool { switch (this->Subtarget.getCPUDirective()) { default: // TODO: enhance the condition for subtarget before pwr8 return false; case PPC::DIR_PWR8: // type mul add shl // scalar 4 1 1 // vector 7 2 2 return true; case PPC::DIR_PWR9: case PPC::DIR_PWR10: case PPC::DIR_PWR_FUTURE: // type mul add shl // scalar 5 2 2 // vector 7 2 2 // The cycle RATIO of related operations are showed as a table above. // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both // scalar and vector type. For 2 instrs patterns, add/sub + shl // are 4, it is always profitable; but for 3 instrs patterns // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6. // So we should only do it for vector type. return IsAddOne && IsNeg ? VT.isVector() : true; } }; EVT VT = N->getValueType(0); SDLoc DL(N); const APInt &MulAmt = ConstOpOrElement->getAPIntValue(); bool IsNeg = MulAmt.isNegative(); APInt MulAmtAbs = MulAmt.abs(); if ((MulAmtAbs - 1).isPowerOf2()) { // (mul x, 2^N + 1) => (add (shl x, N), x) // (mul x, -(2^N + 1)) => -(add (shl x, N), x) if (!IsProfitable(IsNeg, true, VT)) return SDValue(); SDValue Op0 = N->getOperand(0); SDValue Op1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT)); SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); if (!IsNeg) return Res; return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); } else if ((MulAmtAbs + 1).isPowerOf2()) { // (mul x, 2^N - 1) => (sub (shl x, N), x) // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) if (!IsProfitable(IsNeg, false, VT)) return SDValue(); SDValue Op0 = N->getOperand(0); SDValue Op1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT)); if (!IsNeg) return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0); else return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); } else { return SDValue(); } } // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this // in combiner since we need to check SD flags and other subtarget features. SDValue PPCTargetLowering::combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); SDNodeFlags Flags = N->getFlags(); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; const TargetOptions &Options = getTargetMachine().Options; unsigned Opc = N->getOpcode(); bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); bool LegalOps = !DCI.isBeforeLegalizeOps(); SDLoc Loc(N); if (!isOperationLegal(ISD::FMA, VT)) return SDValue(); // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0 // since (fnmsub a b c)=-0 while c-ab=+0. if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath) return SDValue(); // (fma (fneg a) b c) => (fnmsub a b c) // (fnmsub (fneg a) b c) => (fma a b c) if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize)) return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags); // (fma a (fneg b) c) => (fnmsub a b c) // (fnmsub a (fneg b) c) => (fma a b c) if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize)) return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags); return SDValue(); } bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.is64BitELFABI()) return false; // If not a tail call then no need to proceed. if (!CI->isTailCall()) return false; // If sibling calls have been disabled and tail-calls aren't guaranteed // there is no reason to duplicate. auto &TM = getTargetMachine(); if (!TM.Options.GuaranteedTailCallOpt && DisableSCO) return false; // Can't tail call a function called indirectly, or if it has variadic args. const Function *Callee = CI->getCalledFunction(); if (!Callee || Callee->isVarArg()) return false; // Make sure the callee and caller calling conventions are eligible for tco. const Function *Caller = CI->getParent()->getParent(); if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(), CI->getCallingConv())) return false; // If the function is local then we have a good chance at tail-calling it return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee); } bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const { if (!Subtarget.hasVSX()) return false; if (Subtarget.hasP9Vector() && VT == MVT::f128) return true; return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::v4f32 || VT == MVT::v2f64; } bool PPCTargetLowering:: isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { const Value *Mask = AndI.getOperand(1); // If the mask is suitable for andi. or andis. we should sink the and. if (const ConstantInt *CI = dyn_cast(Mask)) { // Can't handle constants wider than 64-bits. if (CI->getBitWidth() > 64) return false; int64_t ConstVal = CI->getZExtValue(); return isUInt<16>(ConstVal) || (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF)); } // For non-constant masks, we can always use the record-form and. return true; } // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0) // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0) // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0) // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0) // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const { assert((N->getOpcode() == ISD::ABS) && "Need ABS node here"); assert(Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"); EVT VT = N->getValueType(0); if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) return SDValue(); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); if (N->getOperand(0).getOpcode() == ISD::SUB) { // Even for signed integers, if it's known to be positive (as signed // integer) due to zero-extended inputs. unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode(); unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode(); if ((SubOpcd0 == ISD::ZERO_EXTEND || SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) && (SubOpcd1 == ISD::ZERO_EXTEND || SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) { return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), N->getOperand(0)->getOperand(0), N->getOperand(0)->getOperand(1), DAG.getTargetConstant(0, dl, MVT::i32)); } // For type v4i32, it can be optimized with xvnegsp + vabsduw if (N->getOperand(0).getValueType() == MVT::v4i32 && N->getOperand(0).hasOneUse()) { return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(), N->getOperand(0)->getOperand(0), N->getOperand(0)->getOperand(1), DAG.getTargetConstant(1, dl, MVT::i32)); } } return SDValue(); } // For type v4i32/v8ii16/v16i8, transform // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b) // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b) // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b) // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b) SDValue PPCTargetLowering::combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const { assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here"); assert(Subtarget.hasP9Altivec() && "Only combine this when P9 altivec supported!"); SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); SDValue Cond = N->getOperand(0); SDValue TrueOpnd = N->getOperand(1); SDValue FalseOpnd = N->getOperand(2); EVT VT = N->getOperand(1).getValueType(); if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB || FalseOpnd.getOpcode() != ISD::SUB) return SDValue(); // ABSD only available for type v4i32/v8i16/v16i8 if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8) return SDValue(); // At least to save one more dependent computation if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse())) return SDValue(); ISD::CondCode CC = cast(Cond.getOperand(2))->get(); // Can only handle unsigned comparison here switch (CC) { default: return SDValue(); case ISD::SETUGT: case ISD::SETUGE: break; case ISD::SETULT: case ISD::SETULE: std::swap(TrueOpnd, FalseOpnd); break; } SDValue CmpOpnd1 = Cond.getOperand(0); SDValue CmpOpnd2 = Cond.getOperand(1); // SETCC CmpOpnd1 CmpOpnd2 cond // TrueOpnd = CmpOpnd1 - CmpOpnd2 // FalseOpnd = CmpOpnd2 - CmpOpnd1 if (TrueOpnd.getOperand(0) == CmpOpnd1 && TrueOpnd.getOperand(1) == CmpOpnd2 && FalseOpnd.getOperand(0) == CmpOpnd2 && FalseOpnd.getOperand(1) == CmpOpnd1) { return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(), CmpOpnd1, CmpOpnd2, DAG.getTargetConstant(0, dl, MVT::i32)); } return SDValue(); } diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 9ace36f344a5..270134d84c61 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1,8443 +1,8457 @@ //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the SystemZTargetLowering class. // //===----------------------------------------------------------------------===// #include "SystemZISelLowering.h" #include "SystemZCallingConv.h" #include "SystemZConstantPoolValue.h" #include "SystemZMachineFunctionInfo.h" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsS390.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/KnownBits.h" #include using namespace llvm; #define DEBUG_TYPE "systemz-lower" namespace { // Represents information about a comparison. struct Comparison { Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) : Op0(Op0In), Op1(Op1In), Chain(ChainIn), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} // The operands to the comparison. SDValue Op0, Op1; // Chain if this is a strict floating-point comparison. SDValue Chain; // The opcode that should be used to compare Op0 and Op1. unsigned Opcode; // A SystemZICMP value. Only used for integer comparisons. unsigned ICmpType; // The mask of CC values that Opcode can produce. unsigned CCValid; // The mask of CC values for which the original condition is true. unsigned CCMask; }; } // end anonymous namespace // Classify VT as either 32 or 64 bit. static bool is32Bit(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { case MVT::i32: return true; case MVT::i64: return false; default: llvm_unreachable("Unsupported type"); } } // Return a version of MachineOperand that can be safely used before the // final use. static MachineOperand earlyUseOperand(MachineOperand Op) { if (Op.isReg()) Op.setIsKill(false); return Op; } SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0)); // Set up the register classes. if (Subtarget.hasHighWord()) addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); else addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); if (!useSoftFloat()) { if (Subtarget.hasVector()) { addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); } else { addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); } if (Subtarget.hasVectorEnhancements1()) addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); else addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); if (Subtarget.hasVector()) { addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); } } // Compute derived properties from the register classes computeRegisterProperties(Subtarget.getRegisterInfo()); // Set up special registers. setStackPointerRegisterToSaveRestore(SystemZ::R15D); // TODO: It may be better to default to latency-oriented scheduling, however // LLVM's current latency-oriented scheduler can't handle physreg definitions // such as SystemZ has with CC, so set this to the register-pressure // scheduler, because it can. setSchedulingPreference(Sched::RegPressure); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Instructions are strings of 2-byte aligned 2-byte values. setMinFunctionAlignment(Align(2)); // For performance reasons we prefer 16-byte alignment. setPrefFunctionAlignment(Align(16)); // Handle operations that are handled in a similar way for all types. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; ++I) { MVT VT = MVT::SimpleValueType(I); if (isTypeLegal(VT)) { // Lower SET_CC into an IPM-based sequence. setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). setOperationAction(ISD::SELECT, VT, Expand); // Lower SELECT_CC and BR_CC into separate comparisons and branches. setOperationAction(ISD::SELECT_CC, VT, Custom); setOperationAction(ISD::BR_CC, VT, Custom); } } // Expand jump table branches as address arithmetic followed by an // indirect jump. setOperationAction(ISD::BR_JT, MVT::Other, Expand); // Expand BRCOND into a BR_CC (see above). setOperationAction(ISD::BRCOND, MVT::Other, Expand); // Handle integer types. for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; I <= MVT::LAST_INTEGER_VALUETYPE; ++I) { MVT VT = MVT::SimpleValueType(I); if (isTypeLegal(VT)) { setOperationAction(ISD::ABS, VT, Legal); // Expand individual DIV and REMs into DIVREMs. setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UDIV, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Custom); setOperationAction(ISD::UDIVREM, VT, Custom); // Support addition/subtraction with overflow. setOperationAction(ISD::SADDO, VT, Custom); setOperationAction(ISD::SSUBO, VT, Custom); // Support addition/subtraction with carry. setOperationAction(ISD::UADDO, VT, Custom); setOperationAction(ISD::USUBO, VT, Custom); // Support carry in as value rather than glue. setOperationAction(ISD::ADDCARRY, VT, Custom); setOperationAction(ISD::SUBCARRY, VT, Custom); // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and // stores, putting a serialization instruction after the stores. setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); setOperationAction(ISD::ATOMIC_STORE, VT, Custom); // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are // available, or if the operand is constant. setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); // Use POPCNT on z196 and above. if (Subtarget.hasPopulationCount()) setOperationAction(ISD::CTPOP, VT, Custom); else setOperationAction(ISD::CTPOP, VT, Expand); // No special instructions for these. setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); // Use *MUL_LOHI where possible instead of MULH*. setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Custom); setOperationAction(ISD::UMUL_LOHI, VT, Custom); // Only z196 and above have native support for conversions to unsigned. // On z10, promoting to i64 doesn't generate an inexact condition for // values that are outside the i32 range but in the i64 range, so use // the default expansion. if (!Subtarget.hasFPExtension()) setOperationAction(ISD::FP_TO_UINT, VT, Expand); // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all // default to Expand, so need to be modified to Legal where appropriate. setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); if (Subtarget.hasFPExtension()) setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); // And similarly for STRICT_[SU]INT_TO_FP. setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); if (Subtarget.hasFPExtension()) setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); } } // Type legalization will convert 8- and 16-bit atomic operations into // forms that operate on i32s (but still keeping the original memory VT). // Lower them into full i32 operations. setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); // Even though i128 is not a legal type, we still need to custom lower // the atomic operations in order to exploit SystemZ instructions. setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); // We can use the CC result of compare-and-swap to implement // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Traps are legal, as we will convert them to "j .+2". setOperationAction(ISD::TRAP, MVT::Other, Legal); // z10 has instructions for signed but not unsigned FP conversion. // Handle unsigned 32-bit types as signed 64-bit types. if (!Subtarget.hasFPExtension()) { setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); } // We have native support for a 64-bit CTLZ, via FLOGR. setOperationAction(ISD::CTLZ, MVT::i32, Promote); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); setOperationAction(ISD::CTLZ, MVT::i64, Legal); // On z15 we have native support for a 64-bit CTPOP. if (Subtarget.hasMiscellaneousExtensions3()) { setOperationAction(ISD::CTPOP, MVT::i32, Promote); setOperationAction(ISD::CTPOP, MVT::i64, Legal); } // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); // Expand 128 bit shifts without using a libcall. setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); // We have native instructions for i8, i16 and i32 extensions, but not i1. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); } // Handle the various types of symbolic address. setOperationAction(ISD::ConstantPool, PtrVT, Custom); setOperationAction(ISD::GlobalAddress, PtrVT, Custom); setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); setOperationAction(ISD::BlockAddress, PtrVT, Custom); setOperationAction(ISD::JumpTable, PtrVT, Custom); // We need to handle dynamic allocations specially because of the // 160-byte area at the bottom of the stack. setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); // Use custom expanders so that we can force the function to use // a frame pointer. setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); // Handle prefetches with PFD or PFDRL. setOperationAction(ISD::PREFETCH, MVT::Other, Custom); for (MVT VT : MVT::fixedlen_vector_valuetypes()) { // Assume by default that all vector operations need to be expanded. for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) if (getOperationAction(Opcode, VT) == Legal) setOperationAction(Opcode, VT, Expand); // Likewise all truncating stores and extending loads. for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { setTruncStoreAction(VT, InnerVT, Expand); setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); } if (isTypeLegal(VT)) { // These operations are legal for anything that can be stored in a // vector register, even if there is no native support for the format // as such. In particular, we can do these for v4f32 even though there // are no specific instructions for that format. setOperationAction(ISD::LOAD, VT, Legal); setOperationAction(ISD::STORE, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); setOperationAction(ISD::BITCAST, VT, Legal); setOperationAction(ISD::UNDEF, VT, Legal); // Likewise, except that we need to replace the nodes with something // more specific. setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } } // Handle integer vector types. for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { if (isTypeLegal(VT)) { // These operations have direct equivalents. setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); if (VT != MVT::v2i64) setOperationAction(ISD::MUL, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::AND, VT, Legal); setOperationAction(ISD::OR, VT, Legal); setOperationAction(ISD::XOR, VT, Legal); if (Subtarget.hasVectorEnhancements1()) setOperationAction(ISD::CTPOP, VT, Legal); else setOperationAction(ISD::CTPOP, VT, Custom); setOperationAction(ISD::CTTZ, VT, Legal); setOperationAction(ISD::CTLZ, VT, Legal); // Convert a GPR scalar to a vector by inserting it into element 0. setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); // Use a series of unpacks for extensions. setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); // Detect shifts by a scalar amount and convert them into // V*_BY_SCALAR. setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); // At present ROTL isn't matched by DAGCombiner. ROTR should be // converted into ROTL. setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands // and inverting the result as necessary. setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCC, VT, Custom); if (Subtarget.hasVectorEnhancements1()) setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } } if (Subtarget.hasVector()) { // There should be no need to check for float types other than v2f64 // since <2 x f32> isn't a legal type. setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); } if (Subtarget.hasVectorEnhancements2()) { setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); } // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; ++I) { MVT VT = MVT::SimpleValueType(I); if (isTypeLegal(VT)) { // We can use FI for FRINT. setOperationAction(ISD::FRINT, VT, Legal); // We can use the extended form of FI for other rounding operations. if (Subtarget.hasFPExtension()) { setOperationAction(ISD::FNEARBYINT, VT, Legal); setOperationAction(ISD::FFLOOR, VT, Legal); setOperationAction(ISD::FCEIL, VT, Legal); setOperationAction(ISD::FTRUNC, VT, Legal); setOperationAction(ISD::FROUND, VT, Legal); } // No special instructions for these. setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FSINCOS, VT, Expand); setOperationAction(ISD::FREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, VT, Legal); setOperationAction(ISD::STRICT_FSUB, VT, Legal); setOperationAction(ISD::STRICT_FMUL, VT, Legal); setOperationAction(ISD::STRICT_FDIV, VT, Legal); setOperationAction(ISD::STRICT_FMA, VT, Legal); setOperationAction(ISD::STRICT_FSQRT, VT, Legal); setOperationAction(ISD::STRICT_FRINT, VT, Legal); setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); if (Subtarget.hasFPExtension()) { setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); setOperationAction(ISD::STRICT_FCEIL, VT, Legal); setOperationAction(ISD::STRICT_FROUND, VT, Legal); setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); } } } // Handle floating-point vector types. if (Subtarget.hasVector()) { // Scalar-to-vector conversion is just a subreg. setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); // Some insertions and extractions can be done directly but others // need to go via integers. setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); // These operations have direct equivalents. setOperationAction(ISD::FADD, MVT::v2f64, Legal); setOperationAction(ISD::FNEG, MVT::v2f64, Legal); setOperationAction(ISD::FSUB, MVT::v2f64, Legal); setOperationAction(ISD::FMUL, MVT::v2f64, Legal); setOperationAction(ISD::FMA, MVT::v2f64, Legal); setOperationAction(ISD::FDIV, MVT::v2f64, Legal); setOperationAction(ISD::FABS, MVT::v2f64, Legal); setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::FRINT, MVT::v2f64, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::FROUND, MVT::v2f64, Legal); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); } // The vector enhancements facility 1 has instructions for these. if (Subtarget.hasVectorEnhancements1()) { setOperationAction(ISD::FADD, MVT::v4f32, Legal); setOperationAction(ISD::FNEG, MVT::v4f32, Legal); setOperationAction(ISD::FSUB, MVT::v4f32, Legal); setOperationAction(ISD::FMUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FABS, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::FRINT, MVT::v4f32, Legal); setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); setOperationAction(ISD::FROUND, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); setOperationAction(ISD::FMINNUM, MVT::f64, Legal); setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); setOperationAction(ISD::FMINNUM, MVT::f128, Legal); setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); // Handle constrained floating-point operations. setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); for (auto VT : { MVT::f32, MVT::f64, MVT::f128, MVT::v4f32, MVT::v2f64 }) { setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); } } // We only have fused f128 multiply-addition on vector registers. if (!Subtarget.hasVectorEnhancements1()) { setOperationAction(ISD::FMA, MVT::f128, Expand); setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); } // We don't have a copysign instruction on vector registers. if (Subtarget.hasVectorEnhancements1()) setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); // Needed so that we don't try to implement f128 constant loads using // a load-and-extend of a f80 constant (in cases where the constant // would fit in an f80). for (MVT VT : MVT::fp_valuetypes()) setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); // We don't have extending load instruction on vector registers. if (Subtarget.hasVectorEnhancements1()) { setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); } // Floating-point truncation and stores need to be done separately. setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f32, Expand); setTruncStoreAction(MVT::f128, MVT::f64, Expand); // We have 64-bit FPR<->GPR moves, but need special handling for // 32-bit forms. if (!Subtarget.hasVector()) { setOperationAction(ISD::BITCAST, MVT::i32, Custom); setOperationAction(ISD::BITCAST, MVT::f32, Custom); } // VASTART and VACOPY need to deal with the SystemZ-specific varargs // structure, but VAEND is a no-op. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Custom); setOperationAction(ISD::VAEND, MVT::Other, Expand); // Codes for which we want to perform some z-specific combinations. setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::STRICT_FP_ROUND); setTargetDAGCombine(ISD::FP_EXTEND); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::SDIV); setTargetDAGCombine(ISD::UDIV); setTargetDAGCombine(ISD::SREM); setTargetDAGCombine(ISD::UREM); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); // Handle intrinsics. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); // We want to use MVC in preference to even a single load/store pair. MaxStoresPerMemcpy = 0; MaxStoresPerMemcpyOptSize = 0; // The main memset sequence is a byte store followed by an MVC. // Two STC or MV..I stores win over that, but the kind of fused stores // generated by target-independent code don't when the byte value is // variable. E.g. "STC ;MHI ,257;STH " is not better // than "STC;MVC". Handle the choice in target-specific code instead. MaxStoresPerMemset = 0; MaxStoresPerMemsetOptSize = 0; // Default to having -disable-strictnode-mutation on IsStrictFPEnabled = true; } bool SystemZTargetLowering::useSoftFloat() const { return Subtarget.hasSoftFloat(); } EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); } bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) return false; switch (VT.getSimpleVT().SimpleTy) { case MVT::f32: case MVT::f64: return true; case MVT::f128: return Subtarget.hasVectorEnhancements1(); default: break; } return false; } // Return true if the constant can be generated with a vector instruction, // such as VGM, VGMB or VREPI. bool SystemZVectorConstantInfo::isVectorConstantLegal( const SystemZSubtarget &Subtarget) { const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); if (!Subtarget.hasVector() || (isFP128 && !Subtarget.hasVectorEnhancements1())) return false; // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- // preferred way of creating all-zero and all-one vectors so give it // priority over other methods below. unsigned Mask = 0; unsigned I = 0; for (; I < SystemZ::VectorBytes; ++I) { uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); if (Byte == 0xff) Mask |= 1ULL << I; else if (Byte != 0) break; } if (I == SystemZ::VectorBytes) { Opcode = SystemZISD::BYTE_MASK; OpVals.push_back(Mask); VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); return true; } if (SplatBitSize > 64) return false; auto tryValue = [&](uint64_t Value) -> bool { // Try VECTOR REPLICATE IMMEDIATE int64_t SignedValue = SignExtend64(Value, SplatBitSize); if (isInt<16>(SignedValue)) { OpVals.push_back(((unsigned) SignedValue)); Opcode = SystemZISD::REPLICATE; VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), SystemZ::VectorBits / SplatBitSize); return true; } // Try VECTOR GENERATE MASK unsigned Start, End; if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). OpVals.push_back(Start - (64 - SplatBitSize)); OpVals.push_back(End - (64 - SplatBitSize)); Opcode = SystemZISD::ROTATE_MASK; VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), SystemZ::VectorBits / SplatBitSize); return true; } return false; }; // First try assuming that any undefined bits above the highest set bit // and below the lowest set bit are 1s. This increases the likelihood of // being able to use a sign-extended element value in VECTOR REPLICATE // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. uint64_t SplatBitsZ = SplatBits.getZExtValue(); uint64_t SplatUndefZ = SplatUndef.getZExtValue(); uint64_t Lower = (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); uint64_t Upper = (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); if (tryValue(SplatBitsZ | Upper | Lower)) return true; // Now try assuming that any undefined bits between the first and // last defined set bits are set. This increases the chances of // using a non-wraparound mask. uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; return tryValue(SplatBitsZ | Middle); } SystemZVectorConstantInfo::SystemZVectorConstantInfo(APFloat FPImm) { IntBits = FPImm.bitcastToAPInt().zextOrSelf(128); isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad()); SplatBits = FPImm.bitcastToAPInt(); unsigned Width = SplatBits.getBitWidth(); IntBits <<= (SystemZ::VectorBits - Width); // Find the smallest splat. while (Width > 8) { unsigned HalfSize = Width / 2; APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); APInt LowValue = SplatBits.trunc(HalfSize); // If the two halves do not match, stop here. if (HighValue != LowValue || 8 > HalfSize) break; SplatBits = HighValue; Width = HalfSize; } SplatUndef = 0; SplatBitSize = Width; } SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); bool HasAnyUndefs; // Get IntBits by finding the 128 bit splat. BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, true); // Get SplatBits by finding the 8 bit or greater splat. BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, true); } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. if (Imm.isZero() || Imm.isNegZero()) return true; return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); } /// Returns true if stack probing through inline assembly is requested. bool SystemZTargetLowering::hasInlineStackProbe(MachineFunction &MF) const { // If the function specifically requests inline stack probes, emit them. if (MF.getFunction().hasFnAttribute("probe-stack")) return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == "inline-asm"; return false; } bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { // We can use CGFI or CLGFI. return isInt<32>(Imm) || isUInt<32>(Imm); } bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { // We can use ALGFI or SLGFI. return isUInt<32>(Imm) || isUInt<32>(-Imm); } bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( EVT VT, unsigned, unsigned, MachineMemOperand::Flags, bool *Fast) const { // Unaligned accesses should never be slower than the expanded version. // We check specifically for aligned accesses in the few cases where // they are required. if (Fast) *Fast = true; return true; } // Information about the addressing mode for a memory access. struct AddressingMode { // True if a long displacement is supported. bool LongDisplacement; // True if use of index register is supported. bool IndexReg; AddressingMode(bool LongDispl, bool IdxReg) : LongDisplacement(LongDispl), IndexReg(IdxReg) {} }; // Return the desired addressing mode for a Load which has only one use (in // the same block) which is a Store. static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty) { // With vector support a Load->Store combination may be combined to either // an MVC or vector operations and it seems to work best to allow the // vector addressing mode. if (HasVector) return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); // Otherwise only the MVC case is special. bool MVC = Ty->isIntegerTy(8); return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); } // Return the addressing mode which seems most desirable given an LLVM // Instruction pointer. static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector) { if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); } } if (isa(I) && I->hasOneUse()) { auto *SingleUser = cast(*I->user_begin()); if (SingleUser->getParent() == I->getParent()) { if (isa(SingleUser)) { if (auto *C = dyn_cast(SingleUser->getOperand(1))) if (C->getBitWidth() <= 64 && (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) // Comparison of memory with 16 bit signed / unsigned immediate return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); } else if (isa(SingleUser)) // Load->Store return getLoadStoreAddrMode(HasVector, I->getType()); } } else if (auto *StoreI = dyn_cast(I)) { if (auto *LoadI = dyn_cast(StoreI->getValueOperand())) if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) // Load->Store return getLoadStoreAddrMode(HasVector, LoadI->getType()); } if (HasVector && (isa(I) || isa(I))) { // * Use LDE instead of LE/LEY for z13 to avoid partial register // dependencies (LDE only supports small offsets). // * Utilize the vector registers to hold floating point // values (vector load / store instructions only support small // offsets). Type *MemAccessTy = (isa(I) ? I->getType() : I->getOperand(0)->getType()); bool IsFPAccess = MemAccessTy->isFloatingPointTy(); bool IsVectorAccess = MemAccessTy->isVectorTy(); // A store of an extracted vector element will be combined into a VSTE type // instruction. if (!IsVectorAccess && isa(I)) { Value *DataOp = I->getOperand(0); if (isa(DataOp)) IsVectorAccess = true; } // A load which gets inserted into a vector element will be combined into a // VLE type instruction. if (!IsVectorAccess && isa(I) && I->hasOneUse()) { User *LoadUser = *I->user_begin(); if (isa(LoadUser)) IsVectorAccess = true; } if (IsFPAccess || IsVectorAccess) return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); } return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); } bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) return false; // Require a 20-bit signed offset. if (!isInt<20>(AM.BaseOffs)) return false; AddressingMode SupportedAM(true, true); if (I != nullptr) SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) return false; if (!SupportedAM.IndexReg) // No indexing allowed. return AM.Scale == 0; else // Indexing is OK but no scale factor can be applied. return AM.Scale == 0 || AM.Scale == 1; } bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) return false; unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize(); unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize(); return FromBits > ToBits; } bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { if (!FromVT.isInteger() || !ToVT.isInteger()) return false; unsigned FromBits = FromVT.getFixedSizeInBits(); unsigned ToBits = ToVT.getFixedSizeInBits(); return FromBits > ToBits; } //===----------------------------------------------------------------------===// // Inline asm support //===----------------------------------------------------------------------===// TargetLowering::ConstraintType SystemZTargetLowering::getConstraintType(StringRef Constraint) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'a': // Address register case 'd': // Data register (equivalent to 'r') case 'f': // Floating-point register case 'h': // High-part register case 'r': // General-purpose register case 'v': // Vector register return C_RegisterClass; case 'Q': // Memory with base and unsigned 12-bit displacement case 'R': // Likewise, plus an index case 'S': // Memory with base and signed 20-bit displacement case 'T': // Likewise, plus an index case 'm': // Equivalent to 'T'. return C_Memory; case 'I': // Unsigned 8-bit constant case 'J': // Unsigned 12-bit constant case 'K': // Signed 16-bit constant case 'L': // Signed 20-bit displacement (on all targets we support) case 'M': // 0x7fffffff return C_Immediate; default: break; } } return TargetLowering::getConstraintType(Constraint); } TargetLowering::ConstraintWeight SystemZTargetLowering:: getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'a': // Address register case 'd': // Data register (equivalent to 'r') case 'h': // High-part register case 'r': // General-purpose register if (CallOperandVal->getType()->isIntegerTy()) weight = CW_Register; break; case 'f': // Floating-point register if (type->isFloatingPointTy()) weight = CW_Register; break; case 'v': // Vector register if ((type->isVectorTy() || type->isFloatingPointTy()) && Subtarget.hasVector()) weight = CW_Register; break; case 'I': // Unsigned 8-bit constant if (auto *C = dyn_cast(CallOperandVal)) if (isUInt<8>(C->getZExtValue())) weight = CW_Constant; break; case 'J': // Unsigned 12-bit constant if (auto *C = dyn_cast(CallOperandVal)) if (isUInt<12>(C->getZExtValue())) weight = CW_Constant; break; case 'K': // Signed 16-bit constant if (auto *C = dyn_cast(CallOperandVal)) if (isInt<16>(C->getSExtValue())) weight = CW_Constant; break; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast(CallOperandVal)) if (isInt<20>(C->getSExtValue())) weight = CW_Constant; break; case 'M': // 0x7fffffff if (auto *C = dyn_cast(CallOperandVal)) if (C->getZExtValue() == 0x7fffffff) weight = CW_Constant; break; } return weight; } // Parse a "{tNNN}" register constraint for which the register type "t" // has already been verified. MC is the class associated with "t" and // Map maps 0-based register numbers to LLVM register numbers. static std::pair parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size) { assert(*(Constraint.end()-1) == '}' && "Missing '}'"); if (isdigit(Constraint[2])) { unsigned Index; bool Failed = Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); if (!Failed && Index < Size && Map[Index]) return std::make_pair(Map[Index], RC); } return std::make_pair(0U, nullptr); } std::pair SystemZTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { // GCC Constraint Letters switch (Constraint[0]) { default: break; case 'd': // Data register (equivalent to 'r') case 'r': // General-purpose register if (VT == MVT::i64) return std::make_pair(0U, &SystemZ::GR64BitRegClass); else if (VT == MVT::i128) return std::make_pair(0U, &SystemZ::GR128BitRegClass); return std::make_pair(0U, &SystemZ::GR32BitRegClass); case 'a': // Address register if (VT == MVT::i64) return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); else if (VT == MVT::i128) return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); case 'h': // High-part register (an LLVM extension) return std::make_pair(0U, &SystemZ::GRH32BitRegClass); case 'f': // Floating-point register if (!useSoftFloat()) { if (VT == MVT::f64) return std::make_pair(0U, &SystemZ::FP64BitRegClass); else if (VT == MVT::f128) return std::make_pair(0U, &SystemZ::FP128BitRegClass); return std::make_pair(0U, &SystemZ::FP32BitRegClass); } break; case 'v': // Vector register if (Subtarget.hasVector()) { if (VT == MVT::f32) return std::make_pair(0U, &SystemZ::VR32BitRegClass); if (VT == MVT::f64) return std::make_pair(0U, &SystemZ::VR64BitRegClass); return std::make_pair(0U, &SystemZ::VR128BitRegClass); } break; } } if (Constraint.size() > 0 && Constraint[0] == '{') { // We need to override the default register parsing for GPRs and FPRs // because the interpretation depends on VT. The internal names of // the registers are also different from the external names // (F0D and F0S instead of F0, etc.). if (Constraint[1] == 'r') { if (VT == MVT::i32) return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, SystemZMC::GR32Regs, 16); if (VT == MVT::i128) return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, SystemZMC::GR128Regs, 16); return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, SystemZMC::GR64Regs, 16); } if (Constraint[1] == 'f') { if (useSoftFloat()) return std::make_pair( 0u, static_cast(nullptr)); if (VT == MVT::f32) return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, SystemZMC::FP32Regs, 16); if (VT == MVT::f128) return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, SystemZMC::FP128Regs, 16); return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, SystemZMC::FP64Regs, 16); } if (Constraint[1] == 'v') { if (!Subtarget.hasVector()) return std::make_pair( 0u, static_cast(nullptr)); if (VT == MVT::f32) return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, SystemZMC::VR32Regs, 32); if (VT == MVT::f64) return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, SystemZMC::VR64Regs, 32); return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, SystemZMC::VR128Regs, 32); } } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. Register SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) .Case("r15", SystemZ::R15D) .Default(0); if (Reg) return Reg; report_fatal_error("Invalid register name global variable"); } void SystemZTargetLowering:: LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { // Only support length 1 constraints for now. if (Constraint.length() == 1) { switch (Constraint[0]) { case 'I': // Unsigned 8-bit constant if (auto *C = dyn_cast(Op)) if (isUInt<8>(C->getZExtValue())) Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'J': // Unsigned 12-bit constant if (auto *C = dyn_cast(Op)) if (isUInt<12>(C->getZExtValue())) Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; case 'K': // Signed 16-bit constant if (auto *C = dyn_cast(Op)) if (isInt<16>(C->getSExtValue())) Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'L': // Signed 20-bit displacement (on all targets we support) if (auto *C = dyn_cast(Op)) if (isInt<20>(C->getSExtValue())) Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), Op.getValueType())); return; case 'M': // 0x7fffffff if (auto *C = dyn_cast(Op)) if (C->getZExtValue() == 0x7fffffff) Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), Op.getValueType())); return; } } TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } //===----------------------------------------------------------------------===// // Calling conventions //===----------------------------------------------------------------------===// #include "SystemZGenCallingConv.inc" const MCPhysReg *SystemZTargetLowering::getScratchRegisters( CallingConv::ID) const { static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, SystemZ::R14D, 0 }; return ScratchRegs; } bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, Type *ToType) const { return isTruncateFree(FromType, ToType); } bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { return CI->isTailCall(); } // We do not yet support 128-bit single-element vector types. If the user // attempts to use such types as function argument or return type, prefer // to error out instead of emitting code violating the ABI. static void VerifyVectorType(MVT VT, EVT ArgVT) { if (ArgVT.isVector() && !VT.isVector()) report_fatal_error("Unsupported vector argument or return type"); } static void VerifyVectorTypes(const SmallVectorImpl &Ins) { for (unsigned i = 0; i < Ins.size(); ++i) VerifyVectorType(Ins[i].VT, Ins[i].ArgVT); } static void VerifyVectorTypes(const SmallVectorImpl &Outs) { for (unsigned i = 0; i < Outs.size(); ++i) VerifyVectorType(Outs[i].VT, Outs[i].ArgVT); } // Value is a value that has been passed to us in the location described by VA // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining // any loads onto Chain. static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Chain, SDValue Value) { // If the argument has been promoted from a smaller type, insert an // assertion to capture this. if (VA.getLocInfo() == CCValAssign::SExt) Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, DAG.getValueType(VA.getValVT())); else if (VA.getLocInfo() == CCValAssign::ZExt) Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, DAG.getValueType(VA.getValVT())); if (VA.isExtInLoc()) Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); else if (VA.getLocInfo() == CCValAssign::BCvt) { // If this is a short vector argument loaded from the stack, // extend from i64 to full vector size and then bitcast. assert(VA.getLocVT() == MVT::i64); assert(VA.getValVT().isVector()); Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); } else assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); return Value; } // Value is a value of type VA.getValVT() that we need to copy into // the location described by VA. Return a copy of Value converted to // VA.getValVT(). The caller is responsible for handling indirect values. static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, CCValAssign &VA, SDValue Value) { switch (VA.getLocInfo()) { case CCValAssign::SExt: return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::ZExt: return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::AExt: return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); case CCValAssign::BCvt: // If this is a short vector argument to be stored to the stack, // bitcast to v2i64 and then extract first element. assert(VA.getLocVT() == MVT::i64); assert(VA.getValVT().isVector()); Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, DAG.getConstant(0, DL, MVT::i32)); case CCValAssign::Full: return Value; default: llvm_unreachable("Unhandled getLocInfo()"); } } SDValue SystemZTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo(); auto *TFL = static_cast(Subtarget.getFrameLowering()); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Detect unsupported vector argument types. if (Subtarget.hasVector()) VerifyVectorTypes(Ins); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); unsigned NumFixedGPRs = 0; unsigned NumFixedFPRs = 0; for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { SDValue ArgValue; CCValAssign &VA = ArgLocs[I]; EVT LocVT = VA.getLocVT(); if (VA.isRegLoc()) { // Arguments passed in registers const TargetRegisterClass *RC; switch (LocVT.getSimpleVT().SimpleTy) { default: // Integers smaller than i64 should be promoted to i64. llvm_unreachable("Unexpected argument type"); case MVT::i32: NumFixedGPRs += 1; RC = &SystemZ::GR32BitRegClass; break; case MVT::i64: NumFixedGPRs += 1; RC = &SystemZ::GR64BitRegClass; break; case MVT::f32: NumFixedFPRs += 1; RC = &SystemZ::FP32BitRegClass; break; case MVT::f64: NumFixedFPRs += 1; RC = &SystemZ::FP64BitRegClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: RC = &SystemZ::VR128BitRegClass; break; } Register VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(VA.getLocReg(), VReg); ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); } else { assert(VA.isMemLoc() && "Argument not register or memory"); // Create the frame index object for this incoming parameter. int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load // from this parameter. Unpromoted ints and floats are // passed as right-justified 8-byte values. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } // Convert the value of the argument register into the value that's // being passed. if (VA.getLocInfo() == CCValAssign::Indirect) { InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo())); // If the original argument was split (e.g. i128), we need // to load all parts of it here (using the same address). unsigned ArgIndex = Ins[I].OrigArgIndex; assert (Ins[I].PartOffset == 0); while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { CCValAssign &PartVA = ArgLocs[I + 1]; unsigned PartOffset = Ins[I + 1].PartOffset; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, DAG.getIntPtrConstant(PartOffset, DL)); InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, MachinePointerInfo())); ++I; } } else InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); } if (IsVarArg) { // Save the number of non-varargs registers for later use by va_start, etc. FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); // Likewise the address (in the form of a frame index) of where the // first stack vararg would be. The 1-byte size here is arbitrary. int64_t StackSize = CCInfo.getNextStackOffset(); FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true)); // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. int64_t RegSaveOffset = -SystemZMC::CallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); // Store the FPR varargs in the reserved frame slots. (We store the // GPRs as part of the prologue.) if (NumFixedFPRs < SystemZ::NumArgFPRs && !useSoftFloat()) { SDValue MemOps[SystemZ::NumArgFPRs]; for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ArgFPRs[I]); int FI = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize + Offset, true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], &SystemZ::FP64BitRegClass); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, MachinePointerInfo::getFixedStack(MF, FI)); } // Join the stores, which are independent of one another. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, makeArrayRef(&MemOps[NumFixedFPRs], SystemZ::NumArgFPRs-NumFixedFPRs)); } } return Chain; } static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl &ArgLocs, SmallVectorImpl &Outs) { // Punt if there are any indirect or stack arguments, or if the call // needs the callee-saved argument register R6, or if the call uses // the callee-saved register arguments SwiftSelf and SwiftError. for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; if (VA.getLocInfo() == CCValAssign::Indirect) return false; if (!VA.isRegLoc()) return false; Register Reg = VA.getLocReg(); if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) return false; if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) return false; } return true; } SDValue SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc &DL = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); EVT PtrVT = getPointerTy(MF.getDataLayout()); + LLVMContext &Ctx = *DAG.getContext(); // Detect unsupported vector argument and return types. if (Subtarget.hasVector()) { VerifyVectorTypes(Outs); VerifyVectorTypes(Ins); } // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; - SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected // sibling calls. if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) IsTailCall = false; // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = ArgCCInfo.getNextStackOffset(); // Mark the start of the call. if (!IsTailCall) Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); // Copy argument values to their designated locations. SmallVector, 9> RegsToPass; SmallVector MemOpChains; SDValue StackPtr; for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { CCValAssign &VA = ArgLocs[I]; SDValue ArgValue = OutVals[I]; if (VA.getLocInfo() == CCValAssign::Indirect) { // Store the argument in a stack slot and pass its address. - SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT); + unsigned ArgIndex = Outs[I].OrigArgIndex; + EVT SlotVT; + if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { + // Allocate the full stack space for a promoted (and split) argument. + Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; + EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); + MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); + SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); + } else { + SlotVT = Outs[I].ArgVT; + } + SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); int FI = cast(SpillSlot)->getIndex(); MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, SpillSlot, MachinePointerInfo::getFixedStack(MF, FI))); // If the original argument was split (e.g. i128), we need // to store all parts of it here (and pass just one address). - unsigned ArgIndex = Outs[I].OrigArgIndex; assert (Outs[I].PartOffset == 0); while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { SDValue PartValue = OutVals[I + 1]; unsigned PartOffset = Outs[I + 1].PartOffset; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, DAG.getIntPtrConstant(PartOffset, DL)); MemOpChains.push_back( DAG.getStore(Chain, DL, PartValue, Address, MachinePointerInfo::getFixedStack(MF, FI))); + assert((PartOffset + PartValue.getValueType().getStoreSize() <= + SlotVT.getStoreSize()) && "Not enough space for argument part!"); ++I; } ArgValue = SpillSlot; } else ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); if (VA.isRegLoc()) // Queue up the argument copies and emit them at the end. RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); else { assert(VA.isMemLoc() && "Argument not register or memory"); // Work out the address of the stack slot. Unpromoted ints and // floats are passed as right-justified 8-byte values. if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset(); if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) Offset += 4; SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, DAG.getIntPtrConstant(Offset, DL)); // Emit the store. MemOpChains.push_back( DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); } } // Join the stores, which are independent of one another. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // Accept direct calls by converting symbolic call addresses to the // associated Target* opcodes. Force %r1 to be used for indirect // tail calls. SDValue Glue; if (auto *G = dyn_cast(Callee)) { Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); } else if (auto *E = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); } else if (IsTailCall) { Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); Glue = Chain.getValue(1); Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); } // Build a sequence of copy-to-reg nodes, chained and glued together. for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, RegsToPass[I].second, Glue); Glue = Chain.getValue(1); } // The first call operand is the chain and the second is the target address. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) Ops.push_back(DAG.getRegister(RegsToPass[I].first, RegsToPass[I].second.getValueType())); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); // Glue the call to the argument copies, if any. if (Glue.getNode()) Ops.push_back(Glue); // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (IsTailCall) return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true), DAG.getConstant(0, DL, PtrVT, true), Glue, DL); Glue = Chain.getValue(1); // Assign locations to each value returned by this call. SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { CCValAssign &VA = RetLocs[I]; // Copy the value out, gluing the copy to the end of the call sequence. SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); Chain = RetValue.getValue(1); Glue = RetValue.getValue(2); // Convert the value of the return register into the value that's // being returned. InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); } return Chain; } bool SystemZTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { // Detect unsupported vector return types. if (Subtarget.hasVector()) VerifyVectorTypes(Outs); // Special case that we cannot easily detect in RetCC_SystemZ since // i128 is not a legal type. for (auto &Out : Outs) if (Out.ArgVT == MVT::i128) return false; SmallVector RetLocs; CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); } SDValue SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); // Detect unsupported vector return types. if (Subtarget.hasVector()) VerifyVectorTypes(Outs); // Assign locations to each returned value. SmallVector RetLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); // Quick exit for void returns if (RetLocs.empty()) return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); if (CallConv == CallingConv::GHC) report_fatal_error("GHC functions return void only"); // Copy the result values into the output registers. SDValue Glue; SmallVector RetOps; RetOps.push_back(Chain); for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { CCValAssign &VA = RetLocs[I]; SDValue RetValue = OutVals[I]; // Make the return register live on exit. assert(VA.isRegLoc() && "Can only return in registers!"); // Promote the value as required. RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); // Chain and glue the copies together. Register Reg = VA.getLocReg(); Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); Glue = Chain.getValue(1); RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); } // Update chain and glue. RetOps[0] = Chain; if (Glue.getNode()) RetOps.push_back(Glue); return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); } // Return true if Op is an intrinsic node with chain that returns the CC value // as its only (other) argument. Provide the associated SystemZISD opcode and // the mask of valid CC values if so. static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid) { unsigned Id = cast(Op.getOperand(1))->getZExtValue(); switch (Id) { case Intrinsic::s390_tbegin: Opcode = SystemZISD::TBEGIN; CCValid = SystemZ::CCMASK_TBEGIN; return true; case Intrinsic::s390_tbegin_nofloat: Opcode = SystemZISD::TBEGIN_NOFLOAT; CCValid = SystemZ::CCMASK_TBEGIN; return true; case Intrinsic::s390_tend: Opcode = SystemZISD::TEND; CCValid = SystemZ::CCMASK_TEND; return true; default: return false; } } // Return true if Op is an intrinsic node without chain that returns the // CC value as its final argument. Provide the associated SystemZISD // opcode and the mask of valid CC values if so. static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { unsigned Id = cast(Op.getOperand(0))->getZExtValue(); switch (Id) { case Intrinsic::s390_vpkshs: case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: Opcode = SystemZISD::PACKS_CC; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vpklshs: case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: Opcode = SystemZISD::PACKLS_CC; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vceqbs: case Intrinsic::s390_vceqhs: case Intrinsic::s390_vceqfs: case Intrinsic::s390_vceqgs: Opcode = SystemZISD::VICMPES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vchbs: case Intrinsic::s390_vchhs: case Intrinsic::s390_vchfs: case Intrinsic::s390_vchgs: Opcode = SystemZISD::VICMPHS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vchlbs: case Intrinsic::s390_vchlhs: case Intrinsic::s390_vchlfs: case Intrinsic::s390_vchlgs: Opcode = SystemZISD::VICMPHLS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vtm: Opcode = SystemZISD::VTM; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfaebs: case Intrinsic::s390_vfaehs: case Intrinsic::s390_vfaefs: Opcode = SystemZISD::VFAE_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfaezbs: case Intrinsic::s390_vfaezhs: case Intrinsic::s390_vfaezfs: Opcode = SystemZISD::VFAEZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfeebs: case Intrinsic::s390_vfeehs: case Intrinsic::s390_vfeefs: Opcode = SystemZISD::VFEE_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfeezbs: case Intrinsic::s390_vfeezhs: case Intrinsic::s390_vfeezfs: Opcode = SystemZISD::VFEEZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfenebs: case Intrinsic::s390_vfenehs: case Intrinsic::s390_vfenefs: Opcode = SystemZISD::VFENE_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfenezbs: case Intrinsic::s390_vfenezhs: case Intrinsic::s390_vfenezfs: Opcode = SystemZISD::VFENEZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vistrbs: case Intrinsic::s390_vistrhs: case Intrinsic::s390_vistrfs: Opcode = SystemZISD::VISTR_CC; CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; return true; case Intrinsic::s390_vstrcbs: case Intrinsic::s390_vstrchs: case Intrinsic::s390_vstrcfs: Opcode = SystemZISD::VSTRC_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vstrczbs: case Intrinsic::s390_vstrczhs: case Intrinsic::s390_vstrczfs: Opcode = SystemZISD::VSTRCZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vstrsb: case Intrinsic::s390_vstrsh: case Intrinsic::s390_vstrsf: Opcode = SystemZISD::VSTRS_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vstrszb: case Intrinsic::s390_vstrszh: case Intrinsic::s390_vstrszf: Opcode = SystemZISD::VSTRSZ_CC; CCValid = SystemZ::CCMASK_ANY; return true; case Intrinsic::s390_vfcedbs: case Intrinsic::s390_vfcesbs: Opcode = SystemZISD::VFCMPES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchdbs: case Intrinsic::s390_vfchsbs: Opcode = SystemZISD::VFCMPHS; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vfchedbs: case Intrinsic::s390_vfchesbs: Opcode = SystemZISD::VFCMPHES; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_vftcidb: case Intrinsic::s390_vftcisb: Opcode = SystemZISD::VFTCI; CCValid = SystemZ::CCMASK_VCMP; return true; case Intrinsic::s390_tdc: Opcode = SystemZISD::TDC; CCValid = SystemZ::CCMASK_TDC; return true; default: return false; } } // Emit an intrinsic with chain and an explicit CC register result. static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector Ops; Ops.reserve(NumOps - 1); Ops.push_back(Op.getOperand(0)); for (unsigned I = 2; I < NumOps; ++I) Ops.push_back(Op.getOperand(I)); assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); SDValue OldChain = SDValue(Op.getNode(), 1); SDValue NewChain = SDValue(Intr.getNode(), 1); DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); return Intr.getNode(); } // Emit an intrinsic with an explicit CC register result. static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode) { // Copy all operands except the intrinsic ID. unsigned NumOps = Op.getNumOperands(); SmallVector Ops; Ops.reserve(NumOps - 1); for (unsigned I = 1; I < NumOps; ++I) Ops.push_back(Op.getOperand(I)); SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); return Intr.getNode(); } // CC is a comparison that will be implemented using an integer or // floating-point comparison. Return the condition code mask for // a branch on true. In the integer case, CCMASK_CMP_UO is set for // unsigned comparisons and clear for signed ones. In the floating-point // case, CCMASK_CMP_UO has its normal mask meaning (unordered). static unsigned CCMaskForCondCode(ISD::CondCode CC) { #define CONV(X) \ case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X switch (CC) { default: llvm_unreachable("Invalid integer condition!"); CONV(EQ); CONV(NE); CONV(GT); CONV(GE); CONV(LT); CONV(LE); case ISD::SETO: return SystemZ::CCMASK_CMP_O; case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; } #undef CONV } // If C can be converted to a comparison against zero, adjust the operands // as necessary. static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.ICmpType == SystemZICMP::UnsignedOnly) return; auto *ConstOp1 = dyn_cast(C.Op1.getNode()); if (!ConstOp1) return; int64_t Value = ConstOp1->getSExtValue(); if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { C.CCMask ^= SystemZ::CCMASK_CMP_EQ; C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); } } // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, // adjust the operands as necessary. static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { // For us to make any changes, it must a comparison between a single-use // load and a constant. if (!C.Op0.hasOneUse() || C.Op0.getOpcode() != ISD::LOAD || C.Op1.getOpcode() != ISD::Constant) return; // We must have an 8- or 16-bit load. auto *Load = cast(C.Op0); unsigned NumBits = Load->getMemoryVT().getSizeInBits(); if ((NumBits != 8 && NumBits != 16) || NumBits != Load->getMemoryVT().getStoreSizeInBits()) return; // The load must be an extending one and the constant must be within the // range of the unextended value. auto *ConstOp1 = cast(C.Op1); uint64_t Value = ConstOp1->getZExtValue(); uint64_t Mask = (1 << NumBits) - 1; if (Load->getExtensionType() == ISD::SEXTLOAD) { // Make sure that ConstOp1 is in range of C.Op0. int64_t SignedValue = ConstOp1->getSExtValue(); if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) return; if (C.ICmpType != SystemZICMP::SignedOnly) { // Unsigned comparison between two sign-extended values is equivalent // to unsigned comparison between two zero-extended values. Value &= Mask; } else if (NumBits == 8) { // Try to treat the comparison as unsigned, so that we can use CLI. // Adjust CCMask and Value as necessary. if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) // Test whether the high bit of the byte is set. Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) // Test whether the high bit of the byte is clear. Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; else // No instruction exists for this combination. return; C.ICmpType = SystemZICMP::UnsignedOnly; } } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { if (Value > Mask) return; // If the constant is in range, we can use any comparison. C.ICmpType = SystemZICMP::Any; } else return; // Make sure that the first operand is an i32 of the right extension type. ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? ISD::SEXTLOAD : ISD::ZEXTLOAD); if (C.Op0.getValueType() != MVT::i32 || Load->getExtensionType() != ExtType) { C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), Load->getBasePtr(), Load->getPointerInfo(), Load->getMemoryVT(), Load->getAlignment(), Load->getMemOperand()->getFlags()); // Update the chain uses. DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); } // Make sure that the second operand is an i32 with the right value. if (C.Op1.getValueType() != MVT::i32 || Value != ConstOp1->getZExtValue()) C.Op1 = DAG.getConstant(Value, DL, MVT::i32); } // Return true if Op is either an unextended load, or a load suitable // for integer register-memory comparisons of type ICmpType. static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { auto *Load = dyn_cast(Op.getNode()); if (Load) { // There are no instructions to compare a register with a memory byte. if (Load->getMemoryVT() == MVT::i8) return false; // Otherwise decide on extension type. switch (Load->getExtensionType()) { case ISD::NON_EXTLOAD: return true; case ISD::SEXTLOAD: return ICmpType != SystemZICMP::UnsignedOnly; case ISD::ZEXTLOAD: return ICmpType != SystemZICMP::SignedOnly; default: break; } } return false; } // Return true if it is better to swap the operands of C. static bool shouldSwapCmpOperands(const Comparison &C) { // Leave f128 comparisons alone, since they have no memory forms. if (C.Op0.getValueType() == MVT::f128) return false; // Always keep a floating-point constant second, since comparisons with // zero can use LOAD TEST and comparisons with other constants make a // natural memory operand. if (isa(C.Op1)) return false; // Never swap comparisons with zero since there are many ways to optimize // those later. auto *ConstOp1 = dyn_cast(C.Op1); if (ConstOp1 && ConstOp1->getZExtValue() == 0) return false; // Also keep natural memory operands second if the loaded value is // only used here. Several comparisons have memory forms. if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) return false; // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. // In that case we generally prefer the memory to be second. if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { // The only exceptions are when the second operand is a constant and // we can use things like CHHSI. if (!ConstOp1) return true; // The unsigned memory-immediate instructions can handle 16-bit // unsigned integers. if (C.ICmpType != SystemZICMP::SignedOnly && isUInt<16>(ConstOp1->getZExtValue())) return false; // The signed memory-immediate instructions can handle 16-bit // signed integers. if (C.ICmpType != SystemZICMP::UnsignedOnly && isInt<16>(ConstOp1->getSExtValue())) return false; return true; } // Try to promote the use of CGFR and CLGFR. unsigned Opcode0 = C.Op0.getOpcode(); if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) return true; if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) return true; if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && C.Op0.getOperand(1).getOpcode() == ISD::Constant && cast(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) return true; return false; } // Check whether C tests for equality between X and Y and whether X - Y // or Y - X is also computed. In that case it's better to compare the // result of the subtraction against zero. static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { SDNode *N = *I; if (N->getOpcode() == ISD::SUB && ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { C.Op0 = SDValue(N, 0); C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); return; } } } } // Check whether C compares a floating-point value with zero and if that // floating-point value is also negated. In this case we can use the // negation to set CC, so avoiding separate LOAD AND TEST and // LOAD (NEGATIVE/COMPLEMENT) instructions. static void adjustForFNeg(Comparison &C) { // This optimization is invalid for strict comparisons, since FNEG // does not raise any exceptions. if (C.Chain) return; auto *C1 = dyn_cast(C.Op1); if (C1 && C1->isZero()) { for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { SDNode *N = *I; if (N->getOpcode() == ISD::FNEG) { C.Op0 = SDValue(N, 0); C.CCMask = SystemZ::reverseCCMask(C.CCMask); return; } } } } // Check whether C compares (shl X, 32) with 0 and whether X is // also sign-extended. In that case it is better to test the result // of the sign extension using LTGFR. // // This case is important because InstCombine transforms a comparison // with (sext (trunc X)) into a comparison with (shl X, 32). static void adjustForLTGFR(Comparison &C) { // Check for a comparison between (shl X, 32) and 0. if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && C.Op1.getOpcode() == ISD::Constant && cast(C.Op1)->getZExtValue() == 0) { auto *C1 = dyn_cast(C.Op0.getOperand(1)); if (C1 && C1->getZExtValue() == 32) { SDValue ShlOp0 = C.Op0.getOperand(0); // See whether X has any SIGN_EXTEND_INREG uses. for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) { SDNode *N = *I; if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && cast(N->getOperand(1))->getVT() == MVT::i32) { C.Op0 = SDValue(N, 0); return; } } } } } // If C compares the truncation of an extending load, try to compare // the untruncated value instead. This exposes more opportunities to // reuse CC. static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.Op0.getOpcode() == ISD::TRUNCATE && C.Op0.getOperand(0).getOpcode() == ISD::LOAD && C.Op1.getOpcode() == ISD::Constant && cast(C.Op1)->getZExtValue() == 0) { auto *L = cast(C.Op0.getOperand(0)); if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <= C.Op0.getValueSizeInBits().getFixedSize()) { unsigned Type = L->getExtensionType(); if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { C.Op0 = C.Op0.getOperand(0); C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); } } } } // Return true if shift operation N has an in-range constant shift value. // Store it in ShiftVal if so. static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { auto *Shift = dyn_cast(N.getOperand(1)); if (!Shift) return false; uint64_t Amount = Shift->getZExtValue(); if (Amount >= N.getValueSizeInBits()) return false; ShiftVal = Amount; return true; } // Check whether an AND with Mask is suitable for a TEST UNDER MASK // instruction and whether the CC value is descriptive enough to handle // a comparison of type Opcode between the AND result and CmpVal. // CCMask says which comparison result is being tested and BitSize is // the number of bits in the operands. If TEST UNDER MASK can be used, // return the corresponding CC mask, otherwise return 0. static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType) { assert(Mask != 0 && "ANDs with zero should have been removed by now"); // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) return 0; // Work out the masks for the lowest and highest bits. unsigned HighShift = 63 - countLeadingZeros(Mask); uint64_t High = uint64_t(1) << HighShift; uint64_t Low = uint64_t(1) << countTrailingZeros(Mask); // Signed ordered comparisons are effectively unsigned if the sign // bit is dropped. bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); // Check for equality comparisons with 0, or the equivalent. if (CmpVal == 0) { if (CCMask == SystemZ::CCMASK_CMP_EQ) return SystemZ::CCMASK_TM_ALL_0; if (CCMask == SystemZ::CCMASK_CMP_NE) return SystemZ::CCMASK_TM_SOME_1; } if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { if (CCMask == SystemZ::CCMASK_CMP_LT) return SystemZ::CCMASK_TM_ALL_0; if (CCMask == SystemZ::CCMASK_CMP_GE) return SystemZ::CCMASK_TM_SOME_1; } if (EffectivelyUnsigned && CmpVal < Low) { if (CCMask == SystemZ::CCMASK_CMP_LE) return SystemZ::CCMASK_TM_ALL_0; if (CCMask == SystemZ::CCMASK_CMP_GT) return SystemZ::CCMASK_TM_SOME_1; } // Check for equality comparisons with the mask, or the equivalent. if (CmpVal == Mask) { if (CCMask == SystemZ::CCMASK_CMP_EQ) return SystemZ::CCMASK_TM_ALL_1; if (CCMask == SystemZ::CCMASK_CMP_NE) return SystemZ::CCMASK_TM_SOME_0; } if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { if (CCMask == SystemZ::CCMASK_CMP_GT) return SystemZ::CCMASK_TM_ALL_1; if (CCMask == SystemZ::CCMASK_CMP_LE) return SystemZ::CCMASK_TM_SOME_0; } if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { if (CCMask == SystemZ::CCMASK_CMP_GE) return SystemZ::CCMASK_TM_ALL_1; if (CCMask == SystemZ::CCMASK_CMP_LT) return SystemZ::CCMASK_TM_SOME_0; } // Check for ordered comparisons with the top bit. if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { if (CCMask == SystemZ::CCMASK_CMP_LE) return SystemZ::CCMASK_TM_MSB_0; if (CCMask == SystemZ::CCMASK_CMP_GT) return SystemZ::CCMASK_TM_MSB_1; } if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { if (CCMask == SystemZ::CCMASK_CMP_LT) return SystemZ::CCMASK_TM_MSB_0; if (CCMask == SystemZ::CCMASK_CMP_GE) return SystemZ::CCMASK_TM_MSB_1; } // If there are just two bits, we can do equality checks for Low and High // as well. if (Mask == Low + High) { if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) return SystemZ::CCMASK_TM_MIXED_MSB_0; if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) return SystemZ::CCMASK_TM_MIXED_MSB_1; if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; } // Looks like we've exhausted our options. return 0; } // See whether C can be implemented as a TEST UNDER MASK instruction. // Update the arguments with the TM version if so. static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { // Check that we have a comparison with a constant. auto *ConstOp1 = dyn_cast(C.Op1); if (!ConstOp1) return; uint64_t CmpVal = ConstOp1->getZExtValue(); // Check whether the nonconstant input is an AND with a constant mask. Comparison NewC(C); uint64_t MaskVal; ConstantSDNode *Mask = nullptr; if (C.Op0.getOpcode() == ISD::AND) { NewC.Op0 = C.Op0.getOperand(0); NewC.Op1 = C.Op0.getOperand(1); Mask = dyn_cast(NewC.Op1); if (!Mask) return; MaskVal = Mask->getZExtValue(); } else { // There is no instruction to compare with a 64-bit immediate // so use TMHH instead if possible. We need an unsigned ordered // comparison with an i64 immediate. if (NewC.Op0.getValueType() != MVT::i64 || NewC.CCMask == SystemZ::CCMASK_CMP_EQ || NewC.CCMask == SystemZ::CCMASK_CMP_NE || NewC.ICmpType == SystemZICMP::SignedOnly) return; // Convert LE and GT comparisons into LT and GE. if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || NewC.CCMask == SystemZ::CCMASK_CMP_GT) { if (CmpVal == uint64_t(-1)) return; CmpVal += 1; NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; } // If the low N bits of Op1 are zero than the low N bits of Op0 can // be masked off without changing the result. MaskVal = -(CmpVal & -CmpVal); NewC.ICmpType = SystemZICMP::UnsignedOnly; } if (!MaskVal) return; // Check whether the combination of mask, comparison value and comparison // type are suitable. unsigned BitSize = NewC.Op0.getValueSizeInBits(); unsigned NewCCMask, ShiftVal; if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SHL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal >> ShiftVal != 0) && ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal >> ShiftVal, CmpVal >> ShiftVal, SystemZICMP::Any))) { NewC.Op0 = NewC.Op0.getOperand(0); MaskVal >>= ShiftVal; } else if (NewC.ICmpType != SystemZICMP::SignedOnly && NewC.Op0.getOpcode() == ISD::SRL && isSimpleShift(NewC.Op0, ShiftVal) && (MaskVal << ShiftVal != 0) && ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal << ShiftVal, CmpVal << ShiftVal, SystemZICMP::UnsignedOnly))) { NewC.Op0 = NewC.Op0.getOperand(0); MaskVal <<= ShiftVal; } else { NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, NewC.ICmpType); if (!NewCCMask) return; } // Go ahead and make the change. C.Opcode = SystemZISD::TM; C.Op0 = NewC.Op0; if (Mask && Mask->getZExtValue() == MaskVal) C.Op1 = SDValue(Mask, 0); else C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); C.CCValid = SystemZ::CCMASK_TM; C.CCMask = NewCCMask; } // See whether the comparison argument contains a redundant AND // and remove it if so. This sometimes happens due to the generic // BRCOND expansion. static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (C.Op0.getOpcode() != ISD::AND) return; auto *Mask = dyn_cast(C.Op0.getOperand(1)); if (!Mask) return; KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0)); if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) return; C.Op0 = C.Op0.getOperand(0); } // Return a Comparison that tests the condition-code result of intrinsic // node Call against constant integer CC using comparison code Cond. // Opcode is the opcode of the SystemZISD operation for the intrinsic // and CCValid is the set of possible condition-code results. static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond) { Comparison C(Call, SDValue(), SDValue()); C.Opcode = Opcode; C.CCValid = CCValid; if (Cond == ISD::SETEQ) // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; else if (Cond == ISD::SETNE) // ...and the inverse of that. C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; else if (Cond == ISD::SETLT || Cond == ISD::SETULT) // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, // always true for CC>3. C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) // ...and the inverse of that. C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; else if (Cond == ISD::SETLE || Cond == ISD::SETULE) // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), // always true for CC>3. C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) // ...and the inverse of that. C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; else llvm_unreachable("Unexpected integer comparison type"); C.CCMask &= CCValid; return C; } // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain = SDValue(), bool IsSignaling = false) { if (CmpOp1.getOpcode() == ISD::Constant) { assert(!Chain); uint64_t Constant = cast(CmpOp1)->getZExtValue(); unsigned Opcode, CCValid; if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); } Comparison C(CmpOp0, CmpOp1, Chain); C.CCMask = CCMaskForCondCode(Cond); if (C.Op0.getValueType().isFloatingPoint()) { C.CCValid = SystemZ::CCMASK_FCMP; if (!C.Chain) C.Opcode = SystemZISD::FCMP; else if (!IsSignaling) C.Opcode = SystemZISD::STRICT_FCMP; else C.Opcode = SystemZISD::STRICT_FCMPS; adjustForFNeg(C); } else { assert(!C.Chain); C.CCValid = SystemZ::CCMASK_ICMP; C.Opcode = SystemZISD::ICMP; // Choose the type of comparison. Equality and inequality tests can // use either signed or unsigned comparisons. The choice also doesn't // matter if both sign bits are known to be clear. In those cases we // want to give the main isel code the freedom to choose whichever // form fits best. if (C.CCMask == SystemZ::CCMASK_CMP_EQ || C.CCMask == SystemZ::CCMASK_CMP_NE || (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) C.ICmpType = SystemZICMP::Any; else if (C.CCMask & SystemZ::CCMASK_CMP_UO) C.ICmpType = SystemZICMP::UnsignedOnly; else C.ICmpType = SystemZICMP::SignedOnly; C.CCMask &= ~SystemZ::CCMASK_CMP_UO; adjustForRedundantAnd(DAG, DL, C); adjustZeroCmp(DAG, DL, C); adjustSubwordCmp(DAG, DL, C); adjustForSubtraction(DAG, DL, C); adjustForLTGFR(C); adjustICmpTruncate(DAG, DL, C); } if (shouldSwapCmpOperands(C)) { std::swap(C.Op0, C.Op1); C.CCMask = SystemZ::reverseCCMask(C.CCMask); } adjustForTestUnderMask(DAG, DL, C); return C; } // Emit the comparison instruction described by C. static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { if (!C.Op1.getNode()) { SDNode *Node; switch (C.Op0.getOpcode()) { case ISD::INTRINSIC_W_CHAIN: Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); return SDValue(Node, 0); case ISD::INTRINSIC_WO_CHAIN: Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); return SDValue(Node, Node->getNumValues() - 1); default: llvm_unreachable("Invalid comparison operands"); } } if (C.Opcode == SystemZISD::ICMP) return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); if (C.Opcode == SystemZISD::TM) { bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); } if (C.Chain) { SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); } return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); } // Implement a 32-bit *MUL_LOHI operation by extending both operands to // 64 bits. Extend is the extension type to use. Store the high part // in Hi and the low part in Lo. static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo) { Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, DL, MVT::i64)); Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); } // Lower a binary operation that produces two VT results, one in each // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, // and Opcode performs the GR128 operation. Store the even register result // in Even and the odd register result in Odd. static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd) { SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); bool Is32Bit = is32Bit(VT); Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); } // Return an i32 value that is 1 if the CC value produced by CCReg is // in the mask CCMask and 0 otherwise. CC is known to have a value // in CCValid, so other values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask) { SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i32), DAG.getTargetConstant(CCValid, DL, MVT::i32), DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) // floating-point comparisons, and CmpMode::SignalingFP for strict signaling // floating-point comparisons. enum class CmpMode { Int, FP, StrictFP, SignalingFP }; static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { switch (CC) { case ISD::SETOEQ: case ISD::SETEQ: switch (Mode) { case CmpMode::Int: return SystemZISD::VICMPE; case CmpMode::FP: return SystemZISD::VFCMPE; case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; } llvm_unreachable("Bad mode"); case ISD::SETOGE: case ISD::SETGE: switch (Mode) { case CmpMode::Int: return 0; case CmpMode::FP: return SystemZISD::VFCMPHE; case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; } llvm_unreachable("Bad mode"); case ISD::SETOGT: case ISD::SETGT: switch (Mode) { case CmpMode::Int: return SystemZISD::VICMPH; case CmpMode::FP: return SystemZISD::VFCMPH; case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; } llvm_unreachable("Bad mode"); case ISD::SETUGT: switch (Mode) { case CmpMode::Int: return SystemZISD::VICMPHL; case CmpMode::FP: return 0; case CmpMode::StrictFP: return 0; case CmpMode::SignalingFP: return 0; } llvm_unreachable("Bad mode"); default: return 0; } } // Return the SystemZISD vector comparison operation for CC or its inverse, // or 0 if neither can be done directly. Indicate in Invert whether the // result is for the inverse of CC. Mode is as above. static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert) { if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = false; return Opcode; } CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); if (unsigned Opcode = getVectorComparison(CC, Mode)) { Invert = true; return Opcode; } return 0; } // Return a v2f64 that contains the extended form of elements Start and Start+1 // of v4f32 value Op. If Chain is nonnull, return the strict form. static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain) { int Mask[] = { Start, -1, Start + 1, -1 }; Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); if (Chain) { SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); } return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); } // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, // producing a result of type VT. If Chain is nonnull, return the strict form. SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &DL, EVT VT, SDValue CmpOp0, SDValue CmpOp1, SDValue Chain) const { // There is no hardware support for v4f32 (unless we have the vector // enhancements facility 1), so extend the vector into two v2f64s // and compare those. if (CmpOp0.getValueType() == MVT::v4f32 && !Subtarget.hasVectorEnhancements1()) { SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); if (Chain) { SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), H1.getValue(1), L1.getValue(1), HRes.getValue(1), LRes.getValue(1) }; SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); SDValue Ops[2] = { Res, NewChain }; return DAG.getMergeValues(Ops, DL); } SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); } if (Chain) { SDVTList VTs = DAG.getVTList(VT, MVT::Other); return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); } return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); } // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing // an integer mask of type VT. If Chain is nonnull, we have a strict // floating-point comparison. If in addition IsSignaling is true, we have // a strict signaling floating-point comparison. SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, const SDLoc &DL, EVT VT, ISD::CondCode CC, SDValue CmpOp0, SDValue CmpOp1, SDValue Chain, bool IsSignaling) const { bool IsFP = CmpOp0.getValueType().isFloatingPoint(); assert (!Chain || IsFP); assert (!IsSignaling || Chain); CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; bool Invert = false; SDValue Cmp; switch (CC) { // Handle tests for order using (or (ogt y x) (oge x y)). case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; case ISD::SETO: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), DL, VT, CmpOp1, CmpOp0, Chain); SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); if (Chain) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LT.getValue(1), GE.getValue(1)); break; } // Handle <> tests using (or (ogt y x) (ogt x y)). case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; case ISD::SETONE: { assert(IsFP && "Unexpected integer comparison"); SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), DL, VT, CmpOp1, CmpOp0, Chain); SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), DL, VT, CmpOp0, CmpOp1, Chain); Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); if (Chain) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, LT.getValue(1), GT.getValue(1)); break; } // Otherwise a single comparison is enough. It doesn't really // matter whether we try the inversion or the swap first, since // there are no cases where both work. default: if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); else { CC = ISD::getSetCCSwappedOperands(CC); if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); else llvm_unreachable("Unhandled comparison"); } if (Chain) Chain = Cmp.getValue(1); break; } if (Invert) { SDValue Mask = DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } if (Chain && Chain.getNode() != Cmp.getNode()) { SDValue Ops[2] = { Cmp, Chain }; Cmp = DAG.getMergeValues(Ops, DL); } return Cmp; } SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue CmpOp0 = Op.getOperand(0); SDValue CmpOp1 = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); SDLoc DL(Op); EVT VT = Op.getValueType(); if (VT.isVector()) return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); } SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG, bool IsSignaling) const { SDValue Chain = Op.getOperand(0); SDValue CmpOp0 = Op.getOperand(1); SDValue CmpOp1 = Op.getOperand(2); ISD::CondCode CC = cast(Op.getOperand(3))->get(); SDLoc DL(Op); EVT VT = Op.getNode()->getValueType(0); if (VT.isVector()) { SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, Chain, IsSignaling); return Res.getValue(Op.getResNo()); } Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); SDValue CCReg = emitCmp(DAG, DL, C); CCReg->setFlags(Op->getFlags()); SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); SDValue Ops[2] = { Result, CCReg.getValue(1) }; return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue CmpOp0 = Op.getOperand(2); SDValue CmpOp1 = Op.getOperand(3); SDValue Dest = Op.getOperand(4); SDLoc DL(Op); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); return DAG.getNode( SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), DAG.getTargetConstant(C.CCValid, DL, MVT::i32), DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); } // Return true if Pos is CmpOp and Neg is the negative of CmpOp, // allowing Pos and Neg to be wider than CmpOp. static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { return (Neg.getOpcode() == ISD::SUB && Neg.getOperand(0).getOpcode() == ISD::Constant && cast(Neg.getOperand(0))->getZExtValue() == 0 && Neg.getOperand(1) == Pos && (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && Pos.getOperand(0) == CmpOp))); } // Return the absolute or negative absolute of Op; IsNegative decides which. static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative) { Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op); if (IsNegative) Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), DAG.getConstant(0, DL, Op.getValueType()), Op); return Op; } SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CmpOp0 = Op.getOperand(0); SDValue CmpOp1 = Op.getOperand(1); SDValue TrueOp = Op.getOperand(2); SDValue FalseOp = Op.getOperand(3); ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDLoc DL(Op); Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); // Check for absolute and negative-absolute selections, including those // where the comparison value is sign-extended (for LPGFR and LNGFR). // This check supplements the one in DAGCombiner. if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && C.CCMask != SystemZ::CCMASK_CMP_NE && C.Op1.getOpcode() == ISD::Constant && cast(C.Op1)->getZExtValue() == 0) { if (isAbsolute(C.Op0, TrueOp, FalseOp)) return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); if (isAbsolute(C.Op0, FalseOp, TrueOp)) return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); } SDValue CCReg = emitCmp(DAG, DL, C); SDValue Ops[] = {TrueOp, FalseOp, DAG.getTargetConstant(C.CCValid, DL, MVT::i32), DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); } SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); CodeModel::Model CM = DAG.getTarget().getCodeModel(); SDValue Result; if (Subtarget.isPC32DBLSymbol(GV, CM)) { if (isInt<32>(Offset)) { // Assign anchors at 1<<12 byte boundaries. uint64_t Anchor = Offset & ~uint64_t(0xfff); Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); // The offset can be folded into the address if it is aligned to a // halfword. Offset -= Anchor; if (Offset != 0 && (Offset & 1) == 0) { SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); Offset = 0; } } else { // Conservatively load a constant offset greater than 32 bits into a // register below. Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } } else { Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); } // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. if (Offset != 0) Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, DAG.getConstant(Offset, DL, PtrVT)); return Result; } SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, SelectionDAG &DAG, unsigned Opcode, SDValue GOTOffset) const { SDLoc DL(Node); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = DAG.getEntryNode(); SDValue Glue; if (DAG.getMachineFunction().getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("In GHC calling convention TLS is not supported"); // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); Glue = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); Glue = Chain.getValue(1); // The first call operand is the chain and the second is the TLS symbol. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, Node->getValueType(0), 0, 0)); // Add argument registers to the end of the list so that they are // known live into the call. Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); // Glue the call to the argument copies. Ops.push_back(Glue); // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); Glue = Chain.getValue(1); // Copy the return value from %r2. return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); } SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); // The low part of the thread pointer is in access register 1. SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); // Merge them into a single 64-bit address. SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, DAG.getConstant(32, DL, PtrVT)); return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); } SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(Node, DAG); SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); if (DAG.getMachineFunction().getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("In GHC calling convention TLS is not supported"); SDValue TP = lowerThreadPointer(DL, DAG); // Get the offset of GA from the thread pointer, based on the TLS model. SDValue Offset; switch (model) { case TLSModel::GeneralDynamic: { // Load the GOT offset of the tls_index (module ID / per-symbol offset). SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); break; } case TLSModel::LocalDynamic: { // Load the GOT offset of the module ID. SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); // Call __tls_get_offset to retrieve the module base offset. Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); // Note: The SystemZLDCleanupPass will remove redundant computations // of the module base offset. Count total number of local-dynamic // accesses to trigger execution of that pass. SystemZMachineFunctionInfo* MFI = DAG.getMachineFunction().getInfo(); MFI->incNumLocalDynamicTLSAccesses(); // Add the per-symbol offset. CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); DTPOffset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), DTPOffset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); break; } case TLSModel::InitialExec: { // Load the offset from the GOT. Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_INDNTPOFF); Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getGOT(DAG.getMachineFunction())); break; } case TLSModel::LocalExec: { // Force the offset into the constant pool and load it from there. SystemZConstantPoolValue *CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); Offset = DAG.getLoad( PtrVT, DL, DAG.getEntryNode(), Offset, MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); break; } } // Add the base and offset together. return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); } SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); const BlockAddress *BA = Node->getBlockAddress(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); return Result; } SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, SelectionDAG &DAG) const { SDLoc DL(JT); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); // Use LARL to load the address of the table. return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, SelectionDAG &DAG) const { SDLoc DL(CP); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Result; if (CP->isMachineConstantPoolEntry()) Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); else Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset()); // Use LARL to load the address of the constant pool entry. return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); } SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { auto *TFL = static_cast(Subtarget.getFrameLowering()); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setFrameAddressIsTaken(true); SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // Return null if the back chain is not present. bool HasBackChain = MF.getFunction().hasFnAttribute("backchain"); if (TFL->usePackedStack(MF) && !HasBackChain) return DAG.getConstant(0, DL, PtrVT); // By definition, the frame address is the address of the back chain. int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); // FIXME The frontend should detect this case. if (Depth > 0) { report_fatal_error("Unsupported stack frame traversal count"); } return BackChain; } SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MFI.setReturnAddressIsTaken(true); if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); SDLoc DL(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); // FIXME The frontend should detect this case. if (Depth > 0) { report_fatal_error("Unsupported stack frame traversal count"); } // Return R14D, which has the return address. Mark it an implicit live-in. unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass); return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); } SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue In = Op.getOperand(0); EVT InVT = In.getValueType(); EVT ResVT = Op.getValueType(); // Convert loads directly. This is normally done by DAGCombiner, // but we need this case for bitcasts that are created during lowering // and which are then lowered themselves. if (auto *LoadN = dyn_cast(In)) if (ISD::isNormalLoad(LoadN)) { SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), LoadN->getMemOperand()); // Update the chain uses. DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); return NewLoad; } if (InVT == MVT::i32 && ResVT == MVT::f32) { SDValue In64; if (Subtarget.hasHighWord()) { SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64); In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, MVT::i64, SDValue(U64, 0), In); } else { In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, DAG.getConstant(32, DL, MVT::i64)); } SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, MVT::f32, Out64); } if (InVT == MVT::f32 && ResVT == MVT::i32) { SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, MVT::f64, SDValue(U64, 0), In); SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); if (Subtarget.hasHighWord()) return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, MVT::i32, Out64); SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, DAG.getConstant(32, DL, MVT::i64)); return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); } llvm_unreachable("Unexpected bitcast combination"); } SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); const Value *SV = cast(Op.getOperand(2))->getValue(); SDLoc DL(Op); // The initial values of each field. const unsigned NumFields = 4; SDValue Fields[NumFields] = { DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) }; // Store each field into its respective slot. SDValue MemOps[NumFields]; unsigned Offset = 0; for (unsigned I = 0; I < NumFields; ++I) { SDValue FieldAddr = Addr; if (Offset != 0) FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, DAG.getIntPtrConstant(Offset, DL)); MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, MachinePointerInfo(SV, Offset)); Offset += 8; } return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue DstPtr = Op.getOperand(1); SDValue SrcPtr = Op.getOperand(2); const Value *DstSV = cast(Op.getOperand(3))->getValue(); const Value *SrcSV = cast(Op.getOperand(4))->getValue(); SDLoc DL(Op); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL), Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, /*isTailCall*/ false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } SDValue SystemZTargetLowering:: lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); MachineFunction &MF = DAG.getMachineFunction(); bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); SDValue Align = Op.getOperand(2); SDLoc DL(Op); // If user has set the no alignment function attribute, ignore // alloca alignments. uint64_t AlignVal = (RealignOpt ? cast(Align)->getZExtValue() : 0); uint64_t StackAlign = TFI->getStackAlignment(); uint64_t RequiredAlign = std::max(AlignVal, StackAlign); uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; Register SPReg = getStackPointerRegisterToSaveRestore(); SDValue NeededSpace = Size; // Get a reference to the stack pointer. SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); // If we need a backchain, save it now. SDValue Backchain; if (StoreBackchain) Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), MachinePointerInfo()); // Add extra space for alignment if needed. if (ExtraAlignSpace) NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); // Get the new stack pointer value. SDValue NewSP; if (hasInlineStackProbe(MF)) { NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); Chain = NewSP.getValue(1); } else { NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); // Copy the new stack pointer back. Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); } // The allocated data lives above the 160 bytes allocated for the standard // frame, plus any outgoing stack arguments. We don't know how much that // amounts to yet, so emit a special ADJDYNALLOC placeholder. SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); // Dynamically realign if needed. if (RequiredAlign > StackAlign) { Result = DAG.getNode(ISD::ADD, DL, MVT::i64, Result, DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); Result = DAG.getNode(ISD::AND, DL, MVT::i64, Result, DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); } if (StoreBackchain) Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), MachinePointerInfo()); SDValue Ops[2] = { Result, Chain }; return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); } SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue Ops[2]; if (is32Bit(VT)) // Just do a normal 64-bit multiplication and extract the results. // We define this so that it can be used for constant division. lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else if (Subtarget.hasMiscellaneousExtensions2()) // SystemZISD::SMUL_LOHI returns the low result in the odd register and // the high result in the even register. ISD::SMUL_LOHI is defined to // return the low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else { // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: // // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) // // but using the fact that the upper halves are either all zeros // or all ones: // // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) // // and grouping the right terms together since they are quicker than the // multiplication: // // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) SDValue C63 = DAG.getConstant(63, DL, MVT::i64); SDValue LL = Op.getOperand(0); SDValue RL = Op.getOperand(1); SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); // SystemZISD::UMUL_LOHI returns the low result in the odd register and // the high result in the even register. ISD::SMUL_LOHI is defined to // return the low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, LL, RL, Ops[1], Ops[0]); SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); } return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue Ops[2]; if (is32Bit(VT)) // Just do a normal 64-bit multiplication and extract the results. // We define this so that it can be used for constant division. lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); else // SystemZISD::UMUL_LOHI returns the low result in the odd register and // the high result in the even register. ISD::UMUL_LOHI is defined to // return the low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); SDLoc DL(Op); // We use DSGF for 32-bit division. This means the first operand must // always be 64-bit, and the second operand should be 32-bit whenever // that is possible, to improve performance. if (is32Bit(VT)) Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); else if (DAG.ComputeNumSignBits(Op1) > 32) Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); // DSG(F) returns the remainder in the even register and the // quotient in the odd register. SDValue Ops[2]; lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); // DL(G) returns the remainder in the even register and the // quotient in the odd register. SDValue Ops[2]; lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); // Get the known-zero masks for each operand. SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)}; KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]), DAG.computeKnownBits(Ops[1])}; // See if the upper 32 bits of one operand and the lower 32 bits of the // other are known zero. They are the low and high operands respectively. uint64_t Masks[] = { Known[0].Zero.getZExtValue(), Known[1].Zero.getZExtValue() }; unsigned High, Low; if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) High = 1, Low = 0; else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) High = 0, Low = 1; else return Op; SDValue LowOp = Ops[Low]; SDValue HighOp = Ops[High]; // If the high part is a constant, we're better off using IILH. if (HighOp.getOpcode() == ISD::Constant) return Op; // If the low part is a constant that is outside the range of LHI, // then we're better off using IILF. if (LowOp.getOpcode() == ISD::Constant) { int64_t Value = int32_t(cast(LowOp)->getZExtValue()); if (!isInt<16>(Value)) return Op; } // Check whether the high part is an AND that doesn't change the // high 32 bits and just masks out low bits. We can skip it if so. if (HighOp.getOpcode() == ISD::AND && HighOp.getOperand(1).getOpcode() == ISD::Constant) { SDValue HighOp0 = HighOp.getOperand(0); uint64_t Mask = cast(HighOp.getOperand(1))->getZExtValue(); if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) HighOp = HighOp0; } // Take advantage of the fact that all GR32 operations only change the // low 32 bits by truncating Low to an i32 and inserting it directly // using a subreg. The interesting cases are those where the truncation // can be folded. SDLoc DL(Op); SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, MVT::i64, HighOp, Low32); } // Lower SADDO/SSUBO/UADDO/USUBO nodes. SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, SelectionDAG &DAG) const { SDNode *N = Op.getNode(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDLoc DL(N); unsigned BaseOp = 0; unsigned CCValid = 0; unsigned CCMask = 0; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::SADDO: BaseOp = SystemZISD::SADDO; CCValid = SystemZ::CCMASK_ARITH; CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; break; case ISD::SSUBO: BaseOp = SystemZISD::SSUBO; CCValid = SystemZ::CCMASK_ARITH; CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; break; case ISD::UADDO: BaseOp = SystemZISD::UADDO; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_CARRY; break; case ISD::USUBO: BaseOp = SystemZISD::USUBO; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_BORROW; break; } SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); if (N->getValueType(1) == MVT::i1) SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); } static bool isAddCarryChain(SDValue Carry) { while (Carry.getOpcode() == ISD::ADDCARRY) Carry = Carry.getOperand(2); return Carry.getOpcode() == ISD::UADDO; } static bool isSubBorrowChain(SDValue Carry) { while (Carry.getOpcode() == ISD::SUBCARRY) Carry = Carry.getOperand(2); return Carry.getOpcode() == ISD::USUBO; } // Lower ADDCARRY/SUBCARRY nodes. SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) const { SDNode *N = Op.getNode(); MVT VT = N->getSimpleValueType(0); // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Carry = Op.getOperand(2); SDLoc DL(N); unsigned BaseOp = 0; unsigned CCValid = 0; unsigned CCMask = 0; switch (Op.getOpcode()) { default: llvm_unreachable("Unknown instruction!"); case ISD::ADDCARRY: if (!isAddCarryChain(Carry)) return SDValue(); BaseOp = SystemZISD::ADDCARRY; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_CARRY; break; case ISD::SUBCARRY: if (!isSubBorrowChain(Carry)) return SDValue(); BaseOp = SystemZISD::SUBCARRY; CCValid = SystemZ::CCMASK_LOGICAL; CCMask = SystemZ::CCMASK_LOGICAL_BORROW; break; } // Set the condition code from the carry flag. Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, DAG.getConstant(CCValid, DL, MVT::i32), DAG.getConstant(CCMask, DL, MVT::i32)); SDVTList VTs = DAG.getVTList(VT, MVT::i32); SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); if (N->getValueType(1) == MVT::i1) SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); } SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); Op = Op.getOperand(0); // Handle vector types via VPOPCT. if (VT.isVector()) { Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); switch (VT.getScalarSizeInBits()) { case 8: break; case 16: { Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); SDValue Shift = DAG.getConstant(8, DL, MVT::i32); SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); break; } case 32: { SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } case 64: { SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } default: llvm_unreachable("Unexpected type"); } return Op; } // Get the known-zero mask for the operand. KnownBits Known = DAG.computeKnownBits(Op); unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); if (NumSignificantBits == 0) return DAG.getConstant(0, DL, VT); // Skip known-zero high parts of the operand. int64_t OrigBitSize = VT.getSizeInBits(); int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); BitSize = std::min(BitSize, OrigBitSize); // The POPCNT instruction counts the number of bits in each byte. Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); // Add up per-byte counts in a binary tree. All bits of Op at // position larger than BitSize remain zero throughout. for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); if (BitSize != OrigBitSize) Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); } // Extract overall result from high byte. if (BitSize > 8) Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, DL, VT)); return Op; } SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); AtomicOrdering FenceOrdering = static_cast( cast(Op.getOperand(1))->getZExtValue()); SyncScope::ID FenceSSID = static_cast( cast(Op.getOperand(2))->getZExtValue()); // The only fence that needs an instruction is a sequentially-consistent // cross-thread fence. if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && FenceSSID == SyncScope::System) { return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, Op.getOperand(0)), 0); } // MEMBARRIER is a compiler barrier; it codegens to a no-op. return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); } // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), Node->getChain(), Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); } // Op is an atomic store. Lower it into a normal volatile store. SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), Node->getBasePtr(), Node->getMemoryVT(), Node->getMemOperand()); // We have to enforce sequential consistency by performing a // serialization operation after the store. if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent) Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, Chain), 0); return Chain; } // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first // two into the fullword ATOMIC_LOADW_* operation given by Opcode. SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, unsigned Opcode) const { auto *Node = cast(Op.getNode()); // 32-bit operations need no code outside the main loop. EVT NarrowVT = Node->getMemoryVT(); EVT WideVT = MVT::i32; if (NarrowVT == WideVT) return Op; int64_t BitSize = NarrowVT.getSizeInBits(); SDValue ChainIn = Node->getChain(); SDValue Addr = Node->getBasePtr(); SDValue Src2 = Node->getVal(); MachineMemOperand *MMO = Node->getMemOperand(); SDLoc DL(Node); EVT PtrVT = Addr.getValueType(); // Convert atomic subtracts of constants into additions. if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) if (auto *Const = dyn_cast(Src2)) { Opcode = SystemZISD::ATOMIC_LOADW_ADD; Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); } // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, DAG.getConstant(0, DL, WideVT), BitShift); // Extend the source operand to 32 bits and prepare it for the inner loop. // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other // operations require the source to be shifted in advance. (This shift // can be folded if the source is constant.) For AND and NAND, the lower // bits must be set, while for other opcodes they should be left clear. if (Opcode != SystemZISD::ATOMIC_SWAPW) Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, DAG.getConstant(32 - BitSize, DL, WideVT)); if (Opcode == SystemZISD::ATOMIC_LOADW_AND || Opcode == SystemZISD::ATOMIC_LOADW_NAND) Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); // Construct the ATOMIC_LOADW_* node. SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, NarrowVT, MMO); // Rotate the result of the final CS so that the field is in the lower // bits of a GR32, then truncate it. SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, DAG.getConstant(BitSize, DL, WideVT)); SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; return DAG.getMergeValues(RetOps, DL); } // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit // operations into additions. SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); EVT MemVT = Node->getMemoryVT(); if (MemVT == MVT::i32 || MemVT == MVT::i64) { // A full-width operation. assert(Op.getValueType() == MemVT && "Mismatched VTs"); SDValue Src2 = Node->getVal(); SDValue NegSrc2; SDLoc DL(Src2); if (auto *Op2 = dyn_cast(Src2)) { // Use an addition if the operand is constant and either LAA(G) is // available or the negative value is in the range of A(G)FHI. int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) NegSrc2 = DAG.getConstant(Value, DL, MemVT); } else if (Subtarget.hasInterlockedAccess1()) // Use LAA(G) if available. NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); if (NegSrc2.getNode()) return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, Node->getChain(), Node->getBasePtr(), NegSrc2, Node->getMemOperand()); // Use the node as-is. return Op; } return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); } // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const { auto *Node = cast(Op.getNode()); SDValue ChainIn = Node->getOperand(0); SDValue Addr = Node->getOperand(1); SDValue CmpVal = Node->getOperand(2); SDValue SwapVal = Node->getOperand(3); MachineMemOperand *MMO = Node->getMemOperand(); SDLoc DL(Node); // We have native support for 32-bit and 64-bit compare and swap, but we // still need to expand extracting the "success" result from the CC. EVT NarrowVT = Node->getMemoryVT(); EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; if (NarrowVT == WideVT) { SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, DL, Tys, Ops, NarrowVT, MMO); SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } // Convert 8-bit and 16-bit compare and swap to a loop, implemented // via a fullword ATOMIC_CMP_SWAPW operation. int64_t BitSize = NarrowVT.getSizeInBits(); EVT PtrVT = Addr.getValueType(); // Get the address of the containing word. SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, DAG.getConstant(-4, DL, PtrVT)); // Get the number of bits that the word must be rotated left in order // to bring the field to the top bits of a GR32. SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, DAG.getConstant(3, DL, PtrVT)); BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); // Get the complementing shift amount, for rotating a field in the top // bits back to its proper position. SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, DAG.getConstant(0, DL, WideVT), BitShift); // Construct the ATOMIC_CMP_SWAPW node. SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, VTList, Ops, NarrowVT, MMO); SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); return SDValue(); } MachineMemOperand::Flags SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { // Because of how we convert atomic_load and atomic_store to normal loads and // stores in the DAG, we need to ensure that the MMOs are marked volatile // since DAGCombine hasn't been updated to account for atomic, but non // volatile loads. (See D57601) if (auto *SI = dyn_cast(&I)) if (SI->isAtomic()) return MachineMemOperand::MOVolatile; if (auto *LI = dyn_cast(&I)) if (LI->isAtomic()) return MachineMemOperand::MOVolatile; if (auto *AI = dyn_cast(&I)) if (AI->isAtomic()) return MachineMemOperand::MOVolatile; if (auto *AI = dyn_cast(&I)) if (AI->isAtomic()) return MachineMemOperand::MOVolatile; return MachineMemOperand::MONone; } SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo()->setManipulatesSP(true); if (MF.getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("Variable-sized stack allocations are not supported " "in GHC calling convention"); return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), SystemZ::R15D, Op.getValueType()); } SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MF.getInfo()->setManipulatesSP(true); bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain"); if (MF.getFunction().getCallingConv() == CallingConv::GHC) report_fatal_error("Variable-sized stack allocations are not supported " "in GHC calling convention"); SDValue Chain = Op.getOperand(0); SDValue NewSP = Op.getOperand(1); SDValue Backchain; SDLoc DL(Op); if (StoreBackchain) { SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64); Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), MachinePointerInfo()); } Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP); if (StoreBackchain) Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), MachinePointerInfo()); return Chain; } SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const { bool IsData = cast(Op.getOperand(4))->getZExtValue(); if (!IsData) // Just preserve the chain. return Op.getOperand(0); SDLoc DL(Op); bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; auto *Node = cast(Op.getNode()); SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), Op.getOperand(1)}; return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); } // Convert condition code in CCReg to an i32 value. static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { SDLoc DL(CCReg); SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); } SDValue SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned Opcode, CCValid; if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); SDValue CC = getCCResult(DAG, SDValue(Node, 0)); DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); return SDValue(); } return SDValue(); } SDValue SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned Opcode, CCValid; if (isIntrinsicWithCC(Op, Opcode, CCValid)) { SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); if (Op->getNumValues() == 1) return getCCResult(DAG, SDValue(Node, 0)); assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); } unsigned Id = cast(Op.getOperand(0))->getZExtValue(); switch (Id) { case Intrinsic::thread_pointer: return lowerThreadPointer(SDLoc(Op), DAG); case Intrinsic::s390_vpdi: return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::s390_vperm: return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); case Intrinsic::s390_vuphb: case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vuplhb: case Intrinsic::s390_vuplhh: case Intrinsic::s390_vuplhf: return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vuplb: case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vupllb: case Intrinsic::s390_vupllh: case Intrinsic::s390_vupllf: return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), Op.getOperand(1)); case Intrinsic::s390_vsumb: case Intrinsic::s390_vsumh: case Intrinsic::s390_vsumgh: case Intrinsic::s390_vsumgf: case Intrinsic::s390_vsumqf: case Intrinsic::s390_vsumqg: return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } return SDValue(); } namespace { // Says that SystemZISD operation Opcode can be used to perform the equivalent // of a VPERM with permute vector Bytes. If Opcode takes three operands, // Operand is the constant third operand, otherwise it is the number of // bytes in each element of the result. struct Permute { unsigned Opcode; unsigned Operand; unsigned char Bytes[SystemZ::VectorBytes]; }; } static const Permute PermuteForms[] = { // VMRHG { SystemZISD::MERGE_HIGH, 8, { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, // VMRHF { SystemZISD::MERGE_HIGH, 4, { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, // VMRHH { SystemZISD::MERGE_HIGH, 2, { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, // VMRHB { SystemZISD::MERGE_HIGH, 1, { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, // VMRLG { SystemZISD::MERGE_LOW, 8, { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, // VMRLF { SystemZISD::MERGE_LOW, 4, { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, // VMRLH { SystemZISD::MERGE_LOW, 2, { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, // VMRLB { SystemZISD::MERGE_LOW, 1, { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, // VPKG { SystemZISD::PACK, 4, { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, // VPKF { SystemZISD::PACK, 2, { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, // VPKH { SystemZISD::PACK, 1, { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, // VPDI V1, V2, 4 (low half of V1, high half of V2) { SystemZISD::PERMUTE_DWORDS, 4, { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, // VPDI V1, V2, 1 (high half of V1, low half of V2) { SystemZISD::PERMUTE_DWORDS, 1, { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } }; // Called after matching a vector shuffle against a particular pattern. // Both the original shuffle and the pattern have two vector operands. // OpNos[0] is the operand of the original shuffle that should be used for // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and // set OpNo0 and OpNo1 to the shuffle operands that should actually be used // for operands 0 and 1 of the pattern. static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { if (OpNos[0] < 0) { if (OpNos[1] < 0) return false; OpNo0 = OpNo1 = OpNos[1]; } else if (OpNos[1] < 0) { OpNo0 = OpNo1 = OpNos[0]; } else { OpNo0 = OpNos[0]; OpNo1 = OpNos[1]; } return true; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Return true if the VPERM can be implemented using P. // When returning true set OpNo0 to the VPERM operand that should be // used for operand 0 of P and likewise OpNo1 for operand 1 of P. // // For example, if swapping the VPERM operands allows P to match, OpNo0 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one // operand, but rewriting it to use two duplicated operands allows it to // match P, then OpNo0 and OpNo1 will be the same. static bool matchPermute(const SmallVectorImpl &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1) { int OpNos[] = { -1, -1 }; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { int Elt = Bytes[I]; if (Elt >= 0) { // Make sure that the two permute vectors use the same suboperand // byte number. Only the operand numbers (the high bits) are // allowed to differ. if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) return false; int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; // Make sure that the operand mappings are consistent with previous // elements. if (OpNos[ModelOpNo] == 1 - RealOpNo) return false; OpNos[ModelOpNo] = RealOpNo; } } return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); } // As above, but search for a matching permute. static const Permute *matchPermute(const SmallVectorImpl &Bytes, unsigned &OpNo0, unsigned &OpNo1) { for (auto &P : PermuteForms) if (matchPermute(Bytes, P, OpNo0, OpNo1)) return &P; return nullptr; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. This permute is an operand of an outer permute. // See whether redistributing the -1 bytes gives a shuffle that can be // implemented using P. If so, set Transform to a VPERM-like permute vector // that, when applied to the result of P, gives the original permute in Bytes. static bool matchDoublePermute(const SmallVectorImpl &Bytes, const Permute &P, SmallVectorImpl &Transform) { unsigned To = 0; for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { int Elt = Bytes[From]; if (Elt < 0) // Byte number From of the result is undefined. Transform[From] = -1; else { while (P.Bytes[To] != Elt) { To += 1; if (To == SystemZ::VectorBytes) return false; } Transform[From] = To; } } return true; } // As above, but search for a matching permute. static const Permute *matchDoublePermute(const SmallVectorImpl &Bytes, SmallVectorImpl &Transform) { for (auto &P : PermuteForms) if (matchDoublePermute(Bytes, P, Transform)) return &P; return nullptr; } // Convert the mask of the given shuffle op into a byte-level mask, // as if it had type vNi8. static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl &Bytes) { EVT VT = ShuffleOp.getValueType(); unsigned NumElements = VT.getVectorNumElements(); unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); if (auto *VSN = dyn_cast(ShuffleOp)) { Bytes.resize(NumElements * BytesPerElement, -1); for (unsigned I = 0; I < NumElements; ++I) { int Index = VSN->getMaskElt(I); if (Index >= 0) for (unsigned J = 0; J < BytesPerElement; ++J) Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; } return true; } if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && isa(ShuffleOp.getOperand(1))) { unsigned Index = ShuffleOp.getConstantOperandVal(1); Bytes.resize(NumElements * BytesPerElement, -1); for (unsigned I = 0; I < NumElements; ++I) for (unsigned J = 0; J < BytesPerElement; ++J) Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; return true; } return false; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of // the result come from a contiguous sequence of bytes from one input. // Set Base to the selector for the first byte if so. static bool getShuffleInput(const SmallVectorImpl &Bytes, unsigned Start, unsigned BytesPerElement, int &Base) { Base = -1; for (unsigned I = 0; I < BytesPerElement; ++I) { if (Bytes[Start + I] >= 0) { unsigned Elem = Bytes[Start + I]; if (Base < 0) { Base = Elem - I; // Make sure the bytes would come from one input operand. if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) return false; } else if (unsigned(Base) != Elem - I) return false; } } return true; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Return true if it can be performed using VSLDB. // When returning true, set StartIndex to the shift amount and OpNo0 // and OpNo1 to the VPERM operands that should be used as the first // and second shift operand respectively. static bool isShlDoublePermute(const SmallVectorImpl &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1) { int OpNos[] = { -1, -1 }; int Shift = -1; for (unsigned I = 0; I < 16; ++I) { int Index = Bytes[I]; if (Index >= 0) { int ExpectedShift = (Index - I) % SystemZ::VectorBytes; int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; if (Shift < 0) Shift = ExpectedShift; else if (Shift != ExpectedShift) return false; // Make sure that the operand mappings are consistent with previous // elements. if (OpNos[ModelOpNo] == 1 - RealOpNo) return false; OpNos[ModelOpNo] = RealOpNo; } } StartIndex = Shift; return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); } // Create a node that performs P on operands Op0 and Op1, casting the // operands to the appropriate type. The type of the result is determined by P. static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1) { // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input // elements of a PACK are twice as wide as the outputs. unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : P.Opcode == SystemZISD::PACK ? P.Operand * 2 : P.Operand); // Cast both operands to the appropriate type. MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), SystemZ::VectorBytes / InBytes); Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); SDValue Op; if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); } else if (P.Opcode == SystemZISD::PACK) { MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), SystemZ::VectorBytes / P.Operand); Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); } else { Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); } return Op; } static bool isZeroVector(SDValue N) { if (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0); if (N->getOpcode() == ISD::SPLAT_VECTOR) if (auto *Op = dyn_cast(N->getOperand(0))) return Op->getZExtValue() == 0; return ISD::isBuildVectorAllZeros(N.getNode()); } // Return the index of the zero/undef vector, or UINT32_MAX if not found. static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { for (unsigned I = 0; I < Num ; I++) if (isZeroVector(Ops[I])) return I; return UINT32_MAX; } // Bytes is a VPERM-like permute vector, except that -1 is used for // undefined bytes. Implement it on operands Ops[0] and Ops[1] using // VSLDB or VPERM. static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl &Bytes) { for (unsigned I = 0; I < 2; ++I) Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); // First see whether VSLDB can be used. unsigned StartIndex, OpNo0, OpNo1; if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], Ops[OpNo1], DAG.getTargetConstant(StartIndex, DL, MVT::i32)); // Fall back on VPERM. Construct an SDNode for the permute vector. Try to // eliminate a zero vector by reusing any zero index in the permute vector. unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); if (ZeroVecIdx != UINT32_MAX) { bool MaskFirst = true; int ZeroIdx = -1; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; if (OpNo == ZeroVecIdx && I == 0) { // If the first byte is zero, use mask as first operand. ZeroIdx = 0; break; } if (OpNo != ZeroVecIdx && Byte == 0) { // If mask contains a zero, use it by placing that vector first. ZeroIdx = I + SystemZ::VectorBytes; MaskFirst = false; break; } } if (ZeroIdx != -1) { SDValue IndexNodes[SystemZ::VectorBytes]; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { if (Bytes[I] >= 0) { unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; if (OpNo == ZeroVecIdx) IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); else { unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); } } else IndexNodes[I] = DAG.getUNDEF(MVT::i32); } SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; if (MaskFirst) return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, Mask); else return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, Mask); } } SDValue IndexNodes[SystemZ::VectorBytes]; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) if (Bytes[I] >= 0) IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); else IndexNodes[I] = DAG.getUNDEF(MVT::i32); SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); } namespace { // Describes a general N-operand vector shuffle. struct GeneralShuffle { GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} void addUndef(); bool add(SDValue, unsigned); SDValue getNode(SelectionDAG &, const SDLoc &); void tryPrepareForUnpack(); bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); // The operands of the shuffle. SmallVector Ops; // Index I is -1 if byte I of the result is undefined. Otherwise the // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand // Bytes[I] / SystemZ::VectorBytes. SmallVector Bytes; // The type of the shuffle result. EVT VT; // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. unsigned UnpackFromEltSize; }; } // Add an extra undefined element to the shuffle. void GeneralShuffle::addUndef() { unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); for (unsigned I = 0; I < BytesPerElement; ++I) Bytes.push_back(-1); } // Add an extra element to the shuffle, taking it from element Elem of Op. // A null Op indicates a vector input whose value will be calculated later; // there is at most one such input per shuffle and it always has the same // type as the result. Aborts and returns false if the source vector elements // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per // LLVM they become implicitly extended, but this is rare and not optimized. bool GeneralShuffle::add(SDValue Op, unsigned Elem) { unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); // The source vector can have wider elements than the result, // either through an explicit TRUNCATE or because of type legalization. // We want the least significant part. EVT FromVT = Op.getNode() ? Op.getValueType() : VT; unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); // Return false if the source elements are smaller than their destination // elements. if (FromBytesPerElement < BytesPerElement) return false; unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + (FromBytesPerElement - BytesPerElement)); // Look through things like shuffles and bitcasts. while (Op.getNode()) { if (Op.getOpcode() == ISD::BITCAST) Op = Op.getOperand(0); else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { // See whether the bytes we need come from a contiguous part of one // operand. SmallVector OpBytes; if (!getVPermMask(Op, OpBytes)) break; int NewByte; if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) break; if (NewByte < 0) { addUndef(); return true; } Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); Byte = unsigned(NewByte) % SystemZ::VectorBytes; } else if (Op.isUndef()) { addUndef(); return true; } else break; } // Make sure that the source of the extraction is in Ops. unsigned OpNo = 0; for (; OpNo < Ops.size(); ++OpNo) if (Ops[OpNo] == Op) break; if (OpNo == Ops.size()) Ops.push_back(Op); // Add the element to Bytes. unsigned Base = OpNo * SystemZ::VectorBytes + Byte; for (unsigned I = 0; I < BytesPerElement; ++I) Bytes.push_back(Base + I); return true; } // Return SDNodes for the completed shuffle. SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); if (Ops.size() == 0) return DAG.getUNDEF(VT); // Use a single unpack if possible as the last operation. tryPrepareForUnpack(); // Make sure that there are at least two shuffle operands. if (Ops.size() == 1) Ops.push_back(DAG.getUNDEF(MVT::v16i8)); // Create a tree of shuffles, deferring root node until after the loop. // Try to redistribute the undefined elements of non-root nodes so that // the non-root shuffles match something like a pack or merge, then adjust // the parent node's permute vector to compensate for the new order. // Among other things, this copes with vectors like <2 x i16> that were // padded with undefined elements during type legalization. // // In the best case this redistribution will lead to the whole tree // using packs and merges. It should rarely be a loss in other cases. unsigned Stride = 1; for (; Stride * 2 < Ops.size(); Stride *= 2) { for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; // Create a mask for just these two operands. SmallVector NewBytes(SystemZ::VectorBytes); for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; if (OpNo == I) NewBytes[J] = Byte; else if (OpNo == I + Stride) NewBytes[J] = SystemZ::VectorBytes + Byte; else NewBytes[J] = -1; } // See if it would be better to reorganize NewMask to avoid using VPERM. SmallVector NewBytesMap(SystemZ::VectorBytes); if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); // Applying NewBytesMap to Ops[I] gets back to NewBytes. for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { if (NewBytes[J] >= 0) { assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && "Invalid double permute"); Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; } else assert(NewBytesMap[J] < 0 && "Invalid double permute"); } } else { // Just use NewBytes on the operands. Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) if (NewBytes[J] >= 0) Bytes[J] = I * SystemZ::VectorBytes + J; } } } // Now we just have 2 inputs. Put the second operand in Ops[1]. if (Stride > 1) { Ops[1] = Ops[Stride]; for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) if (Bytes[I] >= int(SystemZ::VectorBytes)) Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; } // Look for an instruction that can do the permute without resorting // to VPERM. unsigned OpNo0, OpNo1; SDValue Op; if (unpackWasPrepared() && Ops[1].isUndef()) Op = Ops[0]; else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); else Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); Op = insertUnpackIfPrepared(DAG, DL, Op); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } #ifndef NDEBUG static void dumpBytes(const SmallVectorImpl &Bytes, std::string Msg) { dbgs() << Msg.c_str() << " { "; for (unsigned i = 0; i < Bytes.size(); i++) dbgs() << Bytes[i] << " "; dbgs() << "}\n"; } #endif // If the Bytes vector matches an unpack operation, prepare to do the unpack // after all else by removing the zero vector and the effect of the unpack on // Bytes. void GeneralShuffle::tryPrepareForUnpack() { uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) return; // Only do this if removing the zero vector reduces the depth, otherwise // the critical path will increase with the final unpack. if (Ops.size() > 2 && Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) return; // Find an unpack that would allow removing the zero vector from Ops. UnpackFromEltSize = 1; for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { bool MatchUnpack = true; SmallVector SrcBytes; for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { unsigned ToEltSize = UnpackFromEltSize * 2; bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; if (!IsZextByte) SrcBytes.push_back(Bytes[Elt]); if (Bytes[Elt] != -1) { unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; if (IsZextByte != (OpNo == ZeroVecOpNo)) { MatchUnpack = false; break; } } } if (MatchUnpack) { if (Ops.size() == 2) { // Don't use unpack if a single source operand needs rearrangement. for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { UnpackFromEltSize = UINT_MAX; return; } } break; } } if (UnpackFromEltSize > 4) return; LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo << ".\n"; dumpBytes(Bytes, "Original Bytes vector:");); // Apply the unpack in reverse to the Bytes array. unsigned B = 0; for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { Elt += UnpackFromEltSize; for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) Bytes[B] = Bytes[Elt]; } while (B < SystemZ::VectorBytes) Bytes[B++] = -1; // Remove the zero vector from Ops Ops.erase(&Ops[ZeroVecOpNo]); for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) if (Bytes[I] >= 0) { unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; if (OpNo > ZeroVecOpNo) Bytes[I] -= SystemZ::VectorBytes; } LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); dbgs() << "\n";); } SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op) { if (!unpackWasPrepared()) return Op; unsigned InBits = UnpackFromEltSize * 8; EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), SystemZ::VectorBits / InBits); SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); unsigned OutBits = InBits * 2; EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), SystemZ::VectorBits / OutBits); return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); } // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. static bool isScalarToVector(SDValue Op) { for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) if (!Op.getOperand(I).isUndef()) return false; return true; } // Return a vector of type VT that contains Value in the first element. // The other elements don't matter. static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value) { // If we have a constant, replicate it to all elements and let the // BUILD_VECTOR lowering take care of it. if (Value.getOpcode() == ISD::Constant || Value.getOpcode() == ISD::ConstantFP) { SmallVector Ops(VT.getVectorNumElements(), Value); return DAG.getBuildVector(VT, DL, Ops); } if (Value.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); } // Return a vector of type VT in which Op0 is in element 0 and Op1 is in // element 1. Used for cases in which replication is cheap. static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1) { if (Op0.isUndef()) { if (Op1.isUndef()) return DAG.getUNDEF(VT); return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); } if (Op1.isUndef()) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, buildScalarToVector(DAG, DL, VT, Op0), buildScalarToVector(DAG, DL, VT, Op1)); } // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 // vector for them. static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1) { if (Op0.isUndef() && Op1.isUndef()) return DAG.getUNDEF(MVT::v2i64); // If one of the two inputs is undefined then replicate the other one, // in order to avoid using another register unnecessarily. if (Op0.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); else if (Op1.isUndef()) Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); else { Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); } return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR // would benefit from this representation and return it if so. static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN) { EVT VT = BVN->getValueType(0); unsigned NumElements = VT.getVectorNumElements(); // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still // need a BUILD_VECTOR, add an additional placeholder operand for that // BUILD_VECTOR and store its operands in ResidueOps. GeneralShuffle GS(VT); SmallVector ResidueOps; bool FoundOne = false; for (unsigned I = 0; I < NumElements; ++I) { SDValue Op = BVN->getOperand(I); if (Op.getOpcode() == ISD::TRUNCATE) Op = Op.getOperand(0); if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op.getOperand(1).getOpcode() == ISD::Constant) { unsigned Elem = cast(Op.getOperand(1))->getZExtValue(); if (!GS.add(Op.getOperand(0), Elem)) return SDValue(); FoundOne = true; } else if (Op.isUndef()) { GS.addUndef(); } else { if (!GS.add(SDValue(), ResidueOps.size())) return SDValue(); ResidueOps.push_back(BVN->getOperand(I)); } } // Nothing to do if there are no EXTRACT_VECTOR_ELTs. if (!FoundOne) return SDValue(); // Create the BUILD_VECTOR for the remaining elements, if any. if (!ResidueOps.empty()) { while (ResidueOps.size() < NumElements) ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); for (auto &Op : GS.Ops) { if (!Op.getNode()) { Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); break; } } } return GS.getNode(DAG, SDLoc(BVN)); } bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { if (Op.getOpcode() == ISD::LOAD && cast(Op)->isUnindexed()) return true; if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) return true; return false; } // Combine GPR scalar values Elems into a vector of type VT. SDValue SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl &Elems) const { // See whether there is a single replicated value. SDValue Single; unsigned int NumElements = Elems.size(); unsigned int Count = 0; for (auto Elem : Elems) { if (!Elem.isUndef()) { if (!Single.getNode()) Single = Elem; else if (Elem != Single) { Single = SDValue(); break; } Count += 1; } } // There are three cases here: // // - if the only defined element is a loaded one, the best sequence // is a replicating load. // // - otherwise, if the only defined element is an i64 value, we will // end up with the same VLVGP sequence regardless of whether we short-cut // for replication or fall through to the later code. // // - otherwise, if the only defined element is an i32 or smaller value, // we would need 2 instructions to replicate it: VLVGP followed by VREPx. // This is only a win if the single defined element is used more than once. // In other cases we're better off using a single VLVGx. if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); // If all elements are loads, use VLREP/VLEs (below). bool AllLoads = true; for (auto Elem : Elems) if (!isVectorElementLoad(Elem)) { AllLoads = false; break; } // The best way of building a v2i64 from two i64s is to use VLVGP. if (VT == MVT::v2i64 && !AllLoads) return joinDwords(DAG, DL, Elems[0], Elems[1]); // Use a 64-bit merge high to combine two doubles. if (VT == MVT::v2f64 && !AllLoads) return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); // Build v4f32 values directly from the FPRs: // // // V V VMRHF // // V VMRHG // if (VT == MVT::v4f32 && !AllLoads) { SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); // Avoid unnecessary undefs by reusing the other operand. if (Op01.isUndef()) Op01 = Op23; else if (Op23.isUndef()) Op23 = Op01; // Merging identical replications is a no-op. if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) return Op01; Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, DL, MVT::v2i64, Op01, Op23); return DAG.getNode(ISD::BITCAST, DL, VT, Op); } // Collect the constant terms. SmallVector Constants(NumElements, SDValue()); SmallVector Done(NumElements, false); unsigned NumConstants = 0; for (unsigned I = 0; I < NumElements; ++I) { SDValue Elem = Elems[I]; if (Elem.getOpcode() == ISD::Constant || Elem.getOpcode() == ISD::ConstantFP) { NumConstants += 1; Constants[I] = Elem; Done[I] = true; } } // If there was at least one constant, fill in the other elements of // Constants with undefs to get a full vector constant and use that // as the starting point. SDValue Result; SDValue ReplicatedVal; if (NumConstants > 0) { for (unsigned I = 0; I < NumElements; ++I) if (!Constants[I].getNode()) Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); Result = DAG.getBuildVector(VT, DL, Constants); } else { // Otherwise try to use VLREP or VLVGP to start the sequence in order to // avoid a false dependency on any previous contents of the vector // register. // Use a VLREP if at least one element is a load. Make sure to replicate // the load with the most elements having its value. std::map UseCounts; SDNode *LoadMaxUses = nullptr; for (unsigned I = 0; I < NumElements; ++I) if (isVectorElementLoad(Elems[I])) { SDNode *Ld = Elems[I].getNode(); UseCounts[Ld]++; if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) LoadMaxUses = Ld; } if (LoadMaxUses != nullptr) { ReplicatedVal = SDValue(LoadMaxUses, 0); Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal); } else { // Try to use VLVGP. unsigned I1 = NumElements / 2 - 1; unsigned I2 = NumElements - 1; bool Def1 = !Elems[I1].isUndef(); bool Def2 = !Elems[I2].isUndef(); if (Def1 || Def2) { SDValue Elem1 = Elems[Def1 ? I1 : I2]; SDValue Elem2 = Elems[Def2 ? I2 : I1]; Result = DAG.getNode(ISD::BITCAST, DL, VT, joinDwords(DAG, DL, Elem1, Elem2)); Done[I1] = true; Done[I2] = true; } else Result = DAG.getUNDEF(VT); } } // Use VLVGx to insert the other elements. for (unsigned I = 0; I < NumElements; ++I) if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], DAG.getConstant(I, DL, MVT::i32)); return Result; } SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { auto *BVN = cast(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); if (BVN->isConstant()) { if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) return Op; // Fall back to loading it from memory. return SDValue(); } // See if we should use shuffles to construct the vector from other vectors. if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) return Res; // Detect SCALAR_TO_VECTOR conversions. if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); // Otherwise use buildVector to build the vector up from GPRs. unsigned NumElements = Op.getNumOperands(); SmallVector Ops(NumElements); for (unsigned I = 0; I < NumElements; ++I) Ops[I] = Op.getOperand(I); return buildVector(DAG, DL, VT, Ops); } SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { auto *VSN = cast(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); unsigned NumElements = VT.getVectorNumElements(); if (VSN->isSplat()) { SDValue Op0 = Op.getOperand(0); unsigned Index = VSN->getSplatIndex(); assert(Index < VT.getVectorNumElements() && "Splat index should be defined and in first operand"); // See whether the value we're splatting is directly available as a scalar. if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || Op0.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); // Otherwise keep it as a vector-to-vector operation. return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), DAG.getTargetConstant(Index, DL, MVT::i32)); } GeneralShuffle GS(VT); for (unsigned I = 0; I < NumElements; ++I) { int Elt = VSN->getMaskElt(I); if (Elt < 0) GS.addUndef(); else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), unsigned(Elt) % NumElements)) return SDValue(); } return GS.getNode(DAG, SDLoc(VSN)); } SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // Just insert the scalar into element 0 of an undefined vector. return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), DAG.getUNDEF(Op.getValueType()), Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); } SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { // Handle insertions of floating-point values. SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); EVT VT = Op.getValueType(); // Insertions into constant indices of a v2f64 can be done using VPDI. // However, if the inserted value is a bitcast or a constant then it's // better to use GPRs, as below. if (VT == MVT::v2f64 && Op1.getOpcode() != ISD::BITCAST && Op1.getOpcode() != ISD::ConstantFP && Op2.getOpcode() == ISD::Constant) { uint64_t Index = cast(Op2)->getZExtValue(); unsigned Mask = VT.getVectorNumElements() - 1; if (Index <= Mask) return Op; } // Otherwise bitcast to the equivalent integer form and insert via a GPR. MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); return DAG.getNode(ISD::BITCAST, DL, VT, Res); } SDValue SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { // Handle extractions of floating-point values. SDLoc DL(Op); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); EVT VT = Op.getValueType(); EVT VecVT = Op0.getValueType(); // Extractions of constant indices can be done directly. if (auto *CIndexN = dyn_cast(Op1)) { uint64_t Index = CIndexN->getZExtValue(); unsigned Mask = VecVT.getVectorNumElements() - 1; if (Index <= Mask) return Op; } // Otherwise bitcast to the equivalent integer form and extract via a GPR. MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); return DAG.getNode(ISD::BITCAST, DL, VT, Res); } SDValue SystemZTargetLowering:: lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue PackedOp = Op.getOperand(0); EVT OutVT = Op.getValueType(); EVT InVT = PackedOp.getValueType(); unsigned ToBits = OutVT.getScalarSizeInBits(); unsigned FromBits = InVT.getScalarSizeInBits(); do { FromBits *= 2; EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), SystemZ::VectorBits / FromBits); PackedOp = DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); } while (FromBits != ToBits); return PackedOp; } // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. SDValue SystemZTargetLowering:: lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { SDValue PackedOp = Op.getOperand(0); SDLoc DL(Op); EVT OutVT = Op.getValueType(); EVT InVT = PackedOp.getValueType(); unsigned InNumElts = InVT.getVectorNumElements(); unsigned OutNumElts = OutVT.getVectorNumElements(); unsigned NumInPerOut = InNumElts / OutNumElts; SDValue ZeroVec = DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); SmallVector Mask(InNumElts); unsigned ZeroVecElt = InNumElts; for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { unsigned MaskElt = PackedElt * NumInPerOut; unsigned End = MaskElt + NumInPerOut - 1; for (; MaskElt < End; MaskElt++) Mask[MaskElt] = ZeroVecElt++; Mask[MaskElt] = PackedElt; } SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); } SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const { // Look for cases where a vector shift can use the *_BY_SCALAR form. SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDLoc DL(Op); EVT VT = Op.getValueType(); unsigned ElemBitSize = VT.getScalarSizeInBits(); // See whether the shift vector is a splat represented as BUILD_VECTOR. if (auto *BVN = dyn_cast(Op1)) { APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; // Check for constant splats. Use ElemBitSize as the minimum element // width and reject splats that need wider elements. if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ElemBitSize, true) && SplatBitSize == ElemBitSize) { SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, DL, MVT::i32); return DAG.getNode(ByScalar, DL, VT, Op0, Shift); } // Check for variable splats. BitVector UndefElements; SDValue Splat = BVN->getSplatValue(&UndefElements); if (Splat) { // Since i32 is the smallest legal type, we either need a no-op // or a truncation. SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); return DAG.getNode(ByScalar, DL, VT, Op0, Shift); } } // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, // and the shift amount is directly available in a GPR. if (auto *VSN = dyn_cast(Op1)) { if (VSN->isSplat()) { SDValue VSNOp0 = VSN->getOperand(0); unsigned Index = VSN->getSplatIndex(); assert(Index < VT.getVectorNumElements() && "Splat index should be defined and in first operand"); if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { // Since i32 is the smallest legal type, we either need a no-op // or a truncation. SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, VSNOp0.getOperand(Index)); return DAG.getNode(ByScalar, DL, VT, Op0, Shift); } } } // Otherwise just treat the current form as legal. return Op; } SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); case ISD::BR_CC: return lowerBR_CC(Op, DAG); case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); case ISD::STRICT_FSETCC: return lowerSTRICT_FSETCC(Op, DAG, false); case ISD::STRICT_FSETCCS: return lowerSTRICT_FSETCC(Op, DAG, true); case ISD::GlobalAddress: return lowerGlobalAddress(cast(Op), DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(cast(Op), DAG); case ISD::BlockAddress: return lowerBlockAddress(cast(Op), DAG); case ISD::JumpTable: return lowerJumpTable(cast(Op), DAG); case ISD::ConstantPool: return lowerConstantPool(cast(Op), DAG); case ISD::BITCAST: return lowerBITCAST(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::VACOPY: return lowerVACOPY(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return lowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::GET_DYNAMIC_AREA_OFFSET: return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); case ISD::SMUL_LOHI: return lowerSMUL_LOHI(Op, DAG); case ISD::UMUL_LOHI: return lowerUMUL_LOHI(Op, DAG); case ISD::SDIVREM: return lowerSDIVREM(Op, DAG); case ISD::UDIVREM: return lowerUDIVREM(Op, DAG); case ISD::SADDO: case ISD::SSUBO: case ISD::UADDO: case ISD::USUBO: return lowerXALUO(Op, DAG); case ISD::ADDCARRY: case ISD::SUBCARRY: return lowerADDSUBCARRY(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); case ISD::CTPOP: return lowerCTPOP(Op, DAG); case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: return lowerATOMIC_STORE(Op, DAG); case ISD::ATOMIC_LOAD: return lowerATOMIC_LOAD(Op, DAG); case ISD::ATOMIC_LOAD_ADD: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); case ISD::ATOMIC_LOAD_SUB: return lowerATOMIC_LOAD_SUB(Op, DAG); case ISD::ATOMIC_LOAD_AND: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); case ISD::ATOMIC_LOAD_OR: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); case ISD::ATOMIC_LOAD_XOR: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); case ISD::ATOMIC_LOAD_NAND: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); case ISD::ATOMIC_LOAD_MIN: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); case ISD::ATOMIC_LOAD_MAX: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); case ISD::ATOMIC_LOAD_UMIN: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); case ISD::ATOMIC_LOAD_UMAX: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return lowerATOMIC_CMP_SWAP(Op, DAG); case ISD::STACKSAVE: return lowerSTACKSAVE(Op, DAG); case ISD::STACKRESTORE: return lowerSTACKRESTORE(Op, DAG); case ISD::PREFETCH: return lowerPREFETCH(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); case ISD::SCALAR_TO_VECTOR: return lowerSCALAR_TO_VECTOR(Op, DAG); case ISD::INSERT_VECTOR_ELT: return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::SIGN_EXTEND_VECTOR_INREG: return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); case ISD::ZERO_EXTEND_VECTOR_INREG: return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); case ISD::SHL: return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); case ISD::SRL: return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); case ISD::SRA: return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); default: llvm_unreachable("Unexpected node to lower"); } } // Lower operations with invalid operand or result types (currently used // only for 128-bit integer types). static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { SDLoc DL(In); SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, DAG.getIntPtrConstant(0, DL)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, DAG.getIntPtrConstant(1, DL)); SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, MVT::Untyped, Hi, Lo); return SDValue(Pair, 0); } static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { SDLoc DL(In); SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, DL, MVT::i64, In); SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, DL, MVT::i64, In); return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); } void SystemZTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { switch (N->getOpcode()) { case ISD::ATOMIC_LOAD: { SDLoc DL(N); SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, DL, Tys, Ops, MVT::i128, MMO); Results.push_back(lowerGR128ToI128(DAG, Res)); Results.push_back(Res.getValue(1)); break; } case ISD::ATOMIC_STORE: { SDLoc DL(N); SDVTList Tys = DAG.getVTList(MVT::Other); SDValue Ops[] = { N->getOperand(0), lowerI128ToGR128(DAG, N->getOperand(2)), N->getOperand(1) }; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, DL, Tys, Ops, MVT::i128, MMO); // We have to enforce sequential consistency by performing a // serialization operation after the store. if (cast(N)->getOrdering() == AtomicOrdering::SequentiallyConsistent) Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, Res), 0); Results.push_back(Res); break; } case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { SDLoc DL(N); SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); SDValue Ops[] = { N->getOperand(0), N->getOperand(1), lowerI128ToGR128(DAG, N->getOperand(2)), lowerI128ToGR128(DAG, N->getOperand(3)) }; MachineMemOperand *MMO = cast(N)->getMemOperand(); SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, DL, Tys, Ops, MVT::i128, MMO); SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); Results.push_back(lowerGR128ToI128(DAG, Res)); Results.push_back(Success); Results.push_back(Res.getValue(2)); break; } default: llvm_unreachable("Unexpected node to lower"); } } void SystemZTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { return LowerOperationWrapper(N, Results, DAG); } const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME switch ((SystemZISD::NodeType)Opcode) { case SystemZISD::FIRST_NUMBER: break; OPCODE(RET_FLAG); OPCODE(CALL); OPCODE(SIBCALL); OPCODE(TLS_GDCALL); OPCODE(TLS_LDCALL); OPCODE(PCREL_WRAPPER); OPCODE(PCREL_OFFSET); OPCODE(ICMP); OPCODE(FCMP); OPCODE(STRICT_FCMP); OPCODE(STRICT_FCMPS); OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); OPCODE(ADJDYNALLOC); OPCODE(PROBED_ALLOCA); OPCODE(POPCNT); OPCODE(SMUL_LOHI); OPCODE(UMUL_LOHI); OPCODE(SDIVREM); OPCODE(UDIVREM); OPCODE(SADDO); OPCODE(SSUBO); OPCODE(UADDO); OPCODE(USUBO); OPCODE(ADDCARRY); OPCODE(SUBCARRY); OPCODE(GET_CCMASK); OPCODE(MVC); OPCODE(MVC_LOOP); OPCODE(NC); OPCODE(NC_LOOP); OPCODE(OC); OPCODE(OC_LOOP); OPCODE(XC); OPCODE(XC_LOOP); OPCODE(CLC); OPCODE(CLC_LOOP); OPCODE(STPCPY); OPCODE(STRCMP); OPCODE(SEARCH_STRING); OPCODE(IPM); OPCODE(MEMBARRIER); OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); OPCODE(BYTE_MASK); OPCODE(ROTATE_MASK); OPCODE(REPLICATE); OPCODE(JOIN_DWORDS); OPCODE(SPLAT); OPCODE(MERGE_HIGH); OPCODE(MERGE_LOW); OPCODE(SHL_DOUBLE); OPCODE(PERMUTE_DWORDS); OPCODE(PERMUTE); OPCODE(PACK); OPCODE(PACKS_CC); OPCODE(PACKLS_CC); OPCODE(UNPACK_HIGH); OPCODE(UNPACKL_HIGH); OPCODE(UNPACK_LOW); OPCODE(UNPACKL_LOW); OPCODE(VSHL_BY_SCALAR); OPCODE(VSRL_BY_SCALAR); OPCODE(VSRA_BY_SCALAR); OPCODE(VSUM); OPCODE(VICMPE); OPCODE(VICMPH); OPCODE(VICMPHL); OPCODE(VICMPES); OPCODE(VICMPHS); OPCODE(VICMPHLS); OPCODE(VFCMPE); OPCODE(STRICT_VFCMPE); OPCODE(STRICT_VFCMPES); OPCODE(VFCMPH); OPCODE(STRICT_VFCMPH); OPCODE(STRICT_VFCMPHS); OPCODE(VFCMPHE); OPCODE(STRICT_VFCMPHE); OPCODE(STRICT_VFCMPHES); OPCODE(VFCMPES); OPCODE(VFCMPHS); OPCODE(VFCMPHES); OPCODE(VFTCI); OPCODE(VEXTEND); OPCODE(STRICT_VEXTEND); OPCODE(VROUND); OPCODE(STRICT_VROUND); OPCODE(VTM); OPCODE(VFAE_CC); OPCODE(VFAEZ_CC); OPCODE(VFEE_CC); OPCODE(VFEEZ_CC); OPCODE(VFENE_CC); OPCODE(VFENEZ_CC); OPCODE(VISTR_CC); OPCODE(VSTRC_CC); OPCODE(VSTRCZ_CC); OPCODE(VSTRS_CC); OPCODE(VSTRSZ_CC); OPCODE(TDC); OPCODE(ATOMIC_SWAPW); OPCODE(ATOMIC_LOADW_ADD); OPCODE(ATOMIC_LOADW_SUB); OPCODE(ATOMIC_LOADW_AND); OPCODE(ATOMIC_LOADW_OR); OPCODE(ATOMIC_LOADW_XOR); OPCODE(ATOMIC_LOADW_NAND); OPCODE(ATOMIC_LOADW_MIN); OPCODE(ATOMIC_LOADW_MAX); OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); OPCODE(ATOMIC_CMP_SWAP); OPCODE(ATOMIC_LOAD_128); OPCODE(ATOMIC_STORE_128); OPCODE(ATOMIC_CMP_SWAP_128); OPCODE(LRV); OPCODE(STRV); OPCODE(VLER); OPCODE(VSTER); OPCODE(PREFETCH); } return nullptr; #undef OPCODE } // Return true if VT is a vector whose elements are a whole number of bytes // in width. Also check for presence of vector support. bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { if (!Subtarget.hasVector()) return false; return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); } // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT // producing a result of type ResVT. Op is a possibly bitcast version // of the input vector and Index is the index (based on type VecVT) that // should be extracted. Return the new extraction if a simplification // was possible or if Force is true. SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, EVT VecVT, SDValue Op, unsigned Index, DAGCombinerInfo &DCI, bool Force) const { SelectionDAG &DAG = DCI.DAG; // The number of bytes being extracted. unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); for (;;) { unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::BITCAST) // Look through bitcasts. Op = Op.getOperand(0); else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && canTreatAsByteVector(Op.getValueType())) { // Get a VPERM-like permute mask and see whether the bytes covered // by the extracted element are a contiguous sequence from one // source operand. SmallVector Bytes; if (!getVPermMask(Op, Bytes)) break; int First; if (!getShuffleInput(Bytes, Index * BytesPerElement, BytesPerElement, First)) break; if (First < 0) return DAG.getUNDEF(ResVT); // Make sure the contiguous sequence starts at a multiple of the // original element size. unsigned Byte = unsigned(First) % Bytes.size(); if (Byte % BytesPerElement != 0) break; // We can get the extracted value directly from an input. Index = Byte / BytesPerElement; Op = Op.getOperand(unsigned(First) / Bytes.size()); Force = true; } else if (Opcode == ISD::BUILD_VECTOR && canTreatAsByteVector(Op.getValueType())) { // We can only optimize this case if the BUILD_VECTOR elements are // at least as wide as the extracted value. EVT OpVT = Op.getValueType(); unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); if (OpBytesPerElement < BytesPerElement) break; // Make sure that the least-significant bit of the extracted value // is the least significant bit of an input. unsigned End = (Index + 1) * BytesPerElement; if (End % OpBytesPerElement != 0) break; // We're extracting the low part of one operand of the BUILD_VECTOR. Op = Op.getOperand(End / OpBytesPerElement - 1); if (!Op.getValueType().isInteger()) { EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits()); Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); DCI.AddToWorklist(Op.getNode()); } EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); if (VT != ResVT) { DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); } return Op; } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && canTreatAsByteVector(Op.getValueType()) && canTreatAsByteVector(Op.getOperand(0).getValueType())) { // Make sure that only the unextended bits are significant. EVT ExtVT = Op.getValueType(); EVT OpVT = Op.getOperand(0).getValueType(); unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); unsigned Byte = Index * BytesPerElement; unsigned SubByte = Byte % ExtBytesPerElement; unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; if (SubByte < MinSubByte || SubByte + BytesPerElement > ExtBytesPerElement) break; // Get the byte offset of the unextended element Byte = Byte / ExtBytesPerElement * OpBytesPerElement; // ...then add the byte offset relative to that element. Byte += SubByte - MinSubByte; if (Byte % BytesPerElement != 0) break; Op = Op.getOperand(0); Index = Byte / BytesPerElement; Force = true; } else break; } if (Force) { if (Op.getValueType() != VecVT) { Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); DCI.AddToWorklist(Op.getNode()); } return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, DAG.getConstant(Index, DL, MVT::i32)); } return SDValue(); } // Optimize vector operations in scalar value Op on the basis that Op // is truncated to TruncVT. SDValue SystemZTargetLowering::combineTruncateExtract( const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { // If we have (trunc (extract_vector_elt X, Y)), try to turn it into // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements // of type TruncVT. if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && TruncVT.getSizeInBits() % 8 == 0) { SDValue Vec = Op.getOperand(0); EVT VecVT = Vec.getValueType(); if (canTreatAsByteVector(VecVT)) { if (auto *IndexN = dyn_cast(Op.getOperand(1))) { unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); unsigned TruncBytes = TruncVT.getStoreSize(); if (BytesPerElement % TruncBytes == 0) { // Calculate the value of Y' in the above description. We are // splitting the original elements into Scale equal-sized pieces // and for truncation purposes want the last (least-significant) // of these pieces for IndexN. This is easiest to do by calculating // the start index of the following element and then subtracting 1. unsigned Scale = BytesPerElement / TruncBytes; unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; // Defer the creation of the bitcast from X to combineExtract, // which might be able to optimize the extraction. VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), VecVT.getStoreSize() / TruncBytes); EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); } } } } return SDValue(); } SDValue SystemZTargetLowering::combineZERO_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { auto *TrueOp = dyn_cast(N0.getOperand(0)); auto *FalseOp = dyn_cast(N0.getOperand(1)); if (TrueOp && FalseOp) { SDLoc DL(N0); SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), DAG.getConstant(FalseOp->getZExtValue(), DL, VT), N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); // If N0 has multiple uses, change other uses as well. if (!N0.hasOneUse()) { SDValue TruncSelect = DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); DCI.CombineTo(N0.getNode(), TruncSelect); } return NewSelect; } } return SDValue(); } SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) // into (select_cc LHS, RHS, -1, 0, COND) SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); EVT EVT = cast(N->getOperand(1))->getVT(); if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) N0 = N0.getOperand(0); if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { SDLoc DL(N0); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT), N0.getOperand(2) }; return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); } return SDValue(); } SDValue SystemZTargetLowering::combineSIGN_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (sext (ashr (shl X, C1), C2)) to // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as // cheap as narrower ones. SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { auto *SraAmt = dyn_cast(N0.getOperand(1)); SDValue Inner = N0.getOperand(0); if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { if (auto *ShlAmt = dyn_cast(Inner.getOperand(1))) { unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits()); unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; EVT ShiftVT = N0.getOperand(1).getValueType(); SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, Inner.getOperand(0)); SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, DAG.getConstant(NewShlAmt, SDLoc(Inner), ShiftVT)); return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); } } } return SDValue(); } SDValue SystemZTargetLowering::combineMERGE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; unsigned Opcode = N->getOpcode(); SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); if (ISD::isBuildVectorAllZeros(Op0.getNode())) { // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF // for v4f32. if (Op1 == N->getOperand(0)) return Op1; // (z_merge_? 0, X) -> (z_unpackl_? 0, X). EVT VT = Op1.getValueType(); unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); if (ElemBytes <= 4) { Opcode = (Opcode == SystemZISD::MERGE_HIGH ? SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); EVT InVT = VT.changeVectorElementTypeToInteger(); EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), SystemZ::VectorBytes / ElemBytes / 2); if (VT != InVT) { Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); DCI.AddToWorklist(Op1.getNode()); } SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); DCI.AddToWorklist(Op.getNode()); return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } } return SDValue(); } SDValue SystemZTargetLowering::combineLOAD( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; EVT LdVT = N->getValueType(0); if (LdVT.isVector() || LdVT.isInteger()) return SDValue(); // Transform a scalar load that is REPLICATEd as well as having other // use(s) to the form where the other use(s) use the first element of the // REPLICATE instead of the load. Otherwise instruction selection will not // produce a VLREP. Avoid extracting to a GPR, so only do this for floating // point loads. SDValue Replicate; SmallVector OtherUses; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == SystemZISD::REPLICATE) { if (Replicate) return SDValue(); // Should never happen Replicate = SDValue(*UI, 0); } else if (UI.getUse().getResNo() == 0) OtherUses.push_back(*UI); } if (!Replicate || OtherUses.empty()) return SDValue(); SDLoc DL(N); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, Replicate, DAG.getConstant(0, DL, MVT::i32)); // Update uses of the loaded Value while preserving old chains. for (SDNode *U : OtherUses) { SmallVector Ops; for (SDValue Op : U->ops()) Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); DAG.UpdateNodeOperands(U, Ops); } return SDValue(N, 0); } bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) return true; if (Subtarget.hasVectorEnhancements2()) if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64) return true; return false; } static bool isVectorElementSwap(ArrayRef M, EVT VT) { if (!VT.isVector() || !VT.isSimple() || VT.getSizeInBits() != 128 || VT.getScalarSizeInBits() % 8 != 0) return false; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { if (M[i] < 0) continue; // ignore UNDEF indices if ((unsigned) M[i] != NumElts - 1 - i) return false; } return true; } SDValue SystemZTargetLowering::combineSTORE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; auto *SN = cast(N); auto &Op1 = N->getOperand(1); EVT MemVT = SN->getMemoryVT(); // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better // for the extraction to be done on a vMiN value, so that we can use VSTE. // If X has wider elements then convert it to: // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). if (MemVT.isInteger() && SN->isTruncatingStore()) { if (SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { DCI.AddToWorklist(Value.getNode()); // Rewrite the store with the new form of stored value. return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, SN->getBasePtr(), SN->getMemoryVT(), SN->getMemOperand()); } } // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::BSWAP && Op1.getNode()->hasOneUse() && canLoadStoreByteSwapped(Op1.getValueType())) { SDValue BSwapOp = Op1.getOperand(0); if (BSwapOp.getValueType() == MVT::i16) BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); SDValue Ops[] = { N->getOperand(0), BSwapOp, N->getOperand(2) }; return DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } // Combine STORE (element-swap) into VSTER if (!SN->isTruncatingStore() && Op1.getOpcode() == ISD::VECTOR_SHUFFLE && Op1.getNode()->hasOneUse() && Subtarget.hasVectorEnhancements2()) { ShuffleVectorSDNode *SVN = cast(Op1.getNode()); ArrayRef ShuffleMask = SVN->getMask(); if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { SDValue Ops[] = { N->getOperand(0), Op1.getOperand(0), N->getOperand(2) }; return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), DAG.getVTList(MVT::Other), Ops, MemVT, SN->getMemOperand()); } } return SDValue(); } SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine element-swap (LOAD) into VLER if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && Subtarget.hasVectorEnhancements2()) { ShuffleVectorSDNode *SVN = cast(N); ArrayRef ShuffleMask = SVN->getMask(); if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the element-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr() // Ptr }; SDValue ESLoad = DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), DAG.getVTList(LD->getValueType(0), MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // First, combine the VECTOR_SHUFFLE away. This makes the value produced // by the load dead. DCI.CombineTo(N, ESLoad); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the shuffle is dead. DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } } return SDValue(); } SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; if (!Subtarget.hasVector()) return SDValue(); // Look through bitcasts that retain the number of vector elements. SDValue Op = N->getOperand(0); if (Op.getOpcode() == ISD::BITCAST && Op.getValueType().isVector() && Op.getOperand(0).getValueType().isVector() && Op.getValueType().getVectorNumElements() == Op.getOperand(0).getValueType().getVectorNumElements()) Op = Op.getOperand(0); // Pull BSWAP out of a vector extraction. if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { EVT VecVT = Op.getValueType(); EVT EltVT = VecVT.getVectorElementType(); Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, Op.getOperand(0), N->getOperand(1)); DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); if (EltVT != N->getValueType(0)) { DCI.AddToWorklist(Op.getNode()); Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); } return Op; } // Try to simplify a vector extraction. if (auto *IndexN = dyn_cast(N->getOperand(1))) { SDValue Op0 = N->getOperand(0); EVT VecVT = Op0.getValueType(); return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, IndexN->getZExtValue(), DCI, false); } return SDValue(); } SDValue SystemZTargetLowering::combineJOIN_DWORDS( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // (join_dwords X, X) == (replicate X) if (N->getOperand(0) == N->getOperand(1)) return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), N->getOperand(0)); return SDValue(); } static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { SDValue Chain1 = N1->getOperand(0); SDValue Chain2 = N2->getOperand(0); // Trivial case: both nodes take the same chain. if (Chain1 == Chain2) return Chain1; // FIXME - we could handle more complex cases via TokenFactor, // assuming we can verify that this would not create a cycle. return SDValue(); } SDValue SystemZTargetLowering::combineFP_ROUND( SDNode *N, DAGCombinerInfo &DCI) const { if (!Subtarget.hasVector()) return SDValue(); // (fpround (extract_vector_elt X 0)) // (fpround (extract_vector_elt X 1)) -> // (extract_vector_elt (VROUND X) 0) // (extract_vector_elt (VROUND X) 2) // // This is a special case since the target doesn't really support v2f32s. unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v2f64 && Op0.getOperand(1).getOpcode() == ISD::Constant && cast(Op0.getOperand(1))->getZExtValue() == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && cast(U->getOperand(1))->getZExtValue() == 1) { SDValue OtherRound = SDValue(*U->use_begin(), 0); if (OtherRound.getOpcode() == N->getOpcode() && OtherRound.getOperand(OpNo) == SDValue(U, 0) && OtherRound.getValueType() == MVT::f32) { SDValue VRound, Chain; if (N->isStrictFPOpcode()) { Chain = MergeInputChains(N, OtherRound.getNode()); if (!Chain) continue; VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), {MVT::v4f32, MVT::Other}, {Chain, Vec}); Chain = VRound.getValue(1); } else VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), MVT::v4f32, Vec); DCI.AddToWorklist(VRound.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); if (Chain) DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); if (Chain) return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), N->getVTList(), Extract0, Chain); return Extract0; } } } } return SDValue(); } SDValue SystemZTargetLowering::combineFP_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { if (!Subtarget.hasVector()) return SDValue(); // (fpextend (extract_vector_elt X 0)) // (fpextend (extract_vector_elt X 2)) -> // (extract_vector_elt (VEXTEND X) 0) // (extract_vector_elt (VEXTEND X) 1) // // This is a special case since the target doesn't really support v2f32s. unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; SelectionDAG &DAG = DCI.DAG; SDValue Op0 = N->getOperand(OpNo); if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Op0.getOperand(0).getValueType() == MVT::v4f32 && Op0.getOperand(1).getOpcode() == ISD::Constant && cast(Op0.getOperand(1))->getZExtValue() == 0) { SDValue Vec = Op0.getOperand(0); for (auto *U : Vec->uses()) { if (U != Op0.getNode() && U->hasOneUse() && U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && U->getOperand(0) == Vec && U->getOperand(1).getOpcode() == ISD::Constant && cast(U->getOperand(1))->getZExtValue() == 2) { SDValue OtherExtend = SDValue(*U->use_begin(), 0); if (OtherExtend.getOpcode() == N->getOpcode() && OtherExtend.getOperand(OpNo) == SDValue(U, 0) && OtherExtend.getValueType() == MVT::f64) { SDValue VExtend, Chain; if (N->isStrictFPOpcode()) { Chain = MergeInputChains(N, OtherExtend.getNode()); if (!Chain) continue; VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), {MVT::v2f64, MVT::Other}, {Chain, Vec}); Chain = VExtend.getValue(1); } else VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), MVT::v2f64, Vec); DCI.AddToWorklist(VExtend.getNode()); SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); DCI.AddToWorklist(Extract1.getNode()); DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); if (Chain) DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); if (Chain) return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), N->getVTList(), Extract0, Chain); return Extract0; } } } } return SDValue(); } SDValue SystemZTargetLowering::combineINT_TO_FP( SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.Level != BeforeLegalizeTypes) return SDValue(); unsigned Opcode = N->getOpcode(); EVT OutVT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; SDValue Op = N->getOperand(0); unsigned OutScalarBits = OutVT.getScalarSizeInBits(); unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); // Insert an extension before type-legalization to avoid scalarization, e.g.: // v2f64 = uint_to_fp v2i16 // => // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) if (OutVT.isVector() && OutScalarBits > InScalarBits) { MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()), OutVT.getVectorNumElements()); unsigned ExtOpcode = (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); } return SDValue(); } SDValue SystemZTargetLowering::combineBSWAP( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && N->getOperand(0).hasOneUse() && canLoadStoreByteSwapped(N->getValueType(0))) { SDValue Load = N->getOperand(0); LoadSDNode *LD = cast(Load); // Create the byte-swapping load. SDValue Ops[] = { LD->getChain(), // Chain LD->getBasePtr() // Ptr }; EVT LoadVT = N->getValueType(0); if (LoadVT == MVT::i16) LoadVT = MVT::i32; SDValue BSLoad = DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), DAG.getVTList(LoadVT, MVT::Other), Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; if (N->getValueType(0) == MVT::i16) ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); // First, combine the bswap away. This makes the value produced by the // load dead. DCI.CombineTo(N, ResVal); // Next, combine the load away, we give it a bogus result value but a real // chain result. The result value is dead because the bswap is dead. DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); // Return N so it doesn't get rechecked! return SDValue(N, 0); } // Look through bitcasts that retain the number of vector elements. SDValue Op = N->getOperand(0); if (Op.getOpcode() == ISD::BITCAST && Op.getValueType().isVector() && Op.getOperand(0).getValueType().isVector() && Op.getValueType().getVectorNumElements() == Op.getOperand(0).getValueType().getVectorNumElements()) Op = Op.getOperand(0); // Push BSWAP into a vector insertion if at least one side then simplifies. if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { SDValue Vec = Op.getOperand(0); SDValue Elt = Op.getOperand(1); SDValue Idx = Op.getOperand(2); if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Elt) || Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || (canLoadStoreByteSwapped(N->getValueType(0)) && ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { EVT VecVT = N->getValueType(0); EVT EltVT = N->getValueType(0).getVectorElementType(); if (VecVT != Vec.getValueType()) { Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); DCI.AddToWorklist(Vec.getNode()); } if (EltVT != Elt.getValueType()) { Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); DCI.AddToWorklist(Elt.getNode()); } Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); DCI.AddToWorklist(Vec.getNode()); Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); DCI.AddToWorklist(Elt.getNode()); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, Vec, Elt, Idx); } } // Push BSWAP into a vector shuffle if at least one side then simplifies. ShuffleVectorSDNode *SV = dyn_cast(Op); if (SV && Op.hasOneUse()) { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || DAG.isConstantIntBuildVectorOrConstantInt(Op1) || Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { EVT VecVT = N->getValueType(0); if (VecVT != Op0.getValueType()) { Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); DCI.AddToWorklist(Op0.getNode()); } if (VecVT != Op1.getValueType()) { Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); DCI.AddToWorklist(Op1.getNode()); } Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); DCI.AddToWorklist(Op0.getNode()); Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); DCI.AddToWorklist(Op1.getNode()); return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); } } return SDValue(); } static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code // set by the CCReg instruction using the CCValid / CCMask masks, // If the CCReg instruction is itself a ICMP testing the condition // code set by some other instruction, see whether we can directly // use that condition code. // Verify that we have an ICMP against some constant. if (CCValid != SystemZ::CCMASK_ICMP) return false; auto *ICmp = CCReg.getNode(); if (ICmp->getOpcode() != SystemZISD::ICMP) return false; auto *CompareLHS = ICmp->getOperand(0).getNode(); auto *CompareRHS = dyn_cast(ICmp->getOperand(1)); if (!CompareRHS) return false; // Optimize the case where CompareLHS is a SELECT_CCMASK. if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { // Verify that we have an appropriate mask for a EQ or NE comparison. bool Invert = false; if (CCMask == SystemZ::CCMASK_CMP_NE) Invert = !Invert; else if (CCMask != SystemZ::CCMASK_CMP_EQ) return false; // Verify that the ICMP compares against one of select values. auto *TrueVal = dyn_cast(CompareLHS->getOperand(0)); if (!TrueVal) return false; auto *FalseVal = dyn_cast(CompareLHS->getOperand(1)); if (!FalseVal) return false; if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) Invert = !Invert; else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) return false; // Compute the effective CC mask for the new branch or select. auto *NewCCValid = dyn_cast(CompareLHS->getOperand(2)); auto *NewCCMask = dyn_cast(CompareLHS->getOperand(3)); if (!NewCCValid || !NewCCMask) return false; CCValid = NewCCValid->getZExtValue(); CCMask = NewCCMask->getZExtValue(); if (Invert) CCMask ^= CCValid; // Return the updated CCReg link. CCReg = CompareLHS->getOperand(4); return true; } // Optimize the case where CompareRHS is (SRA (SHL (IPM))). if (CompareLHS->getOpcode() == ISD::SRA) { auto *SRACount = dyn_cast(CompareLHS->getOperand(1)); if (!SRACount || SRACount->getZExtValue() != 30) return false; auto *SHL = CompareLHS->getOperand(0).getNode(); if (SHL->getOpcode() != ISD::SHL) return false; auto *SHLCount = dyn_cast(SHL->getOperand(1)); if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) return false; auto *IPM = SHL->getOperand(0).getNode(); if (IPM->getOpcode() != SystemZISD::IPM) return false; // Avoid introducing CC spills (because SRA would clobber CC). if (!CompareLHS->hasOneUse()) return false; // Verify that the ICMP compares against zero. if (CompareRHS->getZExtValue() != 0) return false; // Compute the effective CC mask for the new branch or select. CCMask = SystemZ::reverseCCMask(CCMask); // Return the updated CCReg link. CCReg = IPM->getOperand(0); return true; } return false; } SDValue SystemZTargetLowering::combineBR_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. auto *CCValid = dyn_cast(N->getOperand(1)); auto *CCMask = dyn_cast(N->getOperand(2)); if (!CCValid || !CCMask) return SDValue(); int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); SDValue Chain = N->getOperand(0); SDValue CCReg = N->getOperand(4); if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), Chain, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), N->getOperand(3), CCReg); return SDValue(); } SDValue SystemZTargetLowering::combineSELECT_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. auto *CCValid = dyn_cast(N->getOperand(2)); auto *CCMask = dyn_cast(N->getOperand(3)); if (!CCValid || !CCMask) return SDValue(); int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); SDValue CCReg = N->getOperand(4); if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1), DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg); return SDValue(); } SDValue SystemZTargetLowering::combineGET_CCMASK( SDNode *N, DAGCombinerInfo &DCI) const { // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible auto *CCValid = dyn_cast(N->getOperand(1)); auto *CCMask = dyn_cast(N->getOperand(2)); if (!CCValid || !CCMask) return SDValue(); int CCValidVal = CCValid->getZExtValue(); int CCMaskVal = CCMask->getZExtValue(); SDValue Select = N->getOperand(0); if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) return SDValue(); auto *SelectCCValid = dyn_cast(Select->getOperand(2)); auto *SelectCCMask = dyn_cast(Select->getOperand(3)); if (!SelectCCValid || !SelectCCMask) return SDValue(); int SelectCCValidVal = SelectCCValid->getZExtValue(); int SelectCCMaskVal = SelectCCMask->getZExtValue(); auto *TrueVal = dyn_cast(Select->getOperand(0)); auto *FalseVal = dyn_cast(Select->getOperand(1)); if (!TrueVal || !FalseVal) return SDValue(); if (TrueVal->getZExtValue() != 0 && FalseVal->getZExtValue() == 0) ; else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() != 0) SelectCCMaskVal ^= SelectCCValidVal; else return SDValue(); if (SelectCCValidVal & ~CCValidVal) return SDValue(); if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) return SDValue(); return Select->getOperand(4); } SDValue SystemZTargetLowering::combineIntDIVREM( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; EVT VT = N->getValueType(0); // In the case where the divisor is a vector of constants a cheaper // sequence of instructions can replace the divide. BuildSDIV is called to // do this during DAG combining, but it only succeeds when it can build a // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and // since it is not Legal but Custom it can only happen before // legalization. Therefore we must scalarize this early before Combine // 1. For widened vectors, this is already the result of type legalization. if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) return DAG.UnrollVectorOp(N); return SDValue(); } SDValue SystemZTargetLowering::combineINTRINSIC( SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; unsigned Id = cast(N->getOperand(1))->getZExtValue(); switch (Id) { // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 // or larger is simply a vector load. case Intrinsic::s390_vll: case Intrinsic::s390_vlrl: if (auto *C = dyn_cast(N->getOperand(2))) if (C->getZExtValue() >= 15) return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), N->getOperand(3), MachinePointerInfo()); break; // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. case Intrinsic::s390_vstl: case Intrinsic::s390_vstrl: if (auto *C = dyn_cast(N->getOperand(3))) if (C->getZExtValue() >= 15) return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), N->getOperand(4), MachinePointerInfo()); break; } return SDValue(); } SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) return N->getOperand(0); return N; } SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch(N->getOpcode()) { default: break; case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); case SystemZISD::MERGE_HIGH: case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); case ISD::LOAD: return combineLOAD(N, DCI); case ISD::STORE: return combineSTORE(N, DCI); case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); case ISD::STRICT_FP_EXTEND: case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); case ISD::BSWAP: return combineBSWAP(N, DCI); case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); case ISD::SDIV: case ISD::UDIV: case ISD::SREM: case ISD::UREM: return combineIntDIVREM(N, DCI); case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); } return SDValue(); } // Return the demanded elements for the OpNo source operand of Op. DemandedElts // are for Op. static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo) { EVT VT = Op.getValueType(); unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); APInt SrcDemE; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { unsigned Id = cast(Op.getOperand(0))->getZExtValue(); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: case Intrinsic::s390_vpksg: case Intrinsic::s390_vpkshs: // PACKS_CC case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: case Intrinsic::s390_vpklsh: // PACKLS case Intrinsic::s390_vpklsf: case Intrinsic::s390_vpklsg: case Intrinsic::s390_vpklshs: // PACKLS_CC case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: // VECTOR PACK truncates the elements of two source vectors into one. SrcDemE = DemandedElts; if (OpNo == 2) SrcDemE.lshrInPlace(NumElts / 2); SrcDemE = SrcDemE.trunc(NumElts / 2); break; // VECTOR UNPACK extends half the elements of the source vector. case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH case Intrinsic::s390_vuplhh: case Intrinsic::s390_vuplhf: SrcDemE = APInt(NumElts * 2, 0); SrcDemE.insertBits(DemandedElts, 0); break; case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW case Intrinsic::s390_vupllh: case Intrinsic::s390_vupllf: SrcDemE = APInt(NumElts * 2, 0); SrcDemE.insertBits(DemandedElts, NumElts); break; case Intrinsic::s390_vpdi: { // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. SrcDemE = APInt(NumElts, 0); if (!DemandedElts[OpNo - 1]) break; unsigned Mask = cast(Op.getOperand(3))->getZExtValue(); unsigned MaskBit = ((OpNo - 1) ? 1 : 4); // Demand input element 0 or 1, given by the mask bit value. SrcDemE.setBit((Mask & MaskBit)? 1 : 0); break; } case Intrinsic::s390_vsldb: { // VECTOR SHIFT LEFT DOUBLE BY BYTE assert(VT == MVT::v16i8 && "Unexpected type."); unsigned FirstIdx = cast(Op.getOperand(3))->getZExtValue(); assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); unsigned NumSrc0Els = 16 - FirstIdx; SrcDemE = APInt(NumElts, 0); if (OpNo == 1) { APInt DemEls = DemandedElts.trunc(NumSrc0Els); SrcDemE.insertBits(DemEls, FirstIdx); } else { APInt DemEls = DemandedElts.lshr(NumSrc0Els); SrcDemE.insertBits(DemEls, 0); } break; } case Intrinsic::s390_vperm: SrcDemE = APInt(NumElts, 1); break; default: llvm_unreachable("Unhandled intrinsic."); break; } } else { switch (Opcode) { case SystemZISD::JOIN_DWORDS: // Scalar operand. SrcDemE = APInt(1, 1); break; case SystemZISD::SELECT_CCMASK: SrcDemE = DemandedElts; break; default: llvm_unreachable("Unhandled opcode."); break; } } return SrcDemE; } static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo) { APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); Known = KnownBits::commonBits(LHSKnown, RHSKnown); } void SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { Known.resetAll(); // Intrinsic CC result is returned in the two low bits. unsigned tmp0, tmp1; // not used if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { Known.Zero.setBitsFrom(2); return; } EVT VT = Op.getValueType(); if (Op.getResNo() != 0 || VT == MVT::Untyped) return; assert (Known.getBitWidth() == VT.getScalarSizeInBits() && "KnownBits does not match VT in bitwidth"); assert ((!VT.isVector() || (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && "DemandedElts does not match VT number of elements"); unsigned BitWidth = Known.getBitWidth(); unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { bool IsLogical = false; unsigned Id = cast(Op.getOperand(0))->getZExtValue(); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: case Intrinsic::s390_vpksg: case Intrinsic::s390_vpkshs: // PACKS_CC case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: case Intrinsic::s390_vpklsh: // PACKLS case Intrinsic::s390_vpklsf: case Intrinsic::s390_vpklsg: case Intrinsic::s390_vpklshs: // PACKLS_CC case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: case Intrinsic::s390_vpdi: case Intrinsic::s390_vsldb: case Intrinsic::s390_vperm: computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); break; case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH case Intrinsic::s390_vuplhh: case Intrinsic::s390_vuplhf: case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW case Intrinsic::s390_vupllh: case Intrinsic::s390_vupllf: IsLogical = true; LLVM_FALLTHROUGH; case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: { SDValue SrcOp = Op.getOperand(1); APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); if (IsLogical) { Known = Known.zext(BitWidth); } else Known = Known.sext(BitWidth); break; } default: break; } } else { switch (Opcode) { case SystemZISD::JOIN_DWORDS: case SystemZISD::SELECT_CCMASK: computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); break; case SystemZISD::REPLICATE: { SDValue SrcOp = Op.getOperand(0); Known = DAG.computeKnownBits(SrcOp, Depth + 1); if (Known.getBitWidth() < BitWidth && isa(SrcOp)) Known = Known.sext(BitWidth); // VREPI sign extends the immedate. break; } default: break; } } // Known has the width of the source operand(s). Adjust if needed to match // the passed bitwidth. if (Known.getBitWidth() != BitWidth) Known = Known.anyextOrTrunc(BitWidth); } static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo) { APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); if (LHS == 1) return 1; // Early out. APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); if (RHS == 1) return 1; // Early out. unsigned Common = std::min(LHS, RHS); unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); EVT VT = Op.getValueType(); unsigned VTBits = VT.getScalarSizeInBits(); if (SrcBitWidth > VTBits) { // PACK unsigned SrcExtraBits = SrcBitWidth - VTBits; if (Common > SrcExtraBits) return (Common - SrcExtraBits); return 1; } assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); return Common; } unsigned SystemZTargetLowering::ComputeNumSignBitsForTargetNode( SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { if (Op.getResNo() != 0) return 1; unsigned Opcode = Op.getOpcode(); if (Opcode == ISD::INTRINSIC_WO_CHAIN) { unsigned Id = cast(Op.getOperand(0))->getZExtValue(); switch (Id) { case Intrinsic::s390_vpksh: // PACKS case Intrinsic::s390_vpksf: case Intrinsic::s390_vpksg: case Intrinsic::s390_vpkshs: // PACKS_CC case Intrinsic::s390_vpksfs: case Intrinsic::s390_vpksgs: case Intrinsic::s390_vpklsh: // PACKLS case Intrinsic::s390_vpklsf: case Intrinsic::s390_vpklsg: case Intrinsic::s390_vpklshs: // PACKLS_CC case Intrinsic::s390_vpklsfs: case Intrinsic::s390_vpklsgs: case Intrinsic::s390_vpdi: case Intrinsic::s390_vsldb: case Intrinsic::s390_vperm: return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH case Intrinsic::s390_vuphh: case Intrinsic::s390_vuphf: case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW case Intrinsic::s390_vuplhw: case Intrinsic::s390_vuplf: { SDValue PackedOp = Op.getOperand(1); APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); EVT VT = Op.getValueType(); unsigned VTBits = VT.getScalarSizeInBits(); Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); return Tmp; } default: break; } } else { switch (Opcode) { case SystemZISD::SELECT_CCMASK: return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); default: break; } } return 1; } unsigned SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const { const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); unsigned StackAlign = TFI->getStackAlignment(); assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && "Unexpected stack alignment"); // The default stack probe size is 4096 if the function has no // stack-probe-size attribute. unsigned StackProbeSize = 4096; const Function &Fn = MF.getFunction(); if (Fn.hasFnAttribute("stack-probe-size")) Fn.getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); // Round down to the stack alignment. StackProbeSize &= ~(StackAlign - 1); return StackProbeSize ? StackProbeSize : StackAlign; } //===----------------------------------------------------------------------===// // Custom insertion //===----------------------------------------------------------------------===// // Force base value Base into a register before MI. Return the register. static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII) { if (Base.isReg()) return Base.getReg(); MachineBasicBlock *MBB = MI.getParent(); MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MF.getRegInfo(); Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) .add(Base) .addImm(0) .addReg(0); return Reg; } // The CC operand of MI might be missing a kill marker because there // were multiple uses of CC, and ISel didn't know which to mark. // Figure out whether MI should have had a kill marker. static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { // Scan forward through BB for a use/def of CC. MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { const MachineInstr& mi = *miI; if (mi.readsRegister(SystemZ::CC)) return false; if (mi.definesRegister(SystemZ::CC)) break; // Should have kill-flag - update below. } // If we hit the end of the block, check whether CC is live into a // successor. if (miI == MBB->end()) { for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) if ((*SI)->isLiveIn(SystemZ::CC)) return false; } return true; } // Return true if it is OK for this Select pseudo-opcode to be cascaded // together with other Select pseudo-opcodes into a single basic-block with // a conditional jump around it. static bool isSelectPseudo(MachineInstr &MI) { switch (MI.getOpcode()) { case SystemZ::Select32: case SystemZ::Select64: case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: case SystemZ::SelectVR32: case SystemZ::SelectVR64: case SystemZ::SelectVR128: return true; default: return false; } } // Helper function, which inserts PHI functions into SinkMBB: // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], // where %FalseValue(i) and %TrueValue(i) are taken from Selects. static void createPHIsForSelects(SmallVector &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB) { MachineFunction *MF = TrueMBB->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); MachineInstr *FirstMI = Selects.front(); unsigned CCValid = FirstMI->getOperand(3).getImm(); unsigned CCMask = FirstMI->getOperand(4).getImm(); MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); // As we are creating the PHIs, we have to be careful if there is more than // one. Later Selects may reference the results of earlier Selects, but later // PHIs have to reference the individual true/false inputs from earlier PHIs. // That also means that PHI construction must work forward from earlier to // later, and that the code must maintain a mapping from earlier PHI's // destination registers, and the registers that went into the PHI. DenseMap> RegRewriteTable; for (auto MI : Selects) { Register DestReg = MI->getOperand(0).getReg(); Register TrueReg = MI->getOperand(1).getReg(); Register FalseReg = MI->getOperand(2).getReg(); // If this Select we are generating is the opposite condition from // the jump we generated, then we have to swap the operands for the // PHI that is going to be generated. if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) std::swap(TrueReg, FalseReg); if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end()) TrueReg = RegRewriteTable[TrueReg].first; if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end()) FalseReg = RegRewriteTable[FalseReg].second; DebugLoc DL = MI->getDebugLoc(); BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) .addReg(TrueReg).addMBB(TrueMBB) .addReg(FalseReg).addMBB(FalseMBB); // Add this PHI to the rewrite table. RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); } MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); } // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, MachineBasicBlock *MBB) const { assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); // If we have a sequence of Select* pseudo instructions using the // same condition code value, we want to expand all of them into // a single pair of basic blocks using the same condition. SmallVector Selects; SmallVector DbgValues; Selects.push_back(&MI); unsigned Count = 0; for (MachineBasicBlock::iterator NextMIIt = std::next(MachineBasicBlock::iterator(MI)); NextMIIt != MBB->end(); ++NextMIIt) { if (isSelectPseudo(*NextMIIt)) { assert(NextMIIt->getOperand(3).getImm() == CCValid && "Bad CCValid operands since CC was not redefined."); if (NextMIIt->getOperand(4).getImm() == CCMask || NextMIIt->getOperand(4).getImm() == (CCValid ^ CCMask)) { Selects.push_back(&*NextMIIt); continue; } break; } if (NextMIIt->definesRegister(SystemZ::CC) || NextMIIt->usesCustomInsertionHook()) break; bool User = false; for (auto SelMI : Selects) if (NextMIIt->readsVirtualRegister(SelMI->getOperand(0).getReg())) { User = true; break; } if (NextMIIt->isDebugInstr()) { if (User) { assert(NextMIIt->isDebugValue() && "Unhandled debug opcode."); DbgValues.push_back(&*NextMIIt); } } else if (User || ++Count > 20) break; } MachineInstr *LastMI = Selects.back(); bool CCKilled = (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB)); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the last Select instruction, mark it as // live-in to both FalseMBB and JoinMBB. if (!CCKilled) { FalseMBB->addLiveIn(SystemZ::CC); JoinMBB->addLiveIn(SystemZ::CC); } // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB MBB = StartMBB; BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); // FalseMBB: // # fallthrough to JoinMBB MBB = FalseMBB; MBB->addSuccessor(JoinMBB); // JoinMBB: // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] // ... MBB = JoinMBB; createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); for (auto SelMI : Selects) SelMI->eraseFromParent(); MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); for (auto DbgMI : DbgValues) MBB->splice(InsertPos, StartMBB, DbgMI); return JoinMBB; } // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. // StoreOpcode is the store to use and Invert says whether the store should // happen when the condition is false rather than true. If a STORE ON // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, MachineBasicBlock *MBB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const { const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); Register SrcReg = MI.getOperand(0).getReg(); MachineOperand Base = MI.getOperand(1); int64_t Disp = MI.getOperand(2).getImm(); Register IndexReg = MI.getOperand(3).getReg(); unsigned CCValid = MI.getOperand(4).getImm(); unsigned CCMask = MI.getOperand(5).getImm(); DebugLoc DL = MI.getDebugLoc(); StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); // ISel pattern matching also adds a load memory operand of the same // address, so take special care to find the storing memory operand. MachineMemOperand *MMO = nullptr; for (auto *I : MI.memoperands()) if (I->isStore()) { MMO = I; break; } // Use STOCOpcode if possible. We could use different store patterns in // order to avoid matching the index register, but the performance trade-offs // might be more complicated in that case. if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) .addReg(SrcReg) .add(Base) .addImm(Disp) .addImm(CCValid) .addImm(CCMask) .addMemOperand(MMO); MI.eraseFromParent(); return MBB; } // Get the condition needed to branch around the store. if (!Invert) CCMask ^= CCValid; MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); // Unless CC was killed in the CondStore instruction, mark it as // live-in to both FalseMBB and JoinMBB. if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) { FalseMBB->addLiveIn(SystemZ::CC); JoinMBB->addLiveIn(SystemZ::CC); } // StartMBB: // BRC CCMask, JoinMBB // # fallthrough to FalseMBB MBB = StartMBB; BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); MBB->addSuccessor(JoinMBB); MBB->addSuccessor(FalseMBB); // FalseMBB: // store %SrcReg, %Disp(%Index,%Base) // # fallthrough to JoinMBB MBB = FalseMBB; BuildMI(MBB, DL, TII->get(StoreOpcode)) .addReg(SrcReg) .add(Base) .addImm(Disp) .addReg(IndexReg) .addMemOperand(MMO); MBB->addSuccessor(JoinMBB); MI.eraseFromParent(); return JoinMBB; } // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. // BitSize is the width of the field in bits, or 0 if this is a partword // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize // is one of the operands. Invert says whether the field should be // inverted after performing BinOpcode (e.g. for NAND). MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, unsigned BitSize, bool Invert) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. // Src2 can be a register or immediate. Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register(); Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register(); DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) BitSize = MI.getOperand(6).getImm(); // Subword operations use 32-bit registers. const TargetRegisterClass *RC = (BitSize <= 32 ? &SystemZ::GR32BitRegClass : &SystemZ::GR64BitRegClass); unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; // Get the right opcodes for the displacement. LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. Register OrigVal = MRI.createVirtualRegister(RC); Register OldVal = MRI.createVirtualRegister(RC); Register NewVal = (BinOpcode || IsSubWord ? MRI.createVirtualRegister(RC) : Src2.getReg()); Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); // Insert a basic block for the main loop. MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // ... // %OrigVal = L Disp(%Base) // # fall through to LoopMMB MBB = StartMBB; BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] // %RotatedOldVal = RLL %OldVal, 0(%BitShift) // %RotatedNewVal = OP %RotatedOldVal, %Src2 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) // %Dest = CS %OldVal, %NewVal, Disp(%Base) // JNE LoopMBB // # fall through to DoneMMB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigVal).addMBB(StartMBB) .addReg(Dest).addMBB(LoopMBB); if (IsSubWord) BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) .addReg(OldVal).addReg(BitShift).addImm(0); if (Invert) { // Perform the operation normally and then invert every bit of the field. Register Tmp = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); if (BitSize <= 32) // XILF with the upper BitSize bits set. BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) .addReg(Tmp).addImm(-1U << (32 - BitSize)); else { // Use LCGR and add -1 to the result, which is more compact than // an XILF, XILH pair. Register Tmp2 = MRI.createVirtualRegister(RC); BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) .addReg(Tmp2).addImm(-1); } } else if (BinOpcode) // A simply binary operation. BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) .addReg(RotatedOldVal) .add(Src2); else if (IsSubWord) // Use RISBG to rotate Src2 into position and use it to replace the // field in RotatedOldVal. BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) .addReg(RotatedOldVal).addReg(Src2.getReg()) .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); if (IsSubWord) BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal) .addReg(NewVal) .add(Base) .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); MI.eraseFromParent(); return DoneMBB; } // Implement EmitInstrWithCustomInserter for pseudo // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the // instruction that should be used to compare the current field with the // minimum or maximum value. KeepOldMask is the BRC condition-code mask // for when the current field should be kept. BitSize is the width of // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, unsigned KeepOldMask, unsigned BitSize) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); // Extract the operands. Base can be a register or a frame index. Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); Register Src2 = MI.getOperand(3).getReg(); Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register()); Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register()); DebugLoc DL = MI.getDebugLoc(); if (IsSubWord) BitSize = MI.getOperand(6).getImm(); // Subword operations use 32-bit registers. const TargetRegisterClass *RC = (BitSize <= 32 ? &SystemZ::GR32BitRegClass : &SystemZ::GR64BitRegClass); unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; // Get the right opcodes for the displacement. LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. Register OrigVal = MRI.createVirtualRegister(RC); Register OldVal = MRI.createVirtualRegister(RC); Register NewVal = MRI.createVirtualRegister(RC); Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); // Insert 3 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); // StartMBB: // ... // %OrigVal = L Disp(%Base) // # fall through to LoopMMB MBB = StartMBB; BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] // %RotatedOldVal = RLL %OldVal, 0(%BitShift) // CompareOpcode %RotatedOldVal, %Src2 // BRC KeepOldMask, UpdateMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigVal).addMBB(StartMBB) .addReg(Dest).addMBB(UpdateMBB); if (IsSubWord) BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) .addReg(OldVal).addReg(BitShift).addImm(0); BuildMI(MBB, DL, TII->get(CompareOpcode)) .addReg(RotatedOldVal).addReg(Src2); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); MBB->addSuccessor(UpdateMBB); MBB->addSuccessor(UseAltMBB); // UseAltMBB: // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 // # fall through to UpdateMMB MBB = UseAltMBB; if (IsSubWord) BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) .addReg(RotatedOldVal).addReg(Src2) .addImm(32).addImm(31 + BitSize).addImm(0); MBB->addSuccessor(UpdateMBB); // UpdateMBB: // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], // [ %RotatedAltVal, UseAltMBB ] // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) // %Dest = CS %OldVal, %NewVal, Disp(%Base) // JNE LoopMBB // # fall through to DoneMMB MBB = UpdateMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) .addReg(RotatedOldVal).addMBB(LoopMBB) .addReg(RotatedAltVal).addMBB(UseAltMBB); if (IsSubWord) BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); BuildMI(MBB, DL, TII->get(CSOpcode), Dest) .addReg(OldVal) .addReg(NewVal) .add(Base) .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); MI.eraseFromParent(); return DoneMBB; } // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW // instruction MI. MachineBasicBlock * SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. Register Dest = MI.getOperand(0).getReg(); MachineOperand Base = earlyUseOperand(MI.getOperand(1)); int64_t Disp = MI.getOperand(2).getImm(); Register OrigCmpVal = MI.getOperand(3).getReg(); Register OrigSwapVal = MI.getOperand(4).getReg(); Register BitShift = MI.getOperand(5).getReg(); Register NegBitShift = MI.getOperand(6).getReg(); int64_t BitSize = MI.getOperand(7).getImm(); DebugLoc DL = MI.getDebugLoc(); const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; // Get the right opcodes for the displacement. unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); assert(LOpcode && CSOpcode && "Displacement out of range"); // Create virtual registers for temporary results. Register OrigOldVal = MRI.createVirtualRegister(RC); Register OldVal = MRI.createVirtualRegister(RC); Register CmpVal = MRI.createVirtualRegister(RC); Register SwapVal = MRI.createVirtualRegister(RC); Register StoreVal = MRI.createVirtualRegister(RC); Register RetryOldVal = MRI.createVirtualRegister(RC); Register RetryCmpVal = MRI.createVirtualRegister(RC); Register RetrySwapVal = MRI.createVirtualRegister(RC); // Insert 2 basic blocks for the loop. MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); // StartMBB: // ... // %OrigOldVal = L Disp(%Base) // # fall through to LoopMMB MBB = StartMBB; BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) .add(Base) .addImm(Disp) .addReg(0); MBB->addSuccessor(LoopMBB); // LoopMBB: // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ] // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] // %Dest = RLL %OldVal, BitSize(%BitShift) // ^^ The low BitSize bits contain the field // of interest. // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0 // ^^ Replace the upper 32-BitSize bits of the // comparison value with those that we loaded, // so that we can use a full word comparison. // CR %Dest, %RetryCmpVal // JNE DoneMBB // # Fall through to SetMBB MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) .addReg(OrigOldVal).addMBB(StartMBB) .addReg(RetryOldVal).addMBB(SetMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal) .addReg(OrigCmpVal).addMBB(StartMBB) .addReg(RetryCmpVal).addMBB(SetMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) .addReg(OrigSwapVal).addMBB(StartMBB) .addReg(RetrySwapVal).addMBB(SetMBB); BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest) .addReg(OldVal).addReg(BitShift).addImm(BitSize); BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::CR)) .addReg(Dest).addReg(RetryCmpVal); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP) .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); MBB->addSuccessor(DoneMBB); MBB->addSuccessor(SetMBB); // SetMBB: // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0 // ^^ Replace the upper 32-BitSize bits of the new // value with those that we loaded. // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) // ^^ Rotate the new field to its proper position. // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base) // JNE LoopMBB // # fall through to ExitMMB MBB = SetMBB; BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) .addReg(OldVal) .addReg(StoreVal) .add(Base) .addImm(Disp); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in // to the block after the loop. At this point, CC may have been defined // either by the CR in LoopMBB or by the CS in SetMBB. if (!MI.registerDefIsDead(SystemZ::CC)) DoneMBB->addLiveIn(SystemZ::CC); MI.eraseFromParent(); return DoneMBB; } // Emit a move from two GR64s to a GR128. MachineBasicBlock * SystemZTargetLowering::emitPair128(MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); Register Hi = MI.getOperand(1).getReg(); Register Lo = MI.getOperand(2).getReg(); Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2) .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64); MI.eraseFromParent(); return MBB; } // Emit an extension from a GR64 to a GR128. ClearEven is true // if the high register of the GR128 value must be cleared or false if // it's "don't care". MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, MachineBasicBlock *MBB, bool ClearEven) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); Register Src = MI.getOperand(1).getReg(); Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); if (ClearEven) { Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) .addImm(0); BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); In128 = NewIn128; } BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); MI.eraseFromParent(); return MBB; } MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); uint64_t DestDisp = MI.getOperand(1).getImm(); MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2)); uint64_t SrcDisp = MI.getOperand(3).getImm(); uint64_t Length = MI.getOperand(4).getImm(); // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? SystemZ::splitBlockAfter(MI, MBB) : nullptr); // Check for the loop form, in which operand 5 is the trip count. if (MI.getNumExplicitOperands() > 5) { bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); Register StartCountReg = MI.getOperand(5).getReg(); Register StartSrcReg = forceReg(MI, SrcBase, TII); Register StartDestReg = (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; Register ThisSrcReg = MRI.createVirtualRegister(RC); Register ThisDestReg = (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); Register NextSrcReg = MRI.createVirtualRegister(RC); Register NextDestReg = (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); RC = &SystemZ::GR64BitRegClass; Register ThisCountReg = MRI.createVirtualRegister(RC); Register NextCountReg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); MachineBasicBlock *NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); // StartMBB: // # fall through to LoopMMB MBB->addSuccessor(LoopMBB); // LoopMBB: // %ThisDestReg = phi [ %StartDestReg, StartMBB ], // [ %NextDestReg, NextMBB ] // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], // [ %NextSrcReg, NextMBB ] // %ThisCountReg = phi [ %StartCountReg, StartMBB ], // [ %NextCountReg, NextMBB ] // ( PFD 2, 768+DestDisp(%ThisDestReg) ) // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) // ( JLH EndMBB ) // // The prefetch is used only for MVC. The JLH is used only for CLC. MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) .addReg(StartDestReg).addMBB(StartMBB) .addReg(NextDestReg).addMBB(NextMBB); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) .addReg(StartSrcReg).addMBB(StartMBB) .addReg(NextSrcReg).addMBB(NextMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) .addReg(StartCountReg).addMBB(StartMBB) .addReg(NextCountReg).addMBB(NextMBB); if (Opcode == SystemZ::MVC) BuildMI(MBB, DL, TII->get(SystemZ::PFD)) .addImm(SystemZ::PFD_WRITE) .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); BuildMI(MBB, DL, TII->get(Opcode)) .addReg(ThisDestReg).addImm(DestDisp).addImm(256) .addReg(ThisSrcReg).addImm(SrcDisp); if (EndMBB) { BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(EndMBB); MBB->addSuccessor(EndMBB); MBB->addSuccessor(NextMBB); } // NextMBB: // %NextDestReg = LA 256(%ThisDestReg) // %NextSrcReg = LA 256(%ThisSrcReg) // %NextCountReg = AGHI %ThisCountReg, -1 // CGHI %NextCountReg, 0 // JLH LoopMBB // # fall through to DoneMMB // // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. MBB = NextMBB; BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) .addReg(ThisDestReg).addImm(256).addReg(0); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) .addReg(ThisSrcReg).addImm(256).addReg(0); BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) .addReg(ThisCountReg).addImm(-1); BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) .addReg(NextCountReg).addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); DestBase = MachineOperand::CreateReg(NextDestReg, false); SrcBase = MachineOperand::CreateReg(NextSrcReg, false); Length &= 255; if (EndMBB && !Length) // If the loop handled the whole CLC range, DoneMBB will be empty with // CC live-through into EndMBB, so add it as live-in. DoneMBB->addLiveIn(SystemZ::CC); MBB = DoneMBB; } // Handle any remaining bytes with straight-line code. while (Length > 0) { uint64_t ThisLength = std::min(Length, uint64_t(256)); // The previous iteration might have created out-of-range displacements. // Apply them using LAY if so. if (!isUInt<12>(DestDisp)) { Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) .add(DestBase) .addImm(DestDisp) .addReg(0); DestBase = MachineOperand::CreateReg(Reg, false); DestDisp = 0; } if (!isUInt<12>(SrcDisp)) { Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LAY), Reg) .add(SrcBase) .addImm(SrcDisp) .addReg(0); SrcBase = MachineOperand::CreateReg(Reg, false); SrcDisp = 0; } BuildMI(*MBB, MI, DL, TII->get(Opcode)) .add(DestBase) .addImm(DestDisp) .addImm(ThisLength) .add(SrcBase) .addImm(SrcDisp) .setMemRefs(MI.memoperands()); DestDisp += ThisLength; SrcDisp += ThisLength; Length -= ThisLength; // If there's another CLC to go, branch to the end if a difference // was found. if (EndMBB && Length > 0) { MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) .addMBB(EndMBB); MBB->addSuccessor(EndMBB); MBB->addSuccessor(NextMBB); MBB = NextMBB; } } if (EndMBB) { MBB->addSuccessor(EndMBB); MBB = EndMBB; MBB->addLiveIn(SystemZ::CC); } MI.eraseFromParent(); return MBB; } // Decompose string pseudo-instruction MI into a loop that continually performs // Opcode until CC != 3. MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI.getDebugLoc(); uint64_t End1Reg = MI.getOperand(0).getReg(); uint64_t Start1Reg = MI.getOperand(1).getReg(); uint64_t Start2Reg = MI.getOperand(2).getReg(); uint64_t CharReg = MI.getOperand(3).getReg(); const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; uint64_t This1Reg = MRI.createVirtualRegister(RC); uint64_t This2Reg = MRI.createVirtualRegister(RC); uint64_t End2Reg = MRI.createVirtualRegister(RC); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); // StartMBB: // # fall through to LoopMMB MBB->addSuccessor(LoopMBB); // LoopMBB: // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] // R0L = %CharReg // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L // JO LoopMBB // # fall through to DoneMMB // // The load of R0L can be hoisted by post-RA LICM. MBB = LoopMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) .addReg(Start1Reg).addMBB(StartMBB) .addReg(End1Reg).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) .addReg(Start2Reg).addMBB(StartMBB) .addReg(End2Reg).addMBB(LoopMBB); BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); BuildMI(MBB, DL, TII->get(Opcode)) .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) .addReg(This1Reg).addReg(This2Reg); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); MBB->addSuccessor(LoopMBB); MBB->addSuccessor(DoneMBB); DoneMBB->addLiveIn(SystemZ::CC); MI.eraseFromParent(); return DoneMBB; } // Update TBEGIN instruction with final opcode and register clobbers. MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, bool NoFloat) const { MachineFunction &MF = *MBB->getParent(); const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); // Update opcode. MI.setDesc(TII->get(Opcode)); // We cannot handle a TBEGIN that clobbers the stack or frame pointer. // Make sure to add the corresponding GRSM bits if they are missing. uint64_t Control = MI.getOperand(2).getImm(); static const unsigned GPRControlBit[16] = { 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 }; Control |= GPRControlBit[15]; if (TFI->hasFP(MF)) Control |= GPRControlBit[11]; MI.getOperand(2).setImm(Control); // Add GPR clobbers. for (int I = 0; I < 16; I++) { if ((Control & GPRControlBit[I]) == 0) { unsigned Reg = SystemZMC::GR64Regs[I]; MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } // Add FPR/VR clobbers. if (!NoFloat && (Control & 4) != 0) { if (Subtarget.hasVector()) { for (int I = 0; I < 32; I++) { unsigned Reg = SystemZMC::VR128Regs[I]; MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } else { for (int I = 0; I < 16; I++) { unsigned Reg = SystemZMC::FP64Regs[I]; MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); } } } return MBB; } MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); DebugLoc DL = MI.getDebugLoc(); Register SrcReg = MI.getOperand(0).getReg(); // Create new virtual register of the same class as source. const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); Register DstReg = MRI->createVirtualRegister(RC); // Replace pseudo with a normal load-and-test that models the def as // well. BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) .addReg(SrcReg) .setMIFlags(MI.getFlags()); MI.eraseFromParent(); return MBB; } MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( MachineInstr &MI, MachineBasicBlock *MBB) const { MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo *MRI = &MF.getRegInfo(); const SystemZInstrInfo *TII = static_cast(Subtarget.getInstrInfo()); DebugLoc DL = MI.getDebugLoc(); const unsigned ProbeSize = getStackProbeSize(MF); Register DstReg = MI.getOperand(0).getReg(); Register SizeReg = MI.getOperand(2).getReg(); MachineBasicBlock *StartMBB = MBB; MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); // LoopTestMBB // BRC TailTestMBB // # fallthrough to LoopBodyMBB StartMBB->addSuccessor(LoopTestMBB); MBB = LoopTestMBB; BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) .addReg(SizeReg) .addMBB(StartMBB) .addReg(IncReg) .addMBB(LoopBodyMBB); BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) .addReg(PHIReg) .addImm(ProbeSize); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) .addMBB(TailTestMBB); MBB->addSuccessor(LoopBodyMBB); MBB->addSuccessor(TailTestMBB); // LoopBodyMBB: Allocate and probe by means of a volatile compare. // J LoopTestMBB MBB = LoopBodyMBB; BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) .addReg(PHIReg) .addImm(ProbeSize); BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) .addReg(SystemZ::R15D) .addImm(ProbeSize); BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) .setMemRefs(VolLdMMO); BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); MBB->addSuccessor(LoopTestMBB); // TailTestMBB // BRC DoneMBB // # fallthrough to TailMBB MBB = TailTestMBB; BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) .addReg(PHIReg) .addImm(0); BuildMI(MBB, DL, TII->get(SystemZ::BRC)) .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) .addMBB(DoneMBB); MBB->addSuccessor(TailMBB); MBB->addSuccessor(DoneMBB); // TailMBB // # fallthrough to DoneMBB MBB = TailMBB; BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) .addReg(SystemZ::R15D) .addReg(PHIReg); BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) .setMemRefs(VolLdMMO); MBB->addSuccessor(DoneMBB); // DoneMBB MBB = DoneMBB; BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) .addReg(SystemZ::R15D); MI.eraseFromParent(); return DoneMBB; } SDValue SystemZTargetLowering:: getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); auto *TFL = static_cast(Subtarget.getFrameLowering()); SDLoc DL(SP); return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); } MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *MBB) const { switch (MI.getOpcode()) { case SystemZ::Select32: case SystemZ::Select64: case SystemZ::SelectF32: case SystemZ::SelectF64: case SystemZ::SelectF128: case SystemZ::SelectVR32: case SystemZ::SelectVR64: case SystemZ::SelectVR128: return emitSelect(MI, MBB); case SystemZ::CondStore8Mux: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); case SystemZ::CondStore8MuxInv: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); case SystemZ::CondStore16Mux: return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); case SystemZ::CondStore16MuxInv: return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); case SystemZ::CondStore32Mux: return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); case SystemZ::CondStore32MuxInv: return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); case SystemZ::CondStore8: return emitCondStore(MI, MBB, SystemZ::STC, 0, false); case SystemZ::CondStore8Inv: return emitCondStore(MI, MBB, SystemZ::STC, 0, true); case SystemZ::CondStore16: return emitCondStore(MI, MBB, SystemZ::STH, 0, false); case SystemZ::CondStore16Inv: return emitCondStore(MI, MBB, SystemZ::STH, 0, true); case SystemZ::CondStore32: return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); case SystemZ::CondStore32Inv: return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); case SystemZ::CondStore64: return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); case SystemZ::CondStore64Inv: return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); case SystemZ::CondStoreF32: return emitCondStore(MI, MBB, SystemZ::STE, 0, false); case SystemZ::CondStoreF32Inv: return emitCondStore(MI, MBB, SystemZ::STE, 0, true); case SystemZ::CondStoreF64: return emitCondStore(MI, MBB, SystemZ::STD, 0, false); case SystemZ::CondStoreF64Inv: return emitCondStore(MI, MBB, SystemZ::STD, 0, true); case SystemZ::PAIR128: return emitPair128(MI, MBB); case SystemZ::AEXT128: return emitExt128(MI, MBB, false); case SystemZ::ZEXT128: return emitExt128(MI, MBB, true); case SystemZ::ATOMIC_SWAPW: return emitAtomicLoadBinary(MI, MBB, 0, 0); case SystemZ::ATOMIC_SWAP_32: return emitAtomicLoadBinary(MI, MBB, 0, 32); case SystemZ::ATOMIC_SWAP_64: return emitAtomicLoadBinary(MI, MBB, 0, 64); case SystemZ::ATOMIC_LOADW_AR: return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); case SystemZ::ATOMIC_LOADW_AFI: return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); case SystemZ::ATOMIC_LOAD_AR: return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); case SystemZ::ATOMIC_LOAD_AHI: return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); case SystemZ::ATOMIC_LOAD_AFI: return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); case SystemZ::ATOMIC_LOAD_AGR: return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); case SystemZ::ATOMIC_LOAD_AGHI: return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); case SystemZ::ATOMIC_LOAD_AGFI: return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); case SystemZ::ATOMIC_LOADW_SR: return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); case SystemZ::ATOMIC_LOAD_SR: return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); case SystemZ::ATOMIC_LOAD_SGR: return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); case SystemZ::ATOMIC_LOADW_NR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); case SystemZ::ATOMIC_LOADW_NILH: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); case SystemZ::ATOMIC_LOAD_NR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); case SystemZ::ATOMIC_LOAD_NILL: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); case SystemZ::ATOMIC_LOAD_NILH: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); case SystemZ::ATOMIC_LOAD_NILF: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); case SystemZ::ATOMIC_LOAD_NGR: return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); case SystemZ::ATOMIC_LOAD_NILL64: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); case SystemZ::ATOMIC_LOAD_NILH64: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); case SystemZ::ATOMIC_LOAD_NIHL64: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64); case SystemZ::ATOMIC_LOAD_NIHH64: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64); case SystemZ::ATOMIC_LOAD_NILF64: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); case SystemZ::ATOMIC_LOAD_NIHF64: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64); case SystemZ::ATOMIC_LOADW_OR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); case SystemZ::ATOMIC_LOADW_OILH: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); case SystemZ::ATOMIC_LOAD_OR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); case SystemZ::ATOMIC_LOAD_OILL: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); case SystemZ::ATOMIC_LOAD_OILH: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); case SystemZ::ATOMIC_LOAD_OILF: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); case SystemZ::ATOMIC_LOAD_OGR: return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); case SystemZ::ATOMIC_LOAD_OILL64: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); case SystemZ::ATOMIC_LOAD_OILH64: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); case SystemZ::ATOMIC_LOAD_OIHL64: return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64); case SystemZ::ATOMIC_LOAD_OIHH64: return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64); case SystemZ::ATOMIC_LOAD_OILF64: return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); case SystemZ::ATOMIC_LOAD_OIHF64: return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64); case SystemZ::ATOMIC_LOADW_XR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); case SystemZ::ATOMIC_LOADW_XILF: return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); case SystemZ::ATOMIC_LOAD_XR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); case SystemZ::ATOMIC_LOAD_XILF: return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); case SystemZ::ATOMIC_LOAD_XGR: return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); case SystemZ::ATOMIC_LOAD_XILF64: return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); case SystemZ::ATOMIC_LOAD_XIHF64: return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64); case SystemZ::ATOMIC_LOADW_NRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); case SystemZ::ATOMIC_LOADW_NILHi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); case SystemZ::ATOMIC_LOAD_NRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); case SystemZ::ATOMIC_LOAD_NILLi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); case SystemZ::ATOMIC_LOAD_NILHi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); case SystemZ::ATOMIC_LOAD_NILFi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); case SystemZ::ATOMIC_LOAD_NGRi: return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); case SystemZ::ATOMIC_LOAD_NILL64i: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); case SystemZ::ATOMIC_LOAD_NILH64i: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); case SystemZ::ATOMIC_LOAD_NIHL64i: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true); case SystemZ::ATOMIC_LOAD_NIHH64i: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true); case SystemZ::ATOMIC_LOAD_NILF64i: return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); case SystemZ::ATOMIC_LOAD_NIHF64i: return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true); case SystemZ::ATOMIC_LOADW_MIN: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE, 0); case SystemZ::ATOMIC_LOAD_MIN_32: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE, 32); case SystemZ::ATOMIC_LOAD_MIN_64: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, SystemZ::CCMASK_CMP_LE, 64); case SystemZ::ATOMIC_LOADW_MAX: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE, 0); case SystemZ::ATOMIC_LOAD_MAX_32: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE, 32); case SystemZ::ATOMIC_LOAD_MAX_64: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, SystemZ::CCMASK_CMP_GE, 64); case SystemZ::ATOMIC_LOADW_UMIN: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE, 0); case SystemZ::ATOMIC_LOAD_UMIN_32: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE, 32); case SystemZ::ATOMIC_LOAD_UMIN_64: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, SystemZ::CCMASK_CMP_LE, 64); case SystemZ::ATOMIC_LOADW_UMAX: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE, 0); case SystemZ::ATOMIC_LOAD_UMAX_32: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE, 32); case SystemZ::ATOMIC_LOAD_UMAX_64: return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, SystemZ::CCMASK_CMP_GE, 64); case SystemZ::ATOMIC_CMP_SWAPW: return emitAtomicCmpSwapW(MI, MBB); case SystemZ::MVCSequence: case SystemZ::MVCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::MVC); case SystemZ::NCSequence: case SystemZ::NCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::NC); case SystemZ::OCSequence: case SystemZ::OCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::OC); case SystemZ::XCSequence: case SystemZ::XCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::XC); case SystemZ::CLCSequence: case SystemZ::CLCLoop: return emitMemMemWrapper(MI, MBB, SystemZ::CLC); case SystemZ::CLSTLoop: return emitStringWrapper(MI, MBB, SystemZ::CLST); case SystemZ::MVSTLoop: return emitStringWrapper(MI, MBB, SystemZ::MVST); case SystemZ::SRSTLoop: return emitStringWrapper(MI, MBB, SystemZ::SRST); case SystemZ::TBEGIN: return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); case SystemZ::TBEGIN_nofloat: return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); case SystemZ::TBEGINC: return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); case SystemZ::LTEBRCompare_VecPseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); case SystemZ::LTDBRCompare_VecPseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); case SystemZ::LTXBRCompare_VecPseudo: return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); case SystemZ::PROBED_ALLOCA: return emitProbedAlloca(MI, MBB); case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, MBB); default: llvm_unreachable("Unexpected instr type to insert"); } } // This is only used by the isel schedulers, and is needed only to prevent // compiler from crashing when list-ilp is used. const TargetRegisterClass * SystemZTargetLowering::getRepRegClassFor(MVT VT) const { if (VT == MVT::Untyped) return &SystemZ::ADDR128BitRegClass; return TargetLowering::getRepRegClassFor(VT); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index 1660e41ba830..5e5e513cdfda 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -1,276 +1,285 @@ //===- CoroEarly.cpp - Coroutine Early Function Pass ----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "CoroInternal.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; #define DEBUG_TYPE "coro-early" namespace { // Created on demand if the coro-early pass has work to do. class Lowerer : public coro::LowererBase { IRBuilder<> Builder; PointerType *const AnyResumeFnPtrTy; Constant *NoopCoro = nullptr; void lowerResumeOrDestroy(CallBase &CB, CoroSubFnInst::ResumeKind); void lowerCoroPromise(CoroPromiseInst *Intrin); void lowerCoroDone(IntrinsicInst *II); void lowerCoroNoop(IntrinsicInst *II); public: Lowerer(Module &M) : LowererBase(M), Builder(Context), AnyResumeFnPtrTy(FunctionType::get(Type::getVoidTy(Context), Int8Ptr, /*isVarArg=*/false) ->getPointerTo()) {} bool lowerEarlyIntrinsics(Function &F); }; } // Replace a direct call to coro.resume or coro.destroy with an indirect call to // an address returned by coro.subfn.addr intrinsic. This is done so that // CGPassManager recognizes devirtualization when CoroElide pass replaces a call // to coro.subfn.addr with an appropriate function address. void Lowerer::lowerResumeOrDestroy(CallBase &CB, CoroSubFnInst::ResumeKind Index) { Value *ResumeAddr = makeSubFnCall(CB.getArgOperand(0), Index, &CB); CB.setCalledOperand(ResumeAddr); CB.setCallingConv(CallingConv::Fast); } // Coroutine promise field is always at the fixed offset from the beginning of // the coroutine frame. i8* coro.promise(i8*, i1 from) intrinsic adds an offset // to a passed pointer to move from coroutine frame to coroutine promise and // vice versa. Since we don't know exactly which coroutine frame it is, we build // a coroutine frame mock up starting with two function pointers, followed by a // properly aligned coroutine promise field. // TODO: Handle the case when coroutine promise alloca has align override. void Lowerer::lowerCoroPromise(CoroPromiseInst *Intrin) { Value *Operand = Intrin->getArgOperand(0); Align Alignment = Intrin->getAlignment(); Type *Int8Ty = Builder.getInt8Ty(); auto *SampleStruct = StructType::get(Context, {AnyResumeFnPtrTy, AnyResumeFnPtrTy, Int8Ty}); const DataLayout &DL = TheModule.getDataLayout(); int64_t Offset = alignTo( DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignment); if (Intrin->isFromPromise()) Offset = -Offset; Builder.SetInsertPoint(Intrin); Value *Replacement = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Operand, Offset); Intrin->replaceAllUsesWith(Replacement); Intrin->eraseFromParent(); } // When a coroutine reaches final suspend point, it zeros out ResumeFnAddr in // the coroutine frame (it is UB to resume from a final suspend point). // The llvm.coro.done intrinsic is used to check whether a coroutine is // suspended at the final suspend point or not. void Lowerer::lowerCoroDone(IntrinsicInst *II) { Value *Operand = II->getArgOperand(0); // ResumeFnAddr is the first pointer sized element of the coroutine frame. static_assert(coro::Shape::SwitchFieldIndex::Resume == 0, "resume function not at offset zero"); auto *FrameTy = Int8Ptr; PointerType *FramePtrTy = FrameTy->getPointerTo(); Builder.SetInsertPoint(II); auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy); auto *Load = Builder.CreateLoad(FrameTy, BCI); auto *Cond = Builder.CreateICmpEQ(Load, NullPtr); II->replaceAllUsesWith(Cond); II->eraseFromParent(); } void Lowerer::lowerCoroNoop(IntrinsicInst *II) { if (!NoopCoro) { LLVMContext &C = Builder.getContext(); Module &M = *II->getModule(); // Create a noop.frame struct type. StructType *FrameTy = StructType::create(C, "NoopCoro.Frame"); auto *FramePtrTy = FrameTy->getPointerTo(); auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, /*isVarArg=*/false); auto *FnPtrTy = FnTy->getPointerTo(); FrameTy->setBody({FnPtrTy, FnPtrTy}); // Create a Noop function that does nothing. Function *NoopFn = Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage, "NoopCoro.ResumeDestroy", &M); NoopFn->setCallingConv(CallingConv::Fast); auto *Entry = BasicBlock::Create(C, "entry", NoopFn); ReturnInst::Create(C, Entry); // Create a constant struct for the frame. Constant* Values[] = {NoopFn, NoopFn}; Constant* NoopCoroConst = ConstantStruct::get(FrameTy, Values); NoopCoro = new GlobalVariable(M, NoopCoroConst->getType(), /*isConstant=*/true, GlobalVariable::PrivateLinkage, NoopCoroConst, "NoopCoro.Frame.Const"); } Builder.SetInsertPoint(II); auto *NoopCoroVoidPtr = Builder.CreateBitCast(NoopCoro, Int8Ptr); II->replaceAllUsesWith(NoopCoroVoidPtr); II->eraseFromParent(); } // Prior to CoroSplit, calls to coro.begin needs to be marked as NoDuplicate, // as CoroSplit assumes there is exactly one coro.begin. After CoroSplit, // NoDuplicate attribute will be removed from coro.begin otherwise, it will // interfere with inlining. static void setCannotDuplicate(CoroIdInst *CoroId) { for (User *U : CoroId->users()) if (auto *CB = dyn_cast(U)) CB->setCannotDuplicate(); } bool Lowerer::lowerEarlyIntrinsics(Function &F) { bool Changed = false; CoroIdInst *CoroId = nullptr; SmallVector CoroFrees; + bool HasCoroSuspend = false; for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) { Instruction &I = *IB++; if (auto *CB = dyn_cast(&I)) { switch (CB->getIntrinsicID()) { default: continue; case Intrinsic::coro_free: CoroFrees.push_back(cast(&I)); break; case Intrinsic::coro_suspend: // Make sure that final suspend point is not duplicated as CoroSplit // pass expects that there is at most one final suspend point. if (cast(&I)->isFinal()) CB->setCannotDuplicate(); + HasCoroSuspend = true; break; case Intrinsic::coro_end_async: case Intrinsic::coro_end: // Make sure that fallthrough coro.end is not duplicated as CoroSplit // pass expects that there is at most one fallthrough coro.end. if (cast(&I)->isFallthrough()) CB->setCannotDuplicate(); break; case Intrinsic::coro_noop: lowerCoroNoop(cast(&I)); break; case Intrinsic::coro_id: // Mark a function that comes out of the frontend that has a coro.id // with a coroutine attribute. if (auto *CII = cast(&I)) { if (CII->getInfo().isPreSplit()) { F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT); setCannotDuplicate(CII); CII->setCoroutineSelf(); CoroId = cast(&I); } } break; case Intrinsic::coro_id_retcon: case Intrinsic::coro_id_retcon_once: case Intrinsic::coro_id_async: F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT); break; case Intrinsic::coro_resume: lowerResumeOrDestroy(*CB, CoroSubFnInst::ResumeIndex); break; case Intrinsic::coro_destroy: lowerResumeOrDestroy(*CB, CoroSubFnInst::DestroyIndex); break; case Intrinsic::coro_promise: lowerCoroPromise(cast(&I)); break; case Intrinsic::coro_done: lowerCoroDone(cast(&I)); break; } Changed = true; } } // Make sure that all CoroFree reference the coro.id intrinsic. // Token type is not exposed through coroutine C/C++ builtins to plain C, so // we allow specifying none and fixing it up here. if (CoroId) for (CoroFreeInst *CF : CoroFrees) CF->setArgOperand(0, CoroId); + // Coroutine suspention could potentially lead to any argument modified + // outside of the function, hence arguments should not have noalias + // attributes. + if (HasCoroSuspend) + for (Argument &A : F.args()) + if (A.hasNoAliasAttr()) + A.removeAttr(Attribute::NoAlias); return Changed; } static bool declaresCoroEarlyIntrinsics(const Module &M) { return coro::declaresIntrinsics( M, {"llvm.coro.id", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once", "llvm.coro.id.async", "llvm.coro.destroy", "llvm.coro.done", "llvm.coro.end", "llvm.coro.end.async", "llvm.coro.noop", "llvm.coro.free", "llvm.coro.promise", "llvm.coro.resume", "llvm.coro.suspend"}); } PreservedAnalyses CoroEarlyPass::run(Function &F, FunctionAnalysisManager &) { Module &M = *F.getParent(); if (!declaresCoroEarlyIntrinsics(M) || !Lowerer(M).lowerEarlyIntrinsics(F)) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet(); return PA; } namespace { struct CoroEarlyLegacy : public FunctionPass { static char ID; // Pass identification, replacement for typeid. CoroEarlyLegacy() : FunctionPass(ID) { initializeCoroEarlyLegacyPass(*PassRegistry::getPassRegistry()); } std::unique_ptr L; // This pass has work to do only if we find intrinsics we are going to lower // in the module. bool doInitialization(Module &M) override { if (declaresCoroEarlyIntrinsics(M)) L = std::make_unique(M); return false; } bool runOnFunction(Function &F) override { if (!L) return false; return L->lowerEarlyIntrinsics(F); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); } StringRef getPassName() const override { return "Lower early coroutine intrinsics"; } }; } char CoroEarlyLegacy::ID = 0; INITIALIZE_PASS(CoroEarlyLegacy, "coro-early", "Lower early coroutine intrinsics", false, false) Pass *llvm::createCoroEarlyLegacyPass() { return new CoroEarlyLegacy(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index e53e7605b254..e1e0d50979dc 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -1,2352 +1,2373 @@ //===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This file contains classes used to discover if for a particular value // there from sue to definition that crosses a suspend block. // // Using the information discovered we form a Coroutine Frame structure to // contain those values. All uses of those values are replaced with appropriate // GEP + load from the coroutine frame. At the point of the definition we spill // the value into the coroutine frame. // // TODO: pack values tightly using liveness info. //===----------------------------------------------------------------------===// #include "CoroInternal.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/OptimizedStructLayout.h" #include "llvm/Support/circular_raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include using namespace llvm; // The "coro-suspend-crossing" flag is very noisy. There is another debug type, // "coro-frame", which results in leaner debug spew. #define DEBUG_TYPE "coro-suspend-crossing" static cl::opt EnableReuseStorageInFrame( "reuse-storage-in-coroutine-frame", cl::Hidden, cl::desc( "Enable the optimization which would reuse the storage in the coroutine \ frame for allocas whose liferanges are not overlapped, for testing purposes"), llvm::cl::init(false)); enum { SmallVectorThreshold = 32 }; // Provides two way mapping between the blocks and numbers. namespace { class BlockToIndexMapping { SmallVector V; public: size_t size() const { return V.size(); } BlockToIndexMapping(Function &F) { for (BasicBlock &BB : F) V.push_back(&BB); llvm::sort(V); } size_t blockToIndex(BasicBlock *BB) const { auto *I = llvm::lower_bound(V, BB); assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block"); return I - V.begin(); } BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; } }; } // end anonymous namespace // The SuspendCrossingInfo maintains data that allows to answer a question // whether given two BasicBlocks A and B there is a path from A to B that // passes through a suspend point. // // For every basic block 'i' it maintains a BlockData that consists of: // Consumes: a bit vector which contains a set of indices of blocks that can // reach block 'i' // Kills: a bit vector which contains a set of indices of blocks that can // reach block 'i', but one of the path will cross a suspend point // Suspend: a boolean indicating whether block 'i' contains a suspend point. // End: a boolean indicating whether block 'i' contains a coro.end intrinsic. // namespace { struct SuspendCrossingInfo { BlockToIndexMapping Mapping; struct BlockData { BitVector Consumes; BitVector Kills; bool Suspend = false; bool End = false; }; SmallVector Block; iterator_range successors(BlockData const &BD) const { BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]); return llvm::successors(BB); } BlockData &getBlockData(BasicBlock *BB) { return Block[Mapping.blockToIndex(BB)]; } void dump() const; void dump(StringRef Label, BitVector const &BV) const; SuspendCrossingInfo(Function &F, coro::Shape &Shape); bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const { size_t const DefIndex = Mapping.blockToIndex(DefBB); size_t const UseIndex = Mapping.blockToIndex(UseBB); bool const Result = Block[UseIndex].Kills[DefIndex]; LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName() << " answer is " << Result << "\n"); return Result; } bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const { auto *I = cast(U); // We rewrote PHINodes, so that only the ones with exactly one incoming // value need to be analyzed. if (auto *PN = dyn_cast(I)) if (PN->getNumIncomingValues() > 1) return false; BasicBlock *UseBB = I->getParent(); // As a special case, treat uses by an llvm.coro.suspend.retcon or an // llvm.coro.suspend.async as if they were uses in the suspend's single // predecessor: the uses conceptually occur before the suspend. if (isa(I) || isa(I)) { UseBB = UseBB->getSinglePredecessor(); assert(UseBB && "should have split coro.suspend into its own block"); } return hasPathCrossingSuspendPoint(DefBB, UseBB); } bool isDefinitionAcrossSuspend(Argument &A, User *U) const { return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U); } bool isDefinitionAcrossSuspend(Instruction &I, User *U) const { auto *DefBB = I.getParent(); // As a special case, treat values produced by an llvm.coro.suspend.* // as if they were defined in the single successor: the uses // conceptually occur after the suspend. if (isa(I)) { DefBB = DefBB->getSingleSuccessor(); assert(DefBB && "should have split coro.suspend into its own block"); } return isDefinitionAcrossSuspend(DefBB, U); } }; } // end anonymous namespace #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label, BitVector const &BV) const { dbgs() << Label << ":"; for (size_t I = 0, N = BV.size(); I < N; ++I) if (BV[I]) dbgs() << " " << Mapping.indexToBlock(I)->getName(); dbgs() << "\n"; } LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const { for (size_t I = 0, N = Block.size(); I < N; ++I) { BasicBlock *const B = Mapping.indexToBlock(I); dbgs() << B->getName() << ":\n"; dump(" Consumes", Block[I].Consumes); dump(" Kills", Block[I].Kills); } dbgs() << "\n"; } #endif SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) : Mapping(F) { const size_t N = Mapping.size(); Block.resize(N); // Initialize every block so that it consumes itself for (size_t I = 0; I < N; ++I) { auto &B = Block[I]; B.Consumes.resize(N); B.Kills.resize(N); B.Consumes.set(I); } // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as // the code beyond coro.end is reachable during initial invocation of the // coroutine. for (auto *CE : Shape.CoroEnds) getBlockData(CE->getParent()).End = true; // Mark all suspend blocks and indicate that they kill everything they // consume. Note, that crossing coro.save also requires a spill, as any code // between coro.save and coro.suspend may resume the coroutine and all of the // state needs to be saved by that time. auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) { BasicBlock *SuspendBlock = BarrierInst->getParent(); auto &B = getBlockData(SuspendBlock); B.Suspend = true; B.Kills |= B.Consumes; }; for (auto *CSI : Shape.CoroSuspends) { markSuspendBlock(CSI); if (auto *Save = CSI->getCoroSave()) markSuspendBlock(Save); } // Iterate propagating consumes and kills until they stop changing. int Iteration = 0; (void)Iteration; bool Changed; do { LLVM_DEBUG(dbgs() << "iteration " << ++Iteration); LLVM_DEBUG(dbgs() << "==============\n"); Changed = false; for (size_t I = 0; I < N; ++I) { auto &B = Block[I]; for (BasicBlock *SI : successors(B)) { auto SuccNo = Mapping.blockToIndex(SI); // Saved Consumes and Kills bitsets so that it is easy to see // if anything changed after propagation. auto &S = Block[SuccNo]; auto SavedConsumes = S.Consumes; auto SavedKills = S.Kills; // Propagate Kills and Consumes from block B into its successor S. S.Consumes |= B.Consumes; S.Kills |= B.Kills; // If block B is a suspend block, it should propagate kills into the // its successor for every block B consumes. if (B.Suspend) { S.Kills |= B.Consumes; } if (S.Suspend) { // If block S is a suspend block, it should kill all of the blocks it // consumes. S.Kills |= S.Consumes; } else if (S.End) { // If block S is an end block, it should not propagate kills as the // blocks following coro.end() are reached during initial invocation // of the coroutine while all the data are still available on the // stack or in the registers. S.Kills.reset(); } else { // This is reached when S block it not Suspend nor coro.end and it // need to make sure that it is not in the kill set. S.Kills.reset(SuccNo); } // See if anything changed. Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes); if (S.Kills != SavedKills) { LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName() << "\n"); LLVM_DEBUG(dump("S.Kills", S.Kills)); LLVM_DEBUG(dump("SavedKills", SavedKills)); } if (S.Consumes != SavedConsumes) { LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n"); LLVM_DEBUG(dump("S.Consume", S.Consumes)); LLVM_DEBUG(dump("SavedCons", SavedConsumes)); } } } } while (Changed); LLVM_DEBUG(dump()); } #undef DEBUG_TYPE // "coro-suspend-crossing" #define DEBUG_TYPE "coro-frame" namespace { class FrameTypeBuilder; // Mapping from the to-be-spilled value to all the users that need reload. using SpillInfo = SmallMapVector, 8>; struct AllocaInfo { AllocaInst *Alloca; DenseMap> Aliases; bool MayWriteBeforeCoroBegin; AllocaInfo(AllocaInst *Alloca, DenseMap> Aliases, bool MayWriteBeforeCoroBegin) : Alloca(Alloca), Aliases(std::move(Aliases)), MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {} }; struct FrameDataInfo { // All the values (that are not allocas) that needs to be spilled to the // frame. SpillInfo Spills; // Allocas contains all values defined as allocas that need to live in the // frame. SmallVector Allocas; SmallVector getAllDefs() const { SmallVector Defs; for (const auto &P : Spills) Defs.push_back(P.first); for (const auto &A : Allocas) Defs.push_back(A.Alloca); return Defs; } uint32_t getFieldIndex(Value *V) const { auto Itr = FieldIndexMap.find(V); assert(Itr != FieldIndexMap.end() && "Value does not have a frame field index"); return Itr->second; } void setFieldIndex(Value *V, uint32_t Index) { assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) && "Cannot set the index for the same field twice."); FieldIndexMap[V] = Index; } // Remap the index of every field in the frame, using the final layout index. void updateLayoutIndex(FrameTypeBuilder &B); private: // LayoutIndexUpdateStarted is used to avoid updating the index of any field // twice by mistake. bool LayoutIndexUpdateStarted = false; // Map from values to their slot indexes on the frame. They will be first set // with their original insertion field index. After the frame is built, their // indexes will be updated into the final layout index. DenseMap FieldIndexMap; }; } // namespace #ifndef NDEBUG static void dumpSpills(StringRef Title, const SpillInfo &Spills) { dbgs() << "------------- " << Title << "--------------\n"; for (const auto &E : Spills) { E.first->dump(); dbgs() << " user: "; for (auto *I : E.second) I->dump(); } } static void dumpAllocas(const SmallVectorImpl &Allocas) { dbgs() << "------------- Allocas --------------\n"; for (const auto &A : Allocas) { A.Alloca->dump(); } } #endif namespace { using FieldIDType = size_t; // We cannot rely solely on natural alignment of a type when building a // coroutine frame and if the alignment specified on the Alloca instruction // differs from the natural alignment of the alloca type we will need to insert // padding. class FrameTypeBuilder { private: struct Field { uint64_t Size; uint64_t Offset; Type *Ty; FieldIDType LayoutFieldIndex; Align Alignment; Align TyAlignment; }; const DataLayout &DL; LLVMContext &Context; uint64_t StructSize = 0; Align StructAlign; bool IsFinished = false; SmallVector Fields; DenseMap FieldIndexByKey; public: FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL) : DL(DL), Context(Context) {} /// Add a field to this structure for the storage of an `alloca` /// instruction. LLVM_NODISCARD FieldIDType addFieldForAlloca(AllocaInst *AI, bool IsHeader = false) { Type *Ty = AI->getAllocatedType(); // Make an array type if this is a static array allocation. if (AI->isArrayAllocation()) { if (auto *CI = dyn_cast(AI->getArraySize())) Ty = ArrayType::get(Ty, CI->getValue().getZExtValue()); else report_fatal_error("Coroutines cannot handle non static allocas yet"); } return addField(Ty, AI->getAlign(), IsHeader); } /// We want to put the allocas whose lifetime-ranges are not overlapped /// into one slot of coroutine frame. /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566 /// /// cppcoro::task alternative_paths(bool cond) { /// if (cond) { /// big_structure a; /// process(a); /// co_await something(); /// } else { /// big_structure b; /// process2(b); /// co_await something(); /// } /// } /// /// We want to put variable a and variable b in the same slot to /// reduce the size of coroutine frame. /// /// This function use StackLifetime algorithm to partition the AllocaInsts in /// Spills to non-overlapped sets in order to put Alloca in the same /// non-overlapped set into the same slot in the Coroutine Frame. Then add /// field for the allocas in the same non-overlapped set by using the largest /// type as the field type. /// /// Side Effects: Because We sort the allocas, the order of allocas in the /// frame may be different with the order in the source code. void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData, coro::Shape &Shape); /// Add a field to this structure. LLVM_NODISCARD FieldIDType addField(Type *Ty, MaybeAlign FieldAlignment, bool IsHeader = false) { assert(!IsFinished && "adding fields to a finished builder"); assert(Ty && "must provide a type for a field"); // The field size is always the alloc size of the type. uint64_t FieldSize = DL.getTypeAllocSize(Ty); // The field alignment might not be the type alignment, but we need // to remember the type alignment anyway to build the type. Align TyAlignment = DL.getABITypeAlign(Ty); if (!FieldAlignment) FieldAlignment = TyAlignment; // Lay out header fields immediately. uint64_t Offset; if (IsHeader) { Offset = alignTo(StructSize, FieldAlignment); StructSize = Offset + FieldSize; // Everything else has a flexible offset. } else { Offset = OptimizedStructLayoutField::FlexibleOffset; } Fields.push_back({FieldSize, Offset, Ty, 0, *FieldAlignment, TyAlignment}); return Fields.size() - 1; } /// Finish the layout and set the body on the given type. void finish(StructType *Ty); uint64_t getStructSize() const { assert(IsFinished && "not yet finished!"); return StructSize; } Align getStructAlign() const { assert(IsFinished && "not yet finished!"); return StructAlign; } FieldIDType getLayoutFieldIndex(FieldIDType Id) const { assert(IsFinished && "not yet finished!"); return Fields[Id].LayoutFieldIndex; } }; } // namespace void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) { auto Updater = [&](Value *I) { setFieldIndex(I, B.getLayoutFieldIndex(getFieldIndex(I))); }; LayoutIndexUpdateStarted = true; for (auto &S : Spills) Updater(S.first); for (const auto &A : Allocas) Updater(A.Alloca); LayoutIndexUpdateStarted = false; } void FrameTypeBuilder::addFieldForAllocas(const Function &F, FrameDataInfo &FrameData, coro::Shape &Shape) { DenseMap AllocaIndex; using AllocaSetType = SmallVector; SmallVector NonOverlapedAllocas; // We need to add field for allocas at the end of this function. However, this // function has multiple exits, so we use this helper to avoid redundant code. struct RTTIHelper { std::function func; RTTIHelper(std::function &&func) : func(func) {} ~RTTIHelper() { func(); } } Helper([&]() { for (auto AllocaList : NonOverlapedAllocas) { auto *LargestAI = *AllocaList.begin(); FieldIDType Id = addFieldForAlloca(LargestAI); for (auto *Alloca : AllocaList) FrameData.setFieldIndex(Alloca, Id); } }); if (!Shape.ReuseFrameSlot && !EnableReuseStorageInFrame) { for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; AllocaIndex[Alloca] = NonOverlapedAllocas.size(); NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); } return; } // Because there are pathes from the lifetime.start to coro.end // for each alloca, the liferanges for every alloca is overlaped // in the blocks who contain coro.end and the successor blocks. // So we choose to skip there blocks when we calculates the liferange // for each alloca. It should be reasonable since there shouldn't be uses // in these blocks and the coroutine frame shouldn't be used outside the // coroutine body. // // Note that the user of coro.suspend may not be SwitchInst. However, this // case seems too complex to handle. And it is harmless to skip these // patterns since it just prevend putting the allocas to live in the same // slot. DenseMap DefaultSuspendDest; for (auto CoroSuspendInst : Shape.CoroSuspends) { for (auto U : CoroSuspendInst->users()) { if (auto *ConstSWI = dyn_cast(U)) { auto *SWI = const_cast(ConstSWI); DefaultSuspendDest[SWI] = SWI->getDefaultDest(); SWI->setDefaultDest(SWI->getSuccessor(1)); } } } auto ExtractAllocas = [&]() { AllocaSetType Allocas; Allocas.reserve(FrameData.Allocas.size()); for (const auto &A : FrameData.Allocas) Allocas.push_back(A.Alloca); return Allocas; }; StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(), StackLifetime::LivenessType::May); StackLifetimeAnalyzer.run(); auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) { return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps( StackLifetimeAnalyzer.getLiveRange(AI2)); }; auto GetAllocaSize = [&](const AllocaInfo &A) { Optional RetSize = A.Alloca->getAllocationSizeInBits(DL); assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n"); assert(!RetSize->isScalable() && "Scalable vectors are not yet supported"); return RetSize->getFixedSize(); }; // Put larger allocas in the front. So the larger allocas have higher // priority to merge, which can save more space potentially. Also each // AllocaSet would be ordered. So we can get the largest Alloca in one // AllocaSet easily. sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) { return GetAllocaSize(Iter1) > GetAllocaSize(Iter2); }); for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; bool Merged = false; // Try to find if the Alloca is not inferenced with any existing // NonOverlappedAllocaSet. If it is true, insert the alloca to that // NonOverlappedAllocaSet. for (auto &AllocaSet : NonOverlapedAllocas) { assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n"); bool NoInference = none_of(AllocaSet, [&](auto Iter) { return IsAllocaInferenre(Alloca, Iter); }); // If the alignment of A is multiple of the alignment of B, the address // of A should satisfy the requirement for aligning for B. // // There may be other more fine-grained strategies to handle the alignment // infomation during the merging process. But it seems hard to handle // these strategies and benefit little. bool Alignable = [&]() -> bool { auto *LargestAlloca = *AllocaSet.begin(); return LargestAlloca->getAlign().value() % Alloca->getAlign().value() == 0; }(); bool CouldMerge = NoInference && Alignable; if (!CouldMerge) continue; AllocaIndex[Alloca] = AllocaIndex[*AllocaSet.begin()]; AllocaSet.push_back(Alloca); Merged = true; break; } if (!Merged) { AllocaIndex[Alloca] = NonOverlapedAllocas.size(); NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca)); } } // Recover the default target destination for each Switch statement // reserved. for (auto SwitchAndDefaultDest : DefaultSuspendDest) { SwitchInst *SWI = SwitchAndDefaultDest.first; BasicBlock *DestBB = SwitchAndDefaultDest.second; SWI->setDefaultDest(DestBB); } // This Debug Info could tell us which allocas are merged into one slot. LLVM_DEBUG(for (auto &AllocaSet : NonOverlapedAllocas) { if (AllocaSet.size() > 1) { dbgs() << "In Function:" << F.getName() << "\n"; dbgs() << "Find Union Set " << "\n"; dbgs() << "\tAllocas are \n"; for (auto Alloca : AllocaSet) dbgs() << "\t\t" << *Alloca << "\n"; } }); } void FrameTypeBuilder::finish(StructType *Ty) { assert(!IsFinished && "already finished!"); // Prepare the optimal-layout field array. // The Id in the layout field is a pointer to our Field for it. SmallVector LayoutFields; LayoutFields.reserve(Fields.size()); for (auto &Field : Fields) { LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment, Field.Offset); } // Perform layout. auto SizeAndAlign = performOptimizedStructLayout(LayoutFields); StructSize = SizeAndAlign.first; StructAlign = SizeAndAlign.second; auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & { return *static_cast(const_cast(LayoutField.Id)); }; // We need to produce a packed struct type if there's a field whose // assigned offset isn't a multiple of its natural type alignment. bool Packed = [&] { for (auto &LayoutField : LayoutFields) { auto &F = getField(LayoutField); if (!isAligned(F.TyAlignment, LayoutField.Offset)) return true; } return false; }(); // Build the struct body. SmallVector FieldTypes; FieldTypes.reserve(LayoutFields.size() * 3 / 2); uint64_t LastOffset = 0; for (auto &LayoutField : LayoutFields) { auto &F = getField(LayoutField); auto Offset = LayoutField.Offset; // Add a padding field if there's a padding gap and we're either // building a packed struct or the padding gap is more than we'd // get from aligning to the field type's natural alignment. assert(Offset >= LastOffset); if (Offset != LastOffset) { if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset) FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context), Offset - LastOffset)); } F.Offset = Offset; F.LayoutFieldIndex = FieldTypes.size(); FieldTypes.push_back(F.Ty); LastOffset = Offset + F.Size; } Ty->setBody(FieldTypes, Packed); #ifndef NDEBUG // Check that the IR layout matches the offsets we expect. auto Layout = DL.getStructLayout(Ty); for (auto &F : Fields) { assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty); assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset); } #endif IsFinished = true; } // Build a struct that will keep state for an active coroutine. // struct f.frame { // ResumeFnTy ResumeFnAddr; // ResumeFnTy DestroyFnAddr; // int ResumeIndex; // ... promise (if present) ... // ... spills ... // }; static StructType *buildFrameType(Function &F, coro::Shape &Shape, FrameDataInfo &FrameData) { LLVMContext &C = F.getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); StructType *FrameTy = [&] { SmallString<32> Name(F.getName()); Name.append(".Frame"); return StructType::create(C, Name); }(); FrameTypeBuilder B(C, DL); AllocaInst *PromiseAlloca = Shape.getPromiseAlloca(); Optional SwitchIndexFieldId; if (Shape.ABI == coro::ABI::Switch) { auto *FramePtrTy = FrameTy->getPointerTo(); auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy, /*IsVarArg=*/false); auto *FnPtrTy = FnTy->getPointerTo(); // Add header fields for the resume and destroy functions. // We can rely on these being perfectly packed. (void)B.addField(FnPtrTy, None, /*header*/ true); (void)B.addField(FnPtrTy, None, /*header*/ true); // PromiseAlloca field needs to be explicitly added here because it's // a header field with a fixed offset based on its alignment. Hence it // needs special handling and cannot be added to FrameData.Allocas. if (PromiseAlloca) FrameData.setFieldIndex( PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true)); // Add a field to store the suspend index. This doesn't need to // be in the header. unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size())); Type *IndexType = Type::getIntNTy(C, IndexBits); SwitchIndexFieldId = B.addField(IndexType, None); } else { assert(PromiseAlloca == nullptr && "lowering doesn't support promises"); } // Because multiple allocas may own the same field slot, // we add allocas to field here. B.addFieldForAllocas(F, FrameData, Shape); // Add PromiseAlloca to Allocas list so that // 1. updateLayoutIndex could update its index after // `performOptimizedStructLayout` // 2. it is processed in insertSpills. if (Shape.ABI == coro::ABI::Switch && PromiseAlloca) // We assume that the promise alloca won't be modified before // CoroBegin and no alias will be create before CoroBegin. FrameData.Allocas.emplace_back( PromiseAlloca, DenseMap>{}, false); // Create an entry for every spilled value. for (auto &S : FrameData.Spills) { - FieldIDType Id = B.addField(S.first->getType(), None); + Type *FieldType = S.first->getType(); + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + if (const Argument *A = dyn_cast(S.first)) + if (A->hasByValAttr()) + FieldType = FieldType->getPointerElementType(); + FieldIDType Id = B.addField(FieldType, None); FrameData.setFieldIndex(S.first, Id); } B.finish(FrameTy); FrameData.updateLayoutIndex(B); Shape.FrameAlign = B.getStructAlign(); Shape.FrameSize = B.getStructSize(); switch (Shape.ABI) { case coro::ABI::Switch: // In the switch ABI, remember the switch-index field. Shape.SwitchLowering.IndexField = B.getLayoutFieldIndex(*SwitchIndexFieldId); // Also round the frame size up to a multiple of its alignment, as is // generally expected in C/C++. Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign); break; // In the retcon ABI, remember whether the frame is inline in the storage. case coro::ABI::Retcon: case coro::ABI::RetconOnce: { auto Id = Shape.getRetconCoroId(); Shape.RetconLowering.IsFrameInlineInStorage = (B.getStructSize() <= Id->getStorageSize() && B.getStructAlign() <= Id->getStorageAlignment()); break; } case coro::ABI::Async: { Shape.AsyncLowering.FrameOffset = alignTo(Shape.AsyncLowering.ContextHeaderSize, Shape.FrameAlign); // Also make the final context size a multiple of the context alignment to // make allocation easier for allocators. Shape.AsyncLowering.ContextSize = alignTo(Shape.AsyncLowering.FrameOffset + Shape.FrameSize, Shape.AsyncLowering.getContextAlignment()); if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) { report_fatal_error( "The alignment requirment of frame variables cannot be higher than " "the alignment of the async function context"); } break; } } return FrameTy; } // We use a pointer use visitor to track how an alloca is being used. // The goal is to be able to answer the following three questions: // 1. Should this alloca be allocated on the frame instead. // 2. Could the content of the alloca be modified prior to CoroBegn, which would // require copying the data from alloca to the frame after CoroBegin. // 3. Is there any alias created for this alloca prior to CoroBegin, but used // after CoroBegin. In that case, we will need to recreate the alias after // CoroBegin based off the frame. To answer question 1, we track two things: // a. List of all BasicBlocks that use this alloca or any of the aliases of // the alloca. In the end, we check if there exists any two basic blocks that // cross suspension points. If so, this alloca must be put on the frame. b. // Whether the alloca or any alias of the alloca is escaped at some point, // either by storing the address somewhere, or the address is used in a // function call that might capture. If it's ever escaped, this alloca must be // put on the frame conservatively. // To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin. // Whenever a potential write happens, either through a store instruction, a // function call or any of the memory intrinsics, we check whether this // instruction is prior to CoroBegin. To answer question 3, we track the offsets // of all aliases created for the alloca prior to CoroBegin but used after // CoroBegin. llvm::Optional is used to be able to represent the case when the // offset is unknown (e.g. when you have a PHINode that takes in different // offset values). We cannot handle unknown offsets and will assert. This is the // potential issue left out. An ideal solution would likely require a // significant redesign. namespace { struct AllocaUseVisitor : PtrUseVisitor { using Base = PtrUseVisitor; AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT, const CoroBeginInst &CB, const SuspendCrossingInfo &Checker) : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker) {} void visit(Instruction &I) { UserBBs.insert(I.getParent()); Base::visit(I); // If the pointer is escaped prior to CoroBegin, we have to assume it would // be written into before CoroBegin as well. if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) { MayWriteBeforeCoroBegin = true; } } // We need to provide this overload as PtrUseVisitor uses a pointer based // visiting function. void visit(Instruction *I) { return visit(*I); } void visitPHINode(PHINode &I) { enqueueUsers(I); handleAlias(I); } void visitSelectInst(SelectInst &I) { enqueueUsers(I); handleAlias(I); } void visitStoreInst(StoreInst &SI) { // Regardless whether the alias of the alloca is the value operand or the // pointer operand, we need to assume the alloca is been written. handleMayWrite(SI); if (SI.getValueOperand() != U->get()) return; // We are storing the pointer into a memory location, potentially escaping. // As an optimization, we try to detect simple cases where it doesn't // actually escape, for example: // %ptr = alloca .. // %addr = alloca .. // store %ptr, %addr // %x = load %addr // .. // If %addr is only used by loading from it, we could simply treat %x as // another alias of %ptr, and not considering %ptr being escaped. auto IsSimpleStoreThenLoad = [&]() { auto *AI = dyn_cast(SI.getPointerOperand()); // If the memory location we are storing to is not an alloca, it // could be an alias of some other memory locations, which is difficult // to analyze. if (!AI) return false; // StoreAliases contains aliases of the memory location stored into. SmallVector StoreAliases = {AI}; while (!StoreAliases.empty()) { Instruction *I = StoreAliases.pop_back_val(); for (User *U : I->users()) { // If we are loading from the memory location, we are creating an // alias of the original pointer. if (auto *LI = dyn_cast(U)) { enqueueUsers(*LI); handleAlias(*LI); continue; } // If we are overriding the memory location, the pointer certainly // won't escape. if (auto *S = dyn_cast(U)) if (S->getPointerOperand() == I) continue; if (auto *II = dyn_cast(U)) if (II->isLifetimeStartOrEnd()) continue; // BitCastInst creats aliases of the memory location being stored // into. if (auto *BI = dyn_cast(U)) { StoreAliases.push_back(BI); continue; } return false; } } return true; }; if (!IsSimpleStoreThenLoad()) PI.setEscaped(&SI); } // All mem intrinsics modify the data. void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); } void visitBitCastInst(BitCastInst &BC) { Base::visitBitCastInst(BC); handleAlias(BC); } void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { Base::visitAddrSpaceCastInst(ASC); handleAlias(ASC); } void visitGetElementPtrInst(GetElementPtrInst &GEPI) { // The base visitor will adjust Offset accordingly. Base::visitGetElementPtrInst(GEPI); handleAlias(GEPI); } void visitCallBase(CallBase &CB) { for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op) if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op)) PI.setEscaped(&CB); handleMayWrite(CB); } bool getShouldLiveOnFrame() const { if (!ShouldLiveOnFrame) ShouldLiveOnFrame = computeShouldLiveOnFrame(); return ShouldLiveOnFrame.getValue(); } bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; } DenseMap> getAliasesCopy() const { assert(getShouldLiveOnFrame() && "This method should only be called if the " "alloca needs to live on the frame."); for (const auto &P : AliasOffetMap) if (!P.second) report_fatal_error("Unable to handle an alias with unknown offset " "created before CoroBegin."); return AliasOffetMap; } private: const DominatorTree &DT; const CoroBeginInst &CoroBegin; const SuspendCrossingInfo &Checker; // All alias to the original AllocaInst, created before CoroBegin and used // after CoroBegin. Each entry contains the instruction and the offset in the // original Alloca. They need to be recreated after CoroBegin off the frame. DenseMap> AliasOffetMap{}; SmallPtrSet UserBBs{}; bool MayWriteBeforeCoroBegin{false}; mutable llvm::Optional ShouldLiveOnFrame{}; bool computeShouldLiveOnFrame() const { if (PI.isEscaped()) return true; for (auto *BB1 : UserBBs) for (auto *BB2 : UserBBs) if (Checker.hasPathCrossingSuspendPoint(BB1, BB2)) return true; return false; } void handleMayWrite(const Instruction &I) { if (!DT.dominates(&CoroBegin, &I)) MayWriteBeforeCoroBegin = true; } bool usedAfterCoroBegin(Instruction &I) { for (auto &U : I.uses()) if (DT.dominates(&CoroBegin, U)) return true; return false; } void handleAlias(Instruction &I) { // We track all aliases created prior to CoroBegin but used after. // These aliases may need to be recreated after CoroBegin if the alloca // need to live on the frame. if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I)) return; if (!IsOffsetKnown) { AliasOffetMap[&I].reset(); } else { auto Itr = AliasOffetMap.find(&I); if (Itr == AliasOffetMap.end()) { AliasOffetMap[&I] = Offset; } else if (Itr->second.hasValue() && Itr->second.getValue() != Offset) { // If we have seen two different possible values for this alias, we set // it to empty. AliasOffetMap[&I].reset(); } } } }; } // namespace // We need to make room to insert a spill after initial PHIs, but before // catchswitch instruction. Placing it before violates the requirement that // catchswitch, like all other EHPads must be the first nonPHI in a block. // // Split away catchswitch into a separate block and insert in its place: // // cleanuppad cleanupret. // // cleanupret instruction will act as an insert point for the spill. static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) { BasicBlock *CurrentBlock = CatchSwitch->getParent(); BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch); CurrentBlock->getTerminator()->eraseFromParent(); auto *CleanupPad = CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock); auto *CleanupRet = CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock); return CleanupRet; } // Replace all alloca and SSA values that are accessed across suspend points // with GetElementPointer from coroutine frame + loads and stores. Create an // AllocaSpillBB that will become the new entry block for the resume parts of // the coroutine: // // %hdl = coro.begin(...) // whatever // // becomes: // // %hdl = coro.begin(...) // %FramePtr = bitcast i8* hdl to %f.frame* // br label %AllocaSpillBB // // AllocaSpillBB: // ; geps corresponding to allocas that were moved to coroutine frame // br label PostSpill // // PostSpill: // whatever // // static Instruction *insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) { auto *CB = Shape.CoroBegin; LLVMContext &C = CB->getContext(); IRBuilder<> Builder(CB->getNextNode()); StructType *FrameTy = Shape.FrameTy; PointerType *FramePtrTy = FrameTy->getPointerTo(); auto *FramePtr = cast(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr")); DominatorTree DT(*CB->getFunction()); SmallDenseMap DbgPtrAllocaCache; // Create a GEP with the given index into the coroutine frame for the original // value Orig. Appends an extra 0 index for array-allocas, preserving the // original type. auto GetFramePointer = [&](Value *Orig) -> Value * { FieldIDType Index = FrameData.getFieldIndex(Orig); SmallVector Indices = { ConstantInt::get(Type::getInt32Ty(C), 0), ConstantInt::get(Type::getInt32Ty(C), Index), }; if (auto *AI = dyn_cast(Orig)) { if (auto *CI = dyn_cast(AI->getArraySize())) { auto Count = CI->getValue().getZExtValue(); if (Count > 1) { Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0)); } } else { report_fatal_error("Coroutines cannot handle non static allocas yet"); } } auto GEP = cast( Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices)); if (isa(Orig)) { // If the type of GEP is not equal to the type of AllocaInst, it implies // that the AllocaInst may be reused in the Frame slot of other // AllocaInst. So We cast GEP to the AllocaInst here to re-use // the Frame storage. // // Note: If we change the strategy dealing with alignment, we need to refine // this casting. if (GEP->getResultElementType() != Orig->getType()) return Builder.CreateBitCast(GEP, Orig->getType(), Orig->getName() + Twine(".cast")); } return GEP; }; for (auto const &E : FrameData.Spills) { Value *Def = E.first; // Create a store instruction storing the value into the // coroutine frame. Instruction *InsertPt = nullptr; + bool NeedToCopyArgPtrValue = false; if (auto *Arg = dyn_cast(Def)) { // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of // coro.begin from i8* to %f.frame*. InsertPt = FramePtr->getNextNode(); // If we're spilling an Argument, make sure we clear 'nocapture' // from the coroutine function. Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture); + if (Arg->hasByValAttr()) + NeedToCopyArgPtrValue = true; + } else if (auto *CSI = dyn_cast(Def)) { // Don't spill immediately after a suspend; splitting assumes // that the suspend will be followed by a branch. InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI(); } else { auto *I = cast(Def); if (!DT.dominates(CB, I)) { // If it is not dominated by CoroBegin, then spill should be // inserted immediately after CoroFrame is computed. InsertPt = FramePtr->getNextNode(); } else if (auto *II = dyn_cast(I)) { // If we are spilling the result of the invoke instruction, split // the normal edge and insert the spill in the new block. auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest()); InsertPt = NewBB->getTerminator(); } else if (isa(I)) { // Skip the PHINodes and EH pads instructions. BasicBlock *DefBlock = I->getParent(); if (auto *CSI = dyn_cast(DefBlock->getTerminator())) InsertPt = splitBeforeCatchSwitch(CSI); else InsertPt = &*DefBlock->getFirstInsertionPt(); } else { assert(!I->isTerminator() && "unexpected terminator"); // For all other values, the spill is placed immediately after // the definition. InsertPt = I->getNextNode(); } } auto Index = FrameData.getFieldIndex(Def); Builder.SetInsertPoint(InsertPt); auto *G = Builder.CreateConstInBoundsGEP2_32( FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr")); - Builder.CreateStore(Def, G); + if (NeedToCopyArgPtrValue) { + // For byval arguments, we need to store the pointed value in the frame, + // instead of the pointer itself. + auto *Value = + Builder.CreateLoad(Def->getType()->getPointerElementType(), Def); + Builder.CreateStore(Value, G); + } else { + Builder.CreateStore(Def, G); + } BasicBlock *CurrentBlock = nullptr; Value *CurrentReload = nullptr; for (auto *U : E.second) { // If we have not seen the use block, create a load instruction to reload // the spilled value from the coroutine frame. Populates the Value pointer // reference provided with the frame GEP. if (CurrentBlock != U->getParent()) { CurrentBlock = U->getParent(); Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt()); auto *GEP = GetFramePointer(E.first); GEP->setName(E.first->getName() + Twine(".reload.addr")); - CurrentReload = Builder.CreateLoad( - FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, - E.first->getName() + Twine(".reload")); + if (NeedToCopyArgPtrValue) + CurrentReload = GEP; + else + CurrentReload = Builder.CreateLoad( + FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP, + E.first->getName() + Twine(".reload")); TinyPtrVector DIs = FindDbgDeclareUses(Def); for (DbgDeclareInst *DDI : DIs) { bool AllowUnresolved = false; // This dbg.declare is preserved for all coro-split function // fragments. It will be unreachable in the main function, and // processed by coro::salvageDebugInfo() by CoroCloner. DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved) .insertDeclare(CurrentReload, DDI->getVariable(), DDI->getExpression(), DDI->getDebugLoc(), &*Builder.GetInsertPoint()); // This dbg.declare is for the main function entry point. It // will be deleted in all coro-split functions. coro::salvageDebugInfo(DbgPtrAllocaCache, DDI); } } // If we have a single edge PHINode, remove it and replace it with a // reload from the coroutine frame. (We already took care of multi edge // PHINodes by rewriting them in the rewritePHIs function). if (auto *PN = dyn_cast(U)) { assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " "values in the PHINode"); PN->replaceAllUsesWith(CurrentReload); PN->eraseFromParent(); continue; } // Replace all uses of CurrentValue in the current instruction with // reload. U->replaceUsesOfWith(Def, CurrentReload); } } BasicBlock *FramePtrBB = FramePtr->getParent(); auto SpillBlock = FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); Shape.AllocaSpillBlock = SpillBlock; // retcon and retcon.once lowering assumes all uses have been sunk. if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async) { // If we found any allocas, replace all of their remaining uses with Geps. Builder.SetInsertPoint(&SpillBlock->front()); for (const auto &P : FrameData.Allocas) { AllocaInst *Alloca = P.Alloca; auto *G = GetFramePointer(Alloca); // We are not using ReplaceInstWithInst(P.first, cast(G)) // here, as we are changing location of the instruction. G->takeName(Alloca); Alloca->replaceAllUsesWith(G); Alloca->eraseFromParent(); } return FramePtr; } // If we found any alloca, replace all of their remaining uses with GEP // instructions. Because new dbg.declare have been created for these alloca, // we also delete the original dbg.declare and replace other uses with undef. // Note: We cannot replace the alloca with GEP instructions indiscriminately, // as some of the uses may not be dominated by CoroBegin. Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front()); SmallVector UsersToUpdate; for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; UsersToUpdate.clear(); for (User *U : Alloca->users()) { auto *I = cast(U); if (DT.dominates(CB, I)) UsersToUpdate.push_back(I); } if (UsersToUpdate.empty()) continue; auto *G = GetFramePointer(Alloca); G->setName(Alloca->getName() + Twine(".reload.addr")); SmallPtrSet SeenDbgBBs; TinyPtrVector DIs = FindDbgDeclareUses(Alloca); if (!DIs.empty()) DIBuilder(*Alloca->getModule(), /*AllowUnresolved*/ false) .insertDeclare(G, DIs.front()->getVariable(), DIs.front()->getExpression(), DIs.front()->getDebugLoc(), DIs.front()); for (auto *DI : FindDbgDeclareUses(Alloca)) DI->eraseFromParent(); replaceDbgUsesWithUndef(Alloca); for (Instruction *I : UsersToUpdate) I->replaceUsesOfWith(Alloca, G); } Builder.SetInsertPoint(FramePtr->getNextNode()); for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; if (A.MayWriteBeforeCoroBegin) { // isEscaped really means potentially modified before CoroBegin. if (Alloca->isArrayAllocation()) report_fatal_error( "Coroutines cannot handle copying of array allocas yet"); auto *G = GetFramePointer(Alloca); auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca); Builder.CreateStore(Value, G); } // For each alias to Alloca created before CoroBegin but used after // CoroBegin, we recreate them after CoroBegin by appplying the offset // to the pointer in the frame. for (const auto &Alias : A.Aliases) { auto *FramePtr = GetFramePointer(Alloca); auto *FramePtrRaw = Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C)); auto *AliasPtr = Builder.CreateGEP( FramePtrRaw, ConstantInt::get(Type::getInt64Ty(C), Alias.second.getValue())); auto *AliasPtrTyped = Builder.CreateBitCast(AliasPtr, Alias.first->getType()); Alias.first->replaceUsesWithIf( AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); }); } } return FramePtr; } // Sets the unwind edge of an instruction to a particular successor. static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { if (auto *II = dyn_cast(TI)) II->setUnwindDest(Succ); else if (auto *CS = dyn_cast(TI)) CS->setUnwindDest(Succ); else if (auto *CR = dyn_cast(TI)) CR->setUnwindDest(Succ); else llvm_unreachable("unexpected terminator instruction"); } // Replaces all uses of OldPred with the NewPred block in all PHINodes in a // block. static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, BasicBlock *NewPred, PHINode *Until = nullptr) { unsigned BBIdx = 0; for (BasicBlock::iterator I = DestBB->begin(); isa(I); ++I) { PHINode *PN = cast(I); // We manually update the LandingPadReplacement PHINode and it is the last // PHI Node. So, if we find it, we are done. if (Until == PN) break; // Reuse the previous value of BBIdx if it lines up. In cases where we // have multiple phi nodes with *lots* of predecessors, this is a speed // win because we don't have to scan the PHI looking for TIBB. This // happens because the BB list of PHI nodes are usually in the same // order. if (PN->getIncomingBlock(BBIdx) != OldPred) BBIdx = PN->getBasicBlockIndex(OldPred); assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!"); PN->setIncomingBlock(BBIdx, NewPred); } } // Uses SplitEdge unless the successor block is an EHPad, in which case do EH // specific handling. static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, LandingPadInst *OriginalPad, PHINode *LandingPadReplacement) { auto *PadInst = Succ->getFirstNonPHI(); if (!LandingPadReplacement && !PadInst->isEHPad()) return SplitEdge(BB, Succ); auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ); setUnwindEdgeTo(BB->getTerminator(), NewBB); updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement); if (LandingPadReplacement) { auto *NewLP = OriginalPad->clone(); auto *Terminator = BranchInst::Create(Succ, NewBB); NewLP->insertBefore(Terminator); LandingPadReplacement->addIncoming(NewLP, NewBB); return NewBB; } Value *ParentPad = nullptr; if (auto *FuncletPad = dyn_cast(PadInst)) ParentPad = FuncletPad->getParentPad(); else if (auto *CatchSwitch = dyn_cast(PadInst)) ParentPad = CatchSwitch->getParentPad(); else llvm_unreachable("handling for other EHPads not implemented yet"); auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB); CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB); return NewBB; } // Moves the values in the PHIs in SuccBB that correspong to PredBB into a new // PHI in InsertedBB. static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB, BasicBlock *InsertedBB, BasicBlock *PredBB, PHINode *UntilPHI = nullptr) { auto *PN = cast(&SuccBB->front()); do { int Index = PN->getBasicBlockIndex(InsertedBB); Value *V = PN->getIncomingValue(Index); PHINode *InputV = PHINode::Create( V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(), &InsertedBB->front()); InputV->addIncoming(V, PredBB); PN->setIncomingValue(Index, InputV); PN = dyn_cast(PN->getNextNode()); } while (PN != UntilPHI); } // Rewrites the PHI Nodes in a cleanuppad. static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB, CleanupPadInst *CleanupPad) { // For every incoming edge to a CleanupPad we will create a new block holding // all incoming values in single-value PHI nodes. We will then create another // block to act as a dispather (as all unwind edges for related EH blocks // must be the same). // // cleanuppad: // %2 = phi i32[%0, %catchswitch], [%1, %catch.1] // %3 = cleanuppad within none [] // // It will create: // // cleanuppad.corodispatch // %2 = phi i8[0, %catchswitch], [1, %catch.1] // %3 = cleanuppad within none [] // switch i8 % 2, label %unreachable // [i8 0, label %cleanuppad.from.catchswitch // i8 1, label %cleanuppad.from.catch.1] // cleanuppad.from.catchswitch: // %4 = phi i32 [%0, %catchswitch] // br %label cleanuppad // cleanuppad.from.catch.1: // %6 = phi i32 [%1, %catch.1] // br %label cleanuppad // cleanuppad: // %8 = phi i32 [%4, %cleanuppad.from.catchswitch], // [%6, %cleanuppad.from.catch.1] // Unreachable BB, in case switching on an invalid value in the dispatcher. auto *UnreachBB = BasicBlock::Create( CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent()); IRBuilder<> Builder(UnreachBB); Builder.CreateUnreachable(); // Create a new cleanuppad which will be the dispatcher. auto *NewCleanupPadBB = BasicBlock::Create(CleanupPadBB->getContext(), CleanupPadBB->getName() + Twine(".corodispatch"), CleanupPadBB->getParent(), CleanupPadBB); Builder.SetInsertPoint(NewCleanupPadBB); auto *SwitchType = Builder.getInt8Ty(); auto *SetDispatchValuePN = Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB)); CleanupPad->removeFromParent(); CleanupPad->insertAfter(SetDispatchValuePN); auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB, pred_size(CleanupPadBB)); int SwitchIndex = 0; SmallVector Preds(predecessors(CleanupPadBB)); for (BasicBlock *Pred : Preds) { // Create a new cleanuppad and move the PHI values to there. auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(), CleanupPadBB->getName() + Twine(".from.") + Pred->getName(), CleanupPadBB->getParent(), CleanupPadBB); updatePhiNodes(CleanupPadBB, Pred, CaseBB); CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") + Pred->getName()); Builder.SetInsertPoint(CaseBB); Builder.CreateBr(CleanupPadBB); movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB); // Update this Pred to the new unwind point. setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB); // Setup the switch in the dispatcher. auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex); SetDispatchValuePN->addIncoming(SwitchConstant, Pred); SwitchOnDispatch->addCase(SwitchConstant, CaseBB); SwitchIndex++; } } static void rewritePHIs(BasicBlock &BB) { // For every incoming edge we will create a block holding all // incoming values in a single PHI nodes. // // loop: // %n.val = phi i32[%n, %entry], [%inc, %loop] // // It will create: // // loop.from.entry: // %n.loop.pre = phi i32 [%n, %entry] // br %label loop // loop.from.loop: // %inc.loop.pre = phi i32 [%inc, %loop] // br %label loop // // After this rewrite, further analysis will ignore any phi nodes with more // than one incoming edge. // TODO: Simplify PHINodes in the basic block to remove duplicate // predecessors. // Special case for CleanupPad: all EH blocks must have the same unwind edge // so we need to create an additional "dispatcher" block. if (auto *CleanupPad = dyn_cast_or_null(BB.getFirstNonPHI())) { SmallVector Preds(predecessors(&BB)); for (BasicBlock *Pred : Preds) { if (CatchSwitchInst *CS = dyn_cast(Pred->getTerminator())) { // CleanupPad with a CatchSwitch predecessor: therefore this is an // unwind destination that needs to be handle specially. assert(CS->getUnwindDest() == &BB); (void)CS; rewritePHIsForCleanupPad(&BB, CleanupPad); return; } } } LandingPadInst *LandingPad = nullptr; PHINode *ReplPHI = nullptr; if ((LandingPad = dyn_cast_or_null(BB.getFirstNonPHI()))) { // ehAwareSplitEdge will clone the LandingPad in all the edge blocks. // We replace the original landing pad with a PHINode that will collect the // results from all of them. ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad); ReplPHI->takeName(LandingPad); LandingPad->replaceAllUsesWith(ReplPHI); // We will erase the original landing pad at the end of this function after // ehAwareSplitEdge cloned it in the transition blocks. } SmallVector Preds(predecessors(&BB)); for (BasicBlock *Pred : Preds) { auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI); IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName()); // Stop the moving of values at ReplPHI, as this is either null or the PHI // that replaced the landing pad. movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI); } if (LandingPad) { // Calls to ehAwareSplitEdge function cloned the original lading pad. // No longer need it. LandingPad->eraseFromParent(); } } static void rewritePHIs(Function &F) { SmallVector WorkList; for (BasicBlock &BB : F) if (auto *PN = dyn_cast(&BB.front())) if (PN->getNumIncomingValues() > 1) WorkList.push_back(&BB); for (BasicBlock *BB : WorkList) rewritePHIs(*BB); } // Check for instructions that we can recreate on resume as opposed to spill // the result into a coroutine frame. static bool materializable(Instruction &V) { return isa(&V) || isa(&V) || isa(&V) || isa(&V) || isa(&V); } // Check for structural coroutine intrinsics that should not be spilled into // the coroutine frame. static bool isCoroutineStructureIntrinsic(Instruction &I) { return isa(&I) || isa(&I) || isa(&I); } // For every use of the value that is across suspend point, recreate that value // after a suspend point. static void rewriteMaterializableInstructions(IRBuilder<> &IRB, const SpillInfo &Spills) { for (const auto &E : Spills) { Value *Def = E.first; BasicBlock *CurrentBlock = nullptr; Instruction *CurrentMaterialization = nullptr; for (Instruction *U : E.second) { // If we have not seen this block, materialize the value. if (CurrentBlock != U->getParent()) { CurrentBlock = U->getParent(); CurrentMaterialization = cast(Def)->clone(); CurrentMaterialization->setName(Def->getName()); CurrentMaterialization->insertBefore( &*CurrentBlock->getFirstInsertionPt()); } if (auto *PN = dyn_cast(U)) { assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming " "values in the PHINode"); PN->replaceAllUsesWith(CurrentMaterialization); PN->eraseFromParent(); continue; } // Replace all uses of Def in the current instruction with the // CurrentMaterialization for the block. U->replaceUsesOfWith(Def, CurrentMaterialization); } } } // Splits the block at a particular instruction unless it is the first // instruction in the block with a single predecessor. static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) { auto *BB = I->getParent(); if (&BB->front() == I) { if (BB->getSinglePredecessor()) { BB->setName(Name); return BB; } } return BB->splitBasicBlock(I, Name); } // Split above and below a particular instruction so that it // will be all alone by itself in a block. static void splitAround(Instruction *I, const Twine &Name) { splitBlockIfNotFirst(I, Name); splitBlockIfNotFirst(I->getNextNode(), "After" + Name); } static bool isSuspendBlock(BasicBlock *BB) { return isa(BB->front()); } typedef SmallPtrSet VisitedBlocksSet; /// Does control flow starting at the given block ever reach a suspend /// instruction before reaching a block in VisitedOrFreeBBs? static bool isSuspendReachableFrom(BasicBlock *From, VisitedBlocksSet &VisitedOrFreeBBs) { // Eagerly try to add this block to the visited set. If it's already // there, stop recursing; this path doesn't reach a suspend before // either looping or reaching a freeing block. if (!VisitedOrFreeBBs.insert(From).second) return false; // We assume that we'll already have split suspends into their own blocks. if (isSuspendBlock(From)) return true; // Recurse on the successors. for (auto Succ : successors(From)) { if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs)) return true; } return false; } /// Is the given alloca "local", i.e. bounded in lifetime to not cross a /// suspend point? static bool isLocalAlloca(CoroAllocaAllocInst *AI) { // Seed the visited set with all the basic blocks containing a free // so that we won't pass them up. VisitedBlocksSet VisitedOrFreeBBs; for (auto User : AI->users()) { if (auto FI = dyn_cast(User)) VisitedOrFreeBBs.insert(FI->getParent()); } return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs); } /// After we split the coroutine, will the given basic block be along /// an obvious exit path for the resumption function? static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, unsigned depth = 3) { // If we've bottomed out our depth count, stop searching and assume // that the path might loop back. if (depth == 0) return false; // If this is a suspend block, we're about to exit the resumption function. if (isSuspendBlock(BB)) return true; // Recurse into the successors. for (auto Succ : successors(BB)) { if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1)) return false; } // If none of the successors leads back in a loop, we're on an exit/abort. return true; } static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) { // Look for a free that isn't sufficiently obviously followed by // either a suspend or a termination, i.e. something that will leave // the coro resumption frame. for (auto U : AI->users()) { auto FI = dyn_cast(U); if (!FI) continue; if (!willLeaveFunctionImmediatelyAfter(FI->getParent())) return true; } // If we never found one, we don't need a stack save. return false; } /// Turn each of the given local allocas into a normal (dynamic) alloca /// instruction. static void lowerLocalAllocas(ArrayRef LocalAllocas, SmallVectorImpl &DeadInsts) { for (auto AI : LocalAllocas) { auto M = AI->getModule(); IRBuilder<> Builder(AI); // Save the stack depth. Try to avoid doing this if the stackrestore // is going to immediately precede a return or something. Value *StackSave = nullptr; if (localAllocaNeedsStackSave(AI)) StackSave = Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::stacksave)); // Allocate memory. auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize()); Alloca->setAlignment(Align(AI->getAlignment())); for (auto U : AI->users()) { // Replace gets with the allocation. if (isa(U)) { U->replaceAllUsesWith(Alloca); // Replace frees with stackrestores. This is safe because // alloca.alloc is required to obey a stack discipline, although we // don't enforce that structurally. } else { auto FI = cast(U); if (StackSave) { Builder.SetInsertPoint(FI); Builder.CreateCall( Intrinsic::getDeclaration(M, Intrinsic::stackrestore), StackSave); } } DeadInsts.push_back(cast(U)); } DeadInsts.push_back(AI); } } /// Turn the given coro.alloca.alloc call into a dynamic allocation. /// This happens during the all-instructions iteration, so it must not /// delete the call. static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI, coro::Shape &Shape, SmallVectorImpl &DeadInsts) { IRBuilder<> Builder(AI); auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr); for (User *U : AI->users()) { if (isa(U)) { U->replaceAllUsesWith(Alloc); } else { auto FI = cast(U); Builder.SetInsertPoint(FI); Shape.emitDealloc(Builder, Alloc, nullptr); } DeadInsts.push_back(cast(U)); } // Push this on last so that it gets deleted after all the others. DeadInsts.push_back(AI); // Return the new allocation value so that we can check for needed spills. return cast(Alloc); } /// Get the current swifterror value. static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, coro::Shape &Shape) { // Make a fake function pointer as a sort of intrinsic. auto FnTy = FunctionType::get(ValueTy, {}, false); auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); auto Call = Builder.CreateCall(FnTy, Fn, {}); Shape.SwiftErrorOps.push_back(Call); return Call; } /// Set the given value as the current swifterror value. /// /// Returns a slot that can be used as a swifterror slot. static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, coro::Shape &Shape) { // Make a fake function pointer as a sort of intrinsic. auto FnTy = FunctionType::get(V->getType()->getPointerTo(), {V->getType()}, false); auto Fn = ConstantPointerNull::get(FnTy->getPointerTo()); auto Call = Builder.CreateCall(FnTy, Fn, { V }); Shape.SwiftErrorOps.push_back(Call); return Call; } /// Set the swifterror value from the given alloca before a call, /// then put in back in the alloca afterwards. /// /// Returns an address that will stand in for the swifterror slot /// until splitting. static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call, AllocaInst *Alloca, coro::Shape &Shape) { auto ValueTy = Alloca->getAllocatedType(); IRBuilder<> Builder(Call); // Load the current value from the alloca and set it as the // swifterror value. auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca); auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape); // Move to after the call. Since swifterror only has a guaranteed // value on normal exits, we can ignore implicit and explicit unwind // edges. if (isa(Call)) { Builder.SetInsertPoint(Call->getNextNode()); } else { auto Invoke = cast(Call); Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg()); } // Get the current swifterror value and store it to the alloca. auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape); Builder.CreateStore(ValueAfterCall, Alloca); return Addr; } /// Eliminate a formerly-swifterror alloca by inserting the get/set /// intrinsics and attempting to MemToReg the alloca away. static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, coro::Shape &Shape) { for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) { // We're likely changing the use list, so use a mutation-safe // iteration pattern. auto &Use = *UI; ++UI; // swifterror values can only be used in very specific ways. // We take advantage of that here. auto User = Use.getUser(); if (isa(User) || isa(User)) continue; assert(isa(User) || isa(User)); auto Call = cast(User); auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape); // Use the returned slot address as the call argument. Use.set(Addr); } // All the uses should be loads and stores now. assert(isAllocaPromotable(Alloca)); } /// "Eliminate" a swifterror argument by reducing it to the alloca case /// and then loading and storing in the prologue and epilog. /// /// The argument keeps the swifterror flag. static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, coro::Shape &Shape, SmallVectorImpl &AllocasToPromote) { IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg()); auto ArgTy = cast(Arg.getType()); auto ValueTy = ArgTy->getElementType(); // Reduce to the alloca case: // Create an alloca and replace all uses of the arg with it. auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace()); Arg.replaceAllUsesWith(Alloca); // Set an initial value in the alloca. swifterror is always null on entry. auto InitialValue = Constant::getNullValue(ValueTy); Builder.CreateStore(InitialValue, Alloca); // Find all the suspends in the function and save and restore around them. for (auto Suspend : Shape.CoroSuspends) { (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape); } // Find all the coro.ends in the function and restore the error value. for (auto End : Shape.CoroEnds) { Builder.SetInsertPoint(End); auto FinalValue = Builder.CreateLoad(ValueTy, Alloca); (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape); } // Now we can use the alloca logic. AllocasToPromote.push_back(Alloca); eliminateSwiftErrorAlloca(F, Alloca, Shape); } /// Eliminate all problematic uses of swifterror arguments and allocas /// from the function. We'll fix them up later when splitting the function. static void eliminateSwiftError(Function &F, coro::Shape &Shape) { SmallVector AllocasToPromote; // Look for a swifterror argument. for (auto &Arg : F.args()) { if (!Arg.hasSwiftErrorAttr()) continue; eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote); break; } // Look for swifterror allocas. for (auto &Inst : F.getEntryBlock()) { auto Alloca = dyn_cast(&Inst); if (!Alloca || !Alloca->isSwiftError()) continue; // Clear the swifterror flag. Alloca->setSwiftError(false); AllocasToPromote.push_back(Alloca); eliminateSwiftErrorAlloca(F, Alloca, Shape); } // If we have any allocas to promote, compute a dominator tree and // promote them en masse. if (!AllocasToPromote.empty()) { DominatorTree DT(F); PromoteMemToReg(AllocasToPromote, DT); } } /// retcon and retcon.once conventions assume that all spill uses can be sunk /// after the coro.begin intrinsic. static void sinkSpillUsesAfterCoroBegin(Function &F, const FrameDataInfo &FrameData, CoroBeginInst *CoroBegin) { DominatorTree Dom(F); SmallSetVector ToMove; SmallVector Worklist; // Collect all users that precede coro.begin. for (auto *Def : FrameData.getAllDefs()) { for (User *U : Def->users()) { auto Inst = cast(U); if (Inst->getParent() != CoroBegin->getParent() || Dom.dominates(CoroBegin, Inst)) continue; if (ToMove.insert(Inst)) Worklist.push_back(Inst); } } // Recursively collect users before coro.begin. while (!Worklist.empty()) { auto *Def = Worklist.pop_back_val(); for (User *U : Def->users()) { auto Inst = cast(U); if (Dom.dominates(CoroBegin, Inst)) continue; if (ToMove.insert(Inst)) Worklist.push_back(Inst); } } // Sort by dominance. SmallVector InsertionList(ToMove.begin(), ToMove.end()); llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool { // If a dominates b it should preceed (<) b. return Dom.dominates(A, B); }); Instruction *InsertPt = CoroBegin->getNextNode(); for (Instruction *Inst : InsertionList) Inst->moveBefore(InsertPt); } /// For each local variable that all of its user are only used inside one of /// suspended region, we sink their lifetime.start markers to the place where /// after the suspend block. Doing so minimizes the lifetime of each variable, /// hence minimizing the amount of data we end up putting on the frame. static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, SuspendCrossingInfo &Checker) { DominatorTree DT(F); // Collect all possible basic blocks which may dominate all uses of allocas. SmallPtrSet DomSet; DomSet.insert(&F.getEntryBlock()); for (auto *CSI : Shape.CoroSuspends) { BasicBlock *SuspendBlock = CSI->getParent(); assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() && "should have split coro.suspend into its own block"); DomSet.insert(SuspendBlock->getSingleSuccessor()); } for (Instruction &I : instructions(F)) { AllocaInst* AI = dyn_cast(&I); if (!AI) continue; for (BasicBlock *DomBB : DomSet) { bool Valid = true; SmallVector Lifetimes; auto isLifetimeStart = [](Instruction* I) { if (auto* II = dyn_cast(I)) return II->getIntrinsicID() == Intrinsic::lifetime_start; return false; }; auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) { if (isLifetimeStart(U)) { Lifetimes.push_back(U); return true; } if (!U->hasOneUse() || U->stripPointerCasts() != AI) return false; if (isLifetimeStart(U->user_back())) { Lifetimes.push_back(U->user_back()); return true; } return false; }; for (User *U : AI->users()) { Instruction *UI = cast(U); // For all users except lifetime.start markers, if they are all // dominated by one of the basic blocks and do not cross // suspend points as well, then there is no need to spill the // instruction. if (!DT.dominates(DomBB, UI->getParent()) || Checker.isDefinitionAcrossSuspend(DomBB, UI)) { // Skip lifetime.start, GEP and bitcast used by lifetime.start // markers. if (collectLifetimeStart(UI, AI)) continue; Valid = false; break; } } // Sink lifetime.start markers to dominate block when they are // only used outside the region. if (Valid && Lifetimes.size() != 0) { // May be AI itself, when the type of AI is i8* auto *NewBitCast = [&](AllocaInst *AI) -> Value* { if (isa(Lifetimes[0]->getOperand(1))) return AI; auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext()); return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "", DomBB->getTerminator()); }(AI); auto *NewLifetime = Lifetimes[0]->clone(); NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast); NewLifetime->insertBefore(DomBB->getTerminator()); // All the outsided lifetime.start markers are no longer necessary. for (Instruction *S : Lifetimes) S->eraseFromParent(); break; } } } } static void collectFrameAllocas(Function &F, coro::Shape &Shape, const SuspendCrossingInfo &Checker, SmallVectorImpl &Allocas) { // Collect lifetime.start info for each alloca. using LifetimeStart = SmallPtrSet; llvm::DenseMap> LifetimeMap; for (Instruction &I : instructions(F)) { auto *II = dyn_cast(&I); if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start) continue; if (auto *OpInst = dyn_cast(II->getOperand(1))) { if (auto *AI = dyn_cast(OpInst->stripPointerCasts())) { if (LifetimeMap.find(AI) == LifetimeMap.end()) LifetimeMap[AI] = std::make_unique(); LifetimeMap[AI]->insert(isa(OpInst) ? II : OpInst); } } } for (Instruction &I : instructions(F)) { auto *AI = dyn_cast(&I); if (!AI) continue; // The PromiseAlloca will be specially handled since it needs to be in a // fixed position in the frame. if (AI == Shape.SwitchLowering.PromiseAlloca) { continue; } bool ShouldLiveOnFrame = false; auto Iter = LifetimeMap.find(AI); if (Iter != LifetimeMap.end()) { // Check against lifetime.start if the instruction has the info. for (User *U : I.users()) { for (auto *S : *Iter->second) if ((ShouldLiveOnFrame = Checker.isDefinitionAcrossSuspend(*S, U))) break; if (ShouldLiveOnFrame) break; } if (!ShouldLiveOnFrame) continue; } // At this point, either ShouldLiveOnFrame is true or we didn't have // lifetime information. We will need to rely on more precise pointer // tracking. DominatorTree DT(F); AllocaUseVisitor Visitor{F.getParent()->getDataLayout(), DT, *Shape.CoroBegin, Checker}; Visitor.visitPtr(*AI); if (!Visitor.getShouldLiveOnFrame()) continue; Allocas.emplace_back(AI, Visitor.getAliasesCopy(), Visitor.getMayWriteBeforeCoroBegin()); } } void coro::salvageDebugInfo( SmallDenseMap &DbgPtrAllocaCache, DbgDeclareInst *DDI, bool LoadFromFramePtr) { Function *F = DDI->getFunction(); IRBuilder<> Builder(F->getContext()); auto InsertPt = F->getEntryBlock().getFirstInsertionPt(); while (isa(InsertPt)) ++InsertPt; Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt); DIExpression *Expr = DDI->getExpression(); // Follow the pointer arithmetic all the way to the incoming // function argument and convert into a DIExpression. Value *Storage = DDI->getAddress(); while (Storage) { if (auto *LdInst = dyn_cast(Storage)) { Storage = LdInst->getOperand(0); } else if (auto *StInst = dyn_cast(Storage)) { Storage = StInst->getOperand(0); } else if (auto *GEPInst = dyn_cast(Storage)) { Expr = llvm::salvageDebugInfoImpl(*GEPInst, Expr, /*WithStackValue=*/false); Storage = GEPInst->getOperand(0); } else if (auto *BCInst = dyn_cast(Storage)) Storage = BCInst->getOperand(0); else break; } // Store a pointer to the coroutine frame object in an alloca so it // is available throughout the function when producing unoptimized // code. Extending the lifetime this way is correct because the // variable has been declared by a dbg.declare intrinsic. if (auto Arg = dyn_cast_or_null(Storage)) { auto &Cached = DbgPtrAllocaCache[Storage]; if (!Cached) { Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr, Arg->getName() + ".debug"); Builder.CreateStore(Storage, Cached); } Storage = Cached; } // The FramePtr object adds one extra layer of indirection that // needs to be unwrapped. if (LoadFromFramePtr) Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); auto &VMContext = DDI->getFunction()->getContext(); DDI->setOperand( 0, MetadataAsValue::get(VMContext, ValueAsMetadata::get(Storage))); DDI->setOperand(2, MetadataAsValue::get(VMContext, Expr)); if (auto *InsertPt = dyn_cast_or_null(Storage)) DDI->moveAfter(InsertPt); } void coro::buildCoroutineFrame(Function &F, Shape &Shape) { eliminateSwiftError(F, Shape); if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) { Shape.getSwitchCoroId()->clearPromise(); } // Make sure that all coro.save, coro.suspend and the fallthrough coro.end // intrinsics are in their own blocks to simplify the logic of building up // SuspendCrossing data. for (auto *CSI : Shape.CoroSuspends) { if (auto *Save = CSI->getCoroSave()) splitAround(Save, "CoroSave"); splitAround(CSI, "CoroSuspend"); } // Put CoroEnds into their own blocks. for (AnyCoroEndInst *CE : Shape.CoroEnds) { splitAround(CE, "CoroEnd"); // Emit the musttail call function in a new block before the CoroEnd. // We do this here so that the right suspend crossing info is computed for // the uses of the musttail call function call. (Arguments to the coro.end // instructions would be ignored) if (auto *AsyncEnd = dyn_cast(CE)) { auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction(); if (!MustTailCallFn) continue; IRBuilder<> Builder(AsyncEnd); SmallVector Args(AsyncEnd->args()); auto Arguments = ArrayRef(Args).drop_front(3); auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn, Arguments, Builder); splitAround(Call, "MustTailCall.Before.CoroEnd"); } } // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will // never has its definition separated from the PHI by the suspend point. rewritePHIs(F); // Build suspend crossing info. SuspendCrossingInfo Checker(F, Shape); IRBuilder<> Builder(F.getContext()); FrameDataInfo FrameData; SmallVector LocalAllocas; SmallVector DeadInstructions; { SpillInfo Spills; for (int Repeat = 0; Repeat < 4; ++Repeat) { // See if there are materializable instructions across suspend points. for (Instruction &I : instructions(F)) if (materializable(I)) for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) Spills[&I].push_back(cast(U)); if (Spills.empty()) break; // Rewrite materializable instructions to be materialized at the use // point. LLVM_DEBUG(dumpSpills("Materializations", Spills)); rewriteMaterializableInstructions(Builder, Spills); Spills.clear(); } } sinkLifetimeStartMarkers(F, Shape, Checker); collectFrameAllocas(F, Shape, Checker, FrameData.Allocas); LLVM_DEBUG(dumpAllocas(FrameData.Allocas)); // Collect the spills for arguments and other not-materializable values. for (Argument &A : F.args()) for (User *U : A.users()) if (Checker.isDefinitionAcrossSuspend(A, U)) FrameData.Spills[&A].push_back(cast(U)); for (Instruction &I : instructions(F)) { // Values returned from coroutine structure intrinsics should not be part // of the Coroutine Frame. if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin) continue; // The Coroutine Promise always included into coroutine frame, no need to // check for suspend crossing. if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca == &I) continue; // Handle alloca.alloc specially here. if (auto AI = dyn_cast(&I)) { // Check whether the alloca's lifetime is bounded by suspend points. if (isLocalAlloca(AI)) { LocalAllocas.push_back(AI); continue; } // If not, do a quick rewrite of the alloca and then add spills of // the rewritten value. The rewrite doesn't invalidate anything in // Spills because the other alloca intrinsics have no other operands // besides AI, and it doesn't invalidate the iteration because we delay // erasing AI. auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions); for (User *U : Alloc->users()) { if (Checker.isDefinitionAcrossSuspend(*Alloc, U)) FrameData.Spills[Alloc].push_back(cast(U)); } continue; } // Ignore alloca.get; we process this as part of coro.alloca.alloc. if (isa(I)) continue; if (isa(I)) continue; for (User *U : I.users()) if (Checker.isDefinitionAcrossSuspend(I, U)) { // We cannot spill a token. if (I.getType()->isTokenTy()) report_fatal_error( "token definition is separated from the use by a suspend point"); FrameData.Spills[&I].push_back(cast(U)); } } LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills)); if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce || Shape.ABI == coro::ABI::Async) sinkSpillUsesAfterCoroBegin(F, FrameData, Shape.CoroBegin); Shape.FrameTy = buildFrameType(F, Shape, FrameData); Shape.FramePtr = insertSpills(FrameData, Shape); lowerLocalAllocas(LocalAllocas, DeadInstructions); for (auto I : DeadInstructions) I->eraseFromParent(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp index 67f1438b9b6a..8e81f4bad4af 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp @@ -1,286 +1,288 @@ //===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the interface to a pass that merges duplicate global // constants together into a single constant that is shared. This is useful // because some passes (ie TraceValues) insert a lot of string constants into // the program, regardless of whether or not an existing string is available. // // Algorithm: ConstantMerge is designed to build up a map of available constants // and eliminate duplicates when it is initialized. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Transforms/IPO.h" #include #include #include using namespace llvm; #define DEBUG_TYPE "constmerge" STATISTIC(NumIdenticalMerged, "Number of identical global constants merged"); /// Find values that are marked as llvm.used. static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSetImpl &UsedValues) { if (!LLVMUsed) return; ConstantArray *Inits = cast(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { Value *Operand = Inits->getOperand(i)->stripPointerCasts(); GlobalValue *GV = cast(Operand); UsedValues.insert(GV); } } // True if A is better than B. static bool IsBetterCanonical(const GlobalVariable &A, const GlobalVariable &B) { if (!A.hasLocalLinkage() && B.hasLocalLinkage()) return true; if (A.hasLocalLinkage() && !B.hasLocalLinkage()) return false; return A.hasGlobalUnnamedAddr(); } static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) { SmallVector, 4> MDs; GV->getAllMetadata(MDs); for (const auto &V : MDs) if (V.first != LLVMContext::MD_dbg) return true; return false; } static void copyDebugLocMetadata(const GlobalVariable *From, GlobalVariable *To) { SmallVector MDs; From->getDebugInfo(MDs); for (auto MD : MDs) To->addDebugInfo(MD); } static Align getAlign(GlobalVariable *GV) { return GV->getAlign().getValueOr( GV->getParent()->getDataLayout().getPreferredAlign(GV)); } static bool isUnmergeableGlobal(GlobalVariable *GV, const SmallPtrSetImpl &UsedGlobals) { // Only process constants with initializers in the default address space. return !GV->isConstant() || !GV->hasDefinitiveInitializer() || GV->getType()->getAddressSpace() != 0 || GV->hasSection() || + // Don't touch thread-local variables. + GV->isThreadLocal() || // Don't touch values marked with attribute(used). UsedGlobals.count(GV); } enum class CanMerge { No, Yes }; static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) { if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr()) return CanMerge::No; if (hasMetadataOtherThanDebugLoc(Old)) return CanMerge::No; assert(!hasMetadataOtherThanDebugLoc(New)); if (!Old->hasGlobalUnnamedAddr()) New->setUnnamedAddr(GlobalValue::UnnamedAddr::None); return CanMerge::Yes; } static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { Constant *NewConstant = New; LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @" << New->getName() << "\n"); // Bump the alignment if necessary. if (Old->getAlign() || New->getAlign()) New->setAlignment(std::max(getAlign(Old), getAlign(New))); copyDebugLocMetadata(Old, New); Old->replaceAllUsesWith(NewConstant); // Delete the global value from the module. assert(Old->hasLocalLinkage() && "Refusing to delete an externally visible global variable."); Old->eraseFromParent(); } static bool mergeConstants(Module &M) { // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet UsedGlobals; FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); // Map unique constants to globals. DenseMap CMap; SmallVector, 32> SameContentReplacements; size_t ChangesMade = 0; size_t OldChangesMade = 0; // Iterate constant merging while we are still making progress. Merging two // constants together may allow us to merge other constants together if the // second level constants have initializers which point to the globals that // were just merged. while (true) { // Find the canonical constants others will be merged with. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = &*GVI++; // If this GV is dead, remove it. GV->removeDeadConstantUsers(); if (GV->use_empty() && GV->hasLocalLinkage()) { GV->eraseFromParent(); ++ChangesMade; continue; } if (isUnmergeableGlobal(GV, UsedGlobals)) continue; // This transformation is legal for weak ODR globals in the sense it // doesn't change semantics, but we really don't want to perform it // anyway; it's likely to pessimize code generation, and some tools // (like the Darwin linker in cases involving CFString) don't expect it. if (GV->isWeakForLinker()) continue; // Don't touch globals with metadata other then !dbg. if (hasMetadataOtherThanDebugLoc(GV)) continue; Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. GlobalVariable *&Slot = CMap[Init]; // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible // it cannot be replace, but can be the canonical constant we merge with. bool FirstConstantFound = !Slot; if (FirstConstantFound || IsBetterCanonical(*GV, *Slot)) { Slot = GV; LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName() << (FirstConstantFound ? "\n" : " (updated)\n")); } } // Identify all globals that can be merged together, filling in the // SameContentReplacements vector. We cannot do the replacement in this pass // because doing so may cause initializers of other globals to be rewritten, // invalidating the Constant* pointers in CMap. for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = &*GVI++; if (isUnmergeableGlobal(GV, UsedGlobals)) continue; // We can only replace constant with local linkage. if (!GV->hasLocalLinkage()) continue; Constant *Init = GV->getInitializer(); // Check to see if the initializer is already known. auto Found = CMap.find(Init); if (Found == CMap.end()) continue; GlobalVariable *Slot = Found->second; if (Slot == GV) continue; if (makeMergeable(GV, Slot) == CanMerge::No) continue; // Make all uses of the duplicate constant use the canonical version. LLVM_DEBUG(dbgs() << "Will replace: @" << GV->getName() << " -> @" << Slot->getName() << "\n"); SameContentReplacements.push_back(std::make_pair(GV, Slot)); } // Now that we have figured out which replacements must be made, do them all // now. This avoid invalidating the pointers in CMap, which are unneeded // now. for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) { GlobalVariable *Old = SameContentReplacements[i].first; GlobalVariable *New = SameContentReplacements[i].second; replace(M, Old, New); ++ChangesMade; ++NumIdenticalMerged; } if (ChangesMade == OldChangesMade) break; OldChangesMade = ChangesMade; SameContentReplacements.clear(); CMap.clear(); } return ChangesMade; } PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) { if (!mergeConstants(M)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); } namespace { struct ConstantMergeLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid ConstantMergeLegacyPass() : ModulePass(ID) { initializeConstantMergeLegacyPassPass(*PassRegistry::getPassRegistry()); } // For this pass, process all of the globals in the module, eliminating // duplicate constants. bool runOnModule(Module &M) override { if (skipModule(M)) return false; return mergeConstants(M); } }; } // end anonymous namespace char ConstantMergeLegacyPass::ID = 0; INITIALIZE_PASS(ConstantMergeLegacyPass, "constmerge", "Merge Duplicate Global Constants", false, false) ModulePass *llvm::createConstantMergePass() { return new ConstantMergeLegacyPass(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 2ab0848193f6..b8c21a0e1cd3 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1,1295 +1,1287 @@ //===- LoopVectorizationLegality.cpp --------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file provides loop vectorization legality analysis. Original code // resided in LoopVectorize.cpp for a long time. // // At this point, it is implemented as a utility class, not as an analysis // pass. It should be easy to create an analysis pass around it if there // is a need (but D45420 needs to happen first). // #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/SizeOpts.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" using namespace llvm; using namespace PatternMatch; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME extern cl::opt EnableVPlanPredication; static cl::opt EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, cl::desc("Enable if-conversion during vectorization.")); static cl::opt PragmaVectorizeMemoryCheckThreshold( "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum allowed number of runtime memory checks with a " "vectorize(enable) pragma.")); static cl::opt VectorizeSCEVCheckThreshold( "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed.")); static cl::opt PragmaVectorizeSCEVCheckThreshold( "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, cl::desc("The maximum number of SCEV checks allowed with a " "vectorize(enable) pragma")); /// Maximum vectorization interleave count. static const unsigned MaxInterleaveFactor = 16; namespace llvm { bool LoopVectorizeHints::Hint::validate(unsigned Val) { switch (Kind) { case HK_WIDTH: return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; case HK_UNROLL: return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; case HK_FORCE: return (Val <= 1); case HK_ISVECTORIZED: case HK_PREDICATE: case HK_SCALABLE: return (Val == 0 || Val == 1); } return false; } LoopVectorizeHints::LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE) : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); // force-vector-interleave overrides DisableInterleaving. if (VectorizerParams::isInterleaveForced()) Interleave.Value = VectorizerParams::VectorizationInterleave; if (IsVectorized.Value != 1) // If the vectorization width and interleaving count are both 1 then // consider the loop to have been already vectorized because there's // nothing more that we can do. IsVectorized.Value = getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1; LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() << "LV: Interleaving disabled by the pass manager\n"); } void LoopVectorizeHints::setAlreadyVectorized() { LLVMContext &Context = TheLoop->getHeader()->getContext(); MDNode *IsVectorizedMD = MDNode::get( Context, {MDString::get(Context, "llvm.loop.isvectorized"), ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); MDNode *LoopID = TheLoop->getLoopID(); MDNode *NewLoopID = makePostTransformationMetadata(Context, LoopID, {Twine(Prefix(), "vectorize.").str(), Twine(Prefix(), "interleave.").str()}, {IsVectorizedMD}); TheLoop->setLoopID(NewLoopID); // Update internal cache. IsVectorized.Value = 1; } bool LoopVectorizeHints::allowVectorization( Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { if (getForce() == LoopVectorizeHints::FK_Disabled) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); emitRemarkWithHints(); return false; } if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); emitRemarkWithHints(); return false; } if (getIsVectorized() == 1) { LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); // FIXME: Add interleave.disable metadata. This will allow // vectorize.disable to be used without disabling the pass and errors // to differentiate between disabled vectorization and a width of 1. ORE.emit([&]() { return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), "AllDisabled", L->getStartLoc(), L->getHeader()) << "loop not vectorized: vectorization and interleaving are " "explicitly disabled, or the loop has already been " "vectorized"; }); return false; } return true; } void LoopVectorizeHints::emitRemarkWithHints() const { using namespace ore; ORE.emit([&]() { if (Force.Value == LoopVectorizeHints::FK_Disabled) return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", TheLoop->getStartLoc(), TheLoop->getHeader()) << "loop not vectorized: vectorization is explicitly disabled"; else { OptimizationRemarkMissed R(LV_NAME, "MissedDetails", TheLoop->getStartLoc(), TheLoop->getHeader()); R << "loop not vectorized"; if (Force.Value == LoopVectorizeHints::FK_Enabled) { R << " (Force=" << NV("Force", true); if (Width.Value != 0) R << ", Vector Width=" << NV("VectorWidth", getWidth()); if (Interleave.Value != 0) R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); R << ")"; } return R; } }); } const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { if (getWidth() == ElementCount::getFixed(1)) return LV_NAME; if (getForce() == LoopVectorizeHints::FK_Disabled) return LV_NAME; if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero()) return LV_NAME; return OptimizationRemarkAnalysis::AlwaysPrint; } void LoopVectorizeHints::getHintsFromMetadata() { MDNode *LoopID = TheLoop->getLoopID(); if (!LoopID) return; // First operand should refer to the loop id itself. assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { const MDString *S = nullptr; SmallVector Args; // The expected hint is either a MDString or a MDNode with the first // operand a MDString. if (const MDNode *MD = dyn_cast(LoopID->getOperand(i))) { if (!MD || MD->getNumOperands() == 0) continue; S = dyn_cast(MD->getOperand(0)); for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) Args.push_back(MD->getOperand(i)); } else { S = dyn_cast(LoopID->getOperand(i)); assert(Args.size() == 0 && "too many arguments for MDString"); } if (!S) continue; // Check if the hint starts with the loop metadata prefix. StringRef Name = S->getString(); if (Args.size() == 1) setHint(Name, Args[0]); } } void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { if (!Name.startswith(Prefix())) return; Name = Name.substr(Prefix().size(), StringRef::npos); const ConstantInt *C = mdconst::dyn_extract(Arg); if (!C) return; unsigned Val = C->getZExtValue(); Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate, &Scalable}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) H->Value = Val; else LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); break; } } } bool LoopVectorizationRequirements::doesNotMeet( Function *F, Loop *L, const LoopVectorizeHints &Hints) { const char *PassName = Hints.vectorizeAnalysisPassName(); bool Failed = false; if (UnsafeAlgebraInst && !Hints.allowReordering()) { ORE.emit([&]() { return OptimizationRemarkAnalysisFPCommute( PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), UnsafeAlgebraInst->getParent()) << "loop not vectorized: cannot prove it is safe to reorder " "floating-point operations"; }); Failed = true; } // Test if runtime memcheck thresholds are exceeded. bool PragmaThresholdReached = NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; bool ThresholdReached = NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; if ((ThresholdReached && !Hints.allowReordering()) || PragmaThresholdReached) { ORE.emit([&]() { return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", L->getStartLoc(), L->getHeader()) << "loop not vectorized: cannot prove it is safe to reorder " "memory operations"; }); LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); Failed = true; } return Failed; } // Return true if the inner loop \p Lp is uniform with regard to the outer loop // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes // executing the inner loop will execute the same iterations). This check is // very constrained for now but it will be relaxed in the future. \p Lp is // considered uniform if it meets all the following conditions: // 1) it has a canonical IV (starting from 0 and with stride 1), // 2) its latch terminator is a conditional branch and, // 3) its latch condition is a compare instruction whose operands are the // canonical IV and an OuterLp invariant. // This check doesn't take into account the uniformity of other conditions not // related to the loop latch because they don't affect the loop uniformity. // // NOTE: We decided to keep all these checks and its associated documentation // together so that we can easily have a picture of the current supported loop // nests. However, some of the current checks don't depend on \p OuterLp and // would be redundantly executed for each \p Lp if we invoked this function for // different candidate outer loops. This is not the case for now because we // don't currently have the infrastructure to evaluate multiple candidate outer // loops and \p OuterLp will be a fixed parameter while we only support explicit // outer loop vectorization. It's also very likely that these checks go away // before introducing the aforementioned infrastructure. However, if this is not // the case, we should move the \p OuterLp independent checks to a separate // function that is only executed once for each \p Lp. static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { assert(Lp->getLoopLatch() && "Expected loop with a single latch."); // If Lp is the outer loop, it's uniform by definition. if (Lp == OuterLp) return true; assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); // 1. PHINode *IV = Lp->getCanonicalInductionVariable(); if (!IV) { LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); return false; } // 2. BasicBlock *Latch = Lp->getLoopLatch(); auto *LatchBr = dyn_cast(Latch->getTerminator()); if (!LatchBr || LatchBr->isUnconditional()) { LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); return false; } // 3. auto *LatchCmp = dyn_cast(LatchBr->getCondition()); if (!LatchCmp) { LLVM_DEBUG( dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); return false; } Value *CondOp0 = LatchCmp->getOperand(0); Value *CondOp1 = LatchCmp->getOperand(1); Value *IVUpdate = IV->getIncomingValueForBlock(Latch); if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); return false; } return true; } // Return true if \p Lp and all its nested loops are uniform with regard to \p // OuterLp. static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { if (!isUniformLoop(Lp, OuterLp)) return false; // Check if nested loops are uniform. for (Loop *SubLp : *Lp) if (!isUniformLoopNest(SubLp, OuterLp)) return false; return true; } /// Check whether it is safe to if-convert this phi node. /// /// Phi nodes with constant expressions that can trap are not safe to if /// convert. static bool canIfConvertPHINodes(BasicBlock *BB) { for (PHINode &Phi : BB->phis()) { for (Value *V : Phi.incoming_values()) if (auto *C = dyn_cast(V)) if (C->canTrap()) return false; } return true; } static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { if (Ty->isPointerTy()) return DL.getIntPtrType(Ty); // It is possible that char's or short's overflow when we ask for the loop's // trip count, work around this by changing the type size. if (Ty->getScalarSizeInBits() < 32) return Type::getInt32Ty(Ty->getContext()); return Ty; } static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { Ty0 = convertPointerToIntegerType(DL, Ty0); Ty1 = convertPointerToIntegerType(DL, Ty1); if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) return Ty0; return Ty1; } /// Check that the instruction has outside loop users and is not an /// identified reduction variable. static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, SmallPtrSetImpl &AllowedExit) { // Reductions, Inductions and non-header phis are allowed to have exit users. All // other instructions must not have external users. if (!AllowedExit.count(Inst)) // Check that all of the users of the loop are inside the BB. for (User *U : Inst->users()) { Instruction *UI = cast(U); // This user may be a reduction exit value. if (!TheLoop->contains(UI)) { LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); return true; } } return false; } int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); Function *F = TheLoop->getHeader()->getParent(); bool OptForSize = F->hasOptSize() || llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass); bool CanAddPredicate = !OptForSize; int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); if (Stride == 1 || Stride == -1) return Stride; return 0; } bool LoopVectorizationLegality::isUniform(Value *V) { return LAI->isUniform(V); } bool LoopVectorizationLegality::canVectorizeOuterLoop() { assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop."); // Store the result and return it at the end instead of exiting early, in case // allowExtraAnalysis is used to report multiple reasons for not vectorizing. bool Result = true; bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); for (BasicBlock *BB : TheLoop->blocks()) { // Check whether the BB terminator is a BranchInst. Any other terminator is // not supported yet. auto *Br = dyn_cast(BB->getTerminator()); if (!Br) { reportVectorizationFailure("Unsupported basic block terminator", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } // Check whether the BranchInst is a supported one. Only unconditional // branches, conditional branches with an outer loop invariant condition or // backedges are supported. // FIXME: We skip these checks when VPlan predication is enabled as we // want to allow divergent branches. This whole check will be removed // once VPlan predication is on by default. if (!EnableVPlanPredication && Br && Br->isConditional() && !TheLoop->isLoopInvariant(Br->getCondition()) && !LI->isLoopHeader(Br->getSuccessor(0)) && !LI->isLoopHeader(Br->getSuccessor(1))) { reportVectorizationFailure("Unsupported conditional branch", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } } // Check whether inner loops are uniform. At this point, we only support // simple outer loops scenarios with uniform nested loops. if (!isUniformLoopNest(TheLoop /*loop nest*/, TheLoop /*context outer loop*/)) { reportVectorizationFailure("Outer loop contains divergent loops", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } // Check whether we are able to set up outer loop induction. if (!setupOuterLoopInductions()) { reportVectorizationFailure("Unsupported outer loop Phi(s)", "Unsupported outer loop Phi(s)", "UnsupportedPhi", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } return Result; } void LoopVectorizationLegality::addInductionPhi( PHINode *Phi, const InductionDescriptor &ID, SmallPtrSetImpl &AllowedExit) { Inductions[Phi] = ID; // In case this induction also comes with casts that we know we can ignore // in the vectorized loop body, record them here. All casts could be recorded // here for ignoring, but suffices to record only the first (as it is the // only one that may bw used outside the cast sequence). const SmallVectorImpl &Casts = ID.getCastInsts(); if (!Casts.empty()) InductionCastsToIgnore.insert(*Casts.begin()); Type *PhiTy = Phi->getType(); const DataLayout &DL = Phi->getModule()->getDataLayout(); // Get the widest type. if (!PhiTy->isFloatingPointTy()) { if (!WidestIndTy) WidestIndTy = convertPointerToIntegerType(DL, PhiTy); else WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); } // Int inductions are special because we only allow one IV. if (ID.getKind() == InductionDescriptor::IK_IntInduction && ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && isa(ID.getStartValue()) && cast(ID.getStartValue())->isNullValue()) { // Use the phi node with the widest type as induction. Use the last // one if there are multiple (no good reason for doing this other // than it is expedient). We've checked that it begins at zero and // steps by one, so this is a canonical induction variable. if (!PrimaryInduction || PhiTy == WidestIndTy) PrimaryInduction = Phi; } // Both the PHI node itself, and the "post-increment" value feeding // back into the PHI node may have external users. // We can allow those uses, except if the SCEVs we have for them rely // on predicates that only hold within the loop, since allowing the exit // currently means re-using this SCEV outside the loop (see PR33706 for more // details). if (PSE.getUnionPredicate().isAlwaysTrue()) { AllowedExit.insert(Phi); AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); } LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); } bool LoopVectorizationLegality::setupOuterLoopInductions() { BasicBlock *Header = TheLoop->getHeader(); // Returns true if a given Phi is a supported induction. auto isSupportedPhi = [&](PHINode &Phi) -> bool { InductionDescriptor ID; if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && ID.getKind() == InductionDescriptor::IK_IntInduction) { addInductionPhi(&Phi, ID, AllowedExit); return true; } else { // Bail out for any Phi in the outer loop header that is not a supported // induction. LLVM_DEBUG( dbgs() << "LV: Found unsupported PHI for outer loop vectorization.\n"); return false; } }; if (llvm::all_of(Header->phis(), isSupportedPhi)) return true; else return false; } /// Checks if a function is scalarizable according to the TLI, in /// the sense that it should be vectorized and then expanded in /// multiple scalarcalls. This is represented in the /// TLI via mappings that do not specify a vector name, as in the /// following example: /// /// const VecDesc VecIntrinsics[] = { /// {"llvm.phx.abs.i32", "", 4} /// }; static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { const StringRef ScalarName = CI.getCalledFunction()->getName(); bool Scalarize = TLI.isFunctionVectorizable(ScalarName); // Check that all known VFs are not associated to a vector // function, i.e. the vector name is emty. if (Scalarize) for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); VF <= WidestVF; VF *= 2) { Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); } return Scalarize; } bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *Header = TheLoop->getHeader(); // Look for the attribute signaling the absence of NaNs. Function &F = *Header->getParent(); HasFunNoNaNAttr = F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; // For each block in the loop. for (BasicBlock *BB : TheLoop->blocks()) { // Scan the instructions in the block and look for hazards. for (Instruction &I : *BB) { if (auto *Phi = dyn_cast(&I)) { Type *PhiTy = Phi->getType(); // Check that this PHI type is allowed. if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && !PhiTy->isPointerTy()) { reportVectorizationFailure("Found a non-int non-pointer PHI", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); return false; } // If this PHINode is not in the header block, then we know that we // can convert it to select during if-conversion. No need to check if // the PHIs in this block are induction or reduction variables. if (BB != Header) { // Non-header phi nodes that have outside uses can be vectorized. Add // them to the list of allowed exits. // Unsafe cyclic dependencies with header phis are identified during // legalization for reduction, induction and first order // recurrences. AllowedExit.insert(&I); continue; } // We only allow if-converted PHIs with exactly two incoming values. if (Phi->getNumIncomingValues() != 2) { reportVectorizationFailure("Found an invalid PHI", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop, Phi); return false; } RecurrenceDescriptor RedDes; if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT)) { if (RedDes.hasUnsafeAlgebra()) Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); AllowedExit.insert(RedDes.getLoopExitInstr()); Reductions[Phi] = RedDes; continue; } // TODO: Instead of recording the AllowedExit, it would be good to record the // complementary set: NotAllowedExit. These include (but may not be // limited to): // 1. Reduction phis as they represent the one-before-last value, which // is not available when vectorized // 2. Induction phis and increment when SCEV predicates cannot be used // outside the loop - see addInductionPhi // 3. Non-Phis with outside uses when SCEV predicates cannot be used // outside the loop - see call to hasOutsideLoopUser in the non-phi // handling below // 4. FirstOrderRecurrence phis that can possibly be handled by // extraction. // By recording these, we can then reason about ways to vectorize each // of these NotAllowedExit. InductionDescriptor ID; if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { addInductionPhi(Phi, ID, AllowedExit); if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); continue; } if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, SinkAfter, DT)) { AllowedExit.insert(Phi); FirstOrderRecurrences.insert(Phi); continue; } // As a last resort, coerce the PHI to a AddRec expression // and re-try classifying it a an induction PHI. if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { addInductionPhi(Phi, ID, AllowedExit); continue; } reportVectorizationFailure("Found an unidentified PHI", "value that could not be identified as " "reduction is used outside the loop", "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); return false; } // end of PHI handling // We handle calls that: // * Are debug info intrinsics. // * Have a mapping to an IR intrinsic. // * Have a vector version available. auto *CI = dyn_cast(&I); if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && !isa(CI) && !(CI->getCalledFunction() && TLI && (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) { // If the call is a recognized math libary call, it is likely that // we can vectorize it given loosened floating-point constraints. LibFunc Func; bool IsMathLibCall = TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() && TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && TLI->hasOptimizedCodeGen(Func); if (IsMathLibCall) { // TODO: Ideally, we should not use clang-specific language here, // but it's hard to provide meaningful yet generic advice. // Also, should this be guarded by allowExtraAnalysis() and/or be part // of the returned info from isFunctionVectorizable()? reportVectorizationFailure( "Found a non-intrinsic callsite", "library call cannot be vectorized. " "Try compiling with -fno-math-errno, -ffast-math, " "or similar flags", "CantVectorizeLibcall", ORE, TheLoop, CI); } else { reportVectorizationFailure("Found a non-intrinsic callsite", "call instruction cannot be vectorized", "CantVectorizeLibcall", ORE, TheLoop, CI); } return false; } // Some intrinsics have scalar arguments and should be same in order for // them to be vectorized (i.e. loop invariant). if (CI) { auto *SE = PSE.getSE(); Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { reportVectorizationFailure("Found unvectorizable intrinsic", "intrinsic instruction cannot be vectorized", "CantVectorizeIntrinsic", ORE, TheLoop, CI); return false; } } } // Check that the instruction return type is vectorizable. // Also, we can't vectorize extractelement instructions. if ((!VectorType::isValidElementType(I.getType()) && !I.getType()->isVoidTy()) || isa(I)) { reportVectorizationFailure("Found unvectorizable type", "instruction return type cannot be vectorized", "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); return false; } // Check that the stored type is vectorizable. if (auto *ST = dyn_cast(&I)) { Type *T = ST->getValueOperand()->getType(); if (!VectorType::isValidElementType(T)) { reportVectorizationFailure("Store instruction cannot be vectorized", "store instruction cannot be vectorized", "CantVectorizeStore", ORE, TheLoop, ST); return false; } // For nontemporal stores, check that a nontemporal vector version is // supported on the target. if (ST->getMetadata(LLVMContext::MD_nontemporal)) { // Arbitrarily try a vector of 2 elements. auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2); assert(VecTy && "did not find vectorized version of stored type"); if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) { reportVectorizationFailure( "nontemporal store instruction cannot be vectorized", "nontemporal store instruction cannot be vectorized", "CantVectorizeNontemporalStore", ORE, TheLoop, ST); return false; } } } else if (auto *LD = dyn_cast(&I)) { if (LD->getMetadata(LLVMContext::MD_nontemporal)) { // For nontemporal loads, check that a nontemporal vector version is // supported on the target (arbitrarily try a vector of 2 elements). auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2); assert(VecTy && "did not find vectorized version of load type"); if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) { reportVectorizationFailure( "nontemporal load instruction cannot be vectorized", "nontemporal load instruction cannot be vectorized", "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); return false; } } // FP instructions can allow unsafe algebra, thus vectorizable by // non-IEEE-754 compliant SIMD units. // This applies to floating-point math operations and calls, not memory // operations, shuffles, or casts, as they don't change precision or // semantics. } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && !I.isFast()) { LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); Hints->setPotentiallyUnsafe(); } // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { // We can safely vectorize loops where instructions within the loop are // used outside the loop only if the SCEV predicates within the loop is // same as outside the loop. Allowing the exit means reusing the SCEV // outside the loop. if (PSE.getUnionPredicate().isAlwaysTrue()) { AllowedExit.insert(&I); continue; } reportVectorizationFailure("Value cannot be used outside the loop", "value cannot be used outside the loop", "ValueUsedOutsideLoop", ORE, TheLoop, &I); return false; } } // next instr. } if (!PrimaryInduction) { if (Inductions.empty()) { reportVectorizationFailure("Did not find one integer induction var", "loop induction variable could not be identified", "NoInductionVariable", ORE, TheLoop); return false; } else if (!WidestIndTy) { reportVectorizationFailure("Did not find one integer induction var", "integer loop induction variable could not be identified", "NoIntegerInductionVariable", ORE, TheLoop); return false; } else { LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); } } // For first order recurrences, we use the previous value (incoming value from // the latch) to check if it dominates all users of the recurrence. Bail out // if we have to sink such an instruction for another recurrence, as the // dominance requirement may not hold after sinking. BasicBlock *LoopLatch = TheLoop->getLoopLatch(); if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { Instruction *V = cast(Phi->getIncomingValueForBlock(LoopLatch)); return SinkAfter.find(V) != SinkAfter.end(); })) return false; // Now we know the widest induction type, check if our found induction // is the same size. If it's not, unset it here and InnerLoopVectorizer // will create another. if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) PrimaryInduction = nullptr; return true; } bool LoopVectorizationLegality::canVectorizeMemory() { LAI = &(*GetLAA)(*TheLoop); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); if (LAR) { ORE->emit([&]() { return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), "loop not vectorized: ", *LAR); }); } if (!LAI->canVectorizeMemory()) return false; if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { reportVectorizationFailure("Stores to a uniform address", "write to a loop invariant address could not be vectorized", "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); return false; } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); PSE.addPredicate(LAI->getPSE().getUnionPredicate()); return true; } bool LoopVectorizationLegality::isInductionPhi(const Value *V) { Value *In0 = const_cast(V); PHINode *PN = dyn_cast_or_null(In0); if (!PN) return false; return Inductions.count(PN); } bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { auto *Inst = dyn_cast(V); return (Inst && InductionCastsToIgnore.count(Inst)); } bool LoopVectorizationLegality::isInductionVariable(const Value *V) { return isInductionPhi(V) || isCastedInductionVariable(V); } bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { return FirstOrderRecurrences.count(Phi); } bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); } bool LoopVectorizationLegality::blockCanBePredicated( BasicBlock *BB, SmallPtrSetImpl &SafePtrs, SmallPtrSetImpl &MaskedOp, - SmallPtrSetImpl &ConditionalAssumes, - bool PreserveGuards) const { - const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); - + SmallPtrSetImpl &ConditionalAssumes) const { for (Instruction &I : *BB) { // Check that we don't have a constant expression that can trap as operand. for (Value *Operand : I.operands()) { if (auto *C = dyn_cast(Operand)) if (C->canTrap()) return false; } // We can predicate blocks with calls to assume, as long as we drop them in // case we flatten the CFG via predication. if (match(&I, m_Intrinsic())) { ConditionalAssumes.insert(&I); continue; } // Do not let llvm.experimental.noalias.scope.decl block the vectorization. // TODO: there might be cases that it should block the vectorization. Let's // ignore those for now. if (isa(&I)) continue; // We might be able to hoist the load. if (I.mayReadFromMemory()) { auto *LI = dyn_cast(&I); if (!LI) return false; if (!SafePtrs.count(LI->getPointerOperand())) { - // !llvm.mem.parallel_loop_access implies if-conversion safety. - // Otherwise, record that the load needs (real or emulated) masking - // and let the cost model decide. - if (!IsAnnotatedParallel || PreserveGuards) - MaskedOp.insert(LI); + MaskedOp.insert(LI); continue; } } if (I.mayWriteToMemory()) { auto *SI = dyn_cast(&I); if (!SI) return false; // Predicated store requires some form of masking: // 1) masked store HW instruction, // 2) emulation via load-blend-store (only if safe and legal to do so, // be aware on the race conditions), or // 3) element-by-element predicate check and scalar store. MaskedOp.insert(SI); continue; } if (I.mayThrow()) return false; } return true; } bool LoopVectorizationLegality::canVectorizeWithIfConvert() { if (!EnableIfConversion) { reportVectorizationFailure("If-conversion is disabled", "if-conversion is disabled", "IfConversionDisabled", ORE, TheLoop); return false; } assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); // A list of pointers which are known to be dereferenceable within scope of // the loop body for each iteration of the loop which executes. That is, // the memory pointed to can be dereferenced (with the access size implied by // the value's type) unconditionally within the loop header without // introducing a new fault. SmallPtrSet SafePointers; // Collect safe addresses. for (BasicBlock *BB : TheLoop->blocks()) { if (!blockNeedsPredication(BB)) { for (Instruction &I : *BB) if (auto *Ptr = getLoadStorePointerOperand(&I)) SafePointers.insert(Ptr); continue; } // For a block which requires predication, a address may be safe to access // in the loop w/o predication if we can prove dereferenceability facts // sufficient to ensure it'll never fault within the loop. For the moment, // we restrict this to loads; stores are more complicated due to // concurrency restrictions. ScalarEvolution &SE = *PSE.getSE(); for (Instruction &I : *BB) { LoadInst *LI = dyn_cast(&I); if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) && isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) SafePointers.insert(LI->getPointerOperand()); } } // Collect the blocks that need predication. BasicBlock *Header = TheLoop->getHeader(); for (BasicBlock *BB : TheLoop->blocks()) { // We don't support switch statements inside loops. if (!isa(BB->getTerminator())) { reportVectorizationFailure("Loop contains a switch statement", "loop contains a switch statement", "LoopContainsSwitch", ORE, TheLoop, BB->getTerminator()); return false; } // We must be able to predicate all blocks that need to be predicated. if (blockNeedsPredication(BB)) { if (!blockCanBePredicated(BB, SafePointers, MaskedOp, ConditionalAssumes)) { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", "NoCFGForSelect", ORE, TheLoop, BB->getTerminator()); return false; } } else if (BB != Header && !canIfConvertPHINodes(BB)) { reportVectorizationFailure( "Control flow cannot be substituted for a select", "control flow cannot be substituted for a select", "NoCFGForSelect", ORE, TheLoop, BB->getTerminator()); return false; } } // We can if-convert this loop. return true; } // Helper function to canVectorizeLoopNestCFG. bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath) { assert((UseVPlanNativePath || Lp->isInnermost()) && "VPlan-native path is not enabled."); // TODO: ORE should be improved to show more accurate information when an // outer loop can't be vectorized because a nested loop is not understood or // legal. Something like: "outer_loop_location: loop not vectorized: // (inner_loop_location) loop control flow is not understood by vectorizer". // Store the result and return it at the end instead of exiting early, in case // allowExtraAnalysis is used to report multiple reasons for not vectorizing. bool Result = true; bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!Lp->getLoopPreheader()) { reportVectorizationFailure("Loop doesn't have a legal pre-header", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } // We must have a single backedge. if (Lp->getNumBackEdges() != 1) { reportVectorizationFailure("The loop must have a single backedge", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } // We currently must have a single "exit block" after the loop. Note that // multiple "exiting blocks" inside the loop are allowed, provided they all // reach the single exit block. // TODO: This restriction can be relaxed in the near future, it's here solely // to allow separation of changes for review. We need to generalize the phi // update logic in a number of places. if (!Lp->getUniqueExitBlock()) { reportVectorizationFailure("The loop must have a unique exit block", "loop control flow is not understood by vectorizer", "CFGNotUnderstood", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } return Result; } bool LoopVectorizationLegality::canVectorizeLoopNestCFG( Loop *Lp, bool UseVPlanNativePath) { // Store the result and return it at the end instead of exiting early, in case // allowExtraAnalysis is used to report multiple reasons for not vectorizing. bool Result = true; bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { if (DoExtraAnalysis) Result = false; else return false; } // Recursively check whether the loop control flow of nested loops is // understood. for (Loop *SubLp : *Lp) if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { if (DoExtraAnalysis) Result = false; else return false; } return Result; } bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { // Store the result and return it at the end instead of exiting early, in case // allowExtraAnalysis is used to report multiple reasons for not vectorizing. bool Result = true; bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); // Check whether the loop-related control flow in the loop nest is expected by // vectorizer. if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { if (DoExtraAnalysis) Result = false; else return false; } // We need to have a loop header. LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() << '\n'); // Specific checks for outer loops. We skip the remaining legal checks at this // point because they don't support outer loops. if (!TheLoop->isInnermost()) { assert(UseVPlanNativePath && "VPlan-native path is not enabled."); if (!canVectorizeOuterLoop()) { reportVectorizationFailure("Unsupported outer loop", "unsupported outer loop", "UnsupportedOuterLoop", ORE, TheLoop); // TODO: Implement DoExtraAnalysis when subsequent legal checks support // outer loops. return false; } LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); return Result; } assert(TheLoop->isInnermost() && "Inner loop expected."); // Check if we can if-convert non-single-bb loops. unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); if (DoExtraAnalysis) Result = false; else return false; } // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); if (DoExtraAnalysis) Result = false; else return false; } // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); if (DoExtraAnalysis) Result = false; else return false; } LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" << (LAI->getRuntimePointerChecking()->Need ? " (with a runtime bound check)" : "") << "!\n"); unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { reportVectorizationFailure("Too many SCEV checks needed", "Too many SCEV assumptions need to be made and checked at runtime", "TooManySCEVRunTimeChecks", ORE, TheLoop); if (DoExtraAnalysis) Result = false; else return false; } // Okay! We've done all the tests. If any have failed, return false. Otherwise // we can vectorize, and at this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. return Result; } bool LoopVectorizationLegality::prepareToFoldTailByMasking() { LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); SmallPtrSet ReductionLiveOuts; for (auto &Reduction : getReductionVars()) ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); // TODO: handle non-reduction outside users when tail is folded by masking. for (auto *AE : AllowedExit) { // Check that all users of allowed exit values are inside the loop or // are the live-out of a reduction. if (ReductionLiveOuts.count(AE)) continue; for (User *U : AE->users()) { Instruction *UI = cast(U); if (TheLoop->contains(UI)) continue; LLVM_DEBUG( dbgs() << "LV: Cannot fold tail by masking, loop has an outside user for " << *UI << "\n"); return false; } } // The list of pointers that we can safely read and write to remains empty. SmallPtrSet SafePointers; SmallPtrSet TmpMaskedOp; SmallPtrSet TmpConditionalAssumes; // Check and mark all blocks for predication, including those that ordinarily // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp, - TmpConditionalAssumes, - /* MaskAllLoads= */ true)) { + TmpConditionalAssumes)) { LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n"); return false; } } LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end()); ConditionalAssumes.insert(TmpConditionalAssumes.begin(), TmpConditionalAssumes.end()); return true; } } // namespace llvm diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp index 4a0a86168908..10e059adeb7d 100644 --- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp @@ -1,110 +1,106 @@ //===- CoverageSummaryInfo.cpp - Coverage summary for function/file -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // These structures are used to represent code coverage metrics // for functions/files. // //===----------------------------------------------------------------------===// #include "CoverageSummaryInfo.h" using namespace llvm; using namespace coverage; static void sumBranches(size_t &NumBranches, size_t &CoveredBranches, const ArrayRef &Branches) { for (const auto &BR : Branches) { // Skip folded branches. if (BR.Folded) continue; // "True" Condition Branches. ++NumBranches; if (BR.ExecutionCount > 0) ++CoveredBranches; // "False" Condition Branches. ++NumBranches; if (BR.FalseExecutionCount > 0) ++CoveredBranches; } } static void sumBranchExpansions(size_t &NumBranches, size_t &CoveredBranches, const CoverageMapping &CM, ArrayRef Expansions) { for (const auto &Expansion : Expansions) { auto CE = CM.getCoverageForExpansion(Expansion); sumBranches(NumBranches, CoveredBranches, CE.getBranches()); sumBranchExpansions(NumBranches, CoveredBranches, CM, CE.getExpansions()); } } FunctionCoverageSummary FunctionCoverageSummary::get(const CoverageMapping &CM, const coverage::FunctionRecord &Function) { // Compute the region coverage. size_t NumCodeRegions = 0, CoveredRegions = 0; for (auto &CR : Function.CountedRegions) { if (CR.Kind != CounterMappingRegion::CodeRegion) continue; ++NumCodeRegions; if (CR.ExecutionCount != 0) ++CoveredRegions; } // Compute the line coverage size_t NumLines = 0, CoveredLines = 0; CoverageData CD = CM.getCoverageForFunction(Function); for (const auto &LCS : getLineCoverageStats(CD)) { if (!LCS.isMapped()) continue; ++NumLines; if (LCS.getExecutionCount()) ++CoveredLines; } // Compute the branch coverage, including branches from expansions. size_t NumBranches = 0, CoveredBranches = 0; sumBranches(NumBranches, CoveredBranches, CD.getBranches()); sumBranchExpansions(NumBranches, CoveredBranches, CM, CD.getExpansions()); return FunctionCoverageSummary( Function.Name, Function.ExecutionCount, RegionCoverageInfo(CoveredRegions, NumCodeRegions), LineCoverageInfo(CoveredLines, NumLines), BranchCoverageInfo(CoveredBranches, NumBranches)); } FunctionCoverageSummary FunctionCoverageSummary::get(const InstantiationGroup &Group, ArrayRef Summaries) { std::string Name; if (Group.hasName()) { Name = std::string(Group.getName()); } else { llvm::raw_string_ostream OS(Name); OS << "Definition at line " << Group.getLine() << ", column " << Group.getColumn(); } FunctionCoverageSummary Summary(Name); Summary.ExecutionCount = Group.getTotalExecutionCount(); Summary.RegionCoverage = Summaries[0].RegionCoverage; Summary.LineCoverage = Summaries[0].LineCoverage; Summary.BranchCoverage = Summaries[0].BranchCoverage; for (const auto &FCS : Summaries.drop_front()) { Summary.RegionCoverage.merge(FCS.RegionCoverage); Summary.LineCoverage.merge(FCS.LineCoverage); - - // Sum branch coverage across instantiation groups for the summary rather - // than "merge" the maximum count. This is a clearer view into whether all - // created branches are covered. - Summary.BranchCoverage += FCS.BranchCoverage; + Summary.BranchCoverage.merge(FCS.BranchCoverage); } return Summary; } diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h index 4bc1c24a079f..62e7cad1012b 100644 --- a/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h +++ b/contrib/llvm-project/llvm/tools/llvm-cov/CoverageSummaryInfo.h @@ -1,261 +1,266 @@ //===- CoverageSummaryInfo.h - Coverage summary for function/file ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // These structures are used to represent code coverage metrics // for functions/files. // //===----------------------------------------------------------------------===// #ifndef LLVM_COV_COVERAGESUMMARYINFO_H #define LLVM_COV_COVERAGESUMMARYINFO_H #include "llvm/ProfileData/Coverage/CoverageMapping.h" #include "llvm/Support/raw_ostream.h" namespace llvm { /// Provides information about region coverage for a function/file. class RegionCoverageInfo { /// The number of regions that were executed at least once. size_t Covered; /// The total number of regions in a function/file. size_t NumRegions; public: RegionCoverageInfo() : Covered(0), NumRegions(0) {} RegionCoverageInfo(size_t Covered, size_t NumRegions) : Covered(Covered), NumRegions(NumRegions) { assert(Covered <= NumRegions && "Covered regions over-counted"); } RegionCoverageInfo &operator+=(const RegionCoverageInfo &RHS) { Covered += RHS.Covered; NumRegions += RHS.NumRegions; return *this; } void merge(const RegionCoverageInfo &RHS) { Covered = std::max(Covered, RHS.Covered); NumRegions = std::max(NumRegions, RHS.NumRegions); } size_t getCovered() const { return Covered; } size_t getNumRegions() const { return NumRegions; } bool isFullyCovered() const { return Covered == NumRegions; } double getPercentCovered() const { assert(Covered <= NumRegions && "Covered regions over-counted"); if (NumRegions == 0) return 0.0; return double(Covered) / double(NumRegions) * 100.0; } }; /// Provides information about line coverage for a function/file. class LineCoverageInfo { /// The number of lines that were executed at least once. size_t Covered; /// The total number of lines in a function/file. size_t NumLines; public: LineCoverageInfo() : Covered(0), NumLines(0) {} LineCoverageInfo(size_t Covered, size_t NumLines) : Covered(Covered), NumLines(NumLines) { assert(Covered <= NumLines && "Covered lines over-counted"); } LineCoverageInfo &operator+=(const LineCoverageInfo &RHS) { Covered += RHS.Covered; NumLines += RHS.NumLines; return *this; } void merge(const LineCoverageInfo &RHS) { Covered = std::max(Covered, RHS.Covered); NumLines = std::max(NumLines, RHS.NumLines); } size_t getCovered() const { return Covered; } size_t getNumLines() const { return NumLines; } bool isFullyCovered() const { return Covered == NumLines; } double getPercentCovered() const { assert(Covered <= NumLines && "Covered lines over-counted"); if (NumLines == 0) return 0.0; return double(Covered) / double(NumLines) * 100.0; } }; /// Provides information about branches coverage for a function/file. class BranchCoverageInfo { /// The number of branches that were executed at least once. size_t Covered; /// The total number of branches in a function/file. size_t NumBranches; public: BranchCoverageInfo() : Covered(0), NumBranches(0) {} BranchCoverageInfo(size_t Covered, size_t NumBranches) : Covered(Covered), NumBranches(NumBranches) { assert(Covered <= NumBranches && "Covered branches over-counted"); } BranchCoverageInfo &operator+=(const BranchCoverageInfo &RHS) { Covered += RHS.Covered; NumBranches += RHS.NumBranches; return *this; } + void merge(const BranchCoverageInfo &RHS) { + Covered = std::max(Covered, RHS.Covered); + NumBranches = std::max(NumBranches, RHS.NumBranches); + } + size_t getCovered() const { return Covered; } size_t getNumBranches() const { return NumBranches; } bool isFullyCovered() const { return Covered == NumBranches; } double getPercentCovered() const { assert(Covered <= NumBranches && "Covered branches over-counted"); if (NumBranches == 0) return 0.0; return double(Covered) / double(NumBranches) * 100.0; } }; /// Provides information about function coverage for a file. class FunctionCoverageInfo { /// The number of functions that were executed. size_t Executed; /// The total number of functions in this file. size_t NumFunctions; public: FunctionCoverageInfo() : Executed(0), NumFunctions(0) {} FunctionCoverageInfo(size_t Executed, size_t NumFunctions) : Executed(Executed), NumFunctions(NumFunctions) {} FunctionCoverageInfo &operator+=(const FunctionCoverageInfo &RHS) { Executed += RHS.Executed; NumFunctions += RHS.NumFunctions; return *this; } void addFunction(bool Covered) { if (Covered) ++Executed; ++NumFunctions; } size_t getExecuted() const { return Executed; } size_t getNumFunctions() const { return NumFunctions; } bool isFullyCovered() const { return Executed == NumFunctions; } double getPercentCovered() const { assert(Executed <= NumFunctions && "Covered functions over-counted"); if (NumFunctions == 0) return 0.0; return double(Executed) / double(NumFunctions) * 100.0; } }; /// A summary of function's code coverage. struct FunctionCoverageSummary { std::string Name; uint64_t ExecutionCount; RegionCoverageInfo RegionCoverage; LineCoverageInfo LineCoverage; BranchCoverageInfo BranchCoverage; FunctionCoverageSummary(const std::string &Name) : Name(Name), ExecutionCount(0), RegionCoverage(), LineCoverage(), BranchCoverage() {} FunctionCoverageSummary(const std::string &Name, uint64_t ExecutionCount, const RegionCoverageInfo &RegionCoverage, const LineCoverageInfo &LineCoverage, const BranchCoverageInfo &BranchCoverage) : Name(Name), ExecutionCount(ExecutionCount), RegionCoverage(RegionCoverage), LineCoverage(LineCoverage), BranchCoverage(BranchCoverage) {} /// Compute the code coverage summary for the given function coverage /// mapping record. static FunctionCoverageSummary get(const coverage::CoverageMapping &CM, const coverage::FunctionRecord &Function); /// Compute the code coverage summary for an instantiation group \p Group, /// given a list of summaries for each instantiation in \p Summaries. static FunctionCoverageSummary get(const coverage::InstantiationGroup &Group, ArrayRef Summaries); }; /// A summary of file's code coverage. struct FileCoverageSummary { StringRef Name; RegionCoverageInfo RegionCoverage; LineCoverageInfo LineCoverage; BranchCoverageInfo BranchCoverage; FunctionCoverageInfo FunctionCoverage; FunctionCoverageInfo InstantiationCoverage; FileCoverageSummary(StringRef Name) : Name(Name), RegionCoverage(), LineCoverage(), FunctionCoverage(), InstantiationCoverage() {} FileCoverageSummary &operator+=(const FileCoverageSummary &RHS) { RegionCoverage += RHS.RegionCoverage; LineCoverage += RHS.LineCoverage; FunctionCoverage += RHS.FunctionCoverage; BranchCoverage += RHS.BranchCoverage; InstantiationCoverage += RHS.InstantiationCoverage; return *this; } void addFunction(const FunctionCoverageSummary &Function) { RegionCoverage += Function.RegionCoverage; LineCoverage += Function.LineCoverage; BranchCoverage += Function.BranchCoverage; FunctionCoverage.addFunction(/*Covered=*/Function.ExecutionCount > 0); } void addInstantiation(const FunctionCoverageSummary &Function) { InstantiationCoverage.addFunction(/*Covered=*/Function.ExecutionCount > 0); } }; /// A cache for demangled symbols. struct DemangleCache { StringMap DemangledNames; /// Demangle \p Sym if possible. Otherwise, just return \p Sym. StringRef demangle(StringRef Sym) const { const auto DemangledName = DemangledNames.find(Sym); if (DemangledName == DemangledNames.end()) return Sym; return DemangledName->getValue(); } }; } // namespace llvm #endif // LLVM_COV_COVERAGESUMMARYINFO_H diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc index fc50df3ad24e..cb7736e7d7de 100644 --- a/lib/clang/include/VCSVersion.inc +++ b/lib/clang/include/VCSVersion.inc @@ -1,14 +1,14 @@ // $FreeBSD$ -#define LLVM_REVISION "llvmorg-12.0.1-rc2-0-ge7dac564cd0e" +#define LLVM_REVISION "llvmorg-12.0.1-0-gfed41342a82f" #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git" -#define CLANG_REVISION "llvmorg-12.0.1-rc2-0-ge7dac564cd0e" +#define CLANG_REVISION "llvmorg-12.0.1-0-gfed41342a82f" #define CLANG_REPOSITORY "git@github.com:llvm/llvm-project.git" // - -#define LLD_REVISION "llvmorg-12.0.1-rc2-0-ge7dac564cd0e-1400001" +#define LLD_REVISION "llvmorg-12.0.1-0-gfed41342a82f-1400001" #define LLD_REPOSITORY "FreeBSD" -#define LLDB_REVISION "llvmorg-12.0.1-rc2-0-ge7dac564cd0e" +#define LLDB_REVISION "llvmorg-12.0.1-0-gfed41342a82f" #define LLDB_REPOSITORY "git@github.com:llvm/llvm-project.git" diff --git a/lib/clang/include/llvm/Config/llvm-config.h b/lib/clang/include/llvm/Config/llvm-config.h index 9d95bb2c68dc..11299564108c 100644 --- a/lib/clang/include/llvm/Config/llvm-config.h +++ b/lib/clang/include/llvm/Config/llvm-config.h @@ -1,99 +1,98 @@ - /* $FreeBSD$ */ /*===------- llvm/Config/llvm-config.h - llvm configuration -------*- C -*-===*/ /* */ /* Part of the LLVM Project, under the Apache License v2.0 with LLVM */ /* Exceptions. */ /* See https://llvm.org/LICENSE.txt for license information. */ /* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ /* */ /*===----------------------------------------------------------------------===*/ /* This file enumerates variables from the LLVM configuration so that they can be in exported headers and won't override package specific directives. This is a C header that can be included in the llvm-c headers. */ #ifndef LLVM_CONFIG_H #define LLVM_CONFIG_H /* Define if LLVM_ENABLE_DUMP is enabled */ /* #undef LLVM_ENABLE_DUMP */ /* Target triple LLVM will generate code for by default */ /* #undef LLVM_DEFAULT_TARGET_TRIPLE */ /* Define if threads enabled */ #define LLVM_ENABLE_THREADS 1 /* Has gcc/MSVC atomic intrinsics */ #define LLVM_HAS_ATOMICS 1 /* Host triple LLVM will be executed on */ /* #undef LLVM_HOST_TRIPLE */ /* LLVM architecture name for the native architecture, if available */ /* #undef LLVM_NATIVE_ARCH */ /* LLVM name for the native AsmParser init function, if available */ /* #undef LLVM_NATIVE_ASMPARSER */ /* LLVM name for the native AsmPrinter init function, if available */ /* #undef LLVM_NATIVE_ASMPRINTER */ /* LLVM name for the native Disassembler init function, if available */ /* #undef LLVM_NATIVE_DISASSEMBLER */ /* LLVM name for the native Target init function, if available */ /* #undef LLVM_NATIVE_TARGET */ /* LLVM name for the native TargetInfo init function, if available */ /* #undef LLVM_NATIVE_TARGETINFO */ /* LLVM name for the native target MC init function, if available */ /* #undef LLVM_NATIVE_TARGETMC */ /* Define if this is Unixish platform */ #define LLVM_ON_UNIX 1 /* Define if we have the Intel JIT API runtime support library */ #define LLVM_USE_INTEL_JITEVENTS 0 /* Define if we have the oprofile JIT-support library */ #define LLVM_USE_OPROFILE 0 /* Define if we have the perf JIT-support library */ #define LLVM_USE_PERF 0 /* Major version of the LLVM API */ #define LLVM_VERSION_MAJOR 12 /* Minor version of the LLVM API */ #define LLVM_VERSION_MINOR 0 /* Patch version of the LLVM API */ #define LLVM_VERSION_PATCH 1 /* LLVM version string */ #define LLVM_VERSION_STRING "12.0.1" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() */ #define LLVM_FORCE_ENABLE_STATS 0 /* Define if we have z3 and want to build it */ /* #undef LLVM_WITH_Z3 */ /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */ /* #undef LLVM_HAVE_TF_API */ /* Define if LLVM was built with a dependency to the tensorflow compiler */ /* #undef LLVM_HAVE_TF_AOT */ /* Define to 1 if you have the header file. */ #define HAVE_SYSEXITS_H 1 /* Define to 1 to enable the experimental new pass manager by default */ #define LLVM_ENABLE_NEW_PASS_MANAGER 0 #endif diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index f3cccac9f545..ef3416c7ffcd 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,3 +1,3 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "llvmorg-12.0.1-rc2-0-ge7dac564cd0e" +#define LLVM_REVISION "llvmorg-12.0.1-0-gfed41342a82f" #define LLVM_REPOSITORY "git@github.com:llvm/llvm-project.git"