diff --git a/contrib/llvm-project/clang/include/clang/Basic/LangOptions.h b/contrib/llvm-project/clang/include/clang/Basic/LangOptions.h index 50c7f038fc6b..09afa641acf9 100644 --- a/contrib/llvm-project/clang/include/clang/Basic/LangOptions.h +++ b/contrib/llvm-project/clang/include/clang/Basic/LangOptions.h @@ -1,727 +1,723 @@ //===- LangOptions.h - C Language Family Language Options -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Defines the clang::LangOptions interface. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H #define LLVM_CLANG_BASIC_LANGOPTIONS_H #include "clang/Basic/CommentOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Sanitizers.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/Visibility.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include #include namespace clang { /// Bitfields of LangOptions, split out from LangOptions in order to ensure that /// this large collection of bitfields is a trivial class type. class LangOptionsBase { friend class CompilerInvocation; public: // Define simple language options (with no accessors). #define LANGOPT(Name, Bits, Default, Description) unsigned Name : Bits; #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) #include "clang/Basic/LangOptions.def" protected: // Define language options of enumeration type. These are private, and will // have accessors (below). #define LANGOPT(Name, Bits, Default, Description) #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ unsigned Name : Bits; #include "clang/Basic/LangOptions.def" }; /// In the Microsoft ABI, this controls the placement of virtual displacement /// members used to implement virtual inheritance. enum class MSVtorDispMode { Never, ForVBaseOverride, ForVFTable }; /// Keeps track of the various options that can be /// enabled, which controls the dialect of C or C++ that is accepted. class LangOptions : public LangOptionsBase { public: using Visibility = clang::Visibility; using RoundingMode = llvm::RoundingMode; enum GCMode { NonGC, GCOnly, HybridGC }; enum StackProtectorMode { SSPOff, SSPOn, SSPStrong, SSPReq }; // Automatic variables live on the stack, and when trivial they're usually // uninitialized because it's undefined behavior to use them without // initializing them. enum class TrivialAutoVarInitKind { Uninitialized, Zero, Pattern }; enum SignedOverflowBehaviorTy { // Default C standard behavior. SOB_Undefined, // -fwrapv SOB_Defined, // -ftrapv SOB_Trapping }; // FIXME: Unify with TUKind. enum CompilingModuleKind { /// Not compiling a module interface at all. CMK_None, /// Compiling a module from a module map. CMK_ModuleMap, /// Compiling a module from a list of header files. CMK_HeaderModule, /// Compiling a C++ modules TS module interface unit. CMK_ModuleInterface, }; enum PragmaMSPointersToMembersKind { PPTMK_BestCase, PPTMK_FullGeneralitySingleInheritance, PPTMK_FullGeneralityMultipleInheritance, PPTMK_FullGeneralityVirtualInheritance }; using MSVtorDispMode = clang::MSVtorDispMode; enum DefaultCallingConvention { DCC_None, DCC_CDecl, DCC_FastCall, DCC_StdCall, DCC_VectorCall, DCC_RegCall }; enum AddrSpaceMapMangling { ASMM_Target, ASMM_On, ASMM_Off }; // Corresponds to _MSC_VER enum MSVCMajorVersion { MSVC2010 = 1600, MSVC2012 = 1700, MSVC2013 = 1800, MSVC2015 = 1900, MSVC2017 = 1910, MSVC2017_5 = 1912, MSVC2017_7 = 1914, MSVC2019 = 1920, MSVC2019_5 = 1925, MSVC2019_8 = 1928, }; enum SYCLMajorVersion { SYCL_None, SYCL_2017, SYCL_2020, // The "default" SYCL version to be used when none is specified on the // frontend command line. SYCL_Default = SYCL_2020 }; /// Clang versions with different platform ABI conformance. enum class ClangABI { /// Attempt to be ABI-compatible with code generated by Clang 3.8.x /// (SVN r257626). This causes <1 x long long> to be passed in an /// integer register instead of an SSE register on x64_64. Ver3_8, /// Attempt to be ABI-compatible with code generated by Clang 4.0.x /// (SVN r291814). This causes move operations to be ignored when /// determining whether a class type can be passed or returned directly. Ver4, /// Attempt to be ABI-compatible with code generated by Clang 6.0.x /// (SVN r321711). This causes determination of whether a type is /// standard-layout to ignore collisions between empty base classes /// and between base classes and member subobjects, which affects /// whether we reuse base class tail padding in some ABIs. Ver6, /// Attempt to be ABI-compatible with code generated by Clang 7.0.x /// (SVN r338536). This causes alignof (C++) and _Alignof (C11) to be /// compatible with __alignof (i.e., return the preferred alignment) /// rather than returning the required alignment. Ver7, /// Attempt to be ABI-compatible with code generated by Clang 9.0.x /// (SVN r351319). This causes vectors of __int128 to be passed in memory /// instead of passing in multiple scalar registers on x86_64 on Linux and /// NetBSD. Ver9, /// Attempt to be ABI-compatible with code generated by Clang 11.0.x /// (git 2e10b7a39b93). This causes clang to pass unions with a 256-bit /// vector member on the stack instead of using registers, to not properly /// mangle substitutions for template names in some cases, and to mangle /// declaration template arguments without a cast to the parameter type /// even when that can lead to mangling collisions. Ver11, /// Attempt to be ABI-compatible with code generated by Clang 12.0.x /// (git 8e464dd76bef). This causes clang to mangle lambdas within /// global-scope inline variables incorrectly. Ver12, - /// Attempt to be ABI-compatible with code generated by Clang 13.0.x. - /// This causes clang to not pack non-POD members of packed structs. - Ver13, - /// Conform to the underlying platform's C and C++ ABIs as closely /// as we can. Latest }; enum class CoreFoundationABI { /// No interoperability ABI has been specified Unspecified, /// CoreFoundation does not have any language interoperability Standalone, /// Interoperability with the ObjectiveC runtime ObjectiveC, /// Interoperability with the latest known version of the Swift runtime Swift, /// Interoperability with the Swift 5.0 runtime Swift5_0, /// Interoperability with the Swift 4.2 runtime Swift4_2, /// Interoperability with the Swift 4.1 runtime Swift4_1, }; enum FPModeKind { // Disable the floating point pragma FPM_Off, // Enable the floating point pragma FPM_On, // Aggressively fuse FP ops (E.g. FMA) disregarding pragmas. FPM_Fast, // Aggressively fuse FP ops and honor pragmas. FPM_FastHonorPragmas }; /// Alias for RoundingMode::NearestTiesToEven. static constexpr unsigned FPR_ToNearest = static_cast(llvm::RoundingMode::NearestTiesToEven); /// Possible floating point exception behavior. enum FPExceptionModeKind { /// Assume that floating-point exceptions are masked. FPE_Ignore, /// Transformations do not cause new exceptions but may hide some. FPE_MayTrap, /// Strictly preserve the floating-point exception semantics. FPE_Strict }; /// Possible exception handling behavior. enum class ExceptionHandlingKind { None, SjLj, WinEH, DwarfCFI, Wasm }; enum class LaxVectorConversionKind { /// Permit no implicit vector bitcasts. None, /// Permit vector bitcasts between integer vectors with different numbers /// of elements but the same total bit-width. Integer, /// Permit vector bitcasts between all vectors with the same total /// bit-width. All, }; enum class AltivecSrcCompatKind { // All vector compares produce scalars except vector pixel and vector bool. // The types vector pixel and vector bool return vector results. Mixed, // All vector compares produce vector results as in GCC. GCC, // All vector compares produce scalars as in XL. XL, // Default clang behaviour. Default = Mixed, }; enum class SignReturnAddressScopeKind { /// No signing for any function. None, /// Sign the return address of functions that spill LR. NonLeaf, /// Sign the return address of all functions, All }; enum class SignReturnAddressKeyKind { /// Return address signing uses APIA key. AKey, /// Return address signing uses APIB key. BKey }; enum class ThreadModelKind { /// POSIX Threads. POSIX, /// Single Threaded Environment. Single }; enum class ExtendArgsKind { /// Integer arguments are sign or zero extended to 32/64 bits /// during default argument promotions. ExtendTo32, ExtendTo64 }; public: /// The used language standard. LangStandard::Kind LangStd; /// Set of enabled sanitizers. SanitizerSet Sanitize; /// Is at least one coverage instrumentation type enabled. bool SanitizeCoverage = false; /// Paths to files specifying which objects /// (files, functions, variables) should not be instrumented. std::vector NoSanitizeFiles; /// Paths to the XRay "always instrument" files specifying which /// objects (files, functions, variables) should be imbued with the XRay /// "always instrument" attribute. /// WARNING: This is a deprecated field and will go away in the future. std::vector XRayAlwaysInstrumentFiles; /// Paths to the XRay "never instrument" files specifying which /// objects (files, functions, variables) should be imbued with the XRay /// "never instrument" attribute. /// WARNING: This is a deprecated field and will go away in the future. std::vector XRayNeverInstrumentFiles; /// Paths to the XRay attribute list files, specifying which objects /// (files, functions, variables) should be imbued with the appropriate XRay /// attribute(s). std::vector XRayAttrListFiles; /// Paths to special case list files specifying which entities /// (files, functions) should or should not be instrumented. std::vector ProfileListFiles; clang::ObjCRuntime ObjCRuntime; CoreFoundationABI CFRuntime = CoreFoundationABI::Unspecified; std::string ObjCConstantStringClass; /// The name of the handler function to be called when -ftrapv is /// specified. /// /// If none is specified, abort (GCC-compatible behaviour). std::string OverflowHandler; /// The module currently being compiled as specified by -fmodule-name. std::string ModuleName; /// The name of the current module, of which the main source file /// is a part. If CompilingModule is set, we are compiling the interface /// of this module, otherwise we are compiling an implementation file of /// it. This starts as ModuleName in case -fmodule-name is provided and /// changes during compilation to reflect the current module. std::string CurrentModule; /// The names of any features to enable in module 'requires' decls /// in addition to the hard-coded list in Module.cpp and the target features. /// /// This list is sorted. std::vector ModuleFeatures; /// Options for parsing comments. CommentOptions CommentOpts; /// A list of all -fno-builtin-* function names (e.g., memset). std::vector NoBuiltinFuncs; /// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE(). std::map> MacroPrefixMap; /// Triples of the OpenMP targets that the host code codegen should /// take into account in order to generate accurate offloading descriptors. std::vector OMPTargetTriples; /// Name of the IR file that contains the result of the OpenMP target /// host code generation. std::string OMPHostIRFile; /// The user provided compilation unit ID, if non-empty. This is used to /// externalize static variables which is needed to support accessing static /// device variables in host code for single source offloading languages /// like CUDA/HIP. std::string CUID; /// C++ ABI to compile with, if specified by the frontend through -fc++-abi=. /// This overrides the default ABI used by the target. llvm::Optional CXXABI; /// Indicates whether the front-end is explicitly told that the /// input is a header file (i.e. -x c-header). bool IsHeaderFile = false; LangOptions(); // Define accessors/mutators for language options of enumeration type. #define LANGOPT(Name, Bits, Default, Description) #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } #include "clang/Basic/LangOptions.def" /// Are we compiling a module interface (.cppm or module map)? bool isCompilingModule() const { return getCompilingModule() != CMK_None; } /// Do we need to track the owning module for a local declaration? bool trackLocalOwningModule() const { return isCompilingModule() || ModulesLocalVisibility; } bool isSignedOverflowDefined() const { return getSignedOverflowBehavior() == SOB_Defined; } bool isSubscriptPointerArithmetic() const { return ObjCRuntime.isSubscriptPointerArithmetic() && !ObjCSubscriptingLegacyRuntime; } bool isCompatibleWithMSVC(MSVCMajorVersion MajorVersion) const { return MSCompatibilityVersion >= MajorVersion * 100000U; } /// Reset all of the options that are not considered when building a /// module. void resetNonModularOptions(); /// Is this a libc/libm function that is no longer recognized as a /// builtin because a -fno-builtin-* option has been specified? bool isNoBuiltinFunc(StringRef Name) const; /// True if any ObjC types may have non-trivial lifetime qualifiers. bool allowsNonTrivialObjCLifetimeQualifiers() const { return ObjCAutoRefCount || ObjCWeak; } bool assumeFunctionsAreConvergent() const { return ConvergentFunctions; } /// Return the OpenCL C or C++ version as a VersionTuple. VersionTuple getOpenCLVersionTuple() const; /// Return the OpenCL version that kernel language is compatible with unsigned getOpenCLCompatibleVersion() const; /// Return the OpenCL C or C++ for OpenCL language name and version /// as a string. std::string getOpenCLVersionString() const; /// Check if return address signing is enabled. bool hasSignReturnAddress() const { return getSignReturnAddressScope() != SignReturnAddressScopeKind::None; } /// Check if return address signing uses AKey. bool isSignReturnAddressWithAKey() const { return getSignReturnAddressKey() == SignReturnAddressKeyKind::AKey; } /// Check if leaf functions are also signed. bool isSignReturnAddressScopeAll() const { return getSignReturnAddressScope() == SignReturnAddressScopeKind::All; } bool hasSjLjExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::SjLj; } bool hasSEHExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::WinEH; } bool hasDWARFExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::DwarfCFI; } bool hasWasmExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::Wasm; } bool isSYCL() const { return SYCLIsDevice || SYCLIsHost; } /// Remap path prefix according to -fmacro-prefix-path option. void remapPathPrefix(SmallString<256> &Path) const; }; /// Floating point control options class FPOptionsOverride; class FPOptions { public: // We start by defining the layout. using storage_type = uint16_t; using RoundingMode = llvm::RoundingMode; static constexpr unsigned StorageBitSize = 8 * sizeof(storage_type); // Define a fake option named "First" so that we have a PREVIOUS even for the // real first option. static constexpr storage_type FirstShift = 0, FirstWidth = 0; #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ static constexpr storage_type NAME##Shift = \ PREVIOUS##Shift + PREVIOUS##Width; \ static constexpr storage_type NAME##Width = WIDTH; \ static constexpr storage_type NAME##Mask = ((1 << NAME##Width) - 1) \ << NAME##Shift; #include "clang/Basic/FPOptions.def" static constexpr storage_type TotalWidth = 0 #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) +WIDTH #include "clang/Basic/FPOptions.def" ; static_assert(TotalWidth <= StorageBitSize, "Too short type for FPOptions"); private: storage_type Value; public: FPOptions() : Value(0) { setFPContractMode(LangOptions::FPM_Off); setRoundingMode(static_cast(LangOptions::FPR_ToNearest)); setFPExceptionMode(LangOptions::FPE_Ignore); } explicit FPOptions(const LangOptions &LO) { Value = 0; // The language fp contract option FPM_FastHonorPragmas has the same effect // as FPM_Fast in frontend. For simplicity, use FPM_Fast uniformly in // frontend. auto LangOptContractMode = LO.getDefaultFPContractMode(); if (LangOptContractMode == LangOptions::FPM_FastHonorPragmas) LangOptContractMode = LangOptions::FPM_Fast; setFPContractMode(LangOptContractMode); setRoundingMode(LO.getFPRoundingMode()); setFPExceptionMode(LO.getFPExceptionMode()); setAllowFPReassociate(LO.AllowFPReassoc); setNoHonorNaNs(LO.NoHonorNaNs); setNoHonorInfs(LO.NoHonorInfs); setNoSignedZero(LO.NoSignedZero); setAllowReciprocal(LO.AllowRecip); setAllowApproxFunc(LO.ApproxFunc); if (getFPContractMode() == LangOptions::FPM_On && getRoundingMode() == llvm::RoundingMode::Dynamic && getFPExceptionMode() == LangOptions::FPE_Strict) // If the FP settings are set to the "strict" model, then // FENV access is set to true. (ffp-model=strict) setAllowFEnvAccess(true); else setAllowFEnvAccess(LangOptions::FPM_Off); } bool allowFPContractWithinStatement() const { return getFPContractMode() == LangOptions::FPM_On; } void setAllowFPContractWithinStatement() { setFPContractMode(LangOptions::FPM_On); } bool allowFPContractAcrossStatement() const { return getFPContractMode() == LangOptions::FPM_Fast; } void setAllowFPContractAcrossStatement() { setFPContractMode(LangOptions::FPM_Fast); } bool isFPConstrained() const { return getRoundingMode() != llvm::RoundingMode::NearestTiesToEven || getFPExceptionMode() != LangOptions::FPE_Ignore || getAllowFEnvAccess(); } bool operator==(FPOptions other) const { return Value == other.Value; } /// Return the default value of FPOptions that's used when trailing /// storage isn't required. static FPOptions defaultWithoutTrailingStorage(const LangOptions &LO); storage_type getAsOpaqueInt() const { return Value; } static FPOptions getFromOpaqueInt(storage_type Value) { FPOptions Opts; Opts.Value = Value; return Opts; } // We can define most of the accessors automatically: #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ TYPE get##NAME() const { \ return static_cast((Value & NAME##Mask) >> NAME##Shift); \ } \ void set##NAME(TYPE value) { \ Value = (Value & ~NAME##Mask) | (storage_type(value) << NAME##Shift); \ } #include "clang/Basic/FPOptions.def" LLVM_DUMP_METHOD void dump(); }; /// Represents difference between two FPOptions values. /// /// The effect of language constructs changing the set of floating point options /// is usually a change of some FP properties while leaving others intact. This /// class describes such changes by keeping information about what FP options /// are overridden. /// /// The integral set of FP options, described by the class FPOptions, may be /// represented as a default FP option set, defined by language standard and /// command line options, with the overrides introduced by pragmas. /// /// The is implemented as a value of the new FPOptions plus a mask showing which /// fields are actually set in it. class FPOptionsOverride { FPOptions Options = FPOptions::getFromOpaqueInt(0); FPOptions::storage_type OverrideMask = 0; public: using RoundingMode = llvm::RoundingMode; /// The type suitable for storing values of FPOptionsOverride. Must be twice /// as wide as bit size of FPOption. using storage_type = uint32_t; static_assert(sizeof(storage_type) >= 2 * sizeof(FPOptions::storage_type), "Too short type for FPOptionsOverride"); /// Bit mask selecting bits of OverrideMask in serialized representation of /// FPOptionsOverride. static constexpr storage_type OverrideMaskBits = (static_cast(1) << FPOptions::StorageBitSize) - 1; FPOptionsOverride() {} FPOptionsOverride(const LangOptions &LO) : Options(LO), OverrideMask(OverrideMaskBits) {} FPOptionsOverride(FPOptions FPO) : Options(FPO), OverrideMask(OverrideMaskBits) {} bool requiresTrailingStorage() const { return OverrideMask != 0; } void setAllowFPContractWithinStatement() { setFPContractModeOverride(LangOptions::FPM_On); } void setAllowFPContractAcrossStatement() { setFPContractModeOverride(LangOptions::FPM_Fast); } void setDisallowFPContract() { setFPContractModeOverride(LangOptions::FPM_Off); } void setFPPreciseEnabled(bool Value) { setAllowFPReassociateOverride(!Value); setNoHonorNaNsOverride(!Value); setNoHonorInfsOverride(!Value); setNoSignedZeroOverride(!Value); setAllowReciprocalOverride(!Value); setAllowApproxFuncOverride(!Value); if (Value) /* Precise mode implies fp_contract=on and disables ffast-math */ setAllowFPContractWithinStatement(); else /* Precise mode disabled sets fp_contract=fast and enables ffast-math */ setAllowFPContractAcrossStatement(); } storage_type getAsOpaqueInt() const { return (static_cast(Options.getAsOpaqueInt()) << FPOptions::StorageBitSize) | OverrideMask; } static FPOptionsOverride getFromOpaqueInt(storage_type I) { FPOptionsOverride Opts; Opts.OverrideMask = I & OverrideMaskBits; Opts.Options = FPOptions::getFromOpaqueInt(I >> FPOptions::StorageBitSize); return Opts; } FPOptions applyOverrides(FPOptions Base) { FPOptions Result = FPOptions::getFromOpaqueInt((Base.getAsOpaqueInt() & ~OverrideMask) | (Options.getAsOpaqueInt() & OverrideMask)); return Result; } FPOptions applyOverrides(const LangOptions &LO) { return applyOverrides(FPOptions(LO)); } bool operator==(FPOptionsOverride other) const { return Options == other.Options && OverrideMask == other.OverrideMask; } bool operator!=(FPOptionsOverride other) const { return !(*this == other); } #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ bool has##NAME##Override() const { \ return OverrideMask & FPOptions::NAME##Mask; \ } \ TYPE get##NAME##Override() const { \ assert(has##NAME##Override()); \ return Options.get##NAME(); \ } \ void clear##NAME##Override() { \ /* Clear the actual value so that we don't have spurious differences when \ * testing equality. */ \ Options.set##NAME(TYPE(0)); \ OverrideMask &= ~FPOptions::NAME##Mask; \ } \ void set##NAME##Override(TYPE value) { \ Options.set##NAME(value); \ OverrideMask |= FPOptions::NAME##Mask; \ } #include "clang/Basic/FPOptions.def" LLVM_DUMP_METHOD void dump(); }; /// Describes the kind of translation unit being processed. enum TranslationUnitKind { /// The translation unit is a complete translation unit. TU_Complete, /// The translation unit is a prefix to a translation unit, and is /// not complete. TU_Prefix, /// The translation unit is a module. TU_Module, /// The translation unit is a is a complete translation unit that we might /// incrementally extend later. TU_Incremental }; } // namespace clang #endif // LLVM_CLANG_BASIC_LANGOPTIONS_H diff --git a/contrib/llvm-project/clang/lib/AST/RecordLayoutBuilder.cpp b/contrib/llvm-project/clang/lib/AST/RecordLayoutBuilder.cpp index 709e05716a56..61a30ead165e 100644 --- a/contrib/llvm-project/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/contrib/llvm-project/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1,3716 +1,3711 @@ //=== RecordLayoutBuilder.cpp - Helper class for building record layouts ---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" #include "clang/AST/VTableBuilder.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" using namespace clang; namespace { /// BaseSubobjectInfo - Represents a single base subobject in a complete class. /// For a class hierarchy like /// /// class A { }; /// class B : A { }; /// class C : A, B { }; /// /// The BaseSubobjectInfo graph for C will have three BaseSubobjectInfo /// instances, one for B and two for A. /// /// If a base is virtual, it will only have one BaseSubobjectInfo allocated. struct BaseSubobjectInfo { /// Class - The class for this base info. const CXXRecordDecl *Class; /// IsVirtual - Whether the BaseInfo represents a virtual base or not. bool IsVirtual; /// Bases - Information about the base subobjects. SmallVector Bases; /// PrimaryVirtualBaseInfo - Holds the base info for the primary virtual base /// of this base info (if one exists). BaseSubobjectInfo *PrimaryVirtualBaseInfo; // FIXME: Document. const BaseSubobjectInfo *Derived; }; /// Externally provided layout. Typically used when the AST source, such /// as DWARF, lacks all the information that was available at compile time, such /// as alignment attributes on fields and pragmas in effect. struct ExternalLayout { ExternalLayout() : Size(0), Align(0) {} /// Overall record size in bits. uint64_t Size; /// Overall record alignment in bits. uint64_t Align; /// Record field offsets in bits. llvm::DenseMap FieldOffsets; /// Direct, non-virtual base offsets. llvm::DenseMap BaseOffsets; /// Virtual base offsets. llvm::DenseMap VirtualBaseOffsets; /// Get the offset of the given field. The external source must provide /// entries for all fields in the record. uint64_t getExternalFieldOffset(const FieldDecl *FD) { assert(FieldOffsets.count(FD) && "Field does not have an external offset"); return FieldOffsets[FD]; } bool getExternalNVBaseOffset(const CXXRecordDecl *RD, CharUnits &BaseOffset) { auto Known = BaseOffsets.find(RD); if (Known == BaseOffsets.end()) return false; BaseOffset = Known->second; return true; } bool getExternalVBaseOffset(const CXXRecordDecl *RD, CharUnits &BaseOffset) { auto Known = VirtualBaseOffsets.find(RD); if (Known == VirtualBaseOffsets.end()) return false; BaseOffset = Known->second; return true; } }; /// EmptySubobjectMap - Keeps track of which empty subobjects exist at different /// offsets while laying out a C++ class. class EmptySubobjectMap { const ASTContext &Context; uint64_t CharWidth; /// Class - The class whose empty entries we're keeping track of. const CXXRecordDecl *Class; /// EmptyClassOffsets - A map from offsets to empty record decls. typedef llvm::TinyPtrVector ClassVectorTy; typedef llvm::DenseMap EmptyClassOffsetsMapTy; EmptyClassOffsetsMapTy EmptyClassOffsets; /// MaxEmptyClassOffset - The highest offset known to contain an empty /// base subobject. CharUnits MaxEmptyClassOffset; /// ComputeEmptySubobjectSizes - Compute the size of the largest base or /// member subobject that is empty. void ComputeEmptySubobjectSizes(); void AddSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset); void UpdateEmptyBaseSubobjects(const BaseSubobjectInfo *Info, CharUnits Offset, bool PlacingEmptyBase); void UpdateEmptyFieldSubobjects(const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset, bool PlacingOverlappingField); void UpdateEmptyFieldSubobjects(const FieldDecl *FD, CharUnits Offset, bool PlacingOverlappingField); /// AnyEmptySubobjectsBeyondOffset - Returns whether there are any empty /// subobjects beyond the given offset. bool AnyEmptySubobjectsBeyondOffset(CharUnits Offset) const { return Offset <= MaxEmptyClassOffset; } CharUnits getFieldOffset(const ASTRecordLayout &Layout, unsigned FieldNo) const { uint64_t FieldOffset = Layout.getFieldOffset(FieldNo); assert(FieldOffset % CharWidth == 0 && "Field offset not at char boundary!"); return Context.toCharUnitsFromBits(FieldOffset); } protected: bool CanPlaceSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset) const; bool CanPlaceBaseSubobjectAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset); bool CanPlaceFieldSubobjectAtOffset(const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset) const; bool CanPlaceFieldSubobjectAtOffset(const FieldDecl *FD, CharUnits Offset) const; public: /// This holds the size of the largest empty subobject (either a base /// or a member). Will be zero if the record being built doesn't contain /// any empty classes. CharUnits SizeOfLargestEmptySubobject; EmptySubobjectMap(const ASTContext &Context, const CXXRecordDecl *Class) : Context(Context), CharWidth(Context.getCharWidth()), Class(Class) { ComputeEmptySubobjectSizes(); } /// CanPlaceBaseAtOffset - Return whether the given base class can be placed /// at the given offset. /// Returns false if placing the record will result in two components /// (direct or indirect) of the same type having the same offset. bool CanPlaceBaseAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset); /// CanPlaceFieldAtOffset - Return whether a field can be placed at the given /// offset. bool CanPlaceFieldAtOffset(const FieldDecl *FD, CharUnits Offset); }; void EmptySubobjectMap::ComputeEmptySubobjectSizes() { // Check the bases. for (const CXXBaseSpecifier &Base : Class->bases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits EmptySize; const ASTRecordLayout &Layout = Context.getASTRecordLayout(BaseDecl); if (BaseDecl->isEmpty()) { // If the class decl is empty, get its size. EmptySize = Layout.getSize(); } else { // Otherwise, we get the largest empty subobject for the decl. EmptySize = Layout.getSizeOfLargestEmptySubobject(); } if (EmptySize > SizeOfLargestEmptySubobject) SizeOfLargestEmptySubobject = EmptySize; } // Check the fields. for (const FieldDecl *FD : Class->fields()) { const RecordType *RT = Context.getBaseElementType(FD->getType())->getAs(); // We only care about record types. if (!RT) continue; CharUnits EmptySize; const CXXRecordDecl *MemberDecl = RT->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(MemberDecl); if (MemberDecl->isEmpty()) { // If the class decl is empty, get its size. EmptySize = Layout.getSize(); } else { // Otherwise, we get the largest empty subobject for the decl. EmptySize = Layout.getSizeOfLargestEmptySubobject(); } if (EmptySize > SizeOfLargestEmptySubobject) SizeOfLargestEmptySubobject = EmptySize; } } bool EmptySubobjectMap::CanPlaceSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset) const { // We only need to check empty bases. if (!RD->isEmpty()) return true; EmptyClassOffsetsMapTy::const_iterator I = EmptyClassOffsets.find(Offset); if (I == EmptyClassOffsets.end()) return true; const ClassVectorTy &Classes = I->second; if (!llvm::is_contained(Classes, RD)) return true; // There is already an empty class of the same type at this offset. return false; } void EmptySubobjectMap::AddSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset) { // We only care about empty bases. if (!RD->isEmpty()) return; // If we have empty structures inside a union, we can assign both // the same offset. Just avoid pushing them twice in the list. ClassVectorTy &Classes = EmptyClassOffsets[Offset]; if (llvm::is_contained(Classes, RD)) return; Classes.push_back(RD); // Update the empty class offset. if (Offset > MaxEmptyClassOffset) MaxEmptyClassOffset = Offset; } bool EmptySubobjectMap::CanPlaceBaseSubobjectAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset) { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(Offset)) return true; if (!CanPlaceSubobjectAtOffset(Info->Class, Offset)) return false; // Traverse all non-virtual bases. const ASTRecordLayout &Layout = Context.getASTRecordLayout(Info->Class); for (const BaseSubobjectInfo *Base : Info->Bases) { if (Base->IsVirtual) continue; CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base->Class); if (!CanPlaceBaseSubobjectAtOffset(Base, BaseOffset)) return false; } if (Info->PrimaryVirtualBaseInfo) { BaseSubobjectInfo *PrimaryVirtualBaseInfo = Info->PrimaryVirtualBaseInfo; if (Info == PrimaryVirtualBaseInfo->Derived) { if (!CanPlaceBaseSubobjectAtOffset(PrimaryVirtualBaseInfo, Offset)) return false; } } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) return false; } return true; } void EmptySubobjectMap::UpdateEmptyBaseSubobjects(const BaseSubobjectInfo *Info, CharUnits Offset, bool PlacingEmptyBase) { if (!PlacingEmptyBase && Offset >= SizeOfLargestEmptySubobject) { // We know that the only empty subobjects that can conflict with empty // subobject of non-empty bases, are empty bases that can be placed at // offset zero. Because of this, we only need to keep track of empty base // subobjects with offsets less than the size of the largest empty // subobject for our class. return; } AddSubobjectAtOffset(Info->Class, Offset); // Traverse all non-virtual bases. const ASTRecordLayout &Layout = Context.getASTRecordLayout(Info->Class); for (const BaseSubobjectInfo *Base : Info->Bases) { if (Base->IsVirtual) continue; CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base->Class); UpdateEmptyBaseSubobjects(Base, BaseOffset, PlacingEmptyBase); } if (Info->PrimaryVirtualBaseInfo) { BaseSubobjectInfo *PrimaryVirtualBaseInfo = Info->PrimaryVirtualBaseInfo; if (Info == PrimaryVirtualBaseInfo->Derived) UpdateEmptyBaseSubobjects(PrimaryVirtualBaseInfo, Offset, PlacingEmptyBase); } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingEmptyBase); } } bool EmptySubobjectMap::CanPlaceBaseAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset) { // If we know this class doesn't have any empty subobjects we don't need to // bother checking. if (SizeOfLargestEmptySubobject.isZero()) return true; if (!CanPlaceBaseSubobjectAtOffset(Info, Offset)) return false; // We are able to place the base at this offset. Make sure to update the // empty base subobject map. UpdateEmptyBaseSubobjects(Info, Offset, Info->Class->isEmpty()); return true; } bool EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset) const { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(Offset)) return true; if (!CanPlaceSubobjectAtOffset(RD, Offset)) return false; const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); // Traverse all non-virtual bases. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(BaseDecl); if (!CanPlaceFieldSubobjectAtOffset(BaseDecl, Class, BaseOffset)) return false; } if (RD == Class) { // This is the most derived class, traverse virtual bases as well. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *VBaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBaseDecl); if (!CanPlaceFieldSubobjectAtOffset(VBaseDecl, Class, VBaseOffset)) return false; } } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) return false; } return true; } bool EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const FieldDecl *FD, CharUnits Offset) const { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(Offset)) return true; QualType T = FD->getType(); if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return CanPlaceFieldSubobjectAtOffset(RD, RD, Offset); // If we have an array type we need to look at every element. if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) { QualType ElemTy = Context.getBaseElementType(AT); const RecordType *RT = ElemTy->getAs(); if (!RT) return true; const CXXRecordDecl *RD = RT->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); uint64_t NumElements = Context.getConstantArrayElementCount(AT); CharUnits ElementOffset = Offset; for (uint64_t I = 0; I != NumElements; ++I) { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(ElementOffset)) return true; if (!CanPlaceFieldSubobjectAtOffset(RD, RD, ElementOffset)) return false; ElementOffset += Layout.getSize(); } } return true; } bool EmptySubobjectMap::CanPlaceFieldAtOffset(const FieldDecl *FD, CharUnits Offset) { if (!CanPlaceFieldSubobjectAtOffset(FD, Offset)) return false; // We are able to place the member variable at this offset. // Make sure to update the empty field subobject map. UpdateEmptyFieldSubobjects(FD, Offset, FD->hasAttr()); return true; } void EmptySubobjectMap::UpdateEmptyFieldSubobjects( const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset, bool PlacingOverlappingField) { // We know that the only empty subobjects that can conflict with empty // field subobjects are subobjects of empty bases and potentially-overlapping // fields that can be placed at offset zero. Because of this, we only need to // keep track of empty field subobjects with offsets less than the size of // the largest empty subobject for our class. // // (Proof: we will only consider placing a subobject at offset zero or at // >= the current dsize. The only cases where the earlier subobject can be // placed beyond the end of dsize is if it's an empty base or a // potentially-overlapping field.) if (!PlacingOverlappingField && Offset >= SizeOfLargestEmptySubobject) return; AddSubobjectAtOffset(RD, Offset); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); // Traverse all non-virtual bases. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(BaseDecl); UpdateEmptyFieldSubobjects(BaseDecl, Class, BaseOffset, PlacingOverlappingField); } if (RD == Class) { // This is the most derived class, traverse virtual bases as well. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *VBaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBaseDecl); UpdateEmptyFieldSubobjects(VBaseDecl, Class, VBaseOffset, PlacingOverlappingField); } } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingOverlappingField); } } void EmptySubobjectMap::UpdateEmptyFieldSubobjects( const FieldDecl *FD, CharUnits Offset, bool PlacingOverlappingField) { QualType T = FD->getType(); if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) { UpdateEmptyFieldSubobjects(RD, RD, Offset, PlacingOverlappingField); return; } // If we have an array type we need to update every element. if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) { QualType ElemTy = Context.getBaseElementType(AT); const RecordType *RT = ElemTy->getAs(); if (!RT) return; const CXXRecordDecl *RD = RT->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); uint64_t NumElements = Context.getConstantArrayElementCount(AT); CharUnits ElementOffset = Offset; for (uint64_t I = 0; I != NumElements; ++I) { // We know that the only empty subobjects that can conflict with empty // field subobjects are subobjects of empty bases that can be placed at // offset zero. Because of this, we only need to keep track of empty field // subobjects with offsets less than the size of the largest empty // subobject for our class. if (!PlacingOverlappingField && ElementOffset >= SizeOfLargestEmptySubobject) return; UpdateEmptyFieldSubobjects(RD, RD, ElementOffset, PlacingOverlappingField); ElementOffset += Layout.getSize(); } } } typedef llvm::SmallPtrSet ClassSetTy; class ItaniumRecordLayoutBuilder { protected: // FIXME: Remove this and make the appropriate fields public. friend class clang::ASTContext; const ASTContext &Context; EmptySubobjectMap *EmptySubobjects; /// Size - The current size of the record layout. uint64_t Size; /// Alignment - The current alignment of the record layout. CharUnits Alignment; /// PreferredAlignment - The preferred alignment of the record layout. CharUnits PreferredAlignment; /// The alignment if attribute packed is not used. CharUnits UnpackedAlignment; /// \brief The maximum of the alignments of top-level members. CharUnits UnadjustedAlignment; SmallVector FieldOffsets; /// Whether the external AST source has provided a layout for this /// record. unsigned UseExternalLayout : 1; /// Whether we need to infer alignment, even when we have an /// externally-provided layout. unsigned InferAlignment : 1; /// Packed - Whether the record is packed or not. unsigned Packed : 1; unsigned IsUnion : 1; unsigned IsMac68kAlign : 1; unsigned IsNaturalAlign : 1; unsigned IsMsStruct : 1; /// UnfilledBitsInLastUnit - If the last field laid out was a bitfield, /// this contains the number of bits in the last unit that can be used for /// an adjacent bitfield if necessary. The unit in question is usually /// a byte, but larger units are used if IsMsStruct. unsigned char UnfilledBitsInLastUnit; /// LastBitfieldStorageUnitSize - If IsMsStruct, represents the size of the /// storage unit of the previous field if it was a bitfield. unsigned char LastBitfieldStorageUnitSize; /// MaxFieldAlignment - The maximum allowed field alignment. This is set by /// #pragma pack. CharUnits MaxFieldAlignment; /// DataSize - The data size of the record being laid out. uint64_t DataSize; CharUnits NonVirtualSize; CharUnits NonVirtualAlignment; CharUnits PreferredNVAlignment; /// If we've laid out a field but not included its tail padding in Size yet, /// this is the size up to the end of that field. CharUnits PaddedFieldSize; /// PrimaryBase - the primary base class (if one exists) of the class /// we're laying out. const CXXRecordDecl *PrimaryBase; /// PrimaryBaseIsVirtual - Whether the primary base of the class we're laying /// out is virtual. bool PrimaryBaseIsVirtual; /// HasOwnVFPtr - Whether the class provides its own vtable/vftbl /// pointer, as opposed to inheriting one from a primary base class. bool HasOwnVFPtr; /// the flag of field offset changing due to packed attribute. bool HasPackedField; /// HandledFirstNonOverlappingEmptyField - An auxiliary field used for AIX. /// When there are OverlappingEmptyFields existing in the aggregate, the /// flag shows if the following first non-empty or empty-but-non-overlapping /// field has been handled, if any. bool HandledFirstNonOverlappingEmptyField; typedef llvm::DenseMap BaseOffsetsMapTy; /// Bases - base classes and their offsets in the record. BaseOffsetsMapTy Bases; // VBases - virtual base classes and their offsets in the record. ASTRecordLayout::VBaseOffsetsMapTy VBases; /// IndirectPrimaryBases - Virtual base classes, direct or indirect, that are /// primary base classes for some other direct or indirect base class. CXXIndirectPrimaryBaseSet IndirectPrimaryBases; /// FirstNearlyEmptyVBase - The first nearly empty virtual base class in /// inheritance graph order. Used for determining the primary base class. const CXXRecordDecl *FirstNearlyEmptyVBase; /// VisitedVirtualBases - A set of all the visited virtual bases, used to /// avoid visiting virtual bases more than once. llvm::SmallPtrSet VisitedVirtualBases; /// Valid if UseExternalLayout is true. ExternalLayout External; ItaniumRecordLayoutBuilder(const ASTContext &Context, EmptySubobjectMap *EmptySubobjects) : Context(Context), EmptySubobjects(EmptySubobjects), Size(0), Alignment(CharUnits::One()), PreferredAlignment(CharUnits::One()), UnpackedAlignment(CharUnits::One()), UnadjustedAlignment(CharUnits::One()), UseExternalLayout(false), InferAlignment(false), Packed(false), IsUnion(false), IsMac68kAlign(false), IsNaturalAlign(!Context.getTargetInfo().getTriple().isOSAIX()), IsMsStruct(false), UnfilledBitsInLastUnit(0), LastBitfieldStorageUnitSize(0), MaxFieldAlignment(CharUnits::Zero()), DataSize(0), NonVirtualSize(CharUnits::Zero()), NonVirtualAlignment(CharUnits::One()), PreferredNVAlignment(CharUnits::One()), PaddedFieldSize(CharUnits::Zero()), PrimaryBase(nullptr), PrimaryBaseIsVirtual(false), HasOwnVFPtr(false), HasPackedField(false), HandledFirstNonOverlappingEmptyField(false), FirstNearlyEmptyVBase(nullptr) {} void Layout(const RecordDecl *D); void Layout(const CXXRecordDecl *D); void Layout(const ObjCInterfaceDecl *D); void LayoutFields(const RecordDecl *D); void LayoutField(const FieldDecl *D, bool InsertExtraPadding); void LayoutWideBitField(uint64_t FieldSize, uint64_t StorageUnitSize, bool FieldPacked, const FieldDecl *D); void LayoutBitField(const FieldDecl *D); TargetCXXABI getCXXABI() const { return Context.getTargetInfo().getCXXABI(); } /// BaseSubobjectInfoAllocator - Allocator for BaseSubobjectInfo objects. llvm::SpecificBumpPtrAllocator BaseSubobjectInfoAllocator; typedef llvm::DenseMap BaseSubobjectInfoMapTy; /// VirtualBaseInfo - Map from all the (direct or indirect) virtual bases /// of the class we're laying out to their base subobject info. BaseSubobjectInfoMapTy VirtualBaseInfo; /// NonVirtualBaseInfo - Map from all the direct non-virtual bases of the /// class we're laying out to their base subobject info. BaseSubobjectInfoMapTy NonVirtualBaseInfo; /// ComputeBaseSubobjectInfo - Compute the base subobject information for the /// bases of the given class. void ComputeBaseSubobjectInfo(const CXXRecordDecl *RD); /// ComputeBaseSubobjectInfo - Compute the base subobject information for a /// single class and all of its base classes. BaseSubobjectInfo *ComputeBaseSubobjectInfo(const CXXRecordDecl *RD, bool IsVirtual, BaseSubobjectInfo *Derived); /// DeterminePrimaryBase - Determine the primary base of the given class. void DeterminePrimaryBase(const CXXRecordDecl *RD); void SelectPrimaryVBase(const CXXRecordDecl *RD); void EnsureVTablePointerAlignment(CharUnits UnpackedBaseAlign); /// LayoutNonVirtualBases - Determines the primary base class (if any) and /// lays it out. Will then proceed to lay out all non-virtual base clasess. void LayoutNonVirtualBases(const CXXRecordDecl *RD); /// LayoutNonVirtualBase - Lays out a single non-virtual base. void LayoutNonVirtualBase(const BaseSubobjectInfo *Base); void AddPrimaryVirtualBaseOffsets(const BaseSubobjectInfo *Info, CharUnits Offset); /// LayoutVirtualBases - Lays out all the virtual bases. void LayoutVirtualBases(const CXXRecordDecl *RD, const CXXRecordDecl *MostDerivedClass); /// LayoutVirtualBase - Lays out a single virtual base. void LayoutVirtualBase(const BaseSubobjectInfo *Base); /// LayoutBase - Will lay out a base and return the offset where it was /// placed, in chars. CharUnits LayoutBase(const BaseSubobjectInfo *Base); /// InitializeLayout - Initialize record layout for the given record decl. void InitializeLayout(const Decl *D); /// FinishLayout - Finalize record layout. Adjust record size based on the /// alignment. void FinishLayout(const NamedDecl *D); void UpdateAlignment(CharUnits NewAlignment, CharUnits UnpackedNewAlignment, CharUnits PreferredAlignment); void UpdateAlignment(CharUnits NewAlignment, CharUnits UnpackedNewAlignment) { UpdateAlignment(NewAlignment, UnpackedNewAlignment, NewAlignment); } void UpdateAlignment(CharUnits NewAlignment) { UpdateAlignment(NewAlignment, NewAlignment, NewAlignment); } /// Retrieve the externally-supplied field offset for the given /// field. /// /// \param Field The field whose offset is being queried. /// \param ComputedOffset The offset that we've computed for this field. uint64_t updateExternalFieldOffset(const FieldDecl *Field, uint64_t ComputedOffset); void CheckFieldPadding(uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset, unsigned UnpackedAlign, bool isPacked, const FieldDecl *D); DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID); CharUnits getSize() const { assert(Size % Context.getCharWidth() == 0); return Context.toCharUnitsFromBits(Size); } uint64_t getSizeInBits() const { return Size; } void setSize(CharUnits NewSize) { Size = Context.toBits(NewSize); } void setSize(uint64_t NewSize) { Size = NewSize; } CharUnits getAligment() const { return Alignment; } CharUnits getDataSize() const { assert(DataSize % Context.getCharWidth() == 0); return Context.toCharUnitsFromBits(DataSize); } uint64_t getDataSizeInBits() const { return DataSize; } void setDataSize(CharUnits NewSize) { DataSize = Context.toBits(NewSize); } void setDataSize(uint64_t NewSize) { DataSize = NewSize; } ItaniumRecordLayoutBuilder(const ItaniumRecordLayoutBuilder &) = delete; void operator=(const ItaniumRecordLayoutBuilder &) = delete; }; } // end anonymous namespace void ItaniumRecordLayoutBuilder::SelectPrimaryVBase(const CXXRecordDecl *RD) { for (const auto &I : RD->bases()) { assert(!I.getType()->isDependentType() && "Cannot layout class with dependent bases."); const CXXRecordDecl *Base = I.getType()->getAsCXXRecordDecl(); // Check if this is a nearly empty virtual base. if (I.isVirtual() && Context.isNearlyEmpty(Base)) { // If it's not an indirect primary base, then we've found our primary // base. if (!IndirectPrimaryBases.count(Base)) { PrimaryBase = Base; PrimaryBaseIsVirtual = true; return; } // Is this the first nearly empty virtual base? if (!FirstNearlyEmptyVBase) FirstNearlyEmptyVBase = Base; } SelectPrimaryVBase(Base); if (PrimaryBase) return; } } /// DeterminePrimaryBase - Determine the primary base of the given class. void ItaniumRecordLayoutBuilder::DeterminePrimaryBase(const CXXRecordDecl *RD) { // If the class isn't dynamic, it won't have a primary base. if (!RD->isDynamicClass()) return; // Compute all the primary virtual bases for all of our direct and // indirect bases, and record all their primary virtual base classes. RD->getIndirectPrimaryBases(IndirectPrimaryBases); // If the record has a dynamic base class, attempt to choose a primary base // class. It is the first (in direct base class order) non-virtual dynamic // base class, if one exists. for (const auto &I : RD->bases()) { // Ignore virtual bases. if (I.isVirtual()) continue; const CXXRecordDecl *Base = I.getType()->getAsCXXRecordDecl(); if (Base->isDynamicClass()) { // We found it. PrimaryBase = Base; PrimaryBaseIsVirtual = false; return; } } // Under the Itanium ABI, if there is no non-virtual primary base class, // try to compute the primary virtual base. The primary virtual base is // the first nearly empty virtual base that is not an indirect primary // virtual base class, if one exists. if (RD->getNumVBases() != 0) { SelectPrimaryVBase(RD); if (PrimaryBase) return; } // Otherwise, it is the first indirect primary base class, if one exists. if (FirstNearlyEmptyVBase) { PrimaryBase = FirstNearlyEmptyVBase; PrimaryBaseIsVirtual = true; return; } assert(!PrimaryBase && "Should not get here with a primary base!"); } BaseSubobjectInfo *ItaniumRecordLayoutBuilder::ComputeBaseSubobjectInfo( const CXXRecordDecl *RD, bool IsVirtual, BaseSubobjectInfo *Derived) { BaseSubobjectInfo *Info; if (IsVirtual) { // Check if we already have info about this virtual base. BaseSubobjectInfo *&InfoSlot = VirtualBaseInfo[RD]; if (InfoSlot) { assert(InfoSlot->Class == RD && "Wrong class for virtual base info!"); return InfoSlot; } // We don't, create it. InfoSlot = new (BaseSubobjectInfoAllocator.Allocate()) BaseSubobjectInfo; Info = InfoSlot; } else { Info = new (BaseSubobjectInfoAllocator.Allocate()) BaseSubobjectInfo; } Info->Class = RD; Info->IsVirtual = IsVirtual; Info->Derived = nullptr; Info->PrimaryVirtualBaseInfo = nullptr; const CXXRecordDecl *PrimaryVirtualBase = nullptr; BaseSubobjectInfo *PrimaryVirtualBaseInfo = nullptr; // Check if this base has a primary virtual base. if (RD->getNumVBases()) { const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); if (Layout.isPrimaryBaseVirtual()) { // This base does have a primary virtual base. PrimaryVirtualBase = Layout.getPrimaryBase(); assert(PrimaryVirtualBase && "Didn't have a primary virtual base!"); // Now check if we have base subobject info about this primary base. PrimaryVirtualBaseInfo = VirtualBaseInfo.lookup(PrimaryVirtualBase); if (PrimaryVirtualBaseInfo) { if (PrimaryVirtualBaseInfo->Derived) { // We did have info about this primary base, and it turns out that it // has already been claimed as a primary virtual base for another // base. PrimaryVirtualBase = nullptr; } else { // We can claim this base as our primary base. Info->PrimaryVirtualBaseInfo = PrimaryVirtualBaseInfo; PrimaryVirtualBaseInfo->Derived = Info; } } } } // Now go through all direct bases. for (const auto &I : RD->bases()) { bool IsVirtual = I.isVirtual(); const CXXRecordDecl *BaseDecl = I.getType()->getAsCXXRecordDecl(); Info->Bases.push_back(ComputeBaseSubobjectInfo(BaseDecl, IsVirtual, Info)); } if (PrimaryVirtualBase && !PrimaryVirtualBaseInfo) { // Traversing the bases must have created the base info for our primary // virtual base. PrimaryVirtualBaseInfo = VirtualBaseInfo.lookup(PrimaryVirtualBase); assert(PrimaryVirtualBaseInfo && "Did not create a primary virtual base!"); // Claim the primary virtual base as our primary virtual base. Info->PrimaryVirtualBaseInfo = PrimaryVirtualBaseInfo; PrimaryVirtualBaseInfo->Derived = Info; } return Info; } void ItaniumRecordLayoutBuilder::ComputeBaseSubobjectInfo( const CXXRecordDecl *RD) { for (const auto &I : RD->bases()) { bool IsVirtual = I.isVirtual(); const CXXRecordDecl *BaseDecl = I.getType()->getAsCXXRecordDecl(); // Compute the base subobject info for this base. BaseSubobjectInfo *Info = ComputeBaseSubobjectInfo(BaseDecl, IsVirtual, nullptr); if (IsVirtual) { // ComputeBaseInfo has already added this base for us. assert(VirtualBaseInfo.count(BaseDecl) && "Did not add virtual base!"); } else { // Add the base info to the map of non-virtual bases. assert(!NonVirtualBaseInfo.count(BaseDecl) && "Non-virtual base already exists!"); NonVirtualBaseInfo.insert(std::make_pair(BaseDecl, Info)); } } } void ItaniumRecordLayoutBuilder::EnsureVTablePointerAlignment( CharUnits UnpackedBaseAlign) { CharUnits BaseAlign = Packed ? CharUnits::One() : UnpackedBaseAlign; // The maximum field alignment overrides base align. if (!MaxFieldAlignment.isZero()) { BaseAlign = std::min(BaseAlign, MaxFieldAlignment); UnpackedBaseAlign = std::min(UnpackedBaseAlign, MaxFieldAlignment); } // Round up the current record size to pointer alignment. setSize(getSize().alignTo(BaseAlign)); // Update the alignment. UpdateAlignment(BaseAlign, UnpackedBaseAlign, BaseAlign); } void ItaniumRecordLayoutBuilder::LayoutNonVirtualBases( const CXXRecordDecl *RD) { // Then, determine the primary base class. DeterminePrimaryBase(RD); // Compute base subobject info. ComputeBaseSubobjectInfo(RD); // If we have a primary base class, lay it out. if (PrimaryBase) { if (PrimaryBaseIsVirtual) { // If the primary virtual base was a primary virtual base of some other // base class we'll have to steal it. BaseSubobjectInfo *PrimaryBaseInfo = VirtualBaseInfo.lookup(PrimaryBase); PrimaryBaseInfo->Derived = nullptr; // We have a virtual primary base, insert it as an indirect primary base. IndirectPrimaryBases.insert(PrimaryBase); assert(!VisitedVirtualBases.count(PrimaryBase) && "vbase already visited!"); VisitedVirtualBases.insert(PrimaryBase); LayoutVirtualBase(PrimaryBaseInfo); } else { BaseSubobjectInfo *PrimaryBaseInfo = NonVirtualBaseInfo.lookup(PrimaryBase); assert(PrimaryBaseInfo && "Did not find base info for non-virtual primary base!"); LayoutNonVirtualBase(PrimaryBaseInfo); } // If this class needs a vtable/vf-table and didn't get one from a // primary base, add it in now. } else if (RD->isDynamicClass()) { assert(DataSize == 0 && "Vtable pointer must be at offset zero!"); CharUnits PtrWidth = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0)); CharUnits PtrAlign = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerAlign(0)); EnsureVTablePointerAlignment(PtrAlign); HasOwnVFPtr = true; assert(!IsUnion && "Unions cannot be dynamic classes."); HandledFirstNonOverlappingEmptyField = true; setSize(getSize() + PtrWidth); setDataSize(getSize()); } // Now lay out the non-virtual bases. for (const auto &I : RD->bases()) { // Ignore virtual bases. if (I.isVirtual()) continue; const CXXRecordDecl *BaseDecl = I.getType()->getAsCXXRecordDecl(); // Skip the primary base, because we've already laid it out. The // !PrimaryBaseIsVirtual check is required because we might have a // non-virtual base of the same type as a primary virtual base. if (BaseDecl == PrimaryBase && !PrimaryBaseIsVirtual) continue; // Lay out the base. BaseSubobjectInfo *BaseInfo = NonVirtualBaseInfo.lookup(BaseDecl); assert(BaseInfo && "Did not find base info for non-virtual base!"); LayoutNonVirtualBase(BaseInfo); } } void ItaniumRecordLayoutBuilder::LayoutNonVirtualBase( const BaseSubobjectInfo *Base) { // Layout the base. CharUnits Offset = LayoutBase(Base); // Add its base class offset. assert(!Bases.count(Base->Class) && "base offset already exists!"); Bases.insert(std::make_pair(Base->Class, Offset)); AddPrimaryVirtualBaseOffsets(Base, Offset); } void ItaniumRecordLayoutBuilder::AddPrimaryVirtualBaseOffsets( const BaseSubobjectInfo *Info, CharUnits Offset) { // This base isn't interesting, it has no virtual bases. if (!Info->Class->getNumVBases()) return; // First, check if we have a virtual primary base to add offsets for. if (Info->PrimaryVirtualBaseInfo) { assert(Info->PrimaryVirtualBaseInfo->IsVirtual && "Primary virtual base is not virtual!"); if (Info->PrimaryVirtualBaseInfo->Derived == Info) { // Add the offset. assert(!VBases.count(Info->PrimaryVirtualBaseInfo->Class) && "primary vbase offset already exists!"); VBases.insert(std::make_pair(Info->PrimaryVirtualBaseInfo->Class, ASTRecordLayout::VBaseInfo(Offset, false))); // Traverse the primary virtual base. AddPrimaryVirtualBaseOffsets(Info->PrimaryVirtualBaseInfo, Offset); } } // Now go through all direct non-virtual bases. const ASTRecordLayout &Layout = Context.getASTRecordLayout(Info->Class); for (const BaseSubobjectInfo *Base : Info->Bases) { if (Base->IsVirtual) continue; CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base->Class); AddPrimaryVirtualBaseOffsets(Base, BaseOffset); } } void ItaniumRecordLayoutBuilder::LayoutVirtualBases( const CXXRecordDecl *RD, const CXXRecordDecl *MostDerivedClass) { const CXXRecordDecl *PrimaryBase; bool PrimaryBaseIsVirtual; if (MostDerivedClass == RD) { PrimaryBase = this->PrimaryBase; PrimaryBaseIsVirtual = this->PrimaryBaseIsVirtual; } else { const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); PrimaryBase = Layout.getPrimaryBase(); PrimaryBaseIsVirtual = Layout.isPrimaryBaseVirtual(); } for (const CXXBaseSpecifier &Base : RD->bases()) { assert(!Base.getType()->isDependentType() && "Cannot layout class with dependent bases."); const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); if (Base.isVirtual()) { if (PrimaryBase != BaseDecl || !PrimaryBaseIsVirtual) { bool IndirectPrimaryBase = IndirectPrimaryBases.count(BaseDecl); // Only lay out the virtual base if it's not an indirect primary base. if (!IndirectPrimaryBase) { // Only visit virtual bases once. if (!VisitedVirtualBases.insert(BaseDecl).second) continue; const BaseSubobjectInfo *BaseInfo = VirtualBaseInfo.lookup(BaseDecl); assert(BaseInfo && "Did not find virtual base info!"); LayoutVirtualBase(BaseInfo); } } } if (!BaseDecl->getNumVBases()) { // This base isn't interesting since it doesn't have any virtual bases. continue; } LayoutVirtualBases(BaseDecl, MostDerivedClass); } } void ItaniumRecordLayoutBuilder::LayoutVirtualBase( const BaseSubobjectInfo *Base) { assert(!Base->Derived && "Trying to lay out a primary virtual base!"); // Layout the base. CharUnits Offset = LayoutBase(Base); // Add its base class offset. assert(!VBases.count(Base->Class) && "vbase offset already exists!"); VBases.insert(std::make_pair(Base->Class, ASTRecordLayout::VBaseInfo(Offset, false))); AddPrimaryVirtualBaseOffsets(Base, Offset); } CharUnits ItaniumRecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) { assert(!IsUnion && "Unions cannot have base classes."); const ASTRecordLayout &Layout = Context.getASTRecordLayout(Base->Class); CharUnits Offset; // Query the external layout to see if it provides an offset. bool HasExternalLayout = false; if (UseExternalLayout) { if (Base->IsVirtual) HasExternalLayout = External.getExternalVBaseOffset(Base->Class, Offset); else HasExternalLayout = External.getExternalNVBaseOffset(Base->Class, Offset); } auto getBaseOrPreferredBaseAlignFromUnpacked = [&](CharUnits UnpackedAlign) { // Clang <= 6 incorrectly applied the 'packed' attribute to base classes. // Per GCC's documentation, it only applies to non-static data members. return (Packed && ((Context.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver6) || Context.getTargetInfo().getTriple().isPS4() || Context.getTargetInfo().getTriple().isOSAIX())) ? CharUnits::One() : UnpackedAlign; }; CharUnits UnpackedBaseAlign = Layout.getNonVirtualAlignment(); CharUnits UnpackedPreferredBaseAlign = Layout.getPreferredNVAlignment(); CharUnits BaseAlign = getBaseOrPreferredBaseAlignFromUnpacked(UnpackedBaseAlign); CharUnits PreferredBaseAlign = getBaseOrPreferredBaseAlignFromUnpacked(UnpackedPreferredBaseAlign); const bool DefaultsToAIXPowerAlignment = Context.getTargetInfo().defaultsToAIXPowerAlignment(); if (DefaultsToAIXPowerAlignment) { // AIX `power` alignment does not apply the preferred alignment for // non-union classes if the source of the alignment (the current base in // this context) follows introduction of the first subobject with // exclusively allocated space or zero-extent array. if (!Base->Class->isEmpty() && !HandledFirstNonOverlappingEmptyField) { // By handling a base class that is not empty, we're handling the // "first (inherited) member". HandledFirstNonOverlappingEmptyField = true; } else if (!IsNaturalAlign) { UnpackedPreferredBaseAlign = UnpackedBaseAlign; PreferredBaseAlign = BaseAlign; } } CharUnits UnpackedAlignTo = !DefaultsToAIXPowerAlignment ? UnpackedBaseAlign : UnpackedPreferredBaseAlign; // If we have an empty base class, try to place it at offset 0. if (Base->Class->isEmpty() && (!HasExternalLayout || Offset == CharUnits::Zero()) && EmptySubobjects->CanPlaceBaseAtOffset(Base, CharUnits::Zero())) { setSize(std::max(getSize(), Layout.getSize())); UpdateAlignment(BaseAlign, UnpackedAlignTo, PreferredBaseAlign); return CharUnits::Zero(); } // The maximum field alignment overrides the base align/(AIX-only) preferred // base align. if (!MaxFieldAlignment.isZero()) { BaseAlign = std::min(BaseAlign, MaxFieldAlignment); PreferredBaseAlign = std::min(PreferredBaseAlign, MaxFieldAlignment); UnpackedAlignTo = std::min(UnpackedAlignTo, MaxFieldAlignment); } CharUnits AlignTo = !DefaultsToAIXPowerAlignment ? BaseAlign : PreferredBaseAlign; if (!HasExternalLayout) { // Round up the current record size to the base's alignment boundary. Offset = getDataSize().alignTo(AlignTo); // Try to place the base. while (!EmptySubobjects->CanPlaceBaseAtOffset(Base, Offset)) Offset += AlignTo; } else { bool Allowed = EmptySubobjects->CanPlaceBaseAtOffset(Base, Offset); (void)Allowed; assert(Allowed && "Base subobject externally placed at overlapping offset"); if (InferAlignment && Offset < getDataSize().alignTo(AlignTo)) { // The externally-supplied base offset is before the base offset we // computed. Assume that the structure is packed. Alignment = CharUnits::One(); InferAlignment = false; } } if (!Base->Class->isEmpty()) { // Update the data size. setDataSize(Offset + Layout.getNonVirtualSize()); setSize(std::max(getSize(), getDataSize())); } else setSize(std::max(getSize(), Offset + Layout.getSize())); // Remember max struct/class alignment. UpdateAlignment(BaseAlign, UnpackedAlignTo, PreferredBaseAlign); return Offset; } void ItaniumRecordLayoutBuilder::InitializeLayout(const Decl *D) { if (const RecordDecl *RD = dyn_cast(D)) { IsUnion = RD->isUnion(); IsMsStruct = RD->isMsStruct(Context); } Packed = D->hasAttr(); // Honor the default struct packing maximum alignment flag. if (unsigned DefaultMaxFieldAlignment = Context.getLangOpts().PackStruct) { MaxFieldAlignment = CharUnits::fromQuantity(DefaultMaxFieldAlignment); } // mac68k alignment supersedes maximum field alignment and attribute aligned, // and forces all structures to have 2-byte alignment. The IBM docs on it // allude to additional (more complicated) semantics, especially with regard // to bit-fields, but gcc appears not to follow that. if (D->hasAttr()) { assert( !D->hasAttr() && "Having both mac68k and natural alignment on a decl is not allowed."); IsMac68kAlign = true; MaxFieldAlignment = CharUnits::fromQuantity(2); Alignment = CharUnits::fromQuantity(2); PreferredAlignment = CharUnits::fromQuantity(2); } else { if (D->hasAttr()) IsNaturalAlign = true; if (const MaxFieldAlignmentAttr *MFAA = D->getAttr()) MaxFieldAlignment = Context.toCharUnitsFromBits(MFAA->getAlignment()); if (unsigned MaxAlign = D->getMaxAlignment()) UpdateAlignment(Context.toCharUnitsFromBits(MaxAlign)); } HandledFirstNonOverlappingEmptyField = !Context.getTargetInfo().defaultsToAIXPowerAlignment() || IsNaturalAlign; // If there is an external AST source, ask it for the various offsets. if (const RecordDecl *RD = dyn_cast(D)) if (ExternalASTSource *Source = Context.getExternalSource()) { UseExternalLayout = Source->layoutRecordType( RD, External.Size, External.Align, External.FieldOffsets, External.BaseOffsets, External.VirtualBaseOffsets); // Update based on external alignment. if (UseExternalLayout) { if (External.Align > 0) { Alignment = Context.toCharUnitsFromBits(External.Align); PreferredAlignment = Context.toCharUnitsFromBits(External.Align); } else { // The external source didn't have alignment information; infer it. InferAlignment = true; } } } } void ItaniumRecordLayoutBuilder::Layout(const RecordDecl *D) { InitializeLayout(D); LayoutFields(D); // Finally, round the size of the total struct up to the alignment of the // struct itself. FinishLayout(D); } void ItaniumRecordLayoutBuilder::Layout(const CXXRecordDecl *RD) { InitializeLayout(RD); // Lay out the vtable and the non-virtual bases. LayoutNonVirtualBases(RD); LayoutFields(RD); NonVirtualSize = Context.toCharUnitsFromBits( llvm::alignTo(getSizeInBits(), Context.getTargetInfo().getCharAlign())); NonVirtualAlignment = Alignment; PreferredNVAlignment = PreferredAlignment; // Lay out the virtual bases and add the primary virtual base offsets. LayoutVirtualBases(RD, RD); // Finally, round the size of the total struct up to the alignment // of the struct itself. FinishLayout(RD); #ifndef NDEBUG // Check that we have base offsets for all bases. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); assert(Bases.count(BaseDecl) && "Did not find base offset!"); } // And all virtual bases. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); assert(VBases.count(BaseDecl) && "Did not find base offset!"); } #endif } void ItaniumRecordLayoutBuilder::Layout(const ObjCInterfaceDecl *D) { if (ObjCInterfaceDecl *SD = D->getSuperClass()) { const ASTRecordLayout &SL = Context.getASTObjCInterfaceLayout(SD); UpdateAlignment(SL.getAlignment()); // We start laying out ivars not at the end of the superclass // structure, but at the next byte following the last field. setDataSize(SL.getDataSize()); setSize(getDataSize()); } InitializeLayout(D); // Layout each ivar sequentially. for (const ObjCIvarDecl *IVD = D->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) LayoutField(IVD, false); // Finally, round the size of the total struct up to the alignment of the // struct itself. FinishLayout(D); } void ItaniumRecordLayoutBuilder::LayoutFields(const RecordDecl *D) { // Layout each field, for now, just sequentially, respecting alignment. In // the future, this will need to be tweakable by targets. bool InsertExtraPadding = D->mayInsertExtraPadding(/*EmitRemark=*/true); bool HasFlexibleArrayMember = D->hasFlexibleArrayMember(); for (auto I = D->field_begin(), End = D->field_end(); I != End; ++I) { auto Next(I); ++Next; LayoutField(*I, InsertExtraPadding && (Next != End || !HasFlexibleArrayMember)); } } // Rounds the specified size to have it a multiple of the char size. static uint64_t roundUpSizeToCharAlignment(uint64_t Size, const ASTContext &Context) { uint64_t CharAlignment = Context.getTargetInfo().getCharAlign(); return llvm::alignTo(Size, CharAlignment); } void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize, uint64_t StorageUnitSize, bool FieldPacked, const FieldDecl *D) { assert(Context.getLangOpts().CPlusPlus && "Can only have wide bit-fields in C++!"); // Itanium C++ ABI 2.4: // If sizeof(T)*8 < n, let T' be the largest integral POD type with // sizeof(T')*8 <= n. QualType IntegralPODTypes[] = { Context.UnsignedCharTy, Context.UnsignedShortTy, Context.UnsignedIntTy, Context.UnsignedLongTy, Context.UnsignedLongLongTy }; QualType Type; for (const QualType &QT : IntegralPODTypes) { uint64_t Size = Context.getTypeSize(QT); if (Size > FieldSize) break; Type = QT; } assert(!Type.isNull() && "Did not find a type!"); CharUnits TypeAlign = Context.getTypeAlignInChars(Type); // We're not going to use any of the unfilled bits in the last byte. UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; uint64_t FieldOffset; uint64_t UnpaddedFieldOffset = getDataSizeInBits() - UnfilledBitsInLastUnit; if (IsUnion) { uint64_t RoundedFieldSize = roundUpSizeToCharAlignment(FieldSize, Context); setDataSize(std::max(getDataSizeInBits(), RoundedFieldSize)); FieldOffset = 0; } else { // The bitfield is allocated starting at the next offset aligned // appropriately for T', with length n bits. FieldOffset = llvm::alignTo(getDataSizeInBits(), Context.toBits(TypeAlign)); uint64_t NewSizeInBits = FieldOffset + FieldSize; setDataSize( llvm::alignTo(NewSizeInBits, Context.getTargetInfo().getCharAlign())); UnfilledBitsInLastUnit = getDataSizeInBits() - NewSizeInBits; } // Place this field at the current location. FieldOffsets.push_back(FieldOffset); CheckFieldPadding(FieldOffset, UnpaddedFieldOffset, FieldOffset, Context.toBits(TypeAlign), FieldPacked, D); // Update the size. setSize(std::max(getSizeInBits(), getDataSizeInBits())); // Remember max struct/class alignment. UpdateAlignment(TypeAlign); } static bool isAIXLayout(const ASTContext &Context) { return Context.getTargetInfo().getTriple().getOS() == llvm::Triple::AIX; } void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { bool FieldPacked = Packed || D->hasAttr(); uint64_t FieldSize = D->getBitWidthValue(Context); TypeInfo FieldInfo = Context.getTypeInfo(D->getType()); uint64_t StorageUnitSize = FieldInfo.Width; unsigned FieldAlign = FieldInfo.Align; bool AlignIsRequired = FieldInfo.isAlignRequired(); // UnfilledBitsInLastUnit is the difference between the end of the // last allocated bitfield (i.e. the first bit offset available for // bitfields) and the end of the current data size in bits (i.e. the // first bit offset available for non-bitfields). The current data // size in bits is always a multiple of the char size; additionally, // for ms_struct records it's also a multiple of the // LastBitfieldStorageUnitSize (if set). // The struct-layout algorithm is dictated by the platform ABI, // which in principle could use almost any rules it likes. In // practice, UNIXy targets tend to inherit the algorithm described // in the System V generic ABI. The basic bitfield layout rule in // System V is to place bitfields at the next available bit offset // where the entire bitfield would fit in an aligned storage unit of // the declared type; it's okay if an earlier or later non-bitfield // is allocated in the same storage unit. However, some targets // (those that !useBitFieldTypeAlignment(), e.g. ARM APCS) don't // require this storage unit to be aligned, and therefore always put // the bitfield at the next available bit offset. // ms_struct basically requests a complete replacement of the // platform ABI's struct-layout algorithm, with the high-level goal // of duplicating MSVC's layout. For non-bitfields, this follows // the standard algorithm. The basic bitfield layout rule is to // allocate an entire unit of the bitfield's declared type // (e.g. 'unsigned long'), then parcel it up among successive // bitfields whose declared types have the same size, making a new // unit as soon as the last can no longer store the whole value. // Since it completely replaces the platform ABI's algorithm, // settings like !useBitFieldTypeAlignment() do not apply. // A zero-width bitfield forces the use of a new storage unit for // later bitfields. In general, this occurs by rounding up the // current size of the struct as if the algorithm were about to // place a non-bitfield of the field's formal type. Usually this // does not change the alignment of the struct itself, but it does // on some targets (those that useZeroLengthBitfieldAlignment(), // e.g. ARM). In ms_struct layout, zero-width bitfields are // ignored unless they follow a non-zero-width bitfield. // A field alignment restriction (e.g. from #pragma pack) or // specification (e.g. from __attribute__((aligned))) changes the // formal alignment of the field. For System V, this alters the // required alignment of the notional storage unit that must contain // the bitfield. For ms_struct, this only affects the placement of // new storage units. In both cases, the effect of #pragma pack is // ignored on zero-width bitfields. // On System V, a packed field (e.g. from #pragma pack or // __attribute__((packed))) always uses the next available bit // offset. // In an ms_struct struct, the alignment of a fundamental type is // always equal to its size. This is necessary in order to mimic // the i386 alignment rules on targets which might not fully align // all types (e.g. Darwin PPC32, where alignof(long long) == 4). // First, some simple bookkeeping to perform for ms_struct structs. if (IsMsStruct) { // The field alignment for integer types is always the size. FieldAlign = StorageUnitSize; // If the previous field was not a bitfield, or was a bitfield // with a different storage unit size, or if this field doesn't fit into // the current storage unit, we're done with that storage unit. if (LastBitfieldStorageUnitSize != StorageUnitSize || UnfilledBitsInLastUnit < FieldSize) { // Also, ignore zero-length bitfields after non-bitfields. if (!LastBitfieldStorageUnitSize && !FieldSize) FieldAlign = 1; UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; } } if (isAIXLayout(Context)) { if (StorageUnitSize < Context.getTypeSize(Context.UnsignedIntTy)) { // On AIX, [bool, char, short] bitfields have the same alignment // as [unsigned]. StorageUnitSize = Context.getTypeSize(Context.UnsignedIntTy); } else if (StorageUnitSize > Context.getTypeSize(Context.UnsignedIntTy) && Context.getTargetInfo().getTriple().isArch32Bit() && FieldSize <= 32) { // Under 32-bit compile mode, the bitcontainer is 32 bits if a single // long long bitfield has length no greater than 32 bits. StorageUnitSize = 32; if (!AlignIsRequired) FieldAlign = 32; } if (FieldAlign < StorageUnitSize) { // The bitfield alignment should always be greater than or equal to // bitcontainer size. FieldAlign = StorageUnitSize; } } // If the field is wider than its declared type, it follows // different rules in all cases, except on AIX. // On AIX, wide bitfield follows the same rules as normal bitfield. if (FieldSize > StorageUnitSize && !isAIXLayout(Context)) { LayoutWideBitField(FieldSize, StorageUnitSize, FieldPacked, D); return; } // Compute the next available bit offset. uint64_t FieldOffset = IsUnion ? 0 : (getDataSizeInBits() - UnfilledBitsInLastUnit); // Handle targets that don't honor bitfield type alignment. if (!IsMsStruct && !Context.getTargetInfo().useBitFieldTypeAlignment()) { // Some such targets do honor it on zero-width bitfields. if (FieldSize == 0 && Context.getTargetInfo().useZeroLengthBitfieldAlignment()) { // Some targets don't honor leading zero-width bitfield. if (!IsUnion && FieldOffset == 0 && !Context.getTargetInfo().useLeadingZeroLengthBitfield()) FieldAlign = 1; else { // The alignment to round up to is the max of the field's natural // alignment and a target-specific fixed value (sometimes zero). unsigned ZeroLengthBitfieldBoundary = Context.getTargetInfo().getZeroLengthBitfieldBoundary(); FieldAlign = std::max(FieldAlign, ZeroLengthBitfieldBoundary); } // If that doesn't apply, just ignore the field alignment. } else { FieldAlign = 1; } } // Remember the alignment we would have used if the field were not packed. unsigned UnpackedFieldAlign = FieldAlign; // Ignore the field alignment if the field is packed unless it has zero-size. if (!IsMsStruct && FieldPacked && FieldSize != 0) FieldAlign = 1; // But, if there's an 'aligned' attribute on the field, honor that. unsigned ExplicitFieldAlign = D->getMaxAlignment(); if (ExplicitFieldAlign) { FieldAlign = std::max(FieldAlign, ExplicitFieldAlign); UnpackedFieldAlign = std::max(UnpackedFieldAlign, ExplicitFieldAlign); } // But, if there's a #pragma pack in play, that takes precedent over // even the 'aligned' attribute, for non-zero-width bitfields. unsigned MaxFieldAlignmentInBits = Context.toBits(MaxFieldAlignment); if (!MaxFieldAlignment.isZero() && FieldSize) { UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits); if (FieldPacked) FieldAlign = UnpackedFieldAlign; else FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits); } // But, ms_struct just ignores all of that in unions, even explicit // alignment attributes. if (IsMsStruct && IsUnion) { FieldAlign = UnpackedFieldAlign = 1; } // For purposes of diagnostics, we're going to simultaneously // compute the field offsets that we would have used if we weren't // adding any alignment padding or if the field weren't packed. uint64_t UnpaddedFieldOffset = FieldOffset; uint64_t UnpackedFieldOffset = FieldOffset; // Check if we need to add padding to fit the bitfield within an // allocation unit with the right size and alignment. The rules are // somewhat different here for ms_struct structs. if (IsMsStruct) { // If it's not a zero-width bitfield, and we can fit the bitfield // into the active storage unit (and we haven't already decided to // start a new storage unit), just do so, regardless of any other // other consideration. Otherwise, round up to the right alignment. if (FieldSize == 0 || FieldSize > UnfilledBitsInLastUnit) { FieldOffset = llvm::alignTo(FieldOffset, FieldAlign); UnpackedFieldOffset = llvm::alignTo(UnpackedFieldOffset, UnpackedFieldAlign); UnfilledBitsInLastUnit = 0; } } else { // #pragma pack, with any value, suppresses the insertion of padding. bool AllowPadding = MaxFieldAlignment.isZero(); // Compute the real offset. if (FieldSize == 0 || (AllowPadding && (FieldOffset & (FieldAlign - 1)) + FieldSize > StorageUnitSize)) { FieldOffset = llvm::alignTo(FieldOffset, FieldAlign); } else if (ExplicitFieldAlign && (MaxFieldAlignmentInBits == 0 || ExplicitFieldAlign <= MaxFieldAlignmentInBits) && Context.getTargetInfo().useExplicitBitFieldAlignment()) { // TODO: figure it out what needs to be done on targets that don't honor // bit-field type alignment like ARM APCS ABI. FieldOffset = llvm::alignTo(FieldOffset, ExplicitFieldAlign); } // Repeat the computation for diagnostic purposes. if (FieldSize == 0 || (AllowPadding && (UnpackedFieldOffset & (UnpackedFieldAlign - 1)) + FieldSize > StorageUnitSize)) UnpackedFieldOffset = llvm::alignTo(UnpackedFieldOffset, UnpackedFieldAlign); else if (ExplicitFieldAlign && (MaxFieldAlignmentInBits == 0 || ExplicitFieldAlign <= MaxFieldAlignmentInBits) && Context.getTargetInfo().useExplicitBitFieldAlignment()) UnpackedFieldOffset = llvm::alignTo(UnpackedFieldOffset, ExplicitFieldAlign); } // If we're using external layout, give the external layout a chance // to override this information. if (UseExternalLayout) FieldOffset = updateExternalFieldOffset(D, FieldOffset); // Okay, place the bitfield at the calculated offset. FieldOffsets.push_back(FieldOffset); // Bookkeeping: // Anonymous members don't affect the overall record alignment, // except on targets where they do. if (!IsMsStruct && !Context.getTargetInfo().useZeroLengthBitfieldAlignment() && !D->getIdentifier()) FieldAlign = UnpackedFieldAlign = 1; // On AIX, zero-width bitfields pad out to the natural alignment boundary, // but do not increase the alignment greater than the MaxFieldAlignment, or 1 // if packed. if (isAIXLayout(Context) && !FieldSize) { if (FieldPacked) FieldAlign = 1; if (!MaxFieldAlignment.isZero()) { UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits); FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits); } } // Diagnose differences in layout due to padding or packing. if (!UseExternalLayout) CheckFieldPadding(FieldOffset, UnpaddedFieldOffset, UnpackedFieldOffset, UnpackedFieldAlign, FieldPacked, D); // Update DataSize to include the last byte containing (part of) the bitfield. // For unions, this is just a max operation, as usual. if (IsUnion) { // For ms_struct, allocate the entire storage unit --- unless this // is a zero-width bitfield, in which case just use a size of 1. uint64_t RoundedFieldSize; if (IsMsStruct) { RoundedFieldSize = (FieldSize ? StorageUnitSize : Context.getTargetInfo().getCharWidth()); // Otherwise, allocate just the number of bytes required to store // the bitfield. } else { RoundedFieldSize = roundUpSizeToCharAlignment(FieldSize, Context); } setDataSize(std::max(getDataSizeInBits(), RoundedFieldSize)); // For non-zero-width bitfields in ms_struct structs, allocate a new // storage unit if necessary. } else if (IsMsStruct && FieldSize) { // We should have cleared UnfilledBitsInLastUnit in every case // where we changed storage units. if (!UnfilledBitsInLastUnit) { setDataSize(FieldOffset + StorageUnitSize); UnfilledBitsInLastUnit = StorageUnitSize; } UnfilledBitsInLastUnit -= FieldSize; LastBitfieldStorageUnitSize = StorageUnitSize; // Otherwise, bump the data size up to include the bitfield, // including padding up to char alignment, and then remember how // bits we didn't use. } else { uint64_t NewSizeInBits = FieldOffset + FieldSize; uint64_t CharAlignment = Context.getTargetInfo().getCharAlign(); setDataSize(llvm::alignTo(NewSizeInBits, CharAlignment)); UnfilledBitsInLastUnit = getDataSizeInBits() - NewSizeInBits; // The only time we can get here for an ms_struct is if this is a // zero-width bitfield, which doesn't count as anything for the // purposes of unfilled bits. LastBitfieldStorageUnitSize = 0; } // Update the size. setSize(std::max(getSizeInBits(), getDataSizeInBits())); // Remember max struct/class alignment. UnadjustedAlignment = std::max(UnadjustedAlignment, Context.toCharUnitsFromBits(FieldAlign)); UpdateAlignment(Context.toCharUnitsFromBits(FieldAlign), Context.toCharUnitsFromBits(UnpackedFieldAlign)); } void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, bool InsertExtraPadding) { auto *FieldClass = D->getType()->getAsCXXRecordDecl(); bool PotentiallyOverlapping = D->hasAttr() && FieldClass; bool IsOverlappingEmptyField = PotentiallyOverlapping && FieldClass->isEmpty(); CharUnits FieldOffset = (IsUnion || IsOverlappingEmptyField) ? CharUnits::Zero() : getDataSize(); const bool DefaultsToAIXPowerAlignment = Context.getTargetInfo().defaultsToAIXPowerAlignment(); bool FoundFirstNonOverlappingEmptyFieldForAIX = false; if (DefaultsToAIXPowerAlignment && !HandledFirstNonOverlappingEmptyField) { assert(FieldOffset == CharUnits::Zero() && "The first non-overlapping empty field should have been handled."); if (!IsOverlappingEmptyField) { FoundFirstNonOverlappingEmptyFieldForAIX = true; // We're going to handle the "first member" based on // `FoundFirstNonOverlappingEmptyFieldForAIX` during the current // invocation of this function; record it as handled for future // invocations (except for unions, because the current field does not // represent all "firsts"). HandledFirstNonOverlappingEmptyField = !IsUnion; } } if (D->isBitField()) { LayoutBitField(D); return; } uint64_t UnpaddedFieldOffset = getDataSizeInBits() - UnfilledBitsInLastUnit; // Reset the unfilled bits. UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; - llvm::Triple Target = Context.getTargetInfo().getTriple(); - bool FieldPacked = (Packed && (!FieldClass || FieldClass->isPOD() || - Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver13 || - Target.isPS4() || Target.isOSDarwin())) || - D->hasAttr(); + bool FieldPacked = Packed || D->hasAttr(); AlignRequirementKind AlignRequirement = AlignRequirementKind::None; CharUnits FieldSize; CharUnits FieldAlign; // The amount of this class's dsize occupied by the field. // This is equal to FieldSize unless we're permitted to pack // into the field's tail padding. CharUnits EffectiveFieldSize; auto setDeclInfo = [&](bool IsIncompleteArrayType) { auto TI = Context.getTypeInfoInChars(D->getType()); FieldAlign = TI.Align; // Flexible array members don't have any size, but they have to be // aligned appropriately for their element type. EffectiveFieldSize = FieldSize = IsIncompleteArrayType ? CharUnits::Zero() : TI.Width; AlignRequirement = TI.AlignRequirement; }; if (D->getType()->isIncompleteArrayType()) { setDeclInfo(true /* IsIncompleteArrayType */); } else if (const ReferenceType *RT = D->getType()->getAs()) { unsigned AS = Context.getTargetAddressSpace(RT->getPointeeType()); EffectiveFieldSize = FieldSize = Context.toCharUnitsFromBits( Context.getTargetInfo().getPointerWidth(AS)); FieldAlign = Context.toCharUnitsFromBits( Context.getTargetInfo().getPointerAlign(AS)); } else { setDeclInfo(false /* IsIncompleteArrayType */); // A potentially-overlapping field occupies its dsize or nvsize, whichever // is larger. if (PotentiallyOverlapping) { const ASTRecordLayout &Layout = Context.getASTRecordLayout(FieldClass); EffectiveFieldSize = std::max(Layout.getNonVirtualSize(), Layout.getDataSize()); } if (IsMsStruct) { // If MS bitfield layout is required, figure out what type is being // laid out and align the field to the width of that type. // Resolve all typedefs down to their base type and round up the field // alignment if necessary. QualType T = Context.getBaseElementType(D->getType()); if (const BuiltinType *BTy = T->getAs()) { CharUnits TypeSize = Context.getTypeSizeInChars(BTy); if (!llvm::isPowerOf2_64(TypeSize.getQuantity())) { assert( !Context.getTargetInfo().getTriple().isWindowsMSVCEnvironment() && "Non PowerOf2 size in MSVC mode"); // Base types with sizes that aren't a power of two don't work // with the layout rules for MS structs. This isn't an issue in // MSVC itself since there are no such base data types there. // On e.g. x86_32 mingw and linux, long double is 12 bytes though. // Any structs involving that data type obviously can't be ABI // compatible with MSVC regardless of how it is laid out. // Since ms_struct can be mass enabled (via a pragma or via the // -mms-bitfields command line parameter), this can trigger for // structs that don't actually need MSVC compatibility, so we // need to be able to sidestep the ms_struct layout for these types. // Since the combination of -mms-bitfields together with structs // like max_align_t (which contains a long double) for mingw is // quite common (and GCC handles it silently), just handle it // silently there. For other targets that have ms_struct enabled // (most probably via a pragma or attribute), trigger a diagnostic // that defaults to an error. if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) Diag(D->getLocation(), diag::warn_npot_ms_struct); } if (TypeSize > FieldAlign && llvm::isPowerOf2_64(TypeSize.getQuantity())) FieldAlign = TypeSize; } } } // When used as part of a typedef, or together with a 'packed' attribute, the // 'aligned' attribute can be used to decrease alignment. In that case, it // overrides any computed alignment we have, and there is no need to upgrade // the alignment. auto alignedAttrCanDecreaseAIXAlignment = [AlignRequirement, FieldPacked] { // Enum alignment sources can be safely ignored here, because this only // helps decide whether we need the AIX alignment upgrade, which only // applies to floating-point types. return AlignRequirement == AlignRequirementKind::RequiredByTypedef || (AlignRequirement == AlignRequirementKind::RequiredByRecord && FieldPacked); }; // The AIX `power` alignment rules apply the natural alignment of the // "first member" if it is of a floating-point data type (or is an aggregate // whose recursively "first" member or element is such a type). The alignment // associated with these types for subsequent members use an alignment value // where the floating-point data type is considered to have 4-byte alignment. // // For the purposes of the foregoing: vtable pointers, non-empty base classes, // and zero-width bit-fields count as prior members; members of empty class // types marked `no_unique_address` are not considered to be prior members. CharUnits PreferredAlign = FieldAlign; if (DefaultsToAIXPowerAlignment && !alignedAttrCanDecreaseAIXAlignment() && (FoundFirstNonOverlappingEmptyFieldForAIX || IsNaturalAlign)) { auto performBuiltinTypeAlignmentUpgrade = [&](const BuiltinType *BTy) { if (BTy->getKind() == BuiltinType::Double || BTy->getKind() == BuiltinType::LongDouble) { assert(PreferredAlign == CharUnits::fromQuantity(4) && "No need to upgrade the alignment value."); PreferredAlign = CharUnits::fromQuantity(8); } }; const Type *BaseTy = D->getType()->getBaseElementTypeUnsafe(); if (const ComplexType *CTy = BaseTy->getAs()) { performBuiltinTypeAlignmentUpgrade( CTy->getElementType()->castAs()); } else if (const BuiltinType *BTy = BaseTy->getAs()) { performBuiltinTypeAlignmentUpgrade(BTy); } else if (const RecordType *RT = BaseTy->getAs()) { const RecordDecl *RD = RT->getDecl(); assert(RD && "Expected non-null RecordDecl."); const ASTRecordLayout &FieldRecord = Context.getASTRecordLayout(RD); PreferredAlign = FieldRecord.getPreferredAlignment(); } } // The align if the field is not packed. This is to check if the attribute // was unnecessary (-Wpacked). CharUnits UnpackedFieldAlign = !DefaultsToAIXPowerAlignment ? FieldAlign : PreferredAlign; CharUnits UnpackedFieldOffset = FieldOffset; CharUnits OriginalFieldAlign = UnpackedFieldAlign; if (FieldPacked) { FieldAlign = CharUnits::One(); PreferredAlign = CharUnits::One(); } CharUnits MaxAlignmentInChars = Context.toCharUnitsFromBits(D->getMaxAlignment()); FieldAlign = std::max(FieldAlign, MaxAlignmentInChars); PreferredAlign = std::max(PreferredAlign, MaxAlignmentInChars); UnpackedFieldAlign = std::max(UnpackedFieldAlign, MaxAlignmentInChars); // The maximum field alignment overrides the aligned attribute. if (!MaxFieldAlignment.isZero()) { FieldAlign = std::min(FieldAlign, MaxFieldAlignment); PreferredAlign = std::min(PreferredAlign, MaxFieldAlignment); UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignment); } CharUnits AlignTo = !DefaultsToAIXPowerAlignment ? FieldAlign : PreferredAlign; // Round up the current record size to the field's alignment boundary. FieldOffset = FieldOffset.alignTo(AlignTo); UnpackedFieldOffset = UnpackedFieldOffset.alignTo(UnpackedFieldAlign); if (UseExternalLayout) { FieldOffset = Context.toCharUnitsFromBits( updateExternalFieldOffset(D, Context.toBits(FieldOffset))); if (!IsUnion && EmptySubobjects) { // Record the fact that we're placing a field at this offset. bool Allowed = EmptySubobjects->CanPlaceFieldAtOffset(D, FieldOffset); (void)Allowed; assert(Allowed && "Externally-placed field cannot be placed here"); } } else { if (!IsUnion && EmptySubobjects) { // Check if we can place the field at this offset. while (!EmptySubobjects->CanPlaceFieldAtOffset(D, FieldOffset)) { // We couldn't place the field at the offset. Try again at a new offset. // We try offset 0 (for an empty field) and then dsize(C) onwards. if (FieldOffset == CharUnits::Zero() && getDataSize() != CharUnits::Zero()) FieldOffset = getDataSize().alignTo(AlignTo); else FieldOffset += AlignTo; } } } // Place this field at the current location. FieldOffsets.push_back(Context.toBits(FieldOffset)); if (!UseExternalLayout) CheckFieldPadding(Context.toBits(FieldOffset), UnpaddedFieldOffset, Context.toBits(UnpackedFieldOffset), Context.toBits(UnpackedFieldAlign), FieldPacked, D); if (InsertExtraPadding) { CharUnits ASanAlignment = CharUnits::fromQuantity(8); CharUnits ExtraSizeForAsan = ASanAlignment; if (FieldSize % ASanAlignment) ExtraSizeForAsan += ASanAlignment - CharUnits::fromQuantity(FieldSize % ASanAlignment); EffectiveFieldSize = FieldSize = FieldSize + ExtraSizeForAsan; } // Reserve space for this field. if (!IsOverlappingEmptyField) { uint64_t EffectiveFieldSizeInBits = Context.toBits(EffectiveFieldSize); if (IsUnion) setDataSize(std::max(getDataSizeInBits(), EffectiveFieldSizeInBits)); else setDataSize(FieldOffset + EffectiveFieldSize); PaddedFieldSize = std::max(PaddedFieldSize, FieldOffset + FieldSize); setSize(std::max(getSizeInBits(), getDataSizeInBits())); } else { setSize(std::max(getSizeInBits(), (uint64_t)Context.toBits(FieldOffset + FieldSize))); } // Remember max struct/class ABI-specified alignment. UnadjustedAlignment = std::max(UnadjustedAlignment, FieldAlign); UpdateAlignment(FieldAlign, UnpackedFieldAlign, PreferredAlign); // For checking the alignment of inner fields against // the alignment of its parent record. if (const RecordDecl *RD = D->getParent()) { // Check if packed attribute or pragma pack is present. if (RD->hasAttr() || !MaxFieldAlignment.isZero()) if (FieldAlign < OriginalFieldAlign) if (D->getType()->isRecordType()) { // If the offset is a multiple of the alignment of // the type, raise the warning. // TODO: Takes no account the alignment of the outer struct if (FieldOffset % OriginalFieldAlign != 0) Diag(D->getLocation(), diag::warn_unaligned_access) << Context.getTypeDeclType(RD) << D->getName() << D->getType(); } } } void ItaniumRecordLayoutBuilder::FinishLayout(const NamedDecl *D) { // In C++, records cannot be of size 0. if (Context.getLangOpts().CPlusPlus && getSizeInBits() == 0) { if (const CXXRecordDecl *RD = dyn_cast(D)) { // Compatibility with gcc requires a class (pod or non-pod) // which is not empty but of size 0; such as having fields of // array of zero-length, remains of Size 0 if (RD->isEmpty()) setSize(CharUnits::One()); } else setSize(CharUnits::One()); } // If we have any remaining field tail padding, include that in the overall // size. setSize(std::max(getSizeInBits(), (uint64_t)Context.toBits(PaddedFieldSize))); // Finally, round the size of the record up to the alignment of the // record itself. uint64_t UnpaddedSize = getSizeInBits() - UnfilledBitsInLastUnit; uint64_t UnpackedSizeInBits = llvm::alignTo(getSizeInBits(), Context.toBits(UnpackedAlignment)); uint64_t RoundedSize = llvm::alignTo( getSizeInBits(), Context.toBits(!Context.getTargetInfo().defaultsToAIXPowerAlignment() ? Alignment : PreferredAlignment)); if (UseExternalLayout) { // If we're inferring alignment, and the external size is smaller than // our size after we've rounded up to alignment, conservatively set the // alignment to 1. if (InferAlignment && External.Size < RoundedSize) { Alignment = CharUnits::One(); PreferredAlignment = CharUnits::One(); InferAlignment = false; } setSize(External.Size); return; } // Set the size to the final size. setSize(RoundedSize); unsigned CharBitNum = Context.getTargetInfo().getCharWidth(); if (const RecordDecl *RD = dyn_cast(D)) { // Warn if padding was introduced to the struct/class/union. if (getSizeInBits() > UnpaddedSize) { unsigned PadSize = getSizeInBits() - UnpaddedSize; bool InBits = true; if (PadSize % CharBitNum == 0) { PadSize = PadSize / CharBitNum; InBits = false; } Diag(RD->getLocation(), diag::warn_padded_struct_size) << Context.getTypeDeclType(RD) << PadSize << (InBits ? 1 : 0); // (byte|bit) } // Warn if we packed it unnecessarily, when the unpacked alignment is not // greater than the one after packing, the size in bits doesn't change and // the offset of each field is identical. if (Packed && UnpackedAlignment <= Alignment && UnpackedSizeInBits == getSizeInBits() && !HasPackedField) Diag(D->getLocation(), diag::warn_unnecessary_packed) << Context.getTypeDeclType(RD); } } void ItaniumRecordLayoutBuilder::UpdateAlignment( CharUnits NewAlignment, CharUnits UnpackedNewAlignment, CharUnits PreferredNewAlignment) { // The alignment is not modified when using 'mac68k' alignment or when // we have an externally-supplied layout that also provides overall alignment. if (IsMac68kAlign || (UseExternalLayout && !InferAlignment)) return; if (NewAlignment > Alignment) { assert(llvm::isPowerOf2_64(NewAlignment.getQuantity()) && "Alignment not a power of 2"); Alignment = NewAlignment; } if (UnpackedNewAlignment > UnpackedAlignment) { assert(llvm::isPowerOf2_64(UnpackedNewAlignment.getQuantity()) && "Alignment not a power of 2"); UnpackedAlignment = UnpackedNewAlignment; } if (PreferredNewAlignment > PreferredAlignment) { assert(llvm::isPowerOf2_64(PreferredNewAlignment.getQuantity()) && "Alignment not a power of 2"); PreferredAlignment = PreferredNewAlignment; } } uint64_t ItaniumRecordLayoutBuilder::updateExternalFieldOffset(const FieldDecl *Field, uint64_t ComputedOffset) { uint64_t ExternalFieldOffset = External.getExternalFieldOffset(Field); if (InferAlignment && ExternalFieldOffset < ComputedOffset) { // The externally-supplied field offset is before the field offset we // computed. Assume that the structure is packed. Alignment = CharUnits::One(); PreferredAlignment = CharUnits::One(); InferAlignment = false; } // Use the externally-supplied field offset. return ExternalFieldOffset; } /// Get diagnostic %select index for tag kind for /// field padding diagnostic message. /// WARNING: Indexes apply to particular diagnostics only! /// /// \returns diagnostic %select index. static unsigned getPaddingDiagFromTagKind(TagTypeKind Tag) { switch (Tag) { case TTK_Struct: return 0; case TTK_Interface: return 1; case TTK_Class: return 2; default: llvm_unreachable("Invalid tag kind for field padding diagnostic!"); } } void ItaniumRecordLayoutBuilder::CheckFieldPadding( uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset, unsigned UnpackedAlign, bool isPacked, const FieldDecl *D) { // We let objc ivars without warning, objc interfaces generally are not used // for padding tricks. if (isa(D)) return; // Don't warn about structs created without a SourceLocation. This can // be done by clients of the AST, such as codegen. if (D->getLocation().isInvalid()) return; unsigned CharBitNum = Context.getTargetInfo().getCharWidth(); // Warn if padding was introduced to the struct/class. if (!IsUnion && Offset > UnpaddedOffset) { unsigned PadSize = Offset - UnpaddedOffset; bool InBits = true; if (PadSize % CharBitNum == 0) { PadSize = PadSize / CharBitNum; InBits = false; } if (D->getIdentifier()) Diag(D->getLocation(), diag::warn_padded_struct_field) << getPaddingDiagFromTagKind(D->getParent()->getTagKind()) << Context.getTypeDeclType(D->getParent()) << PadSize << (InBits ? 1 : 0) // (byte|bit) << D->getIdentifier(); else Diag(D->getLocation(), diag::warn_padded_struct_anon_field) << getPaddingDiagFromTagKind(D->getParent()->getTagKind()) << Context.getTypeDeclType(D->getParent()) << PadSize << (InBits ? 1 : 0); // (byte|bit) } if (isPacked && Offset != UnpackedOffset) { HasPackedField = true; } } static const CXXMethodDecl *computeKeyFunction(ASTContext &Context, const CXXRecordDecl *RD) { // If a class isn't polymorphic it doesn't have a key function. if (!RD->isPolymorphic()) return nullptr; // A class that is not externally visible doesn't have a key function. (Or // at least, there's no point to assigning a key function to such a class; // this doesn't affect the ABI.) if (!RD->isExternallyVisible()) return nullptr; // Template instantiations don't have key functions per Itanium C++ ABI 5.2.6. // Same behavior as GCC. TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind(); if (TSK == TSK_ImplicitInstantiation || TSK == TSK_ExplicitInstantiationDeclaration || TSK == TSK_ExplicitInstantiationDefinition) return nullptr; bool allowInlineFunctions = Context.getTargetInfo().getCXXABI().canKeyFunctionBeInline(); for (const CXXMethodDecl *MD : RD->methods()) { if (!MD->isVirtual()) continue; if (MD->isPure()) continue; // Ignore implicit member functions, they are always marked as inline, but // they don't have a body until they're defined. if (MD->isImplicit()) continue; if (MD->isInlineSpecified() || MD->isConstexpr()) continue; if (MD->hasInlineBody()) continue; // Ignore inline deleted or defaulted functions. if (!MD->isUserProvided()) continue; // In certain ABIs, ignore functions with out-of-line inline definitions. if (!allowInlineFunctions) { const FunctionDecl *Def; if (MD->hasBody(Def) && Def->isInlineSpecified()) continue; } if (Context.getLangOpts().CUDA) { // While compiler may see key method in this TU, during CUDA // compilation we should ignore methods that are not accessible // on this side of compilation. if (Context.getLangOpts().CUDAIsDevice) { // In device mode ignore methods without __device__ attribute. if (!MD->hasAttr()) continue; } else { // In host mode ignore __device__-only methods. if (!MD->hasAttr() && MD->hasAttr()) continue; } } // If the key function is dllimport but the class isn't, then the class has // no key function. The DLL that exports the key function won't export the // vtable in this case. if (MD->hasAttr() && !RD->hasAttr() && !Context.getTargetInfo().hasPS4DLLImportExport()) return nullptr; // We found it. return MD; } return nullptr; } DiagnosticBuilder ItaniumRecordLayoutBuilder::Diag(SourceLocation Loc, unsigned DiagID) { return Context.getDiagnostics().Report(Loc, DiagID); } /// Does the target C++ ABI require us to skip over the tail-padding /// of the given class (considering it as a base class) when allocating /// objects? static bool mustSkipTailPadding(TargetCXXABI ABI, const CXXRecordDecl *RD) { switch (ABI.getTailPaddingUseRules()) { case TargetCXXABI::AlwaysUseTailPadding: return false; case TargetCXXABI::UseTailPaddingUnlessPOD03: // FIXME: To the extent that this is meant to cover the Itanium ABI // rules, we should implement the restrictions about over-sized // bitfields: // // http://itanium-cxx-abi.github.io/cxx-abi/abi.html#POD : // In general, a type is considered a POD for the purposes of // layout if it is a POD type (in the sense of ISO C++ // [basic.types]). However, a POD-struct or POD-union (in the // sense of ISO C++ [class]) with a bitfield member whose // declared width is wider than the declared type of the // bitfield is not a POD for the purpose of layout. Similarly, // an array type is not a POD for the purpose of layout if the // element type of the array is not a POD for the purpose of // layout. // // Where references to the ISO C++ are made in this paragraph, // the Technical Corrigendum 1 version of the standard is // intended. return RD->isPOD(); case TargetCXXABI::UseTailPaddingUnlessPOD11: // This is equivalent to RD->getTypeForDecl().isCXX11PODType(), // but with a lot of abstraction penalty stripped off. This does // assume that these properties are set correctly even in C++98 // mode; fortunately, that is true because we want to assign // consistently semantics to the type-traits intrinsics (or at // least as many of them as possible). return RD->isTrivial() && RD->isCXX11StandardLayout(); } llvm_unreachable("bad tail-padding use kind"); } static bool isMsLayout(const ASTContext &Context) { return Context.getTargetInfo().getCXXABI().isMicrosoft(); } // This section contains an implementation of struct layout that is, up to the // included tests, compatible with cl.exe (2013). The layout produced is // significantly different than those produced by the Itanium ABI. Here we note // the most important differences. // // * The alignment of bitfields in unions is ignored when computing the // alignment of the union. // * The existence of zero-width bitfield that occurs after anything other than // a non-zero length bitfield is ignored. // * There is no explicit primary base for the purposes of layout. All bases // with vfptrs are laid out first, followed by all bases without vfptrs. // * The Itanium equivalent vtable pointers are split into a vfptr (virtual // function pointer) and a vbptr (virtual base pointer). They can each be // shared with a, non-virtual bases. These bases need not be the same. vfptrs // always occur at offset 0. vbptrs can occur at an arbitrary offset and are // placed after the lexicographically last non-virtual base. This placement // is always before fields but can be in the middle of the non-virtual bases // due to the two-pass layout scheme for non-virtual-bases. // * Virtual bases sometimes require a 'vtordisp' field that is laid out before // the virtual base and is used in conjunction with virtual overrides during // construction and destruction. This is always a 4 byte value and is used as // an alternative to constructor vtables. // * vtordisps are allocated in a block of memory with size and alignment equal // to the alignment of the completed structure (before applying __declspec( // align())). The vtordisp always occur at the end of the allocation block, // immediately prior to the virtual base. // * vfptrs are injected after all bases and fields have been laid out. In // order to guarantee proper alignment of all fields, the vfptr injection // pushes all bases and fields back by the alignment imposed by those bases // and fields. This can potentially add a significant amount of padding. // vfptrs are always injected at offset 0. // * vbptrs are injected after all bases and fields have been laid out. In // order to guarantee proper alignment of all fields, the vfptr injection // pushes all bases and fields back by the alignment imposed by those bases // and fields. This can potentially add a significant amount of padding. // vbptrs are injected immediately after the last non-virtual base as // lexicographically ordered in the code. If this site isn't pointer aligned // the vbptr is placed at the next properly aligned location. Enough padding // is added to guarantee a fit. // * The last zero sized non-virtual base can be placed at the end of the // struct (potentially aliasing another object), or may alias with the first // field, even if they are of the same type. // * The last zero size virtual base may be placed at the end of the struct // potentially aliasing another object. // * The ABI attempts to avoid aliasing of zero sized bases by adding padding // between bases or vbases with specific properties. The criteria for // additional padding between two bases is that the first base is zero sized // or ends with a zero sized subobject and the second base is zero sized or // trails with a zero sized base or field (sharing of vfptrs can reorder the // layout of the so the leading base is not always the first one declared). // This rule does take into account fields that are not records, so padding // will occur even if the last field is, e.g. an int. The padding added for // bases is 1 byte. The padding added between vbases depends on the alignment // of the object but is at least 4 bytes (in both 32 and 64 bit modes). // * There is no concept of non-virtual alignment, non-virtual alignment and // alignment are always identical. // * There is a distinction between alignment and required alignment. // __declspec(align) changes the required alignment of a struct. This // alignment is _always_ obeyed, even in the presence of #pragma pack. A // record inherits required alignment from all of its fields and bases. // * __declspec(align) on bitfields has the effect of changing the bitfield's // alignment instead of its required alignment. This is the only known way // to make the alignment of a struct bigger than 8. Interestingly enough // this alignment is also immune to the effects of #pragma pack and can be // used to create structures with large alignment under #pragma pack. // However, because it does not impact required alignment, such a structure, // when used as a field or base, will not be aligned if #pragma pack is // still active at the time of use. // // Known incompatibilities: // * all: #pragma pack between fields in a record // * 2010 and back: If the last field in a record is a bitfield, every object // laid out after the record will have extra padding inserted before it. The // extra padding will have size equal to the size of the storage class of the // bitfield. 0 sized bitfields don't exhibit this behavior and the extra // padding can be avoided by adding a 0 sized bitfield after the non-zero- // sized bitfield. // * 2012 and back: In 64-bit mode, if the alignment of a record is 16 or // greater due to __declspec(align()) then a second layout phase occurs after // The locations of the vf and vb pointers are known. This layout phase // suffers from the "last field is a bitfield" bug in 2010 and results in // _every_ field getting padding put in front of it, potentially including the // vfptr, leaving the vfprt at a non-zero location which results in a fault if // anything tries to read the vftbl. The second layout phase also treats // bitfields as separate entities and gives them each storage rather than // packing them. Additionally, because this phase appears to perform a // (an unstable) sort on the members before laying them out and because merged // bitfields have the same address, the bitfields end up in whatever order // the sort left them in, a behavior we could never hope to replicate. namespace { struct MicrosoftRecordLayoutBuilder { struct ElementInfo { CharUnits Size; CharUnits Alignment; }; typedef llvm::DenseMap BaseOffsetsMapTy; MicrosoftRecordLayoutBuilder(const ASTContext &Context) : Context(Context) {} private: MicrosoftRecordLayoutBuilder(const MicrosoftRecordLayoutBuilder &) = delete; void operator=(const MicrosoftRecordLayoutBuilder &) = delete; public: void layout(const RecordDecl *RD); void cxxLayout(const CXXRecordDecl *RD); /// Initializes size and alignment and honors some flags. void initializeLayout(const RecordDecl *RD); /// Initialized C++ layout, compute alignment and virtual alignment and /// existence of vfptrs and vbptrs. Alignment is needed before the vfptr is /// laid out. void initializeCXXLayout(const CXXRecordDecl *RD); void layoutNonVirtualBases(const CXXRecordDecl *RD); void layoutNonVirtualBase(const CXXRecordDecl *RD, const CXXRecordDecl *BaseDecl, const ASTRecordLayout &BaseLayout, const ASTRecordLayout *&PreviousBaseLayout); void injectVFPtr(const CXXRecordDecl *RD); void injectVBPtr(const CXXRecordDecl *RD); /// Lays out the fields of the record. Also rounds size up to /// alignment. void layoutFields(const RecordDecl *RD); void layoutField(const FieldDecl *FD); void layoutBitField(const FieldDecl *FD); /// Lays out a single zero-width bit-field in the record and handles /// special cases associated with zero-width bit-fields. void layoutZeroWidthBitField(const FieldDecl *FD); void layoutVirtualBases(const CXXRecordDecl *RD); void finalizeLayout(const RecordDecl *RD); /// Gets the size and alignment of a base taking pragma pack and /// __declspec(align) into account. ElementInfo getAdjustedElementInfo(const ASTRecordLayout &Layout); /// Gets the size and alignment of a field taking pragma pack and /// __declspec(align) into account. It also updates RequiredAlignment as a /// side effect because it is most convenient to do so here. ElementInfo getAdjustedElementInfo(const FieldDecl *FD); /// Places a field at an offset in CharUnits. void placeFieldAtOffset(CharUnits FieldOffset) { FieldOffsets.push_back(Context.toBits(FieldOffset)); } /// Places a bitfield at a bit offset. void placeFieldAtBitOffset(uint64_t FieldOffset) { FieldOffsets.push_back(FieldOffset); } /// Compute the set of virtual bases for which vtordisps are required. void computeVtorDispSet( llvm::SmallPtrSetImpl &HasVtorDispSet, const CXXRecordDecl *RD) const; const ASTContext &Context; /// The size of the record being laid out. CharUnits Size; /// The non-virtual size of the record layout. CharUnits NonVirtualSize; /// The data size of the record layout. CharUnits DataSize; /// The current alignment of the record layout. CharUnits Alignment; /// The maximum allowed field alignment. This is set by #pragma pack. CharUnits MaxFieldAlignment; /// The alignment that this record must obey. This is imposed by /// __declspec(align()) on the record itself or one of its fields or bases. CharUnits RequiredAlignment; /// The size of the allocation of the currently active bitfield. /// This value isn't meaningful unless LastFieldIsNonZeroWidthBitfield /// is true. CharUnits CurrentBitfieldSize; /// Offset to the virtual base table pointer (if one exists). CharUnits VBPtrOffset; /// Minimum record size possible. CharUnits MinEmptyStructSize; /// The size and alignment info of a pointer. ElementInfo PointerInfo; /// The primary base class (if one exists). const CXXRecordDecl *PrimaryBase; /// The class we share our vb-pointer with. const CXXRecordDecl *SharedVBPtrBase; /// The collection of field offsets. SmallVector FieldOffsets; /// Base classes and their offsets in the record. BaseOffsetsMapTy Bases; /// virtual base classes and their offsets in the record. ASTRecordLayout::VBaseOffsetsMapTy VBases; /// The number of remaining bits in our last bitfield allocation. /// This value isn't meaningful unless LastFieldIsNonZeroWidthBitfield is /// true. unsigned RemainingBitsInField; bool IsUnion : 1; /// True if the last field laid out was a bitfield and was not 0 /// width. bool LastFieldIsNonZeroWidthBitfield : 1; /// True if the class has its own vftable pointer. bool HasOwnVFPtr : 1; /// True if the class has a vbtable pointer. bool HasVBPtr : 1; /// True if the last sub-object within the type is zero sized or the /// object itself is zero sized. This *does not* count members that are not /// records. Only used for MS-ABI. bool EndsWithZeroSizedObject : 1; /// True if this class is zero sized or first base is zero sized or /// has this property. Only used for MS-ABI. bool LeadsWithZeroSizedBase : 1; /// True if the external AST source provided a layout for this record. bool UseExternalLayout : 1; /// The layout provided by the external AST source. Only active if /// UseExternalLayout is true. ExternalLayout External; }; } // namespace MicrosoftRecordLayoutBuilder::ElementInfo MicrosoftRecordLayoutBuilder::getAdjustedElementInfo( const ASTRecordLayout &Layout) { ElementInfo Info; Info.Alignment = Layout.getAlignment(); // Respect pragma pack. if (!MaxFieldAlignment.isZero()) Info.Alignment = std::min(Info.Alignment, MaxFieldAlignment); // Track zero-sized subobjects here where it's already available. EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject(); // Respect required alignment, this is necessary because we may have adjusted // the alignment in the case of pragma pack. Note that the required alignment // doesn't actually apply to the struct alignment at this point. Alignment = std::max(Alignment, Info.Alignment); RequiredAlignment = std::max(RequiredAlignment, Layout.getRequiredAlignment()); Info.Alignment = std::max(Info.Alignment, Layout.getRequiredAlignment()); Info.Size = Layout.getNonVirtualSize(); return Info; } MicrosoftRecordLayoutBuilder::ElementInfo MicrosoftRecordLayoutBuilder::getAdjustedElementInfo( const FieldDecl *FD) { // Get the alignment of the field type's natural alignment, ignore any // alignment attributes. auto TInfo = Context.getTypeInfoInChars(FD->getType()->getUnqualifiedDesugaredType()); ElementInfo Info{TInfo.Width, TInfo.Align}; // Respect align attributes on the field. CharUnits FieldRequiredAlignment = Context.toCharUnitsFromBits(FD->getMaxAlignment()); // Respect align attributes on the type. if (Context.isAlignmentRequired(FD->getType())) FieldRequiredAlignment = std::max( Context.getTypeAlignInChars(FD->getType()), FieldRequiredAlignment); // Respect attributes applied to subobjects of the field. if (FD->isBitField()) // For some reason __declspec align impacts alignment rather than required // alignment when it is applied to bitfields. Info.Alignment = std::max(Info.Alignment, FieldRequiredAlignment); else { if (auto RT = FD->getType()->getBaseElementTypeUnsafe()->getAs()) { auto const &Layout = Context.getASTRecordLayout(RT->getDecl()); EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject(); FieldRequiredAlignment = std::max(FieldRequiredAlignment, Layout.getRequiredAlignment()); } // Capture required alignment as a side-effect. RequiredAlignment = std::max(RequiredAlignment, FieldRequiredAlignment); } // Respect pragma pack, attribute pack and declspec align if (!MaxFieldAlignment.isZero()) Info.Alignment = std::min(Info.Alignment, MaxFieldAlignment); if (FD->hasAttr()) Info.Alignment = CharUnits::One(); Info.Alignment = std::max(Info.Alignment, FieldRequiredAlignment); return Info; } void MicrosoftRecordLayoutBuilder::layout(const RecordDecl *RD) { // For C record layout, zero-sized records always have size 4. MinEmptyStructSize = CharUnits::fromQuantity(4); initializeLayout(RD); layoutFields(RD); DataSize = Size = Size.alignTo(Alignment); RequiredAlignment = std::max( RequiredAlignment, Context.toCharUnitsFromBits(RD->getMaxAlignment())); finalizeLayout(RD); } void MicrosoftRecordLayoutBuilder::cxxLayout(const CXXRecordDecl *RD) { // The C++ standard says that empty structs have size 1. MinEmptyStructSize = CharUnits::One(); initializeLayout(RD); initializeCXXLayout(RD); layoutNonVirtualBases(RD); layoutFields(RD); injectVBPtr(RD); injectVFPtr(RD); if (HasOwnVFPtr || (HasVBPtr && !SharedVBPtrBase)) Alignment = std::max(Alignment, PointerInfo.Alignment); auto RoundingAlignment = Alignment; if (!MaxFieldAlignment.isZero()) RoundingAlignment = std::min(RoundingAlignment, MaxFieldAlignment); if (!UseExternalLayout) Size = Size.alignTo(RoundingAlignment); NonVirtualSize = Size; RequiredAlignment = std::max( RequiredAlignment, Context.toCharUnitsFromBits(RD->getMaxAlignment())); layoutVirtualBases(RD); finalizeLayout(RD); } void MicrosoftRecordLayoutBuilder::initializeLayout(const RecordDecl *RD) { IsUnion = RD->isUnion(); Size = CharUnits::Zero(); Alignment = CharUnits::One(); // In 64-bit mode we always perform an alignment step after laying out vbases. // In 32-bit mode we do not. The check to see if we need to perform alignment // checks the RequiredAlignment field and performs alignment if it isn't 0. RequiredAlignment = Context.getTargetInfo().getTriple().isArch64Bit() ? CharUnits::One() : CharUnits::Zero(); // Compute the maximum field alignment. MaxFieldAlignment = CharUnits::Zero(); // Honor the default struct packing maximum alignment flag. if (unsigned DefaultMaxFieldAlignment = Context.getLangOpts().PackStruct) MaxFieldAlignment = CharUnits::fromQuantity(DefaultMaxFieldAlignment); // Honor the packing attribute. The MS-ABI ignores pragma pack if its larger // than the pointer size. if (const MaxFieldAlignmentAttr *MFAA = RD->getAttr()){ unsigned PackedAlignment = MFAA->getAlignment(); if (PackedAlignment <= Context.getTargetInfo().getPointerWidth(0)) MaxFieldAlignment = Context.toCharUnitsFromBits(PackedAlignment); } // Packed attribute forces max field alignment to be 1. if (RD->hasAttr()) MaxFieldAlignment = CharUnits::One(); // Try to respect the external layout if present. UseExternalLayout = false; if (ExternalASTSource *Source = Context.getExternalSource()) UseExternalLayout = Source->layoutRecordType( RD, External.Size, External.Align, External.FieldOffsets, External.BaseOffsets, External.VirtualBaseOffsets); } void MicrosoftRecordLayoutBuilder::initializeCXXLayout(const CXXRecordDecl *RD) { EndsWithZeroSizedObject = false; LeadsWithZeroSizedBase = false; HasOwnVFPtr = false; HasVBPtr = false; PrimaryBase = nullptr; SharedVBPtrBase = nullptr; // Calculate pointer size and alignment. These are used for vfptr and vbprt // injection. PointerInfo.Size = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0)); PointerInfo.Alignment = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerAlign(0)); // Respect pragma pack. if (!MaxFieldAlignment.isZero()) PointerInfo.Alignment = std::min(PointerInfo.Alignment, MaxFieldAlignment); } void MicrosoftRecordLayoutBuilder::layoutNonVirtualBases(const CXXRecordDecl *RD) { // The MS-ABI lays out all bases that contain leading vfptrs before it lays // out any bases that do not contain vfptrs. We implement this as two passes // over the bases. This approach guarantees that the primary base is laid out // first. We use these passes to calculate some additional aggregated // information about the bases, such as required alignment and the presence of // zero sized members. const ASTRecordLayout *PreviousBaseLayout = nullptr; bool HasPolymorphicBaseClass = false; // Iterate through the bases and lay out the non-virtual ones. for (const CXXBaseSpecifier &Base : RD->bases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); HasPolymorphicBaseClass |= BaseDecl->isPolymorphic(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); // Mark and skip virtual bases. if (Base.isVirtual()) { HasVBPtr = true; continue; } // Check for a base to share a VBPtr with. if (!SharedVBPtrBase && BaseLayout.hasVBPtr()) { SharedVBPtrBase = BaseDecl; HasVBPtr = true; } // Only lay out bases with extendable VFPtrs on the first pass. if (!BaseLayout.hasExtendableVFPtr()) continue; // If we don't have a primary base, this one qualifies. if (!PrimaryBase) { PrimaryBase = BaseDecl; LeadsWithZeroSizedBase = BaseLayout.leadsWithZeroSizedBase(); } // Lay out the base. layoutNonVirtualBase(RD, BaseDecl, BaseLayout, PreviousBaseLayout); } // Figure out if we need a fresh VFPtr for this class. if (RD->isPolymorphic()) { if (!HasPolymorphicBaseClass) // This class introduces polymorphism, so we need a vftable to store the // RTTI information. HasOwnVFPtr = true; else if (!PrimaryBase) { // We have a polymorphic base class but can't extend its vftable. Add a // new vfptr if we would use any vftable slots. for (CXXMethodDecl *M : RD->methods()) { if (MicrosoftVTableContext::hasVtableSlot(M) && M->size_overridden_methods() == 0) { HasOwnVFPtr = true; break; } } } } // If we don't have a primary base then we have a leading object that could // itself lead with a zero-sized object, something we track. bool CheckLeadingLayout = !PrimaryBase; // Iterate through the bases and lay out the non-virtual ones. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); // Only lay out bases without extendable VFPtrs on the second pass. if (BaseLayout.hasExtendableVFPtr()) { VBPtrOffset = Bases[BaseDecl] + BaseLayout.getNonVirtualSize(); continue; } // If this is the first layout, check to see if it leads with a zero sized // object. If it does, so do we. if (CheckLeadingLayout) { CheckLeadingLayout = false; LeadsWithZeroSizedBase = BaseLayout.leadsWithZeroSizedBase(); } // Lay out the base. layoutNonVirtualBase(RD, BaseDecl, BaseLayout, PreviousBaseLayout); VBPtrOffset = Bases[BaseDecl] + BaseLayout.getNonVirtualSize(); } // Set our VBPtroffset if we know it at this point. if (!HasVBPtr) VBPtrOffset = CharUnits::fromQuantity(-1); else if (SharedVBPtrBase) { const ASTRecordLayout &Layout = Context.getASTRecordLayout(SharedVBPtrBase); VBPtrOffset = Bases[SharedVBPtrBase] + Layout.getVBPtrOffset(); } } static bool recordUsesEBO(const RecordDecl *RD) { if (!isa(RD)) return false; if (RD->hasAttr()) return true; if (auto *LVA = RD->getAttr()) // TODO: Double check with the next version of MSVC. if (LVA->getVersion() <= LangOptions::MSVC2015) return false; // TODO: Some later version of MSVC will change the default behavior of the // compiler to enable EBO by default. When this happens, we will need an // additional isCompatibleWithMSVC check. return false; } void MicrosoftRecordLayoutBuilder::layoutNonVirtualBase( const CXXRecordDecl *RD, const CXXRecordDecl *BaseDecl, const ASTRecordLayout &BaseLayout, const ASTRecordLayout *&PreviousBaseLayout) { // Insert padding between two bases if the left first one is zero sized or // contains a zero sized subobject and the right is zero sized or one leads // with a zero sized base. bool MDCUsesEBO = recordUsesEBO(RD); if (PreviousBaseLayout && PreviousBaseLayout->endsWithZeroSizedObject() && BaseLayout.leadsWithZeroSizedBase() && !MDCUsesEBO) Size++; ElementInfo Info = getAdjustedElementInfo(BaseLayout); CharUnits BaseOffset; // Respect the external AST source base offset, if present. bool FoundBase = false; if (UseExternalLayout) { FoundBase = External.getExternalNVBaseOffset(BaseDecl, BaseOffset); if (FoundBase) { assert(BaseOffset >= Size && "base offset already allocated"); Size = BaseOffset; } } if (!FoundBase) { if (MDCUsesEBO && BaseDecl->isEmpty()) { assert(BaseLayout.getNonVirtualSize() == CharUnits::Zero()); BaseOffset = CharUnits::Zero(); } else { // Otherwise, lay the base out at the end of the MDC. BaseOffset = Size = Size.alignTo(Info.Alignment); } } Bases.insert(std::make_pair(BaseDecl, BaseOffset)); Size += BaseLayout.getNonVirtualSize(); PreviousBaseLayout = &BaseLayout; } void MicrosoftRecordLayoutBuilder::layoutFields(const RecordDecl *RD) { LastFieldIsNonZeroWidthBitfield = false; for (const FieldDecl *Field : RD->fields()) layoutField(Field); } void MicrosoftRecordLayoutBuilder::layoutField(const FieldDecl *FD) { if (FD->isBitField()) { layoutBitField(FD); return; } LastFieldIsNonZeroWidthBitfield = false; ElementInfo Info = getAdjustedElementInfo(FD); Alignment = std::max(Alignment, Info.Alignment); CharUnits FieldOffset; if (UseExternalLayout) FieldOffset = Context.toCharUnitsFromBits(External.getExternalFieldOffset(FD)); else if (IsUnion) FieldOffset = CharUnits::Zero(); else FieldOffset = Size.alignTo(Info.Alignment); placeFieldAtOffset(FieldOffset); Size = std::max(Size, FieldOffset + Info.Size); } void MicrosoftRecordLayoutBuilder::layoutBitField(const FieldDecl *FD) { unsigned Width = FD->getBitWidthValue(Context); if (Width == 0) { layoutZeroWidthBitField(FD); return; } ElementInfo Info = getAdjustedElementInfo(FD); // Clamp the bitfield to a containable size for the sake of being able // to lay them out. Sema will throw an error. if (Width > Context.toBits(Info.Size)) Width = Context.toBits(Info.Size); // Check to see if this bitfield fits into an existing allocation. Note: // MSVC refuses to pack bitfields of formal types with different sizes // into the same allocation. if (!UseExternalLayout && !IsUnion && LastFieldIsNonZeroWidthBitfield && CurrentBitfieldSize == Info.Size && Width <= RemainingBitsInField) { placeFieldAtBitOffset(Context.toBits(Size) - RemainingBitsInField); RemainingBitsInField -= Width; return; } LastFieldIsNonZeroWidthBitfield = true; CurrentBitfieldSize = Info.Size; if (UseExternalLayout) { auto FieldBitOffset = External.getExternalFieldOffset(FD); placeFieldAtBitOffset(FieldBitOffset); auto NewSize = Context.toCharUnitsFromBits( llvm::alignDown(FieldBitOffset, Context.toBits(Info.Alignment)) + Context.toBits(Info.Size)); Size = std::max(Size, NewSize); Alignment = std::max(Alignment, Info.Alignment); } else if (IsUnion) { placeFieldAtOffset(CharUnits::Zero()); Size = std::max(Size, Info.Size); // TODO: Add a Sema warning that MS ignores bitfield alignment in unions. } else { // Allocate a new block of memory and place the bitfield in it. CharUnits FieldOffset = Size.alignTo(Info.Alignment); placeFieldAtOffset(FieldOffset); Size = FieldOffset + Info.Size; Alignment = std::max(Alignment, Info.Alignment); RemainingBitsInField = Context.toBits(Info.Size) - Width; } } void MicrosoftRecordLayoutBuilder::layoutZeroWidthBitField(const FieldDecl *FD) { // Zero-width bitfields are ignored unless they follow a non-zero-width // bitfield. if (!LastFieldIsNonZeroWidthBitfield) { placeFieldAtOffset(IsUnion ? CharUnits::Zero() : Size); // TODO: Add a Sema warning that MS ignores alignment for zero // sized bitfields that occur after zero-size bitfields or non-bitfields. return; } LastFieldIsNonZeroWidthBitfield = false; ElementInfo Info = getAdjustedElementInfo(FD); if (IsUnion) { placeFieldAtOffset(CharUnits::Zero()); Size = std::max(Size, Info.Size); // TODO: Add a Sema warning that MS ignores bitfield alignment in unions. } else { // Round up the current record size to the field's alignment boundary. CharUnits FieldOffset = Size.alignTo(Info.Alignment); placeFieldAtOffset(FieldOffset); Size = FieldOffset; Alignment = std::max(Alignment, Info.Alignment); } } void MicrosoftRecordLayoutBuilder::injectVBPtr(const CXXRecordDecl *RD) { if (!HasVBPtr || SharedVBPtrBase) return; // Inject the VBPointer at the injection site. CharUnits InjectionSite = VBPtrOffset; // But before we do, make sure it's properly aligned. VBPtrOffset = VBPtrOffset.alignTo(PointerInfo.Alignment); // Determine where the first field should be laid out after the vbptr. CharUnits FieldStart = VBPtrOffset + PointerInfo.Size; // Shift everything after the vbptr down, unless we're using an external // layout. if (UseExternalLayout) { // It is possible that there were no fields or bases located after vbptr, // so the size was not adjusted before. if (Size < FieldStart) Size = FieldStart; return; } // Make sure that the amount we push the fields back by is a multiple of the // alignment. CharUnits Offset = (FieldStart - InjectionSite) .alignTo(std::max(RequiredAlignment, Alignment)); Size += Offset; for (uint64_t &FieldOffset : FieldOffsets) FieldOffset += Context.toBits(Offset); for (BaseOffsetsMapTy::value_type &Base : Bases) if (Base.second >= InjectionSite) Base.second += Offset; } void MicrosoftRecordLayoutBuilder::injectVFPtr(const CXXRecordDecl *RD) { if (!HasOwnVFPtr) return; // Make sure that the amount we push the struct back by is a multiple of the // alignment. CharUnits Offset = PointerInfo.Size.alignTo(std::max(RequiredAlignment, Alignment)); // Push back the vbptr, but increase the size of the object and push back // regular fields by the offset only if not using external record layout. if (HasVBPtr) VBPtrOffset += Offset; if (UseExternalLayout) { // The class may have no bases or fields, but still have a vfptr // (e.g. it's an interface class). The size was not correctly set before // in this case. if (FieldOffsets.empty() && Bases.empty()) Size += Offset; return; } Size += Offset; // If we're using an external layout, the fields offsets have already // accounted for this adjustment. for (uint64_t &FieldOffset : FieldOffsets) FieldOffset += Context.toBits(Offset); for (BaseOffsetsMapTy::value_type &Base : Bases) Base.second += Offset; } void MicrosoftRecordLayoutBuilder::layoutVirtualBases(const CXXRecordDecl *RD) { if (!HasVBPtr) return; // Vtordisps are always 4 bytes (even in 64-bit mode) CharUnits VtorDispSize = CharUnits::fromQuantity(4); CharUnits VtorDispAlignment = VtorDispSize; // vtordisps respect pragma pack. if (!MaxFieldAlignment.isZero()) VtorDispAlignment = std::min(VtorDispAlignment, MaxFieldAlignment); // The alignment of the vtordisp is at least the required alignment of the // entire record. This requirement may be present to support vtordisp // injection. for (const CXXBaseSpecifier &VBase : RD->vbases()) { const CXXRecordDecl *BaseDecl = VBase.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); RequiredAlignment = std::max(RequiredAlignment, BaseLayout.getRequiredAlignment()); } VtorDispAlignment = std::max(VtorDispAlignment, RequiredAlignment); // Compute the vtordisp set. llvm::SmallPtrSet HasVtorDispSet; computeVtorDispSet(HasVtorDispSet, RD); // Iterate through the virtual bases and lay them out. const ASTRecordLayout *PreviousBaseLayout = nullptr; for (const CXXBaseSpecifier &VBase : RD->vbases()) { const CXXRecordDecl *BaseDecl = VBase.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); bool HasVtordisp = HasVtorDispSet.contains(BaseDecl); // Insert padding between two bases if the left first one is zero sized or // contains a zero sized subobject and the right is zero sized or one leads // with a zero sized base. The padding between virtual bases is 4 // bytes (in both 32 and 64 bits modes) and always involves rounding up to // the required alignment, we don't know why. if ((PreviousBaseLayout && PreviousBaseLayout->endsWithZeroSizedObject() && BaseLayout.leadsWithZeroSizedBase() && !recordUsesEBO(RD)) || HasVtordisp) { Size = Size.alignTo(VtorDispAlignment) + VtorDispSize; Alignment = std::max(VtorDispAlignment, Alignment); } // Insert the virtual base. ElementInfo Info = getAdjustedElementInfo(BaseLayout); CharUnits BaseOffset; // Respect the external AST source base offset, if present. if (UseExternalLayout) { if (!External.getExternalVBaseOffset(BaseDecl, BaseOffset)) BaseOffset = Size; } else BaseOffset = Size.alignTo(Info.Alignment); assert(BaseOffset >= Size && "base offset already allocated"); VBases.insert(std::make_pair(BaseDecl, ASTRecordLayout::VBaseInfo(BaseOffset, HasVtordisp))); Size = BaseOffset + BaseLayout.getNonVirtualSize(); PreviousBaseLayout = &BaseLayout; } } void MicrosoftRecordLayoutBuilder::finalizeLayout(const RecordDecl *RD) { // Respect required alignment. Note that in 32-bit mode Required alignment // may be 0 and cause size not to be updated. DataSize = Size; if (!RequiredAlignment.isZero()) { Alignment = std::max(Alignment, RequiredAlignment); auto RoundingAlignment = Alignment; if (!MaxFieldAlignment.isZero()) RoundingAlignment = std::min(RoundingAlignment, MaxFieldAlignment); RoundingAlignment = std::max(RoundingAlignment, RequiredAlignment); Size = Size.alignTo(RoundingAlignment); } if (Size.isZero()) { if (!recordUsesEBO(RD) || !cast(RD)->isEmpty()) { EndsWithZeroSizedObject = true; LeadsWithZeroSizedBase = true; } // Zero-sized structures have size equal to their alignment if a // __declspec(align) came into play. if (RequiredAlignment >= MinEmptyStructSize) Size = Alignment; else Size = MinEmptyStructSize; } if (UseExternalLayout) { Size = Context.toCharUnitsFromBits(External.Size); if (External.Align) Alignment = Context.toCharUnitsFromBits(External.Align); } } // Recursively walks the non-virtual bases of a class and determines if any of // them are in the bases with overridden methods set. static bool RequiresVtordisp(const llvm::SmallPtrSetImpl & BasesWithOverriddenMethods, const CXXRecordDecl *RD) { if (BasesWithOverriddenMethods.count(RD)) return true; // If any of a virtual bases non-virtual bases (recursively) requires a // vtordisp than so does this virtual base. for (const CXXBaseSpecifier &Base : RD->bases()) if (!Base.isVirtual() && RequiresVtordisp(BasesWithOverriddenMethods, Base.getType()->getAsCXXRecordDecl())) return true; return false; } void MicrosoftRecordLayoutBuilder::computeVtorDispSet( llvm::SmallPtrSetImpl &HasVtordispSet, const CXXRecordDecl *RD) const { // /vd2 or #pragma vtordisp(2): Always use vtordisps for virtual bases with // vftables. if (RD->getMSVtorDispMode() == MSVtorDispMode::ForVFTable) { for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(BaseDecl); if (Layout.hasExtendableVFPtr()) HasVtordispSet.insert(BaseDecl); } return; } // If any of our bases need a vtordisp for this type, so do we. Check our // direct bases for vtordisp requirements. for (const CXXBaseSpecifier &Base : RD->bases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(BaseDecl); for (const auto &bi : Layout.getVBaseOffsetsMap()) if (bi.second.hasVtorDisp()) HasVtordispSet.insert(bi.first); } // We don't introduce any additional vtordisps if either: // * A user declared constructor or destructor aren't declared. // * #pragma vtordisp(0) or the /vd0 flag are in use. if ((!RD->hasUserDeclaredConstructor() && !RD->hasUserDeclaredDestructor()) || RD->getMSVtorDispMode() == MSVtorDispMode::Never) return; // /vd1 or #pragma vtordisp(1): Try to guess based on whether we think it's // possible for a partially constructed object with virtual base overrides to // escape a non-trivial constructor. assert(RD->getMSVtorDispMode() == MSVtorDispMode::ForVBaseOverride); // Compute a set of base classes which define methods we override. A virtual // base in this set will require a vtordisp. A virtual base that transitively // contains one of these bases as a non-virtual base will also require a // vtordisp. llvm::SmallPtrSet Work; llvm::SmallPtrSet BasesWithOverriddenMethods; // Seed the working set with our non-destructor, non-pure virtual methods. for (const CXXMethodDecl *MD : RD->methods()) if (MicrosoftVTableContext::hasVtableSlot(MD) && !isa(MD) && !MD->isPure()) Work.insert(MD); while (!Work.empty()) { const CXXMethodDecl *MD = *Work.begin(); auto MethodRange = MD->overridden_methods(); // If a virtual method has no-overrides it lives in its parent's vtable. if (MethodRange.begin() == MethodRange.end()) BasesWithOverriddenMethods.insert(MD->getParent()); else Work.insert(MethodRange.begin(), MethodRange.end()); // We've finished processing this element, remove it from the working set. Work.erase(MD); } // For each of our virtual bases, check if it is in the set of overridden // bases or if it transitively contains a non-virtual base that is. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); if (!HasVtordispSet.count(BaseDecl) && RequiresVtordisp(BasesWithOverriddenMethods, BaseDecl)) HasVtordispSet.insert(BaseDecl); } } /// getASTRecordLayout - Get or compute information about the layout of the /// specified record (struct/union/class), which indicates its size and field /// position information. const ASTRecordLayout & ASTContext::getASTRecordLayout(const RecordDecl *D) const { // These asserts test different things. A record has a definition // as soon as we begin to parse the definition. That definition is // not a complete definition (which is what isDefinition() tests) // until we *finish* parsing the definition. if (D->hasExternalLexicalStorage() && !D->getDefinition()) getExternalSource()->CompleteType(const_cast(D)); D = D->getDefinition(); assert(D && "Cannot get layout of forward declarations!"); assert(!D->isInvalidDecl() && "Cannot get layout of invalid decl!"); assert(D->isCompleteDefinition() && "Cannot layout type before complete!"); // Look up this layout, if already laid out, return what we have. // Note that we can't save a reference to the entry because this function // is recursive. const ASTRecordLayout *Entry = ASTRecordLayouts[D]; if (Entry) return *Entry; const ASTRecordLayout *NewEntry = nullptr; if (isMsLayout(*this)) { MicrosoftRecordLayoutBuilder Builder(*this); if (const auto *RD = dyn_cast(D)) { Builder.cxxLayout(RD); NewEntry = new (*this) ASTRecordLayout( *this, Builder.Size, Builder.Alignment, Builder.Alignment, Builder.Alignment, Builder.RequiredAlignment, Builder.HasOwnVFPtr, Builder.HasOwnVFPtr || Builder.PrimaryBase, Builder.VBPtrOffset, Builder.DataSize, Builder.FieldOffsets, Builder.NonVirtualSize, Builder.Alignment, Builder.Alignment, CharUnits::Zero(), Builder.PrimaryBase, false, Builder.SharedVBPtrBase, Builder.EndsWithZeroSizedObject, Builder.LeadsWithZeroSizedBase, Builder.Bases, Builder.VBases); } else { Builder.layout(D); NewEntry = new (*this) ASTRecordLayout( *this, Builder.Size, Builder.Alignment, Builder.Alignment, Builder.Alignment, Builder.RequiredAlignment, Builder.Size, Builder.FieldOffsets); } } else { if (const auto *RD = dyn_cast(D)) { EmptySubobjectMap EmptySubobjects(*this, RD); ItaniumRecordLayoutBuilder Builder(*this, &EmptySubobjects); Builder.Layout(RD); // In certain situations, we are allowed to lay out objects in the // tail-padding of base classes. This is ABI-dependent. // FIXME: this should be stored in the record layout. bool skipTailPadding = mustSkipTailPadding(getTargetInfo().getCXXABI(), RD); // FIXME: This should be done in FinalizeLayout. CharUnits DataSize = skipTailPadding ? Builder.getSize() : Builder.getDataSize(); CharUnits NonVirtualSize = skipTailPadding ? DataSize : Builder.NonVirtualSize; NewEntry = new (*this) ASTRecordLayout( *this, Builder.getSize(), Builder.Alignment, Builder.PreferredAlignment, Builder.UnadjustedAlignment, /*RequiredAlignment : used by MS-ABI)*/ Builder.Alignment, Builder.HasOwnVFPtr, RD->isDynamicClass(), CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets, NonVirtualSize, Builder.NonVirtualAlignment, Builder.PreferredNVAlignment, EmptySubobjects.SizeOfLargestEmptySubobject, Builder.PrimaryBase, Builder.PrimaryBaseIsVirtual, nullptr, false, false, Builder.Bases, Builder.VBases); } else { ItaniumRecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr); Builder.Layout(D); NewEntry = new (*this) ASTRecordLayout( *this, Builder.getSize(), Builder.Alignment, Builder.PreferredAlignment, Builder.UnadjustedAlignment, /*RequiredAlignment : used by MS-ABI)*/ Builder.Alignment, Builder.getSize(), Builder.FieldOffsets); } } ASTRecordLayouts[D] = NewEntry; if (getLangOpts().DumpRecordLayouts) { llvm::outs() << "\n*** Dumping AST Record Layout\n"; DumpRecordLayout(D, llvm::outs(), getLangOpts().DumpRecordLayoutsSimple); } return *NewEntry; } const CXXMethodDecl *ASTContext::getCurrentKeyFunction(const CXXRecordDecl *RD) { if (!getTargetInfo().getCXXABI().hasKeyFunctions()) return nullptr; assert(RD->getDefinition() && "Cannot get key function for forward decl!"); RD = RD->getDefinition(); // Beware: // 1) computing the key function might trigger deserialization, which might // invalidate iterators into KeyFunctions // 2) 'get' on the LazyDeclPtr might also trigger deserialization and // invalidate the LazyDeclPtr within the map itself LazyDeclPtr Entry = KeyFunctions[RD]; const Decl *Result = Entry ? Entry.get(getExternalSource()) : computeKeyFunction(*this, RD); // Store it back if it changed. if (Entry.isOffset() || Entry.isValid() != bool(Result)) KeyFunctions[RD] = const_cast(Result); return cast_or_null(Result); } void ASTContext::setNonKeyFunction(const CXXMethodDecl *Method) { assert(Method == Method->getFirstDecl() && "not working with method declaration from class definition"); // Look up the cache entry. Since we're working with the first // declaration, its parent must be the class definition, which is // the correct key for the KeyFunctions hash. const auto &Map = KeyFunctions; auto I = Map.find(Method->getParent()); // If it's not cached, there's nothing to do. if (I == Map.end()) return; // If it is cached, check whether it's the target method, and if so, // remove it from the cache. Note, the call to 'get' might invalidate // the iterator and the LazyDeclPtr object within the map. LazyDeclPtr Ptr = I->second; if (Ptr.get(getExternalSource()) == Method) { // FIXME: remember that we did this for module / chained PCH state? KeyFunctions.erase(Method->getParent()); } } static uint64_t getFieldOffset(const ASTContext &C, const FieldDecl *FD) { const ASTRecordLayout &Layout = C.getASTRecordLayout(FD->getParent()); return Layout.getFieldOffset(FD->getFieldIndex()); } uint64_t ASTContext::getFieldOffset(const ValueDecl *VD) const { uint64_t OffsetInBits; if (const FieldDecl *FD = dyn_cast(VD)) { OffsetInBits = ::getFieldOffset(*this, FD); } else { const IndirectFieldDecl *IFD = cast(VD); OffsetInBits = 0; for (const NamedDecl *ND : IFD->chain()) OffsetInBits += ::getFieldOffset(*this, cast(ND)); } return OffsetInBits; } uint64_t ASTContext::lookupFieldBitOffset(const ObjCInterfaceDecl *OID, const ObjCImplementationDecl *ID, const ObjCIvarDecl *Ivar) const { Ivar = Ivar->getCanonicalDecl(); const ObjCInterfaceDecl *Container = Ivar->getContainingInterface(); // FIXME: We should eliminate the need to have ObjCImplementationDecl passed // in here; it should never be necessary because that should be the lexical // decl context for the ivar. // If we know have an implementation (and the ivar is in it) then // look up in the implementation layout. const ASTRecordLayout *RL; if (ID && declaresSameEntity(ID->getClassInterface(), Container)) RL = &getASTObjCImplementationLayout(ID); else RL = &getASTObjCInterfaceLayout(Container); // Compute field index. // // FIXME: The index here is closely tied to how ASTContext::getObjCLayout is // implemented. This should be fixed to get the information from the layout // directly. unsigned Index = 0; for (const ObjCIvarDecl *IVD = Container->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { if (Ivar == IVD) break; ++Index; } assert(Index < RL->getFieldCount() && "Ivar is not inside record layout!"); return RL->getFieldOffset(Index); } /// getObjCLayout - Get or compute information about the layout of the /// given interface. /// /// \param Impl - If given, also include the layout of the interface's /// implementation. This may differ by including synthesized ivars. const ASTRecordLayout & ASTContext::getObjCLayout(const ObjCInterfaceDecl *D, const ObjCImplementationDecl *Impl) const { // Retrieve the definition if (D->hasExternalLexicalStorage() && !D->getDefinition()) getExternalSource()->CompleteType(const_cast(D)); D = D->getDefinition(); assert(D && !D->isInvalidDecl() && D->isThisDeclarationADefinition() && "Invalid interface decl!"); // Look up this layout, if already laid out, return what we have. const ObjCContainerDecl *Key = Impl ? (const ObjCContainerDecl*) Impl : (const ObjCContainerDecl*) D; if (const ASTRecordLayout *Entry = ObjCLayouts[Key]) return *Entry; // Add in synthesized ivar count if laying out an implementation. if (Impl) { unsigned SynthCount = CountNonClassIvars(D); // If there aren't any synthesized ivars then reuse the interface // entry. Note we can't cache this because we simply free all // entries later; however we shouldn't look up implementations // frequently. if (SynthCount == 0) return getObjCLayout(D, nullptr); } ItaniumRecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr); Builder.Layout(D); const ASTRecordLayout *NewEntry = new (*this) ASTRecordLayout( *this, Builder.getSize(), Builder.Alignment, Builder.PreferredAlignment, Builder.UnadjustedAlignment, /*RequiredAlignment : used by MS-ABI)*/ Builder.Alignment, Builder.getDataSize(), Builder.FieldOffsets); ObjCLayouts[Key] = NewEntry; return *NewEntry; } static void PrintOffset(raw_ostream &OS, CharUnits Offset, unsigned IndentLevel) { OS << llvm::format("%10" PRId64 " | ", (int64_t)Offset.getQuantity()); OS.indent(IndentLevel * 2); } static void PrintBitFieldOffset(raw_ostream &OS, CharUnits Offset, unsigned Begin, unsigned Width, unsigned IndentLevel) { llvm::SmallString<10> Buffer; { llvm::raw_svector_ostream BufferOS(Buffer); BufferOS << Offset.getQuantity() << ':'; if (Width == 0) { BufferOS << '-'; } else { BufferOS << Begin << '-' << (Begin + Width - 1); } } OS << llvm::right_justify(Buffer, 10) << " | "; OS.indent(IndentLevel * 2); } static void PrintIndentNoOffset(raw_ostream &OS, unsigned IndentLevel) { OS << " | "; OS.indent(IndentLevel * 2); } static void DumpRecordLayout(raw_ostream &OS, const RecordDecl *RD, const ASTContext &C, CharUnits Offset, unsigned IndentLevel, const char* Description, bool PrintSizeInfo, bool IncludeVirtualBases) { const ASTRecordLayout &Layout = C.getASTRecordLayout(RD); auto CXXRD = dyn_cast(RD); PrintOffset(OS, Offset, IndentLevel); OS << C.getTypeDeclType(const_cast(RD)).getAsString(); if (Description) OS << ' ' << Description; if (CXXRD && CXXRD->isEmpty()) OS << " (empty)"; OS << '\n'; IndentLevel++; // Dump bases. if (CXXRD) { const CXXRecordDecl *PrimaryBase = Layout.getPrimaryBase(); bool HasOwnVFPtr = Layout.hasOwnVFPtr(); bool HasOwnVBPtr = Layout.hasOwnVBPtr(); // Vtable pointer. if (CXXRD->isDynamicClass() && !PrimaryBase && !isMsLayout(C)) { PrintOffset(OS, Offset, IndentLevel); OS << '(' << *RD << " vtable pointer)\n"; } else if (HasOwnVFPtr) { PrintOffset(OS, Offset, IndentLevel); // vfptr (for Microsoft C++ ABI) OS << '(' << *RD << " vftable pointer)\n"; } // Collect nvbases. SmallVector Bases; for (const CXXBaseSpecifier &Base : CXXRD->bases()) { assert(!Base.getType()->isDependentType() && "Cannot layout class with dependent bases."); if (!Base.isVirtual()) Bases.push_back(Base.getType()->getAsCXXRecordDecl()); } // Sort nvbases by offset. llvm::stable_sort( Bases, [&](const CXXRecordDecl *L, const CXXRecordDecl *R) { return Layout.getBaseClassOffset(L) < Layout.getBaseClassOffset(R); }); // Dump (non-virtual) bases for (const CXXRecordDecl *Base : Bases) { CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base); DumpRecordLayout(OS, Base, C, BaseOffset, IndentLevel, Base == PrimaryBase ? "(primary base)" : "(base)", /*PrintSizeInfo=*/false, /*IncludeVirtualBases=*/false); } // vbptr (for Microsoft C++ ABI) if (HasOwnVBPtr) { PrintOffset(OS, Offset + Layout.getVBPtrOffset(), IndentLevel); OS << '(' << *RD << " vbtable pointer)\n"; } } // Dump fields. uint64_t FieldNo = 0; for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { const FieldDecl &Field = **I; uint64_t LocalFieldOffsetInBits = Layout.getFieldOffset(FieldNo); CharUnits FieldOffset = Offset + C.toCharUnitsFromBits(LocalFieldOffsetInBits); // Recursively dump fields of record type. if (auto RT = Field.getType()->getAs()) { DumpRecordLayout(OS, RT->getDecl(), C, FieldOffset, IndentLevel, Field.getName().data(), /*PrintSizeInfo=*/false, /*IncludeVirtualBases=*/true); continue; } if (Field.isBitField()) { uint64_t LocalFieldByteOffsetInBits = C.toBits(FieldOffset - Offset); unsigned Begin = LocalFieldOffsetInBits - LocalFieldByteOffsetInBits; unsigned Width = Field.getBitWidthValue(C); PrintBitFieldOffset(OS, FieldOffset, Begin, Width, IndentLevel); } else { PrintOffset(OS, FieldOffset, IndentLevel); } const QualType &FieldType = C.getLangOpts().DumpRecordLayoutsCanonical ? Field.getType().getCanonicalType() : Field.getType(); OS << FieldType.getAsString() << ' ' << Field << '\n'; } // Dump virtual bases. if (CXXRD && IncludeVirtualBases) { const ASTRecordLayout::VBaseOffsetsMapTy &VtorDisps = Layout.getVBaseOffsetsMap(); for (const CXXBaseSpecifier &Base : CXXRD->vbases()) { assert(Base.isVirtual() && "Found non-virtual class!"); const CXXRecordDecl *VBase = Base.getType()->getAsCXXRecordDecl(); CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBase); if (VtorDisps.find(VBase)->second.hasVtorDisp()) { PrintOffset(OS, VBaseOffset - CharUnits::fromQuantity(4), IndentLevel); OS << "(vtordisp for vbase " << *VBase << ")\n"; } DumpRecordLayout(OS, VBase, C, VBaseOffset, IndentLevel, VBase == Layout.getPrimaryBase() ? "(primary virtual base)" : "(virtual base)", /*PrintSizeInfo=*/false, /*IncludeVirtualBases=*/false); } } if (!PrintSizeInfo) return; PrintIndentNoOffset(OS, IndentLevel - 1); OS << "[sizeof=" << Layout.getSize().getQuantity(); if (CXXRD && !isMsLayout(C)) OS << ", dsize=" << Layout.getDataSize().getQuantity(); OS << ", align=" << Layout.getAlignment().getQuantity(); if (C.getTargetInfo().defaultsToAIXPowerAlignment()) OS << ", preferredalign=" << Layout.getPreferredAlignment().getQuantity(); if (CXXRD) { OS << ",\n"; PrintIndentNoOffset(OS, IndentLevel - 1); OS << " nvsize=" << Layout.getNonVirtualSize().getQuantity(); OS << ", nvalign=" << Layout.getNonVirtualAlignment().getQuantity(); if (C.getTargetInfo().defaultsToAIXPowerAlignment()) OS << ", preferrednvalign=" << Layout.getPreferredNVAlignment().getQuantity(); } OS << "]\n"; } void ASTContext::DumpRecordLayout(const RecordDecl *RD, raw_ostream &OS, bool Simple) const { if (!Simple) { ::DumpRecordLayout(OS, RD, *this, CharUnits(), 0, nullptr, /*PrintSizeInfo*/ true, /*IncludeVirtualBases=*/true); return; } // The "simple" format is designed to be parsed by the // layout-override testing code. There shouldn't be any external // uses of this format --- when LLDB overrides a layout, it sets up // the data structures directly --- so feel free to adjust this as // you like as long as you also update the rudimentary parser for it // in libFrontend. const ASTRecordLayout &Info = getASTRecordLayout(RD); OS << "Type: " << getTypeDeclType(RD).getAsString() << "\n"; OS << "\nLayout: "; OS << "defaultsToAIXPowerAlignment()) OS << " PreferredAlignment:" << toBits(Info.getPreferredAlignment()) << "\n"; OS << " FieldOffsets: ["; for (unsigned i = 0, e = Info.getFieldCount(); i != e; ++i) { if (i) OS << ", "; OS << Info.getFieldOffset(i); } OS << "]>\n"; } diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/PPCLinux.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/PPCLinux.cpp index e480d8bd8703..2fea262fd109 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/PPCLinux.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/PPCLinux.cpp @@ -1,85 +1,87 @@ //===-- PPCLinux.cpp - PowerPC ToolChain Implementations --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "PPCLinux.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" using namespace clang::driver; using namespace clang::driver::toolchains; using namespace llvm::opt; using namespace llvm::sys; // Glibc older than 2.32 doesn't fully support IEEE float128. Here we check // glibc version by looking at dynamic linker name. static bool GlibcSupportsFloat128(const std::string &Linker) { llvm::SmallVector Path; // Resolve potential symlinks to linker. if (fs::real_path(Linker, Path)) return false; llvm::StringRef LinkerName = path::filename(llvm::StringRef(Path.data(), Path.size())); // Since glibc 2.34, the installed .so file is not symlink anymore. But we can // still safely assume it's newer than 2.32. if (LinkerName.startswith("ld64.so")) return true; if (!LinkerName.startswith("ld-2.")) return false; unsigned Minor = (LinkerName[5] - '0') * 10 + (LinkerName[6] - '0'); if (Minor < 32) return false; return true; } PPCLinuxToolChain::PPCLinuxToolChain(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args) : Linux(D, Triple, Args) { if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { StringRef ABIName = A->getValue(); if (ABIName == "ieeelongdouble" && !SupportIEEEFloat128(D, Triple, Args)) D.Diag(diag::warn_drv_unsupported_float_abi_by_lib) << ABIName; } } void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (!DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) && !DriverArgs.hasArg(options::OPT_nobuiltininc)) { const Driver &D = getDriver(); SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "include", "ppc_wrappers"); addSystemInclude(DriverArgs, CC1Args, P); } Linux::AddClangSystemIncludeArgs(DriverArgs, CC1Args); } bool PPCLinuxToolChain::SupportIEEEFloat128( const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args) const { if (!Triple.isLittleEndian() || !Triple.isPPC64()) return false; if (Args.hasArg(options::OPT_nostdlib, options::OPT_nostdlibxx)) return true; + CXXStdlibType StdLib = ToolChain::GetCXXStdlibType(Args); bool HasUnsupportedCXXLib = - ToolChain::GetCXXStdlibType(Args) == CST_Libcxx && - GCCInstallation.getVersion().isOlderThan(12, 1, 0); + StdLib == CST_Libcxx || + (StdLib == CST_Libstdcxx && + GCCInstallation.getVersion().isOlderThan(12, 1, 0)); return GlibcSupportsFloat128(Linux::getDynamicLinker(Args)) && !(D.CCCIsCXX() && HasUnsupportedCXXLib); } diff --git a/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp index 553a0b31c0ab..7f1ce3da7e7e 100644 --- a/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm-project/clang/lib/Frontend/CompilerInvocation.cpp @@ -1,4705 +1,4701 @@ //===- CompilerInvocation.cpp ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/Frontend/CompilerInvocation.h" #include "TestModuleFileExtension.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/DebugInfoOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileSystemOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Sanitizers.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/Version.h" #include "clang/Basic/Visibility.h" #include "clang/Basic/XRayInstr.h" #include "clang/Config/config.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "clang/Frontend/CommandLineSourceLoc.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/FrontendPluginRegistry.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Sema/CodeCompleteOptions.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ModuleFileExtension.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Remarks/HotnessThresholdParser.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/HashBuilder.h" #include "llvm/Support/Host.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Regex.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include #include #include #include #include using namespace clang; using namespace driver; using namespace options; using namespace llvm::opt; //===----------------------------------------------------------------------===// // Initialization. //===----------------------------------------------------------------------===// CompilerInvocationRefBase::CompilerInvocationRefBase() : LangOpts(new LangOptions()), TargetOpts(new TargetOptions()), DiagnosticOpts(new DiagnosticOptions()), HeaderSearchOpts(new HeaderSearchOptions()), PreprocessorOpts(new PreprocessorOptions()), AnalyzerOpts(new AnalyzerOptions()) {} CompilerInvocationRefBase::CompilerInvocationRefBase( const CompilerInvocationRefBase &X) : LangOpts(new LangOptions(*X.getLangOpts())), TargetOpts(new TargetOptions(X.getTargetOpts())), DiagnosticOpts(new DiagnosticOptions(X.getDiagnosticOpts())), HeaderSearchOpts(new HeaderSearchOptions(X.getHeaderSearchOpts())), PreprocessorOpts(new PreprocessorOptions(X.getPreprocessorOpts())), AnalyzerOpts(new AnalyzerOptions(*X.getAnalyzerOpts())) {} CompilerInvocationRefBase::CompilerInvocationRefBase( CompilerInvocationRefBase &&X) = default; CompilerInvocationRefBase & CompilerInvocationRefBase::operator=(CompilerInvocationRefBase X) { LangOpts.swap(X.LangOpts); TargetOpts.swap(X.TargetOpts); DiagnosticOpts.swap(X.DiagnosticOpts); HeaderSearchOpts.swap(X.HeaderSearchOpts); PreprocessorOpts.swap(X.PreprocessorOpts); AnalyzerOpts.swap(X.AnalyzerOpts); return *this; } CompilerInvocationRefBase & CompilerInvocationRefBase::operator=(CompilerInvocationRefBase &&X) = default; CompilerInvocationRefBase::~CompilerInvocationRefBase() = default; //===----------------------------------------------------------------------===// // Normalizers //===----------------------------------------------------------------------===// #define SIMPLE_ENUM_VALUE_TABLE #include "clang/Driver/Options.inc" #undef SIMPLE_ENUM_VALUE_TABLE static llvm::Optional normalizeSimpleFlag(OptSpecifier Opt, unsigned TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { if (Args.hasArg(Opt)) return true; return None; } static Optional normalizeSimpleNegativeFlag(OptSpecifier Opt, unsigned, const ArgList &Args, DiagnosticsEngine &) { if (Args.hasArg(Opt)) return false; return None; } /// The tblgen-erated code passes in a fifth parameter of an arbitrary type, but /// denormalizeSimpleFlags never looks at it. Avoid bloating compile-time with /// unnecessary template instantiations and just ignore it with a variadic /// argument. static void denormalizeSimpleFlag(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator, Option::OptionClass, unsigned, /*T*/...) { Args.push_back(Spelling); } template static constexpr bool is_uint64_t_convertible() { return !std::is_same::value && llvm::is_integral_or_enum::value; } template (), bool> = false> static auto makeFlagToValueNormalizer(T Value) { return [Value](OptSpecifier Opt, unsigned, const ArgList &Args, DiagnosticsEngine &) -> Optional { if (Args.hasArg(Opt)) return Value; return None; }; } template (), bool> = false> static auto makeFlagToValueNormalizer(T Value) { return makeFlagToValueNormalizer(uint64_t(Value)); } static auto makeBooleanOptionNormalizer(bool Value, bool OtherValue, OptSpecifier OtherOpt) { return [Value, OtherValue, OtherOpt](OptSpecifier Opt, unsigned, const ArgList &Args, DiagnosticsEngine &) -> Optional { if (const Arg *A = Args.getLastArg(Opt, OtherOpt)) { return A->getOption().matches(Opt) ? Value : OtherValue; } return None; }; } static auto makeBooleanOptionDenormalizer(bool Value) { return [Value](SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator, Option::OptionClass, unsigned, bool KeyPath) { if (KeyPath == Value) Args.push_back(Spelling); }; } static void denormalizeStringImpl(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned, const Twine &Value) { switch (OptClass) { case Option::SeparateClass: case Option::JoinedOrSeparateClass: case Option::JoinedAndSeparateClass: Args.push_back(Spelling); Args.push_back(SA(Value)); break; case Option::JoinedClass: case Option::CommaJoinedClass: Args.push_back(SA(Twine(Spelling) + Value)); break; default: llvm_unreachable("Cannot denormalize an option with option class " "incompatible with string denormalization."); } } template static void denormalizeString(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, T Value) { denormalizeStringImpl(Args, Spelling, SA, OptClass, TableIndex, Twine(Value)); } static Optional findValueTableByName(const SimpleEnumValueTable &Table, StringRef Name) { for (int I = 0, E = Table.Size; I != E; ++I) if (Name == Table.Table[I].Name) return Table.Table[I]; return None; } static Optional findValueTableByValue(const SimpleEnumValueTable &Table, unsigned Value) { for (int I = 0, E = Table.Size; I != E; ++I) if (Value == Table.Table[I].Value) return Table.Table[I]; return None; } static llvm::Optional normalizeSimpleEnum(OptSpecifier Opt, unsigned TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { assert(TableIndex < SimpleEnumValueTablesSize); const SimpleEnumValueTable &Table = SimpleEnumValueTables[TableIndex]; auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; StringRef ArgValue = Arg->getValue(); if (auto MaybeEnumVal = findValueTableByName(Table, ArgValue)) return MaybeEnumVal->Value; Diags.Report(diag::err_drv_invalid_value) << Arg->getAsString(Args) << ArgValue; return None; } static void denormalizeSimpleEnumImpl(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, unsigned Value) { assert(TableIndex < SimpleEnumValueTablesSize); const SimpleEnumValueTable &Table = SimpleEnumValueTables[TableIndex]; if (auto MaybeEnumVal = findValueTableByValue(Table, Value)) { denormalizeString(Args, Spelling, SA, OptClass, TableIndex, MaybeEnumVal->Name); } else { llvm_unreachable("The simple enum value was not correctly defined in " "the tablegen option description"); } } template static void denormalizeSimpleEnum(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, T Value) { return denormalizeSimpleEnumImpl(Args, Spelling, SA, OptClass, TableIndex, static_cast(Value)); } static Optional normalizeString(OptSpecifier Opt, int TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; return std::string(Arg->getValue()); } template static Optional normalizeStringIntegral(OptSpecifier Opt, int, const ArgList &Args, DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; IntTy Res; if (StringRef(Arg->getValue()).getAsInteger(0, Res)) { Diags.Report(diag::err_drv_invalid_int_value) << Arg->getAsString(Args) << Arg->getValue(); return None; } return Res; } static Optional> normalizeStringVector(OptSpecifier Opt, int, const ArgList &Args, DiagnosticsEngine &) { return Args.getAllArgValues(Opt); } static void denormalizeStringVector(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, const std::vector &Values) { switch (OptClass) { case Option::CommaJoinedClass: { std::string CommaJoinedValue; if (!Values.empty()) { CommaJoinedValue.append(Values.front()); for (const std::string &Value : llvm::drop_begin(Values, 1)) { CommaJoinedValue.append(","); CommaJoinedValue.append(Value); } } denormalizeString(Args, Spelling, SA, Option::OptionClass::JoinedClass, TableIndex, CommaJoinedValue); break; } case Option::JoinedClass: case Option::SeparateClass: case Option::JoinedOrSeparateClass: for (const std::string &Value : Values) denormalizeString(Args, Spelling, SA, OptClass, TableIndex, Value); break; default: llvm_unreachable("Cannot denormalize an option with option class " "incompatible with string vector denormalization."); } } static Optional normalizeTriple(OptSpecifier Opt, int TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; return llvm::Triple::normalize(Arg->getValue()); } template static T mergeForwardValue(T KeyPath, U Value) { return static_cast(Value); } template static T mergeMaskValue(T KeyPath, U Value) { return KeyPath | Value; } template static T extractForwardValue(T KeyPath) { return KeyPath; } template static T extractMaskValue(T KeyPath) { return ((KeyPath & Value) == Value) ? static_cast(Value) : T(); } #define PARSE_OPTION_WITH_MARSHALLING( \ ARGS, DIAGS, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) \ if ((FLAGS)&options::CC1Option) { \ KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE); \ if (IMPLIED_CHECK) \ KEYPATH = MERGER(KEYPATH, IMPLIED_VALUE); \ if (SHOULD_PARSE) \ if (auto MaybeValue = NORMALIZER(OPT_##ID, TABLE_INDEX, ARGS, DIAGS)) \ KEYPATH = \ MERGER(KEYPATH, static_cast(*MaybeValue)); \ } // Capture the extracted value as a lambda argument to avoid potential issues // with lifetime extension of the reference. #define GENERATE_OPTION_WITH_MARSHALLING( \ ARGS, STRING_ALLOCATOR, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, \ TABLE_INDEX) \ if ((FLAGS)&options::CC1Option) { \ [&](const auto &Extracted) { \ if (ALWAYS_EMIT || \ (Extracted != \ static_cast((IMPLIED_CHECK) ? (IMPLIED_VALUE) \ : (DEFAULT_VALUE)))) \ DENORMALIZER(ARGS, SPELLING, STRING_ALLOCATOR, Option::KIND##Class, \ TABLE_INDEX, Extracted); \ }(EXTRACTOR(KEYPATH)); \ } static StringRef GetInputKindName(InputKind IK); static bool FixupInvocation(CompilerInvocation &Invocation, DiagnosticsEngine &Diags, const ArgList &Args, InputKind IK) { unsigned NumErrorsBefore = Diags.getNumErrors(); LangOptions &LangOpts = *Invocation.getLangOpts(); CodeGenOptions &CodeGenOpts = Invocation.getCodeGenOpts(); TargetOptions &TargetOpts = Invocation.getTargetOpts(); FrontendOptions &FrontendOpts = Invocation.getFrontendOpts(); CodeGenOpts.XRayInstrumentFunctions = LangOpts.XRayInstrument; CodeGenOpts.XRayAlwaysEmitCustomEvents = LangOpts.XRayAlwaysEmitCustomEvents; CodeGenOpts.XRayAlwaysEmitTypedEvents = LangOpts.XRayAlwaysEmitTypedEvents; CodeGenOpts.DisableFree = FrontendOpts.DisableFree; FrontendOpts.GenerateGlobalModuleIndex = FrontendOpts.UseGlobalModuleIndex; if (FrontendOpts.ShowStats) CodeGenOpts.ClearASTBeforeBackend = false; LangOpts.SanitizeCoverage = CodeGenOpts.hasSanitizeCoverage(); LangOpts.ForceEmitVTables = CodeGenOpts.ForceEmitVTables; LangOpts.SpeculativeLoadHardening = CodeGenOpts.SpeculativeLoadHardening; LangOpts.CurrentModule = LangOpts.ModuleName; llvm::Triple T(TargetOpts.Triple); llvm::Triple::ArchType Arch = T.getArch(); CodeGenOpts.CodeModel = TargetOpts.CodeModel; if (LangOpts.getExceptionHandling() != LangOptions::ExceptionHandlingKind::None && T.isWindowsMSVCEnvironment()) Diags.Report(diag::err_fe_invalid_exception_model) << static_cast(LangOpts.getExceptionHandling()) << T.str(); if (LangOpts.AppleKext && !LangOpts.CPlusPlus) Diags.Report(diag::warn_c_kext); if (Args.hasArg(OPT_fconcepts_ts)) Diags.Report(diag::warn_fe_concepts_ts_flag); if (LangOpts.NewAlignOverride && !llvm::isPowerOf2_32(LangOpts.NewAlignOverride)) { Arg *A = Args.getLastArg(OPT_fnew_alignment_EQ); Diags.Report(diag::err_fe_invalid_alignment) << A->getAsString(Args) << A->getValue(); LangOpts.NewAlignOverride = 0; } // Prevent the user from specifying both -fsycl-is-device and -fsycl-is-host. if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsHost) Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fsycl-is-device" << "-fsycl-is-host"; if (Args.hasArg(OPT_fgnu89_inline) && LangOpts.CPlusPlus) Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fgnu89-inline" << GetInputKindName(IK); if (Args.hasArg(OPT_fgpu_allow_device_init) && !LangOpts.HIP) Diags.Report(diag::warn_ignored_hip_only_option) << Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args); if (Args.hasArg(OPT_gpu_max_threads_per_block_EQ) && !LangOpts.HIP) Diags.Report(diag::warn_ignored_hip_only_option) << Args.getLastArg(OPT_gpu_max_threads_per_block_EQ)->getAsString(Args); // -cl-strict-aliasing needs to emit diagnostic in the case where CL > 1.0. // This option should be deprecated for CL > 1.0 because // this option was added for compatibility with OpenCL 1.0. if (Args.getLastArg(OPT_cl_strict_aliasing) && (LangOpts.getOpenCLCompatibleVersion() > 100)) Diags.Report(diag::warn_option_invalid_ocl_version) << LangOpts.getOpenCLVersionString() << Args.getLastArg(OPT_cl_strict_aliasing)->getAsString(Args); if (Arg *A = Args.getLastArg(OPT_fdefault_calling_conv_EQ)) { auto DefaultCC = LangOpts.getDefaultCallingConv(); bool emitError = (DefaultCC == LangOptions::DCC_FastCall || DefaultCC == LangOptions::DCC_StdCall) && Arch != llvm::Triple::x86; emitError |= (DefaultCC == LangOptions::DCC_VectorCall || DefaultCC == LangOptions::DCC_RegCall) && !T.isX86(); if (emitError) Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << T.getTriple(); } if (!CodeGenOpts.ProfileRemappingFile.empty() && CodeGenOpts.LegacyPassManager) Diags.Report(diag::err_drv_argument_only_allowed_with) << Args.getLastArg(OPT_fprofile_remapping_file_EQ)->getAsString(Args) << "-fno-legacy-pass-manager"; return Diags.getNumErrors() == NumErrorsBefore; } //===----------------------------------------------------------------------===// // Deserialization (from args) //===----------------------------------------------------------------------===// static unsigned getOptimizationLevel(ArgList &Args, InputKind IK, DiagnosticsEngine &Diags) { unsigned DefaultOpt = llvm::CodeGenOpt::None; if ((IK.getLanguage() == Language::OpenCL || IK.getLanguage() == Language::OpenCLCXX) && !Args.hasArg(OPT_cl_opt_disable)) DefaultOpt = llvm::CodeGenOpt::Default; if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O0)) return llvm::CodeGenOpt::None; if (A->getOption().matches(options::OPT_Ofast)) return llvm::CodeGenOpt::Aggressive; assert(A->getOption().matches(options::OPT_O)); StringRef S(A->getValue()); if (S == "s" || S == "z") return llvm::CodeGenOpt::Default; if (S == "g") return llvm::CodeGenOpt::Less; return getLastArgIntValue(Args, OPT_O, DefaultOpt, Diags); } return DefaultOpt; } static unsigned getOptimizationLevelSize(ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O)) { switch (A->getValue()[0]) { default: return 0; case 's': return 1; case 'z': return 2; } } } return 0; } static void GenerateArg(SmallVectorImpl &Args, llvm::opt::OptSpecifier OptSpecifier, CompilerInvocation::StringAllocator SA) { Option Opt = getDriverOptTable().getOption(OptSpecifier); denormalizeSimpleFlag(Args, SA(Opt.getPrefix() + Opt.getName()), SA, Option::OptionClass::FlagClass, 0); } static void GenerateArg(SmallVectorImpl &Args, llvm::opt::OptSpecifier OptSpecifier, const Twine &Value, CompilerInvocation::StringAllocator SA) { Option Opt = getDriverOptTable().getOption(OptSpecifier); denormalizeString(Args, SA(Opt.getPrefix() + Opt.getName()), SA, Opt.getKind(), 0, Value); } // Parse command line arguments into CompilerInvocation. using ParseFn = llvm::function_ref, DiagnosticsEngine &, const char *)>; // Generate command line arguments from CompilerInvocation. using GenerateFn = llvm::function_ref &, CompilerInvocation::StringAllocator)>; // May perform round-trip of command line arguments. By default, the round-trip // is enabled in assert builds. This can be overwritten at run-time via the // "-round-trip-args" and "-no-round-trip-args" command line flags. // During round-trip, the command line arguments are parsed into a dummy // instance of CompilerInvocation which is used to generate the command line // arguments again. The real CompilerInvocation instance is then created by // parsing the generated arguments, not the original ones. static bool RoundTrip(ParseFn Parse, GenerateFn Generate, CompilerInvocation &RealInvocation, CompilerInvocation &DummyInvocation, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { #ifndef NDEBUG bool DoRoundTripDefault = true; #else bool DoRoundTripDefault = false; #endif bool DoRoundTrip = DoRoundTripDefault; for (const auto *Arg : CommandLineArgs) { if (Arg == StringRef("-round-trip-args")) DoRoundTrip = true; if (Arg == StringRef("-no-round-trip-args")) DoRoundTrip = false; } // If round-trip was not requested, simply run the parser with the real // invocation diagnostics. if (!DoRoundTrip) return Parse(RealInvocation, CommandLineArgs, Diags, Argv0); // Serializes quoted (and potentially escaped) arguments. auto SerializeArgs = [](ArrayRef Args) { std::string Buffer; llvm::raw_string_ostream OS(Buffer); for (const char *Arg : Args) { llvm::sys::printArg(OS, Arg, /*Quote=*/true); OS << ' '; } OS.flush(); return Buffer; }; // Setup a dummy DiagnosticsEngine. DiagnosticsEngine DummyDiags(new DiagnosticIDs(), new DiagnosticOptions()); DummyDiags.setClient(new TextDiagnosticBuffer()); // Run the first parse on the original arguments with the dummy invocation and // diagnostics. if (!Parse(DummyInvocation, CommandLineArgs, DummyDiags, Argv0) || DummyDiags.getNumWarnings() != 0) { // If the first parse did not succeed, it must be user mistake (invalid // command line arguments). We won't be able to generate arguments that // would reproduce the same result. Let's fail again with the real // invocation and diagnostics, so all side-effects of parsing are visible. unsigned NumWarningsBefore = Diags.getNumWarnings(); auto Success = Parse(RealInvocation, CommandLineArgs, Diags, Argv0); if (!Success || Diags.getNumWarnings() != NumWarningsBefore) return Success; // Parse with original options and diagnostics succeeded even though it // shouldn't have. Something is off. Diags.Report(diag::err_cc1_round_trip_fail_then_ok); Diags.Report(diag::note_cc1_round_trip_original) << SerializeArgs(CommandLineArgs); return false; } // Setup string allocator. llvm::BumpPtrAllocator Alloc; llvm::StringSaver StringPool(Alloc); auto SA = [&StringPool](const Twine &Arg) { return StringPool.save(Arg).data(); }; // Generate arguments from the dummy invocation. If Generate is the // inverse of Parse, the newly generated arguments must have the same // semantics as the original. SmallVector GeneratedArgs1; Generate(DummyInvocation, GeneratedArgs1, SA); // Run the second parse, now on the generated arguments, and with the real // invocation and diagnostics. The result is what we will end up using for the // rest of compilation, so if Generate is not inverse of Parse, something down // the line will break. bool Success2 = Parse(RealInvocation, GeneratedArgs1, Diags, Argv0); // The first parse on original arguments succeeded, but second parse of // generated arguments failed. Something must be wrong with the generator. if (!Success2) { Diags.Report(diag::err_cc1_round_trip_ok_then_fail); Diags.Report(diag::note_cc1_round_trip_generated) << 1 << SerializeArgs(GeneratedArgs1); return false; } // Generate arguments again, this time from the options we will end up using // for the rest of the compilation. SmallVector GeneratedArgs2; Generate(RealInvocation, GeneratedArgs2, SA); // Compares two lists of generated arguments. auto Equal = [](const ArrayRef A, const ArrayRef B) { return std::equal(A.begin(), A.end(), B.begin(), B.end(), [](const char *AElem, const char *BElem) { return StringRef(AElem) == StringRef(BElem); }); }; // If we generated different arguments from what we assume are two // semantically equivalent CompilerInvocations, the Generate function may // be non-deterministic. if (!Equal(GeneratedArgs1, GeneratedArgs2)) { Diags.Report(diag::err_cc1_round_trip_mismatch); Diags.Report(diag::note_cc1_round_trip_generated) << 1 << SerializeArgs(GeneratedArgs1); Diags.Report(diag::note_cc1_round_trip_generated) << 2 << SerializeArgs(GeneratedArgs2); return false; } Diags.Report(diag::remark_cc1_round_trip_generated) << 1 << SerializeArgs(GeneratedArgs1); Diags.Report(diag::remark_cc1_round_trip_generated) << 2 << SerializeArgs(GeneratedArgs2); return Success2; } static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group, OptSpecifier GroupWithValue, std::vector &Diagnostics) { for (auto *A : Args.filtered(Group)) { if (A->getOption().getKind() == Option::FlagClass) { // The argument is a pure flag (such as OPT_Wall or OPT_Wdeprecated). Add // its name (minus the "W" or "R" at the beginning) to the diagnostics. Diagnostics.push_back( std::string(A->getOption().getName().drop_front(1))); } else if (A->getOption().matches(GroupWithValue)) { // This is -Wfoo= or -Rfoo=, where foo is the name of the diagnostic // group. Add only the group name to the diagnostics. Diagnostics.push_back( std::string(A->getOption().getName().drop_front(1).rtrim("=-"))); } else { // Otherwise, add its value (for OPT_W_Joined and similar). Diagnostics.push_back(A->getValue()); } } } // Parse the Static Analyzer configuration. If \p Diags is set to nullptr, // it won't verify the input. static void parseAnalyzerConfigs(AnalyzerOptions &AnOpts, DiagnosticsEngine *Diags); static void getAllNoBuiltinFuncValues(ArgList &Args, std::vector &Funcs) { std::vector Values = Args.getAllArgValues(OPT_fno_builtin_); auto BuiltinEnd = llvm::partition(Values, Builtin::Context::isBuiltinFunc); Funcs.insert(Funcs.end(), Values.begin(), BuiltinEnd); } static void GenerateAnalyzerArgs(AnalyzerOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const AnalyzerOptions *AnalyzerOpts = &Opts; #define ANALYZER_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef ANALYZER_OPTION_WITH_MARSHALLING if (Opts.AnalysisStoreOpt != RegionStoreModel) { switch (Opts.AnalysisStoreOpt) { #define ANALYSIS_STORE(NAME, CMDFLAG, DESC, CREATFN) \ case NAME##Model: \ GenerateArg(Args, OPT_analyzer_store, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis store."); } } if (Opts.AnalysisConstraintsOpt != RangeConstraintsModel) { switch (Opts.AnalysisConstraintsOpt) { #define ANALYSIS_CONSTRAINTS(NAME, CMDFLAG, DESC, CREATFN) \ case NAME##Model: \ GenerateArg(Args, OPT_analyzer_constraints, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis constraint."); } } if (Opts.AnalysisDiagOpt != PD_HTML) { switch (Opts.AnalysisDiagOpt) { #define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATFN) \ case PD_##NAME: \ GenerateArg(Args, OPT_analyzer_output, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis diagnostic client."); } } if (Opts.AnalysisPurgeOpt != PurgeStmt) { switch (Opts.AnalysisPurgeOpt) { #define ANALYSIS_PURGE(NAME, CMDFLAG, DESC) \ case NAME: \ GenerateArg(Args, OPT_analyzer_purge, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis purge mode."); } } if (Opts.InliningMode != NoRedundancy) { switch (Opts.InliningMode) { #define ANALYSIS_INLINING_MODE(NAME, CMDFLAG, DESC) \ case NAME: \ GenerateArg(Args, OPT_analyzer_inlining_mode, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis inlining mode."); } } for (const auto &CP : Opts.CheckersAndPackages) { OptSpecifier Opt = CP.second ? OPT_analyzer_checker : OPT_analyzer_disable_checker; GenerateArg(Args, Opt, CP.first, SA); } AnalyzerOptions ConfigOpts; parseAnalyzerConfigs(ConfigOpts, nullptr); for (const auto &C : Opts.Config) { // Don't generate anything that came from parseAnalyzerConfigs. It would be // redundant and may not be valid on the command line. auto Entry = ConfigOpts.Config.find(C.getKey()); if (Entry != ConfigOpts.Config.end() && Entry->getValue() == C.getValue()) continue; GenerateArg(Args, OPT_analyzer_config, C.getKey() + "=" + C.getValue(), SA); } // Nothing to generate for FullCompilerInvocation. } static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); AnalyzerOptions *AnalyzerOpts = &Opts; #define ANALYZER_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef ANALYZER_OPTION_WITH_MARSHALLING if (Arg *A = Args.getLastArg(OPT_analyzer_store)) { StringRef Name = A->getValue(); AnalysisStores Value = llvm::StringSwitch(Name) #define ANALYSIS_STORE(NAME, CMDFLAG, DESC, CREATFN) \ .Case(CMDFLAG, NAME##Model) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumStores); if (Value == NumStores) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisStoreOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_constraints)) { StringRef Name = A->getValue(); AnalysisConstraints Value = llvm::StringSwitch(Name) #define ANALYSIS_CONSTRAINTS(NAME, CMDFLAG, DESC, CREATFN) \ .Case(CMDFLAG, NAME##Model) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumConstraints); if (Value == NumConstraints) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisConstraintsOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_output)) { StringRef Name = A->getValue(); AnalysisDiagClients Value = llvm::StringSwitch(Name) #define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATFN) \ .Case(CMDFLAG, PD_##NAME) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NUM_ANALYSIS_DIAG_CLIENTS); if (Value == NUM_ANALYSIS_DIAG_CLIENTS) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisDiagOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_purge)) { StringRef Name = A->getValue(); AnalysisPurgeMode Value = llvm::StringSwitch(Name) #define ANALYSIS_PURGE(NAME, CMDFLAG, DESC) \ .Case(CMDFLAG, NAME) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumPurgeModes); if (Value == NumPurgeModes) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisPurgeOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_inlining_mode)) { StringRef Name = A->getValue(); AnalysisInliningMode Value = llvm::StringSwitch(Name) #define ANALYSIS_INLINING_MODE(NAME, CMDFLAG, DESC) \ .Case(CMDFLAG, NAME) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumInliningModes); if (Value == NumInliningModes) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.InliningMode = Value; } } Opts.CheckersAndPackages.clear(); for (const Arg *A : Args.filtered(OPT_analyzer_checker, OPT_analyzer_disable_checker)) { A->claim(); bool IsEnabled = A->getOption().getID() == OPT_analyzer_checker; // We can have a list of comma separated checker names, e.g: // '-analyzer-checker=cocoa,unix' StringRef CheckerAndPackageList = A->getValue(); SmallVector CheckersAndPackages; CheckerAndPackageList.split(CheckersAndPackages, ","); for (const StringRef &CheckerOrPackage : CheckersAndPackages) Opts.CheckersAndPackages.emplace_back(std::string(CheckerOrPackage), IsEnabled); } // Go through the analyzer configuration options. for (const auto *A : Args.filtered(OPT_analyzer_config)) { // We can have a list of comma separated config names, e.g: // '-analyzer-config key1=val1,key2=val2' StringRef configList = A->getValue(); SmallVector configVals; configList.split(configVals, ","); for (const auto &configVal : configVals) { StringRef key, val; std::tie(key, val) = configVal.split("="); if (val.empty()) { Diags.Report(SourceLocation(), diag::err_analyzer_config_no_value) << configVal; break; } if (val.contains('=')) { Diags.Report(SourceLocation(), diag::err_analyzer_config_multiple_values) << configVal; break; } // TODO: Check checker options too, possibly in CheckerRegistry. // Leave unknown non-checker configs unclaimed. if (!key.contains(":") && Opts.isUnknownAnalyzerConfig(key)) { if (Opts.ShouldEmitErrorsOnInvalidConfigValue) Diags.Report(diag::err_analyzer_config_unknown) << key; continue; } A->claim(); Opts.Config[key] = std::string(val); } } if (Opts.ShouldEmitErrorsOnInvalidConfigValue) parseAnalyzerConfigs(Opts, &Diags); else parseAnalyzerConfigs(Opts, nullptr); llvm::raw_string_ostream os(Opts.FullCompilerInvocation); for (unsigned i = 0; i < Args.getNumInputArgStrings(); ++i) { if (i != 0) os << " "; os << Args.getArgString(i); } os.flush(); return Diags.getNumErrors() == NumErrorsBefore; } static StringRef getStringOption(AnalyzerOptions::ConfigTable &Config, StringRef OptionName, StringRef DefaultVal) { return Config.insert({OptionName, std::string(DefaultVal)}).first->second; } static void initOption(AnalyzerOptions::ConfigTable &Config, DiagnosticsEngine *Diags, StringRef &OptionField, StringRef Name, StringRef DefaultVal) { // String options may be known to invalid (e.g. if the expected string is a // file name, but the file does not exist), those will have to be checked in // parseConfigs. OptionField = getStringOption(Config, Name, DefaultVal); } static void initOption(AnalyzerOptions::ConfigTable &Config, DiagnosticsEngine *Diags, bool &OptionField, StringRef Name, bool DefaultVal) { auto PossiblyInvalidVal = llvm::StringSwitch>( getStringOption(Config, Name, (DefaultVal ? "true" : "false"))) .Case("true", true) .Case("false", false) .Default(None); if (!PossiblyInvalidVal) { if (Diags) Diags->Report(diag::err_analyzer_config_invalid_input) << Name << "a boolean"; else OptionField = DefaultVal; } else OptionField = PossiblyInvalidVal.getValue(); } static void initOption(AnalyzerOptions::ConfigTable &Config, DiagnosticsEngine *Diags, unsigned &OptionField, StringRef Name, unsigned DefaultVal) { OptionField = DefaultVal; bool HasFailed = getStringOption(Config, Name, std::to_string(DefaultVal)) .getAsInteger(0, OptionField); if (Diags && HasFailed) Diags->Report(diag::err_analyzer_config_invalid_input) << Name << "an unsigned"; } static void parseAnalyzerConfigs(AnalyzerOptions &AnOpts, DiagnosticsEngine *Diags) { // TODO: There's no need to store the entire configtable, it'd be plenty // enough tostore checker options. #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ initOption(AnOpts.Config, Diags, AnOpts.NAME, CMDFLAG, DEFAULT_VAL); #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ SHALLOW_VAL, DEEP_VAL) \ switch (AnOpts.getUserMode()) { \ case UMK_Shallow: \ initOption(AnOpts.Config, Diags, AnOpts.NAME, CMDFLAG, SHALLOW_VAL); \ break; \ case UMK_Deep: \ initOption(AnOpts.Config, Diags, AnOpts.NAME, CMDFLAG, DEEP_VAL); \ break; \ } \ #include "clang/StaticAnalyzer/Core/AnalyzerOptions.def" #undef ANALYZER_OPTION #undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE // At this point, AnalyzerOptions is configured. Let's validate some options. // FIXME: Here we try to validate the silenced checkers or packages are valid. // The current approach only validates the registered checkers which does not // contain the runtime enabled checkers and optimally we would validate both. if (!AnOpts.RawSilencedCheckersAndPackages.empty()) { std::vector Checkers = AnOpts.getRegisteredCheckers(/*IncludeExperimental=*/true); std::vector Packages = AnOpts.getRegisteredPackages(/*IncludeExperimental=*/true); SmallVector CheckersAndPackages; AnOpts.RawSilencedCheckersAndPackages.split(CheckersAndPackages, ";"); for (const StringRef &CheckerOrPackage : CheckersAndPackages) { if (Diags) { bool IsChecker = CheckerOrPackage.contains('.'); bool IsValidName = IsChecker ? llvm::is_contained(Checkers, CheckerOrPackage) : llvm::is_contained(Packages, CheckerOrPackage); if (!IsValidName) Diags->Report(diag::err_unknown_analyzer_checker_or_package) << CheckerOrPackage; } AnOpts.SilencedCheckersAndPackages.emplace_back(CheckerOrPackage); } } if (!Diags) return; if (AnOpts.ShouldTrackConditionsDebug && !AnOpts.ShouldTrackConditions) Diags->Report(diag::err_analyzer_config_invalid_input) << "track-conditions-debug" << "'track-conditions' to also be enabled"; if (!AnOpts.CTUDir.empty() && !llvm::sys::fs::is_directory(AnOpts.CTUDir)) Diags->Report(diag::err_analyzer_config_invalid_input) << "ctu-dir" << "a filename"; if (!AnOpts.ModelPath.empty() && !llvm::sys::fs::is_directory(AnOpts.ModelPath)) Diags->Report(diag::err_analyzer_config_invalid_input) << "model-path" << "a filename"; } /// Generate a remark argument. This is an inverse of `ParseOptimizationRemark`. static void GenerateOptimizationRemark(SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, OptSpecifier OptEQ, StringRef Name, const CodeGenOptions::OptRemark &Remark) { if (Remark.hasValidPattern()) { GenerateArg(Args, OptEQ, Remark.Pattern, SA); } else if (Remark.Kind == CodeGenOptions::RK_Enabled) { GenerateArg(Args, OPT_R_Joined, Name, SA); } else if (Remark.Kind == CodeGenOptions::RK_Disabled) { GenerateArg(Args, OPT_R_Joined, StringRef("no-") + Name, SA); } } /// Parse a remark command line argument. It may be missing, disabled/enabled by /// '-R[no-]group' or specified with a regular expression by '-Rgroup=regexp'. /// On top of that, it can be disabled/enabled globally by '-R[no-]everything'. static CodeGenOptions::OptRemark ParseOptimizationRemark(DiagnosticsEngine &Diags, ArgList &Args, OptSpecifier OptEQ, StringRef Name) { CodeGenOptions::OptRemark Result; auto InitializeResultPattern = [&Diags, &Args, &Result](const Arg *A, StringRef Pattern) { Result.Pattern = Pattern.str(); std::string RegexError; Result.Regex = std::make_shared(Result.Pattern); if (!Result.Regex->isValid(RegexError)) { Diags.Report(diag::err_drv_optimization_remark_pattern) << RegexError << A->getAsString(Args); return false; } return true; }; for (Arg *A : Args) { if (A->getOption().matches(OPT_R_Joined)) { StringRef Value = A->getValue(); if (Value == Name) Result.Kind = CodeGenOptions::RK_Enabled; else if (Value == "everything") Result.Kind = CodeGenOptions::RK_EnabledEverything; else if (Value.split('-') == std::make_pair(StringRef("no"), Name)) Result.Kind = CodeGenOptions::RK_Disabled; else if (Value == "no-everything") Result.Kind = CodeGenOptions::RK_DisabledEverything; else continue; if (Result.Kind == CodeGenOptions::RK_Disabled || Result.Kind == CodeGenOptions::RK_DisabledEverything) { Result.Pattern = ""; Result.Regex = nullptr; } else { InitializeResultPattern(A, ".*"); } } else if (A->getOption().matches(OptEQ)) { Result.Kind = CodeGenOptions::RK_WithPattern; if (!InitializeResultPattern(A, A->getValue())) return CodeGenOptions::OptRemark(); } } return Result; } static bool parseDiagnosticLevelMask(StringRef FlagName, const std::vector &Levels, DiagnosticsEngine &Diags, DiagnosticLevelMask &M) { bool Success = true; for (const auto &Level : Levels) { DiagnosticLevelMask const PM = llvm::StringSwitch(Level) .Case("note", DiagnosticLevelMask::Note) .Case("remark", DiagnosticLevelMask::Remark) .Case("warning", DiagnosticLevelMask::Warning) .Case("error", DiagnosticLevelMask::Error) .Default(DiagnosticLevelMask::None); if (PM == DiagnosticLevelMask::None) { Success = false; Diags.Report(diag::err_drv_invalid_value) << FlagName << Level; } M = M | PM; } return Success; } static void parseSanitizerKinds(StringRef FlagName, const std::vector &Sanitizers, DiagnosticsEngine &Diags, SanitizerSet &S) { for (const auto &Sanitizer : Sanitizers) { SanitizerMask K = parseSanitizerValue(Sanitizer, /*AllowGroups=*/false); if (K == SanitizerMask()) Diags.Report(diag::err_drv_invalid_value) << FlagName << Sanitizer; else S.set(K, true); } } static SmallVector serializeSanitizerKinds(SanitizerSet S) { SmallVector Values; serializeSanitizerSet(S, Values); return Values; } static void parseXRayInstrumentationBundle(StringRef FlagName, StringRef Bundle, ArgList &Args, DiagnosticsEngine &D, XRayInstrSet &S) { llvm::SmallVector BundleParts; llvm::SplitString(Bundle, BundleParts, ","); for (const auto &B : BundleParts) { auto Mask = parseXRayInstrValue(B); if (Mask == XRayInstrKind::None) if (B != "none") D.Report(diag::err_drv_invalid_value) << FlagName << Bundle; else S.Mask = Mask; else if (Mask == XRayInstrKind::All) S.Mask = Mask; else S.set(Mask, true); } } static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) { llvm::SmallVector BundleParts; serializeXRayInstrValue(S, BundleParts); std::string Buffer; llvm::raw_string_ostream OS(Buffer); llvm::interleave(BundleParts, OS, [&OS](StringRef Part) { OS << Part; }, ","); return Buffer; } // Set the profile kind using fprofile-instrument-use-path. static void setPGOUseInstrumentor(CodeGenOptions &Opts, const Twine &ProfileName) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName); // In error, return silently and let Clang PGOUse report the error message. if (auto E = ReaderOrErr.takeError()) { llvm::consumeError(std::move(E)); Opts.setProfileUse(CodeGenOptions::ProfileClangInstr); return; } std::unique_ptr PGOReader = std::move(ReaderOrErr.get()); if (PGOReader->isIRLevelProfile()) { if (PGOReader->hasCSIRLevelProfile()) Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr); else Opts.setProfileUse(CodeGenOptions::ProfileIRInstr); } else Opts.setProfileUse(CodeGenOptions::ProfileClangInstr); } void CompilerInvocation::GenerateCodeGenArgs( const CodeGenOptions &Opts, SmallVectorImpl &Args, StringAllocator SA, const llvm::Triple &T, const std::string &OutputFile, const LangOptions *LangOpts) { const CodeGenOptions &CodeGenOpts = Opts; if (Opts.OptimizationLevel == 0) GenerateArg(Args, OPT_O0, SA); else GenerateArg(Args, OPT_O, Twine(Opts.OptimizationLevel), SA); #define CODEGEN_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef CODEGEN_OPTION_WITH_MARSHALLING if (Opts.OptimizationLevel > 0) { if (Opts.Inlining == CodeGenOptions::NormalInlining) GenerateArg(Args, OPT_finline_functions, SA); else if (Opts.Inlining == CodeGenOptions::OnlyHintInlining) GenerateArg(Args, OPT_finline_hint_functions, SA); else if (Opts.Inlining == CodeGenOptions::OnlyAlwaysInlining) GenerateArg(Args, OPT_fno_inline, SA); } if (Opts.DirectAccessExternalData && LangOpts->PICLevel != 0) GenerateArg(Args, OPT_fdirect_access_external_data, SA); else if (!Opts.DirectAccessExternalData && LangOpts->PICLevel == 0) GenerateArg(Args, OPT_fno_direct_access_external_data, SA); Optional DebugInfoVal; switch (Opts.DebugInfo) { case codegenoptions::DebugLineTablesOnly: DebugInfoVal = "line-tables-only"; break; case codegenoptions::DebugDirectivesOnly: DebugInfoVal = "line-directives-only"; break; case codegenoptions::DebugInfoConstructor: DebugInfoVal = "constructor"; break; case codegenoptions::LimitedDebugInfo: DebugInfoVal = "limited"; break; case codegenoptions::FullDebugInfo: DebugInfoVal = "standalone"; break; case codegenoptions::UnusedTypeInfo: DebugInfoVal = "unused-types"; break; case codegenoptions::NoDebugInfo: // default value DebugInfoVal = None; break; case codegenoptions::LocTrackingOnly: // implied value DebugInfoVal = None; break; } if (DebugInfoVal) GenerateArg(Args, OPT_debug_info_kind_EQ, *DebugInfoVal, SA); for (const auto &Prefix : Opts.DebugPrefixMap) GenerateArg(Args, OPT_fdebug_prefix_map_EQ, Prefix.first + "=" + Prefix.second, SA); for (const auto &Prefix : Opts.CoveragePrefixMap) GenerateArg(Args, OPT_fcoverage_prefix_map_EQ, Prefix.first + "=" + Prefix.second, SA); if (Opts.NewStructPathTBAA) GenerateArg(Args, OPT_new_struct_path_tbaa, SA); if (Opts.OptimizeSize == 1) GenerateArg(Args, OPT_O, "s", SA); else if (Opts.OptimizeSize == 2) GenerateArg(Args, OPT_O, "z", SA); // SimplifyLibCalls is set only in the absence of -fno-builtin and // -ffreestanding. We'll consider that when generating them. // NoBuiltinFuncs are generated by LangOptions. if (Opts.UnrollLoops && Opts.OptimizationLevel <= 1) GenerateArg(Args, OPT_funroll_loops, SA); else if (!Opts.UnrollLoops && Opts.OptimizationLevel > 1) GenerateArg(Args, OPT_fno_unroll_loops, SA); if (!Opts.BinutilsVersion.empty()) GenerateArg(Args, OPT_fbinutils_version_EQ, Opts.BinutilsVersion, SA); if (Opts.DebugNameTable == static_cast(llvm::DICompileUnit::DebugNameTableKind::GNU)) GenerateArg(Args, OPT_ggnu_pubnames, SA); else if (Opts.DebugNameTable == static_cast( llvm::DICompileUnit::DebugNameTableKind::Default)) GenerateArg(Args, OPT_gpubnames, SA); auto TNK = Opts.getDebugSimpleTemplateNames(); if (TNK != codegenoptions::DebugTemplateNamesKind::Full) { if (TNK == codegenoptions::DebugTemplateNamesKind::Simple) GenerateArg(Args, OPT_gsimple_template_names_EQ, "simple", SA); else if (TNK == codegenoptions::DebugTemplateNamesKind::Mangled) GenerateArg(Args, OPT_gsimple_template_names_EQ, "mangled", SA); } // ProfileInstrumentUsePath is marshalled automatically, no need to generate // it or PGOUseInstrumentor. if (Opts.TimePasses) { if (Opts.TimePassesPerRun) GenerateArg(Args, OPT_ftime_report_EQ, "per-pass-run", SA); else GenerateArg(Args, OPT_ftime_report, SA); } if (Opts.PrepareForLTO && !Opts.PrepareForThinLTO) GenerateArg(Args, OPT_flto_EQ, "full", SA); if (Opts.PrepareForThinLTO) GenerateArg(Args, OPT_flto_EQ, "thin", SA); if (!Opts.ThinLTOIndexFile.empty()) GenerateArg(Args, OPT_fthinlto_index_EQ, Opts.ThinLTOIndexFile, SA); if (Opts.SaveTempsFilePrefix == OutputFile) GenerateArg(Args, OPT_save_temps_EQ, "obj", SA); StringRef MemProfileBasename("memprof.profraw"); if (!Opts.MemoryProfileOutput.empty()) { if (Opts.MemoryProfileOutput == MemProfileBasename) { GenerateArg(Args, OPT_fmemory_profile, SA); } else { size_t ArgLength = Opts.MemoryProfileOutput.size() - MemProfileBasename.size(); GenerateArg(Args, OPT_fmemory_profile_EQ, Opts.MemoryProfileOutput.substr(0, ArgLength), SA); } } if (memcmp(Opts.CoverageVersion, "408*", 4) != 0) GenerateArg(Args, OPT_coverage_version_EQ, StringRef(Opts.CoverageVersion, 4), SA); // TODO: Check if we need to generate arguments stored in CmdArgs. (Namely // '-fembed_bitcode', which does not map to any CompilerInvocation field and // won't be generated.) if (Opts.XRayInstrumentationBundle.Mask != XRayInstrKind::All) { std::string InstrBundle = serializeXRayInstrumentationBundle(Opts.XRayInstrumentationBundle); if (!InstrBundle.empty()) GenerateArg(Args, OPT_fxray_instrumentation_bundle, InstrBundle, SA); } if (Opts.CFProtectionReturn && Opts.CFProtectionBranch) GenerateArg(Args, OPT_fcf_protection_EQ, "full", SA); else if (Opts.CFProtectionReturn) GenerateArg(Args, OPT_fcf_protection_EQ, "return", SA); else if (Opts.CFProtectionBranch) GenerateArg(Args, OPT_fcf_protection_EQ, "branch", SA); for (const auto &F : Opts.LinkBitcodeFiles) { bool Builtint = F.LinkFlags == llvm::Linker::Flags::LinkOnlyNeeded && F.PropagateAttrs && F.Internalize; GenerateArg(Args, Builtint ? OPT_mlink_builtin_bitcode : OPT_mlink_bitcode_file, F.Filename, SA); } // TODO: Consider removing marshalling annotations from f[no_]emulated_tls. // That would make it easy to generate the option only **once** if it was // explicitly set to non-default value. if (Opts.ExplicitEmulatedTLS) { GenerateArg( Args, Opts.EmulatedTLS ? OPT_femulated_tls : OPT_fno_emulated_tls, SA); } if (Opts.FPDenormalMode != llvm::DenormalMode::getIEEE()) GenerateArg(Args, OPT_fdenormal_fp_math_EQ, Opts.FPDenormalMode.str(), SA); if (Opts.FP32DenormalMode != llvm::DenormalMode::getIEEE()) GenerateArg(Args, OPT_fdenormal_fp_math_f32_EQ, Opts.FP32DenormalMode.str(), SA); if (Opts.StructReturnConvention == CodeGenOptions::SRCK_OnStack) { OptSpecifier Opt = T.isPPC32() ? OPT_maix_struct_return : OPT_fpcc_struct_return; GenerateArg(Args, Opt, SA); } else if (Opts.StructReturnConvention == CodeGenOptions::SRCK_InRegs) { OptSpecifier Opt = T.isPPC32() ? OPT_msvr4_struct_return : OPT_freg_struct_return; GenerateArg(Args, Opt, SA); } if (Opts.EnableAIXExtendedAltivecABI) GenerateArg(Args, OPT_mabi_EQ_vec_extabi, SA); if (!Opts.OptRecordPasses.empty()) GenerateArg(Args, OPT_opt_record_passes, Opts.OptRecordPasses, SA); if (!Opts.OptRecordFormat.empty()) GenerateArg(Args, OPT_opt_record_format, Opts.OptRecordFormat, SA); GenerateOptimizationRemark(Args, SA, OPT_Rpass_EQ, "pass", Opts.OptimizationRemark); GenerateOptimizationRemark(Args, SA, OPT_Rpass_missed_EQ, "pass-missed", Opts.OptimizationRemarkMissed); GenerateOptimizationRemark(Args, SA, OPT_Rpass_analysis_EQ, "pass-analysis", Opts.OptimizationRemarkAnalysis); GenerateArg(Args, OPT_fdiagnostics_hotness_threshold_EQ, Opts.DiagnosticsHotnessThreshold ? Twine(*Opts.DiagnosticsHotnessThreshold) : "auto", SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.SanitizeRecover)) GenerateArg(Args, OPT_fsanitize_recover_EQ, Sanitizer, SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.SanitizeTrap)) GenerateArg(Args, OPT_fsanitize_trap_EQ, Sanitizer, SA); if (!Opts.EmitVersionIdentMetadata) GenerateArg(Args, OPT_Qn, SA); switch (Opts.FiniteLoops) { case CodeGenOptions::FiniteLoopsKind::Language: break; case CodeGenOptions::FiniteLoopsKind::Always: GenerateArg(Args, OPT_ffinite_loops, SA); break; case CodeGenOptions::FiniteLoopsKind::Never: GenerateArg(Args, OPT_fno_finite_loops, SA); break; } } bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, DiagnosticsEngine &Diags, const llvm::Triple &T, const std::string &OutputFile, const LangOptions &LangOptsRef) { unsigned NumErrorsBefore = Diags.getNumErrors(); unsigned OptimizationLevel = getOptimizationLevel(Args, IK, Diags); // TODO: This could be done in Driver unsigned MaxOptLevel = 3; if (OptimizationLevel > MaxOptLevel) { // If the optimization level is not supported, fall back on the default // optimization Diags.Report(diag::warn_drv_optimization_value) << Args.getLastArg(OPT_O)->getAsString(Args) << "-O" << MaxOptLevel; OptimizationLevel = MaxOptLevel; } Opts.OptimizationLevel = OptimizationLevel; // The key paths of codegen options defined in Options.td start with // "CodeGenOpts.". Let's provide the expected variable name and type. CodeGenOptions &CodeGenOpts = Opts; // Some codegen options depend on language options. Let's provide the expected // variable name and type. const LangOptions *LangOpts = &LangOptsRef; #define CODEGEN_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef CODEGEN_OPTION_WITH_MARSHALLING // At O0 we want to fully disable inlining outside of cases marked with // 'alwaysinline' that are required for correctness. Opts.setInlining((Opts.OptimizationLevel == 0) ? CodeGenOptions::OnlyAlwaysInlining : CodeGenOptions::NormalInlining); // Explicit inlining flags can disable some or all inlining even at // optimization levels above zero. if (Arg *InlineArg = Args.getLastArg( options::OPT_finline_functions, options::OPT_finline_hint_functions, options::OPT_fno_inline_functions, options::OPT_fno_inline)) { if (Opts.OptimizationLevel > 0) { const Option &InlineOpt = InlineArg->getOption(); if (InlineOpt.matches(options::OPT_finline_functions)) Opts.setInlining(CodeGenOptions::NormalInlining); else if (InlineOpt.matches(options::OPT_finline_hint_functions)) Opts.setInlining(CodeGenOptions::OnlyHintInlining); else Opts.setInlining(CodeGenOptions::OnlyAlwaysInlining); } } // PIC defaults to -fno-direct-access-external-data while non-PIC defaults to // -fdirect-access-external-data. Opts.DirectAccessExternalData = Args.hasArg(OPT_fdirect_access_external_data) || (!Args.hasArg(OPT_fno_direct_access_external_data) && LangOpts->PICLevel == 0); if (Arg *A = Args.getLastArg(OPT_debug_info_kind_EQ)) { unsigned Val = llvm::StringSwitch(A->getValue()) .Case("line-tables-only", codegenoptions::DebugLineTablesOnly) .Case("line-directives-only", codegenoptions::DebugDirectivesOnly) .Case("constructor", codegenoptions::DebugInfoConstructor) .Case("limited", codegenoptions::LimitedDebugInfo) .Case("standalone", codegenoptions::FullDebugInfo) .Case("unused-types", codegenoptions::UnusedTypeInfo) .Default(~0U); if (Val == ~0U) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); else Opts.setDebugInfo(static_cast(Val)); } // If -fuse-ctor-homing is set and limited debug info is already on, then use // constructor homing, and vice versa for -fno-use-ctor-homing. if (const Arg *A = Args.getLastArg(OPT_fuse_ctor_homing, OPT_fno_use_ctor_homing)) { if (A->getOption().matches(OPT_fuse_ctor_homing) && Opts.getDebugInfo() == codegenoptions::LimitedDebugInfo) Opts.setDebugInfo(codegenoptions::DebugInfoConstructor); if (A->getOption().matches(OPT_fno_use_ctor_homing) && Opts.getDebugInfo() == codegenoptions::DebugInfoConstructor) Opts.setDebugInfo(codegenoptions::LimitedDebugInfo); } for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ)) { auto Split = StringRef(Arg).split('='); Opts.DebugPrefixMap.insert( {std::string(Split.first), std::string(Split.second)}); } for (const auto &Arg : Args.getAllArgValues(OPT_fcoverage_prefix_map_EQ)) { auto Split = StringRef(Arg).split('='); Opts.CoveragePrefixMap.insert( {std::string(Split.first), std::string(Split.second)}); } const llvm::Triple::ArchType DebugEntryValueArchs[] = { llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el}; if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() && llvm::is_contained(DebugEntryValueArchs, T.getArch())) Opts.EmitCallSiteInfo = true; if (!Opts.EnableDIPreservationVerify && Opts.DIBugsReportFilePath.size()) { Diags.Report(diag::warn_ignoring_verify_debuginfo_preserve_export) << Opts.DIBugsReportFilePath; Opts.DIBugsReportFilePath = ""; } Opts.NewStructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa) && Args.hasArg(OPT_new_struct_path_tbaa); Opts.OptimizeSize = getOptimizationLevelSize(Args); Opts.SimplifyLibCalls = !LangOpts->NoBuiltin; if (Opts.SimplifyLibCalls) Opts.NoBuiltinFuncs = LangOpts->NoBuiltinFuncs; Opts.UnrollLoops = Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops, (Opts.OptimizationLevel > 1)); Opts.BinutilsVersion = std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ)); Opts.DebugNameTable = static_cast( Args.hasArg(OPT_ggnu_pubnames) ? llvm::DICompileUnit::DebugNameTableKind::GNU : Args.hasArg(OPT_gpubnames) ? llvm::DICompileUnit::DebugNameTableKind::Default : llvm::DICompileUnit::DebugNameTableKind::None); if (const Arg *A = Args.getLastArg(OPT_gsimple_template_names_EQ)) { StringRef Value = A->getValue(); if (Value != "simple" && Value != "mangled") Diags.Report(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << A->getValue(); Opts.setDebugSimpleTemplateNames( StringRef(A->getValue()) == "simple" ? codegenoptions::DebugTemplateNamesKind::Simple : codegenoptions::DebugTemplateNamesKind::Mangled); } if (!Opts.ProfileInstrumentUsePath.empty()) setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath); if (const Arg *A = Args.getLastArg(OPT_ftime_report, OPT_ftime_report_EQ)) { Opts.TimePasses = true; // -ftime-report= is only for new pass manager. if (A->getOption().getID() == OPT_ftime_report_EQ) { if (Opts.LegacyPassManager) Diags.Report(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-fno-legacy-pass-manager"; StringRef Val = A->getValue(); if (Val == "per-pass") Opts.TimePassesPerRun = false; else if (Val == "per-pass-run") Opts.TimePassesPerRun = true; else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } } Opts.PrepareForLTO = false; Opts.PrepareForThinLTO = false; if (Arg *A = Args.getLastArg(OPT_flto_EQ)) { Opts.PrepareForLTO = true; StringRef S = A->getValue(); if (S == "thin") Opts.PrepareForThinLTO = true; else if (S != "full") Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << S; } if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) { if (IK.getLanguage() != Language::LLVM_IR) Diags.Report(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-x ir"; Opts.ThinLTOIndexFile = std::string(Args.getLastArgValue(OPT_fthinlto_index_EQ)); } if (Arg *A = Args.getLastArg(OPT_save_temps_EQ)) Opts.SaveTempsFilePrefix = llvm::StringSwitch(A->getValue()) .Case("obj", OutputFile) .Default(llvm::sys::path::filename(OutputFile).str()); // The memory profile runtime appends the pid to make this name more unique. const char *MemProfileBasename = "memprof.profraw"; if (Args.hasArg(OPT_fmemory_profile_EQ)) { SmallString<128> Path( std::string(Args.getLastArgValue(OPT_fmemory_profile_EQ))); llvm::sys::path::append(Path, MemProfileBasename); Opts.MemoryProfileOutput = std::string(Path); } else if (Args.hasArg(OPT_fmemory_profile)) Opts.MemoryProfileOutput = MemProfileBasename; memcpy(Opts.CoverageVersion, "408*", 4); if (Opts.EmitGcovArcs || Opts.EmitGcovNotes) { if (Args.hasArg(OPT_coverage_version_EQ)) { StringRef CoverageVersion = Args.getLastArgValue(OPT_coverage_version_EQ); if (CoverageVersion.size() != 4) { Diags.Report(diag::err_drv_invalid_value) << Args.getLastArg(OPT_coverage_version_EQ)->getAsString(Args) << CoverageVersion; } else { memcpy(Opts.CoverageVersion, CoverageVersion.data(), 4); } } } // FIXME: For backend options that are not yet recorded as function // attributes in the IR, keep track of them so we can embed them in a // separate data section and use them when building the bitcode. for (const auto &A : Args) { // Do not encode output and input. if (A->getOption().getID() == options::OPT_o || A->getOption().getID() == options::OPT_INPUT || A->getOption().getID() == options::OPT_x || A->getOption().getID() == options::OPT_fembed_bitcode || A->getOption().matches(options::OPT_W_Group)) continue; ArgStringList ASL; A->render(Args, ASL); for (const auto &arg : ASL) { StringRef ArgStr(arg); Opts.CmdArgs.insert(Opts.CmdArgs.end(), ArgStr.begin(), ArgStr.end()); // using \00 to separate each commandline options. Opts.CmdArgs.push_back('\0'); } } auto XRayInstrBundles = Args.getAllArgValues(OPT_fxray_instrumentation_bundle); if (XRayInstrBundles.empty()) Opts.XRayInstrumentationBundle.Mask = XRayInstrKind::All; else for (const auto &A : XRayInstrBundles) parseXRayInstrumentationBundle("-fxray-instrumentation-bundle=", A, Args, Diags, Opts.XRayInstrumentationBundle); if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { StringRef Name = A->getValue(); if (Name == "full") { Opts.CFProtectionReturn = 1; Opts.CFProtectionBranch = 1; } else if (Name == "return") Opts.CFProtectionReturn = 1; else if (Name == "branch") Opts.CFProtectionBranch = 1; else if (Name != "none") Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } if (Opts.PrepareForLTO && Args.hasArg(OPT_mibt_seal)) Opts.IBTSeal = 1; for (auto *A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_builtin_bitcode)) { CodeGenOptions::BitcodeFileToLink F; F.Filename = A->getValue(); if (A->getOption().matches(OPT_mlink_builtin_bitcode)) { F.LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded; // When linking CUDA bitcode, propagate function attributes so that // e.g. libdevice gets fast-math attrs if we're building with fast-math. F.PropagateAttrs = true; F.Internalize = true; } Opts.LinkBitcodeFiles.push_back(F); } if (Args.getLastArg(OPT_femulated_tls) || Args.getLastArg(OPT_fno_emulated_tls)) { Opts.ExplicitEmulatedTLS = true; } if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) { if (T.isOSAIX()) { StringRef Name = A->getValue(); if (Name != "global-dynamic") Diags.Report(diag::err_aix_unsupported_tls_model) << Name; } } if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) { StringRef Val = A->getValue(); Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val); if (!Opts.FPDenormalMode.isValid()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_f32_EQ)) { StringRef Val = A->getValue(); Opts.FP32DenormalMode = llvm::parseDenormalFPAttribute(Val); if (!Opts.FP32DenormalMode.isValid()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } // X86_32 has -fppc-struct-return and -freg-struct-return. // PPC32 has -maix-struct-return and -msvr4-struct-return. if (Arg *A = Args.getLastArg(OPT_fpcc_struct_return, OPT_freg_struct_return, OPT_maix_struct_return, OPT_msvr4_struct_return)) { // TODO: We might want to consider enabling these options on AIX in the // future. if (T.isOSAIX()) Diags.Report(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << T.str(); const Option &O = A->getOption(); if (O.matches(OPT_fpcc_struct_return) || O.matches(OPT_maix_struct_return)) { Opts.setStructReturnConvention(CodeGenOptions::SRCK_OnStack); } else { assert(O.matches(OPT_freg_struct_return) || O.matches(OPT_msvr4_struct_return)); Opts.setStructReturnConvention(CodeGenOptions::SRCK_InRegs); } } if (Arg *A = Args.getLastArg(OPT_mabi_EQ_vec_default, OPT_mabi_EQ_vec_extabi)) { if (!T.isOSAIX()) Diags.Report(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << T.str(); const Option &O = A->getOption(); Opts.EnableAIXExtendedAltivecABI = O.matches(OPT_mabi_EQ_vec_extabi); } bool NeedLocTracking = false; if (!Opts.OptRecordFile.empty()) NeedLocTracking = true; if (Arg *A = Args.getLastArg(OPT_opt_record_passes)) { Opts.OptRecordPasses = A->getValue(); NeedLocTracking = true; } if (Arg *A = Args.getLastArg(OPT_opt_record_format)) { Opts.OptRecordFormat = A->getValue(); NeedLocTracking = true; } Opts.OptimizationRemark = ParseOptimizationRemark(Diags, Args, OPT_Rpass_EQ, "pass"); Opts.OptimizationRemarkMissed = ParseOptimizationRemark(Diags, Args, OPT_Rpass_missed_EQ, "pass-missed"); Opts.OptimizationRemarkAnalysis = ParseOptimizationRemark( Diags, Args, OPT_Rpass_analysis_EQ, "pass-analysis"); NeedLocTracking |= Opts.OptimizationRemark.hasValidPattern() || Opts.OptimizationRemarkMissed.hasValidPattern() || Opts.OptimizationRemarkAnalysis.hasValidPattern(); bool UsingSampleProfile = !Opts.SampleProfileFile.empty(); bool UsingProfile = UsingSampleProfile || (Opts.getProfileUse() != CodeGenOptions::ProfileNone); if (Opts.DiagnosticsWithHotness && !UsingProfile && // An IR file will contain PGO as metadata IK.getLanguage() != Language::LLVM_IR) Diags.Report(diag::warn_drv_diagnostics_hotness_requires_pgo) << "-fdiagnostics-show-hotness"; // Parse remarks hotness threshold. Valid value is either integer or 'auto'. if (auto *arg = Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ)) { auto ResultOrErr = llvm::remarks::parseHotnessThresholdOption(arg->getValue()); if (!ResultOrErr) { Diags.Report(diag::err_drv_invalid_diagnotics_hotness_threshold) << "-fdiagnostics-hotness-threshold="; } else { Opts.DiagnosticsHotnessThreshold = *ResultOrErr; if ((!Opts.DiagnosticsHotnessThreshold.hasValue() || Opts.DiagnosticsHotnessThreshold.getValue() > 0) && !UsingProfile) Diags.Report(diag::warn_drv_diagnostics_hotness_requires_pgo) << "-fdiagnostics-hotness-threshold="; } } // If the user requested to use a sample profile for PGO, then the // backend will need to track source location information so the profile // can be incorporated into the IR. if (UsingSampleProfile) NeedLocTracking = true; if (!Opts.StackUsageOutput.empty()) NeedLocTracking = true; // If the user requested a flag that requires source locations available in // the backend, make sure that the backend tracks source location information. if (NeedLocTracking && Opts.getDebugInfo() == codegenoptions::NoDebugInfo) Opts.setDebugInfo(codegenoptions::LocTrackingOnly); // Parse -fsanitize-recover= arguments. // FIXME: Report unrecoverable sanitizers incorrectly specified here. parseSanitizerKinds("-fsanitize-recover=", Args.getAllArgValues(OPT_fsanitize_recover_EQ), Diags, Opts.SanitizeRecover); parseSanitizerKinds("-fsanitize-trap=", Args.getAllArgValues(OPT_fsanitize_trap_EQ), Diags, Opts.SanitizeTrap); Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true); if (Args.hasArg(options::OPT_ffinite_loops)) Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Always; else if (Args.hasArg(options::OPT_fno_finite_loops)) Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never; Opts.EmitIEEENaNCompliantInsts = Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee); if (!Opts.EmitIEEENaNCompliantInsts && !LangOptsRef.NoHonorNaNs) Diags.Report(diag::err_drv_amdgpu_ieee_without_no_honor_nans); return Diags.getNumErrors() == NumErrorsBefore; } static void GenerateDependencyOutputArgs(const DependencyOutputOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const DependencyOutputOptions &DependencyOutputOpts = Opts; #define DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING if (Opts.ShowIncludesDest != ShowIncludesDestination::None) GenerateArg(Args, OPT_show_includes, SA); for (const auto &Dep : Opts.ExtraDeps) { switch (Dep.second) { case EDK_SanitizeIgnorelist: // Sanitizer ignorelist arguments are generated from LanguageOptions. continue; case EDK_ModuleFile: // Module file arguments are generated from FrontendOptions and // HeaderSearchOptions. continue; case EDK_ProfileList: // Profile list arguments are generated from LanguageOptions via the // marshalling infrastructure. continue; case EDK_DepFileEntry: GenerateArg(Args, OPT_fdepfile_entry, Dep.first, SA); break; } } } static bool ParseDependencyOutputArgs(DependencyOutputOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, frontend::ActionKind Action, bool ShowLineMarkers) { unsigned NumErrorsBefore = Diags.getNumErrors(); DependencyOutputOptions &DependencyOutputOpts = Opts; #define DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING if (Args.hasArg(OPT_show_includes)) { // Writing both /showIncludes and preprocessor output to stdout // would produce interleaved output, so use stderr for /showIncludes. // This behaves the same as cl.exe, when /E, /EP or /P are passed. if (Action == frontend::PrintPreprocessedInput || !ShowLineMarkers) Opts.ShowIncludesDest = ShowIncludesDestination::Stderr; else Opts.ShowIncludesDest = ShowIncludesDestination::Stdout; } else { Opts.ShowIncludesDest = ShowIncludesDestination::None; } // Add sanitizer ignorelists as extra dependencies. // They won't be discovered by the regular preprocessor, so // we let make / ninja to know about this implicit dependency. if (!Args.hasArg(OPT_fno_sanitize_ignorelist)) { for (const auto *A : Args.filtered(OPT_fsanitize_ignorelist_EQ)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ExtraDeps.emplace_back(std::string(Val), EDK_SanitizeIgnorelist); } if (Opts.IncludeSystemHeaders) { for (const auto *A : Args.filtered(OPT_fsanitize_system_ignorelist_EQ)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ExtraDeps.emplace_back(std::string(Val), EDK_SanitizeIgnorelist); } } } // -fprofile-list= dependencies. for (const auto &Filename : Args.getAllArgValues(OPT_fprofile_list_EQ)) Opts.ExtraDeps.emplace_back(Filename, EDK_ProfileList); // Propagate the extra dependencies. for (const auto *A : Args.filtered(OPT_fdepfile_entry)) Opts.ExtraDeps.emplace_back(A->getValue(), EDK_DepFileEntry); // Only the -fmodule-file= form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ExtraDeps.emplace_back(std::string(Val), EDK_ModuleFile); } return Diags.getNumErrors() == NumErrorsBefore; } static bool parseShowColorsArgs(const ArgList &Args, bool DefaultColor) { // Color diagnostics default to auto ("on" if terminal supports) in the driver // but default to off in cc1, needing an explicit OPT_fdiagnostics_color. // Support both clang's -f[no-]color-diagnostics and gcc's // -f[no-]diagnostics-colors[=never|always|auto]. enum { Colors_On, Colors_Off, Colors_Auto } ShowColors = DefaultColor ? Colors_Auto : Colors_Off; for (auto *A : Args) { const Option &O = A->getOption(); if (O.matches(options::OPT_fcolor_diagnostics) || O.matches(options::OPT_fdiagnostics_color)) { ShowColors = Colors_On; } else if (O.matches(options::OPT_fno_color_diagnostics) || O.matches(options::OPT_fno_diagnostics_color)) { ShowColors = Colors_Off; } else if (O.matches(options::OPT_fdiagnostics_color_EQ)) { StringRef Value(A->getValue()); if (Value == "always") ShowColors = Colors_On; else if (Value == "never") ShowColors = Colors_Off; else if (Value == "auto") ShowColors = Colors_Auto; } } return ShowColors == Colors_On || (ShowColors == Colors_Auto && llvm::sys::Process::StandardErrHasColors()); } static bool checkVerifyPrefixes(const std::vector &VerifyPrefixes, DiagnosticsEngine &Diags) { bool Success = true; for (const auto &Prefix : VerifyPrefixes) { // Every prefix must start with a letter and contain only alphanumeric // characters, hyphens, and underscores. auto BadChar = llvm::find_if(Prefix, [](char C) { return !isAlphanumeric(C) && C != '-' && C != '_'; }); if (BadChar != Prefix.end() || !isLetter(Prefix[0])) { Success = false; Diags.Report(diag::err_drv_invalid_value) << "-verify=" << Prefix; Diags.Report(diag::note_drv_verify_prefix_spelling); } } return Success; } static void GenerateFileSystemArgs(const FileSystemOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const FileSystemOptions &FileSystemOpts = Opts; #define FILE_SYSTEM_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FILE_SYSTEM_OPTION_WITH_MARSHALLING } static bool ParseFileSystemArgs(FileSystemOptions &Opts, const ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); FileSystemOptions &FileSystemOpts = Opts; #define FILE_SYSTEM_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FILE_SYSTEM_OPTION_WITH_MARSHALLING return Diags.getNumErrors() == NumErrorsBefore; } static void GenerateMigratorArgs(const MigratorOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const MigratorOptions &MigratorOpts = Opts; #define MIGRATOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef MIGRATOR_OPTION_WITH_MARSHALLING } static bool ParseMigratorArgs(MigratorOptions &Opts, const ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); MigratorOptions &MigratorOpts = Opts; #define MIGRATOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef MIGRATOR_OPTION_WITH_MARSHALLING return Diags.getNumErrors() == NumErrorsBefore; } void CompilerInvocation::GenerateDiagnosticArgs( const DiagnosticOptions &Opts, SmallVectorImpl &Args, StringAllocator SA, bool DefaultDiagColor) { const DiagnosticOptions *DiagnosticOpts = &Opts; #define DIAG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DIAG_OPTION_WITH_MARSHALLING if (!Opts.DiagnosticSerializationFile.empty()) GenerateArg(Args, OPT_diagnostic_serialized_file, Opts.DiagnosticSerializationFile, SA); if (Opts.ShowColors) GenerateArg(Args, OPT_fcolor_diagnostics, SA); if (Opts.VerifyDiagnostics && llvm::is_contained(Opts.VerifyPrefixes, "expected")) GenerateArg(Args, OPT_verify, SA); for (const auto &Prefix : Opts.VerifyPrefixes) if (Prefix != "expected") GenerateArg(Args, OPT_verify_EQ, Prefix, SA); DiagnosticLevelMask VIU = Opts.getVerifyIgnoreUnexpected(); if (VIU == DiagnosticLevelMask::None) { // This is the default, don't generate anything. } else if (VIU == DiagnosticLevelMask::All) { GenerateArg(Args, OPT_verify_ignore_unexpected, SA); } else { if (static_cast(VIU & DiagnosticLevelMask::Note) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "note", SA); if (static_cast(VIU & DiagnosticLevelMask::Remark) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "remark", SA); if (static_cast(VIU & DiagnosticLevelMask::Warning) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "warning", SA); if (static_cast(VIU & DiagnosticLevelMask::Error) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "error", SA); } for (const auto &Warning : Opts.Warnings) { // This option is automatically generated from UndefPrefixes. if (Warning == "undef-prefix") continue; Args.push_back(SA(StringRef("-W") + Warning)); } for (const auto &Remark : Opts.Remarks) { // These arguments are generated from OptimizationRemark fields of // CodeGenOptions. StringRef IgnoredRemarks[] = {"pass", "no-pass", "pass-analysis", "no-pass-analysis", "pass-missed", "no-pass-missed"}; if (llvm::is_contained(IgnoredRemarks, Remark)) continue; Args.push_back(SA(StringRef("-R") + Remark)); } } std::unique_ptr clang::CreateAndPopulateDiagOpts(ArrayRef Argv) { auto DiagOpts = std::make_unique(); unsigned MissingArgIndex, MissingArgCount; InputArgList Args = getDriverOptTable().ParseArgs( Argv.slice(1), MissingArgIndex, MissingArgCount); // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. // Any errors that would be diagnosed here will also be diagnosed later, // when the DiagnosticsEngine actually exists. (void)ParseDiagnosticArgs(*DiagOpts, Args); return DiagOpts; } bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, DiagnosticsEngine *Diags, bool DefaultDiagColor) { Optional IgnoringDiags; if (!Diags) { IgnoringDiags.emplace(new DiagnosticIDs(), new DiagnosticOptions(), new IgnoringDiagConsumer()); Diags = &*IgnoringDiags; } unsigned NumErrorsBefore = Diags->getNumErrors(); // The key paths of diagnostic options defined in Options.td start with // "DiagnosticOpts->". Let's provide the expected variable name and type. DiagnosticOptions *DiagnosticOpts = &Opts; #define DIAG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, *Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DIAG_OPTION_WITH_MARSHALLING llvm::sys::Process::UseANSIEscapeCodes(Opts.UseANSIEscapeCodes); if (Arg *A = Args.getLastArg(OPT_diagnostic_serialized_file, OPT__serialize_diags)) Opts.DiagnosticSerializationFile = A->getValue(); Opts.ShowColors = parseShowColorsArgs(Args, DefaultDiagColor); Opts.VerifyDiagnostics = Args.hasArg(OPT_verify) || Args.hasArg(OPT_verify_EQ); Opts.VerifyPrefixes = Args.getAllArgValues(OPT_verify_EQ); if (Args.hasArg(OPT_verify)) Opts.VerifyPrefixes.push_back("expected"); // Keep VerifyPrefixes in its original order for the sake of diagnostics, and // then sort it to prepare for fast lookup using std::binary_search. if (!checkVerifyPrefixes(Opts.VerifyPrefixes, *Diags)) Opts.VerifyDiagnostics = false; else llvm::sort(Opts.VerifyPrefixes); DiagnosticLevelMask DiagMask = DiagnosticLevelMask::None; parseDiagnosticLevelMask( "-verify-ignore-unexpected=", Args.getAllArgValues(OPT_verify_ignore_unexpected_EQ), *Diags, DiagMask); if (Args.hasArg(OPT_verify_ignore_unexpected)) DiagMask = DiagnosticLevelMask::All; Opts.setVerifyIgnoreUnexpected(DiagMask); if (Opts.TabStop == 0 || Opts.TabStop > DiagnosticOptions::MaxTabStop) { Opts.TabStop = DiagnosticOptions::DefaultTabStop; Diags->Report(diag::warn_ignoring_ftabstop_value) << Opts.TabStop << DiagnosticOptions::DefaultTabStop; } addDiagnosticArgs(Args, OPT_W_Group, OPT_W_value_Group, Opts.Warnings); addDiagnosticArgs(Args, OPT_R_Group, OPT_R_value_Group, Opts.Remarks); return Diags->getNumErrors() == NumErrorsBefore; } /// Parse the argument to the -ftest-module-file-extension /// command-line argument. /// /// \returns true on error, false on success. static bool parseTestModuleFileExtensionArg(StringRef Arg, std::string &BlockName, unsigned &MajorVersion, unsigned &MinorVersion, bool &Hashed, std::string &UserInfo) { SmallVector Args; Arg.split(Args, ':', 5); if (Args.size() < 5) return true; BlockName = std::string(Args[0]); if (Args[1].getAsInteger(10, MajorVersion)) return true; if (Args[2].getAsInteger(10, MinorVersion)) return true; if (Args[3].getAsInteger(2, Hashed)) return true; if (Args.size() > 4) UserInfo = std::string(Args[4]); return false; } /// Return a table that associates command line option specifiers with the /// frontend action. Note: The pair {frontend::PluginAction, OPT_plugin} is /// intentionally missing, as this case is handled separately from other /// frontend options. static const auto &getFrontendActionTable() { static const std::pair Table[] = { {frontend::ASTDeclList, OPT_ast_list}, {frontend::ASTDump, OPT_ast_dump_all_EQ}, {frontend::ASTDump, OPT_ast_dump_all}, {frontend::ASTDump, OPT_ast_dump_EQ}, {frontend::ASTDump, OPT_ast_dump}, {frontend::ASTDump, OPT_ast_dump_lookups}, {frontend::ASTDump, OPT_ast_dump_decl_types}, {frontend::ASTPrint, OPT_ast_print}, {frontend::ASTView, OPT_ast_view}, {frontend::DumpCompilerOptions, OPT_compiler_options_dump}, {frontend::DumpRawTokens, OPT_dump_raw_tokens}, {frontend::DumpTokens, OPT_dump_tokens}, {frontend::EmitAssembly, OPT_S}, {frontend::EmitBC, OPT_emit_llvm_bc}, {frontend::EmitHTML, OPT_emit_html}, {frontend::EmitLLVM, OPT_emit_llvm}, {frontend::EmitLLVMOnly, OPT_emit_llvm_only}, {frontend::EmitCodeGenOnly, OPT_emit_codegen_only}, {frontend::EmitCodeGenOnly, OPT_emit_codegen_only}, {frontend::EmitObj, OPT_emit_obj}, {frontend::ExtractAPI, OPT_extract_api}, {frontend::FixIt, OPT_fixit_EQ}, {frontend::FixIt, OPT_fixit}, {frontend::GenerateModule, OPT_emit_module}, {frontend::GenerateModuleInterface, OPT_emit_module_interface}, {frontend::GenerateHeaderModule, OPT_emit_header_module}, {frontend::GeneratePCH, OPT_emit_pch}, {frontend::GenerateInterfaceStubs, OPT_emit_interface_stubs}, {frontend::InitOnly, OPT_init_only}, {frontend::ParseSyntaxOnly, OPT_fsyntax_only}, {frontend::ModuleFileInfo, OPT_module_file_info}, {frontend::VerifyPCH, OPT_verify_pch}, {frontend::PrintPreamble, OPT_print_preamble}, {frontend::PrintPreprocessedInput, OPT_E}, {frontend::TemplightDump, OPT_templight_dump}, {frontend::RewriteMacros, OPT_rewrite_macros}, {frontend::RewriteObjC, OPT_rewrite_objc}, {frontend::RewriteTest, OPT_rewrite_test}, {frontend::RunAnalysis, OPT_analyze}, {frontend::MigrateSource, OPT_migrate}, {frontend::RunPreprocessorOnly, OPT_Eonly}, {frontend::PrintDependencyDirectivesSourceMinimizerOutput, OPT_print_dependency_directives_minimized_source}, }; return Table; } /// Maps command line option to frontend action. static Optional getFrontendAction(OptSpecifier &Opt) { for (const auto &ActionOpt : getFrontendActionTable()) if (ActionOpt.second == Opt.getID()) return ActionOpt.first; return None; } /// Maps frontend action to command line option. static Optional getProgramActionOpt(frontend::ActionKind ProgramAction) { for (const auto &ActionOpt : getFrontendActionTable()) if (ActionOpt.first == ProgramAction) return OptSpecifier(ActionOpt.second); return None; } static void GenerateFrontendArgs(const FrontendOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, bool IsHeader) { const FrontendOptions &FrontendOpts = Opts; #define FRONTEND_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FRONTEND_OPTION_WITH_MARSHALLING Optional ProgramActionOpt = getProgramActionOpt(Opts.ProgramAction); // Generating a simple flag covers most frontend actions. std::function GenerateProgramAction = [&]() { GenerateArg(Args, *ProgramActionOpt, SA); }; if (!ProgramActionOpt) { // PluginAction is the only program action handled separately. assert(Opts.ProgramAction == frontend::PluginAction && "Frontend action without option."); GenerateProgramAction = [&]() { GenerateArg(Args, OPT_plugin, Opts.ActionName, SA); }; } // FIXME: Simplify the complex 'AST dump' command line. if (Opts.ProgramAction == frontend::ASTDump) { GenerateProgramAction = [&]() { // ASTDumpLookups, ASTDumpDeclTypes and ASTDumpFilter are generated via // marshalling infrastructure. if (Opts.ASTDumpFormat != ADOF_Default) { StringRef Format; switch (Opts.ASTDumpFormat) { case ADOF_Default: llvm_unreachable("Default AST dump format."); case ADOF_JSON: Format = "json"; break; } if (Opts.ASTDumpAll) GenerateArg(Args, OPT_ast_dump_all_EQ, Format, SA); if (Opts.ASTDumpDecls) GenerateArg(Args, OPT_ast_dump_EQ, Format, SA); } else { if (Opts.ASTDumpAll) GenerateArg(Args, OPT_ast_dump_all, SA); if (Opts.ASTDumpDecls) GenerateArg(Args, OPT_ast_dump, SA); } }; } if (Opts.ProgramAction == frontend::FixIt && !Opts.FixItSuffix.empty()) { GenerateProgramAction = [&]() { GenerateArg(Args, OPT_fixit_EQ, Opts.FixItSuffix, SA); }; } GenerateProgramAction(); for (const auto &PluginArgs : Opts.PluginArgs) { Option Opt = getDriverOptTable().getOption(OPT_plugin_arg); const char *Spelling = SA(Opt.getPrefix() + Opt.getName() + PluginArgs.first); for (const auto &PluginArg : PluginArgs.second) denormalizeString(Args, Spelling, SA, Opt.getKind(), 0, PluginArg); } for (const auto &Ext : Opts.ModuleFileExtensions) if (auto *TestExt = dyn_cast_or_null(Ext.get())) GenerateArg(Args, OPT_ftest_module_file_extension_EQ, TestExt->str(), SA); if (!Opts.CodeCompletionAt.FileName.empty()) GenerateArg(Args, OPT_code_completion_at, Opts.CodeCompletionAt.ToString(), SA); for (const auto &Plugin : Opts.Plugins) GenerateArg(Args, OPT_load, Plugin, SA); // ASTDumpDecls and ASTDumpAll already handled with ProgramAction. for (const auto &ModuleFile : Opts.ModuleFiles) GenerateArg(Args, OPT_fmodule_file, ModuleFile, SA); if (Opts.AuxTargetCPU.hasValue()) GenerateArg(Args, OPT_aux_target_cpu, *Opts.AuxTargetCPU, SA); if (Opts.AuxTargetFeatures.hasValue()) for (const auto &Feature : *Opts.AuxTargetFeatures) GenerateArg(Args, OPT_aux_target_feature, Feature, SA); { StringRef Preprocessed = Opts.DashX.isPreprocessed() ? "-cpp-output" : ""; StringRef ModuleMap = Opts.DashX.getFormat() == InputKind::ModuleMap ? "-module-map" : ""; StringRef Header = IsHeader ? "-header" : ""; StringRef Lang; switch (Opts.DashX.getLanguage()) { case Language::C: Lang = "c"; break; case Language::OpenCL: Lang = "cl"; break; case Language::OpenCLCXX: Lang = "clcpp"; break; case Language::CUDA: Lang = "cuda"; break; case Language::HIP: Lang = "hip"; break; case Language::CXX: Lang = "c++"; break; case Language::ObjC: Lang = "objective-c"; break; case Language::ObjCXX: Lang = "objective-c++"; break; case Language::RenderScript: Lang = "renderscript"; break; case Language::Asm: Lang = "assembler-with-cpp"; break; case Language::Unknown: assert(Opts.DashX.getFormat() == InputKind::Precompiled && "Generating -x argument for unknown language (not precompiled)."); Lang = "ast"; break; case Language::LLVM_IR: Lang = "ir"; break; } GenerateArg(Args, OPT_x, Lang + Header + ModuleMap + Preprocessed, SA); } // OPT_INPUT has a unique class, generate it directly. for (const auto &Input : Opts.Inputs) Args.push_back(SA(Input.getFile())); } static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, bool &IsHeaderFile) { unsigned NumErrorsBefore = Diags.getNumErrors(); FrontendOptions &FrontendOpts = Opts; #define FRONTEND_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FRONTEND_OPTION_WITH_MARSHALLING Opts.ProgramAction = frontend::ParseSyntaxOnly; if (const Arg *A = Args.getLastArg(OPT_Action_Group)) { OptSpecifier Opt = OptSpecifier(A->getOption().getID()); Optional ProgramAction = getFrontendAction(Opt); assert(ProgramAction && "Option specifier not in Action_Group."); if (ProgramAction == frontend::ASTDump && (Opt == OPT_ast_dump_all_EQ || Opt == OPT_ast_dump_EQ)) { unsigned Val = llvm::StringSwitch(A->getValue()) .CaseLower("default", ADOF_Default) .CaseLower("json", ADOF_JSON) .Default(std::numeric_limits::max()); if (Val != std::numeric_limits::max()) Opts.ASTDumpFormat = static_cast(Val); else { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); Opts.ASTDumpFormat = ADOF_Default; } } if (ProgramAction == frontend::FixIt && Opt == OPT_fixit_EQ) Opts.FixItSuffix = A->getValue(); if (ProgramAction == frontend::GenerateInterfaceStubs) { StringRef ArgStr = Args.hasArg(OPT_interface_stub_version_EQ) ? Args.getLastArgValue(OPT_interface_stub_version_EQ) : "ifs-v1"; if (ArgStr == "experimental-yaml-elf-v1" || ArgStr == "experimental-ifs-v1" || ArgStr == "experimental-ifs-v2" || ArgStr == "experimental-tapi-elf-v1") { std::string ErrorMessage = "Invalid interface stub format: " + ArgStr.str() + " is deprecated."; Diags.Report(diag::err_drv_invalid_value) << "Must specify a valid interface stub format type, ie: " "-interface-stub-version=ifs-v1" << ErrorMessage; ProgramAction = frontend::ParseSyntaxOnly; } else if (!ArgStr.startswith("ifs-")) { std::string ErrorMessage = "Invalid interface stub format: " + ArgStr.str() + "."; Diags.Report(diag::err_drv_invalid_value) << "Must specify a valid interface stub format type, ie: " "-interface-stub-version=ifs-v1" << ErrorMessage; ProgramAction = frontend::ParseSyntaxOnly; } } Opts.ProgramAction = *ProgramAction; } if (const Arg* A = Args.getLastArg(OPT_plugin)) { Opts.Plugins.emplace_back(A->getValue(0)); Opts.ProgramAction = frontend::PluginAction; Opts.ActionName = A->getValue(); } for (const auto *AA : Args.filtered(OPT_plugin_arg)) Opts.PluginArgs[AA->getValue(0)].emplace_back(AA->getValue(1)); for (const std::string &Arg : Args.getAllArgValues(OPT_ftest_module_file_extension_EQ)) { std::string BlockName; unsigned MajorVersion; unsigned MinorVersion; bool Hashed; std::string UserInfo; if (parseTestModuleFileExtensionArg(Arg, BlockName, MajorVersion, MinorVersion, Hashed, UserInfo)) { Diags.Report(diag::err_test_module_file_extension_format) << Arg; continue; } // Add the testing module file extension. Opts.ModuleFileExtensions.push_back( std::make_shared( BlockName, MajorVersion, MinorVersion, Hashed, UserInfo)); } if (const Arg *A = Args.getLastArg(OPT_code_completion_at)) { Opts.CodeCompletionAt = ParsedSourceLocation::FromString(A->getValue()); if (Opts.CodeCompletionAt.FileName.empty()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } Opts.Plugins = Args.getAllArgValues(OPT_load); Opts.ASTDumpDecls = Args.hasArg(OPT_ast_dump, OPT_ast_dump_EQ); Opts.ASTDumpAll = Args.hasArg(OPT_ast_dump_all, OPT_ast_dump_all_EQ); // Only the -fmodule-file= form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ModuleFiles.push_back(std::string(Val)); } if (Opts.ProgramAction != frontend::GenerateModule && Opts.IsSystemModule) Diags.Report(diag::err_drv_argument_only_allowed_with) << "-fsystem-module" << "-emit-module"; if (Args.hasArg(OPT_aux_target_cpu)) Opts.AuxTargetCPU = std::string(Args.getLastArgValue(OPT_aux_target_cpu)); if (Args.hasArg(OPT_aux_target_feature)) Opts.AuxTargetFeatures = Args.getAllArgValues(OPT_aux_target_feature); if (Opts.ARCMTAction != FrontendOptions::ARCMT_None && Opts.ObjCMTAction != FrontendOptions::ObjCMT_None) { Diags.Report(diag::err_drv_argument_not_allowed_with) << "ARC migration" << "ObjC migration"; } InputKind DashX(Language::Unknown); if (const Arg *A = Args.getLastArg(OPT_x)) { StringRef XValue = A->getValue(); // Parse suffixes: '(-header|[-module-map][-cpp-output])'. // FIXME: Supporting '-header-cpp-output' would be useful. bool Preprocessed = XValue.consume_back("-cpp-output"); bool ModuleMap = XValue.consume_back("-module-map"); IsHeaderFile = !Preprocessed && !ModuleMap && XValue != "precompiled-header" && XValue.consume_back("-header"); // Principal languages. DashX = llvm::StringSwitch(XValue) .Case("c", Language::C) .Case("cl", Language::OpenCL) .Case("clcpp", Language::OpenCLCXX) .Case("cuda", Language::CUDA) .Case("hip", Language::HIP) .Case("c++", Language::CXX) .Case("objective-c", Language::ObjC) .Case("objective-c++", Language::ObjCXX) .Case("renderscript", Language::RenderScript) .Default(Language::Unknown); // "objc[++]-cpp-output" is an acceptable synonym for // "objective-c[++]-cpp-output". if (DashX.isUnknown() && Preprocessed && !IsHeaderFile && !ModuleMap) DashX = llvm::StringSwitch(XValue) .Case("objc", Language::ObjC) .Case("objc++", Language::ObjCXX) .Default(Language::Unknown); // Some special cases cannot be combined with suffixes. if (DashX.isUnknown() && !Preprocessed && !ModuleMap && !IsHeaderFile) DashX = llvm::StringSwitch(XValue) .Case("cpp-output", InputKind(Language::C).getPreprocessed()) .Case("assembler-with-cpp", Language::Asm) .Cases("ast", "pcm", "precompiled-header", InputKind(Language::Unknown, InputKind::Precompiled)) .Case("ir", Language::LLVM_IR) .Default(Language::Unknown); if (DashX.isUnknown()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); if (Preprocessed) DashX = DashX.getPreprocessed(); if (ModuleMap) DashX = DashX.withFormat(InputKind::ModuleMap); } // '-' is the default input if none is given. std::vector Inputs = Args.getAllArgValues(OPT_INPUT); Opts.Inputs.clear(); if (Inputs.empty()) Inputs.push_back("-"); for (unsigned i = 0, e = Inputs.size(); i != e; ++i) { InputKind IK = DashX; if (IK.isUnknown()) { IK = FrontendOptions::getInputKindForExtension( StringRef(Inputs[i]).rsplit('.').second); // FIXME: Warn on this? if (IK.isUnknown()) IK = Language::C; // FIXME: Remove this hack. if (i == 0) DashX = IK; } bool IsSystem = false; // The -emit-module action implicitly takes a module map. if (Opts.ProgramAction == frontend::GenerateModule && IK.getFormat() == InputKind::Source) { IK = IK.withFormat(InputKind::ModuleMap); IsSystem = Opts.IsSystemModule; } Opts.Inputs.emplace_back(std::move(Inputs[i]), IK, IsSystem); } Opts.DashX = DashX; return Diags.getNumErrors() == NumErrorsBefore; } std::string CompilerInvocation::GetResourcesPath(const char *Argv0, void *MainAddr) { std::string ClangExecutable = llvm::sys::fs::getMainExecutable(Argv0, MainAddr); return Driver::GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); } static void GenerateHeaderSearchArgs(HeaderSearchOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const HeaderSearchOptions *HeaderSearchOpts = &Opts; #define HEADER_SEARCH_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef HEADER_SEARCH_OPTION_WITH_MARSHALLING if (Opts.UseLibcxx) GenerateArg(Args, OPT_stdlib_EQ, "libc++", SA); if (!Opts.ModuleCachePath.empty()) GenerateArg(Args, OPT_fmodules_cache_path, Opts.ModuleCachePath, SA); for (const auto &File : Opts.PrebuiltModuleFiles) GenerateArg(Args, OPT_fmodule_file, File.first + "=" + File.second, SA); for (const auto &Path : Opts.PrebuiltModulePaths) GenerateArg(Args, OPT_fprebuilt_module_path, Path, SA); for (const auto &Macro : Opts.ModulesIgnoreMacros) GenerateArg(Args, OPT_fmodules_ignore_macro, Macro.val(), SA); auto Matches = [](const HeaderSearchOptions::Entry &Entry, llvm::ArrayRef Groups, llvm::Optional IsFramework, llvm::Optional IgnoreSysRoot) { return llvm::is_contained(Groups, Entry.Group) && (!IsFramework || (Entry.IsFramework == *IsFramework)) && (!IgnoreSysRoot || (Entry.IgnoreSysRoot == *IgnoreSysRoot)); }; auto It = Opts.UserEntries.begin(); auto End = Opts.UserEntries.end(); // Add -I..., -F..., and -index-header-map options in order. for (; It < End && Matches(*It, {frontend::IndexHeaderMap, frontend::Angled}, None, true); ++It) { OptSpecifier Opt = [It, Matches]() { if (Matches(*It, frontend::IndexHeaderMap, true, true)) return OPT_F; if (Matches(*It, frontend::IndexHeaderMap, false, true)) return OPT_I; if (Matches(*It, frontend::Angled, true, true)) return OPT_F; if (Matches(*It, frontend::Angled, false, true)) return OPT_I; llvm_unreachable("Unexpected HeaderSearchOptions::Entry."); }(); if (It->Group == frontend::IndexHeaderMap) GenerateArg(Args, OPT_index_header_map, SA); GenerateArg(Args, Opt, It->Path, SA); }; // Note: some paths that came from "[-iprefix=xx] -iwithprefixbefore=yy" may // have already been generated as "-I[xx]yy". If that's the case, their // position on command line was such that this has no semantic impact on // include paths. for (; It < End && Matches(*It, {frontend::After, frontend::Angled}, false, true); ++It) { OptSpecifier Opt = It->Group == frontend::After ? OPT_iwithprefix : OPT_iwithprefixbefore; GenerateArg(Args, Opt, It->Path, SA); } // Note: Some paths that came from "-idirafter=xxyy" may have already been // generated as "-iwithprefix=xxyy". If that's the case, their position on // command line was such that this has no semantic impact on include paths. for (; It < End && Matches(*It, {frontend::After}, false, true); ++It) GenerateArg(Args, OPT_idirafter, It->Path, SA); for (; It < End && Matches(*It, {frontend::Quoted}, false, true); ++It) GenerateArg(Args, OPT_iquote, It->Path, SA); for (; It < End && Matches(*It, {frontend::System}, false, None); ++It) GenerateArg(Args, It->IgnoreSysRoot ? OPT_isystem : OPT_iwithsysroot, It->Path, SA); for (; It < End && Matches(*It, {frontend::System}, true, true); ++It) GenerateArg(Args, OPT_iframework, It->Path, SA); for (; It < End && Matches(*It, {frontend::System}, true, false); ++It) GenerateArg(Args, OPT_iframeworkwithsysroot, It->Path, SA); // Add the paths for the various language specific isystem flags. for (; It < End && Matches(*It, {frontend::CSystem}, false, true); ++It) GenerateArg(Args, OPT_c_isystem, It->Path, SA); for (; It < End && Matches(*It, {frontend::CXXSystem}, false, true); ++It) GenerateArg(Args, OPT_cxx_isystem, It->Path, SA); for (; It < End && Matches(*It, {frontend::ObjCSystem}, false, true); ++It) GenerateArg(Args, OPT_objc_isystem, It->Path, SA); for (; It < End && Matches(*It, {frontend::ObjCXXSystem}, false, true); ++It) GenerateArg(Args, OPT_objcxx_isystem, It->Path, SA); // Add the internal paths from a driver that detects standard include paths. // Note: Some paths that came from "-internal-isystem" arguments may have // already been generated as "-isystem". If that's the case, their position on // command line was such that this has no semantic impact on include paths. for (; It < End && Matches(*It, {frontend::System, frontend::ExternCSystem}, false, true); ++It) { OptSpecifier Opt = It->Group == frontend::System ? OPT_internal_isystem : OPT_internal_externc_isystem; GenerateArg(Args, Opt, It->Path, SA); } assert(It == End && "Unhandled HeaderSearchOption::Entry."); // Add the path prefixes which are implicitly treated as being system headers. for (const auto &P : Opts.SystemHeaderPrefixes) { OptSpecifier Opt = P.IsSystemHeader ? OPT_system_header_prefix : OPT_no_system_header_prefix; GenerateArg(Args, Opt, P.Prefix, SA); } for (const std::string &F : Opts.VFSOverlayFiles) GenerateArg(Args, OPT_ivfsoverlay, F, SA); } static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, const std::string &WorkingDir) { unsigned NumErrorsBefore = Diags.getNumErrors(); HeaderSearchOptions *HeaderSearchOpts = &Opts; #define HEADER_SEARCH_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef HEADER_SEARCH_OPTION_WITH_MARSHALLING if (const Arg *A = Args.getLastArg(OPT_stdlib_EQ)) Opts.UseLibcxx = (strcmp(A->getValue(), "libc++") == 0); // Canonicalize -fmodules-cache-path before storing it. SmallString<128> P(Args.getLastArgValue(OPT_fmodules_cache_path)); if (!(P.empty() || llvm::sys::path::is_absolute(P))) { if (WorkingDir.empty()) llvm::sys::fs::make_absolute(P); else llvm::sys::fs::make_absolute(WorkingDir, P); } llvm::sys::path::remove_dots(P); Opts.ModuleCachePath = std::string(P.str()); // Only the -fmodule-file== form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); if (Val.contains('=')) { auto Split = Val.split('='); Opts.PrebuiltModuleFiles.insert( {std::string(Split.first), std::string(Split.second)}); } } for (const auto *A : Args.filtered(OPT_fprebuilt_module_path)) Opts.AddPrebuiltModulePath(A->getValue()); for (const auto *A : Args.filtered(OPT_fmodules_ignore_macro)) { StringRef MacroDef = A->getValue(); Opts.ModulesIgnoreMacros.insert( llvm::CachedHashString(MacroDef.split('=').first)); } // Add -I..., -F..., and -index-header-map options in order. bool IsIndexHeaderMap = false; bool IsSysrootSpecified = Args.hasArg(OPT__sysroot_EQ) || Args.hasArg(OPT_isysroot); for (const auto *A : Args.filtered(OPT_I, OPT_F, OPT_index_header_map)) { if (A->getOption().matches(OPT_index_header_map)) { // -index-header-map applies to the next -I or -F. IsIndexHeaderMap = true; continue; } frontend::IncludeDirGroup Group = IsIndexHeaderMap ? frontend::IndexHeaderMap : frontend::Angled; bool IsFramework = A->getOption().matches(OPT_F); std::string Path = A->getValue(); if (IsSysrootSpecified && !IsFramework && A->getValue()[0] == '=') { SmallString<32> Buffer; llvm::sys::path::append(Buffer, Opts.Sysroot, llvm::StringRef(A->getValue()).substr(1)); Path = std::string(Buffer.str()); } Opts.AddPath(Path, Group, IsFramework, /*IgnoreSysroot*/ true); IsIndexHeaderMap = false; } // Add -iprefix/-iwithprefix/-iwithprefixbefore options. StringRef Prefix = ""; // FIXME: This isn't the correct default prefix. for (const auto *A : Args.filtered(OPT_iprefix, OPT_iwithprefix, OPT_iwithprefixbefore)) { if (A->getOption().matches(OPT_iprefix)) Prefix = A->getValue(); else if (A->getOption().matches(OPT_iwithprefix)) Opts.AddPath(Prefix.str() + A->getValue(), frontend::After, false, true); else Opts.AddPath(Prefix.str() + A->getValue(), frontend::Angled, false, true); } for (const auto *A : Args.filtered(OPT_idirafter)) Opts.AddPath(A->getValue(), frontend::After, false, true); for (const auto *A : Args.filtered(OPT_iquote)) Opts.AddPath(A->getValue(), frontend::Quoted, false, true); for (const auto *A : Args.filtered(OPT_isystem, OPT_iwithsysroot)) Opts.AddPath(A->getValue(), frontend::System, false, !A->getOption().matches(OPT_iwithsysroot)); for (const auto *A : Args.filtered(OPT_iframework)) Opts.AddPath(A->getValue(), frontend::System, true, true); for (const auto *A : Args.filtered(OPT_iframeworkwithsysroot)) Opts.AddPath(A->getValue(), frontend::System, /*IsFramework=*/true, /*IgnoreSysRoot=*/false); // Add the paths for the various language specific isystem flags. for (const auto *A : Args.filtered(OPT_c_isystem)) Opts.AddPath(A->getValue(), frontend::CSystem, false, true); for (const auto *A : Args.filtered(OPT_cxx_isystem)) Opts.AddPath(A->getValue(), frontend::CXXSystem, false, true); for (const auto *A : Args.filtered(OPT_objc_isystem)) Opts.AddPath(A->getValue(), frontend::ObjCSystem, false,true); for (const auto *A : Args.filtered(OPT_objcxx_isystem)) Opts.AddPath(A->getValue(), frontend::ObjCXXSystem, false, true); // Add the internal paths from a driver that detects standard include paths. for (const auto *A : Args.filtered(OPT_internal_isystem, OPT_internal_externc_isystem)) { frontend::IncludeDirGroup Group = frontend::System; if (A->getOption().matches(OPT_internal_externc_isystem)) Group = frontend::ExternCSystem; Opts.AddPath(A->getValue(), Group, false, true); } // Add the path prefixes which are implicitly treated as being system headers. for (const auto *A : Args.filtered(OPT_system_header_prefix, OPT_no_system_header_prefix)) Opts.AddSystemHeaderPrefix( A->getValue(), A->getOption().matches(OPT_system_header_prefix)); for (const auto *A : Args.filtered(OPT_ivfsoverlay)) Opts.AddVFSOverlayFile(A->getValue()); return Diags.getNumErrors() == NumErrorsBefore; } void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, const llvm::Triple &T, std::vector &Includes, LangStandard::Kind LangStd) { // Set some properties which depend solely on the input kind; it would be nice // to move these to the language standard, and have the driver resolve the // input kind + language standard. // // FIXME: Perhaps a better model would be for a single source file to have // multiple language standards (C / C++ std, ObjC std, OpenCL std, OpenMP std) // simultaneously active? if (IK.getLanguage() == Language::Asm) { Opts.AsmPreprocessor = 1; } else if (IK.isObjectiveC()) { Opts.ObjC = 1; } if (LangStd == LangStandard::lang_unspecified) { // Based on the base language, pick one. switch (IK.getLanguage()) { case Language::Unknown: case Language::LLVM_IR: llvm_unreachable("Invalid input kind!"); case Language::OpenCL: LangStd = LangStandard::lang_opencl12; break; case Language::OpenCLCXX: LangStd = LangStandard::lang_openclcpp10; break; case Language::CUDA: LangStd = LangStandard::lang_cuda; break; case Language::Asm: case Language::C: #if defined(CLANG_DEFAULT_STD_C) LangStd = CLANG_DEFAULT_STD_C; #else // The PS4 uses C99 as the default C standard. if (T.isPS4()) LangStd = LangStandard::lang_gnu99; else LangStd = LangStandard::lang_gnu17; #endif break; case Language::ObjC: #if defined(CLANG_DEFAULT_STD_C) LangStd = CLANG_DEFAULT_STD_C; #else LangStd = LangStandard::lang_gnu11; #endif break; case Language::CXX: case Language::ObjCXX: #if defined(CLANG_DEFAULT_STD_CXX) LangStd = CLANG_DEFAULT_STD_CXX; #else LangStd = LangStandard::lang_gnucxx14; #endif break; case Language::RenderScript: LangStd = LangStandard::lang_c99; break; case Language::HIP: LangStd = LangStandard::lang_hip; break; } } const LangStandard &Std = LangStandard::getLangStandardForKind(LangStd); Opts.LangStd = LangStd; Opts.LineComment = Std.hasLineComments(); Opts.C99 = Std.isC99(); Opts.C11 = Std.isC11(); Opts.C17 = Std.isC17(); Opts.C2x = Std.isC2x(); Opts.CPlusPlus = Std.isCPlusPlus(); Opts.CPlusPlus11 = Std.isCPlusPlus11(); Opts.CPlusPlus14 = Std.isCPlusPlus14(); Opts.CPlusPlus17 = Std.isCPlusPlus17(); Opts.CPlusPlus20 = Std.isCPlusPlus20(); Opts.CPlusPlus2b = Std.isCPlusPlus2b(); Opts.GNUMode = Std.isGNUMode(); Opts.GNUCVersion = 0; Opts.HexFloats = Std.hasHexFloats(); Opts.ImplicitInt = Std.hasImplicitInt(); // Set OpenCL Version. Opts.OpenCL = Std.isOpenCL(); if (LangStd == LangStandard::lang_opencl10) Opts.OpenCLVersion = 100; else if (LangStd == LangStandard::lang_opencl11) Opts.OpenCLVersion = 110; else if (LangStd == LangStandard::lang_opencl12) Opts.OpenCLVersion = 120; else if (LangStd == LangStandard::lang_opencl20) Opts.OpenCLVersion = 200; else if (LangStd == LangStandard::lang_opencl30) Opts.OpenCLVersion = 300; else if (LangStd == LangStandard::lang_openclcpp10) Opts.OpenCLCPlusPlusVersion = 100; else if (LangStd == LangStandard::lang_openclcpp2021) Opts.OpenCLCPlusPlusVersion = 202100; // OpenCL has some additional defaults. if (Opts.OpenCL) { Opts.AltiVec = 0; Opts.ZVector = 0; Opts.setDefaultFPContractMode(LangOptions::FPM_On); Opts.OpenCLCPlusPlus = Opts.CPlusPlus; Opts.OpenCLPipes = Opts.getOpenCLCompatibleVersion() == 200; Opts.OpenCLGenericAddressSpace = Opts.getOpenCLCompatibleVersion() == 200; // Include default header file for OpenCL. if (Opts.IncludeDefaultHeader) { if (Opts.DeclareOpenCLBuiltins) { // Only include base header file for builtin types and constants. Includes.push_back("opencl-c-base.h"); } else { Includes.push_back("opencl-c.h"); } } } Opts.HIP = IK.getLanguage() == Language::HIP; Opts.CUDA = IK.getLanguage() == Language::CUDA || Opts.HIP; if (Opts.HIP) { // HIP toolchain does not support 'Fast' FPOpFusion in backends since it // fuses multiplication/addition instructions without contract flag from // device library functions in LLVM bitcode, which causes accuracy loss in // certain math functions, e.g. tan(-1e20) becomes -0.933 instead of 0.8446. // For device library functions in bitcode to work, 'Strict' or 'Standard' // FPOpFusion options in backends is needed. Therefore 'fast-honor-pragmas' // FP contract option is used to allow fuse across statements in frontend // whereas respecting contract flag in backend. Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas); } else if (Opts.CUDA) { // Allow fuse across statements disregarding pragmas. Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); } Opts.RenderScript = IK.getLanguage() == Language::RenderScript; // OpenCL and C++ both have bool, true, false keywords. Opts.Bool = Opts.OpenCL || Opts.CPlusPlus; // OpenCL has half keyword Opts.Half = Opts.OpenCL; } /// Check if input file kind and language standard are compatible. static bool IsInputCompatibleWithStandard(InputKind IK, const LangStandard &S) { switch (IK.getLanguage()) { case Language::Unknown: case Language::LLVM_IR: llvm_unreachable("should not parse language flags for this input"); case Language::C: case Language::ObjC: case Language::RenderScript: return S.getLanguage() == Language::C; case Language::OpenCL: return S.getLanguage() == Language::OpenCL || S.getLanguage() == Language::OpenCLCXX; case Language::OpenCLCXX: return S.getLanguage() == Language::OpenCLCXX; case Language::CXX: case Language::ObjCXX: return S.getLanguage() == Language::CXX; case Language::CUDA: // FIXME: What -std= values should be permitted for CUDA compilations? return S.getLanguage() == Language::CUDA || S.getLanguage() == Language::CXX; case Language::HIP: return S.getLanguage() == Language::CXX || S.getLanguage() == Language::HIP; case Language::Asm: // Accept (and ignore) all -std= values. // FIXME: The -std= value is not ignored; it affects the tokenization // and preprocessing rules if we're preprocessing this asm input. return true; } llvm_unreachable("unexpected input language"); } /// Get language name for given input kind. static StringRef GetInputKindName(InputKind IK) { switch (IK.getLanguage()) { case Language::C: return "C"; case Language::ObjC: return "Objective-C"; case Language::CXX: return "C++"; case Language::ObjCXX: return "Objective-C++"; case Language::OpenCL: return "OpenCL"; case Language::OpenCLCXX: return "C++ for OpenCL"; case Language::CUDA: return "CUDA"; case Language::RenderScript: return "RenderScript"; case Language::HIP: return "HIP"; case Language::Asm: return "Asm"; case Language::LLVM_IR: return "LLVM IR"; case Language::Unknown: break; } llvm_unreachable("unknown input language"); } void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, SmallVectorImpl &Args, StringAllocator SA, const llvm::Triple &T, InputKind IK) { if (IK.getFormat() == InputKind::Precompiled || IK.getLanguage() == Language::LLVM_IR) { if (Opts.ObjCAutoRefCount) GenerateArg(Args, OPT_fobjc_arc, SA); if (Opts.PICLevel != 0) GenerateArg(Args, OPT_pic_level, Twine(Opts.PICLevel), SA); if (Opts.PIE) GenerateArg(Args, OPT_pic_is_pie, SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize)) GenerateArg(Args, OPT_fsanitize_EQ, Sanitizer, SA); return; } OptSpecifier StdOpt; switch (Opts.LangStd) { case LangStandard::lang_opencl10: case LangStandard::lang_opencl11: case LangStandard::lang_opencl12: case LangStandard::lang_opencl20: case LangStandard::lang_opencl30: case LangStandard::lang_openclcpp10: case LangStandard::lang_openclcpp2021: StdOpt = OPT_cl_std_EQ; break; default: StdOpt = OPT_std_EQ; break; } auto LangStandard = LangStandard::getLangStandardForKind(Opts.LangStd); GenerateArg(Args, StdOpt, LangStandard.getName(), SA); if (Opts.IncludeDefaultHeader) GenerateArg(Args, OPT_finclude_default_header, SA); if (Opts.DeclareOpenCLBuiltins) GenerateArg(Args, OPT_fdeclare_opencl_builtins, SA); const LangOptions *LangOpts = &Opts; #define LANG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING // The '-fcf-protection=' option is generated by CodeGenOpts generator. if (Opts.ObjC) { GenerateArg(Args, OPT_fobjc_runtime_EQ, Opts.ObjCRuntime.getAsString(), SA); if (Opts.GC == LangOptions::GCOnly) GenerateArg(Args, OPT_fobjc_gc_only, SA); else if (Opts.GC == LangOptions::HybridGC) GenerateArg(Args, OPT_fobjc_gc, SA); else if (Opts.ObjCAutoRefCount == 1) GenerateArg(Args, OPT_fobjc_arc, SA); if (Opts.ObjCWeakRuntime) GenerateArg(Args, OPT_fobjc_runtime_has_weak, SA); if (Opts.ObjCWeak) GenerateArg(Args, OPT_fobjc_weak, SA); if (Opts.ObjCSubscriptingLegacyRuntime) GenerateArg(Args, OPT_fobjc_subscripting_legacy_runtime, SA); } if (Opts.GNUCVersion != 0) { unsigned Major = Opts.GNUCVersion / 100 / 100; unsigned Minor = (Opts.GNUCVersion / 100) % 100; unsigned Patch = Opts.GNUCVersion % 100; GenerateArg(Args, OPT_fgnuc_version_EQ, Twine(Major) + "." + Twine(Minor) + "." + Twine(Patch), SA); } if (Opts.IgnoreXCOFFVisibility) GenerateArg(Args, OPT_mignore_xcoff_visibility, SA); if (Opts.SignedOverflowBehavior == LangOptions::SOB_Trapping) { GenerateArg(Args, OPT_ftrapv, SA); GenerateArg(Args, OPT_ftrapv_handler, Opts.OverflowHandler, SA); } else if (Opts.SignedOverflowBehavior == LangOptions::SOB_Defined) { GenerateArg(Args, OPT_fwrapv, SA); } if (Opts.MSCompatibilityVersion != 0) { unsigned Major = Opts.MSCompatibilityVersion / 10000000; unsigned Minor = (Opts.MSCompatibilityVersion / 100000) % 100; unsigned Subminor = Opts.MSCompatibilityVersion % 100000; GenerateArg(Args, OPT_fms_compatibility_version, Twine(Major) + "." + Twine(Minor) + "." + Twine(Subminor), SA); } if ((!Opts.GNUMode && !Opts.MSVCCompat && !Opts.CPlusPlus17) || T.isOSzOS()) { if (!Opts.Trigraphs) GenerateArg(Args, OPT_fno_trigraphs, SA); } else { if (Opts.Trigraphs) GenerateArg(Args, OPT_ftrigraphs, SA); } if (Opts.Blocks && !(Opts.OpenCL && Opts.OpenCLVersion == 200)) GenerateArg(Args, OPT_fblocks, SA); if (Opts.ConvergentFunctions && !(Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || Opts.SYCLIsDevice)) GenerateArg(Args, OPT_fconvergent_functions, SA); if (Opts.NoBuiltin && !Opts.Freestanding) GenerateArg(Args, OPT_fno_builtin, SA); if (!Opts.NoBuiltin) for (const auto &Func : Opts.NoBuiltinFuncs) GenerateArg(Args, OPT_fno_builtin_, Func, SA); if (Opts.LongDoubleSize == 128) GenerateArg(Args, OPT_mlong_double_128, SA); else if (Opts.LongDoubleSize == 64) GenerateArg(Args, OPT_mlong_double_64, SA); // Not generating '-mrtd', it's just an alias for '-fdefault-calling-conv='. // OpenMP was requested via '-fopenmp', not implied by '-fopenmp-simd' or // '-fopenmp-targets='. if (Opts.OpenMP && !Opts.OpenMPSimd) { GenerateArg(Args, OPT_fopenmp, SA); if (Opts.OpenMP != 50) GenerateArg(Args, OPT_fopenmp_version_EQ, Twine(Opts.OpenMP), SA); if (!Opts.OpenMPUseTLS) GenerateArg(Args, OPT_fnoopenmp_use_tls, SA); if (Opts.OpenMPIsDevice) GenerateArg(Args, OPT_fopenmp_is_device, SA); if (Opts.OpenMPIRBuilder) GenerateArg(Args, OPT_fopenmp_enable_irbuilder, SA); } if (Opts.OpenMPSimd) { GenerateArg(Args, OPT_fopenmp_simd, SA); if (Opts.OpenMP != 50) GenerateArg(Args, OPT_fopenmp_version_EQ, Twine(Opts.OpenMP), SA); } if (Opts.OpenMPTargetNewRuntime) GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA); if (Opts.OpenMPThreadSubscription) GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA); if (Opts.OpenMPTeamSubscription) GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA); if (Opts.OpenMPTargetDebug != 0) GenerateArg(Args, OPT_fopenmp_target_debug_EQ, Twine(Opts.OpenMPTargetDebug), SA); if (Opts.OpenMPCUDANumSMs != 0) GenerateArg(Args, OPT_fopenmp_cuda_number_of_sm_EQ, Twine(Opts.OpenMPCUDANumSMs), SA); if (Opts.OpenMPCUDABlocksPerSM != 0) GenerateArg(Args, OPT_fopenmp_cuda_blocks_per_sm_EQ, Twine(Opts.OpenMPCUDABlocksPerSM), SA); if (Opts.OpenMPCUDAReductionBufNum != 1024) GenerateArg(Args, OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, Twine(Opts.OpenMPCUDAReductionBufNum), SA); if (!Opts.OMPTargetTriples.empty()) { std::string Targets; llvm::raw_string_ostream OS(Targets); llvm::interleave( Opts.OMPTargetTriples, OS, [&OS](const llvm::Triple &T) { OS << T.str(); }, ","); GenerateArg(Args, OPT_fopenmp_targets_EQ, OS.str(), SA); } if (!Opts.OMPHostIRFile.empty()) GenerateArg(Args, OPT_fopenmp_host_ir_file_path, Opts.OMPHostIRFile, SA); if (Opts.OpenMPCUDAMode) GenerateArg(Args, OPT_fopenmp_cuda_mode, SA); if (Opts.OpenMPCUDAForceFullRuntime) GenerateArg(Args, OPT_fopenmp_cuda_force_full_runtime, SA); // The arguments used to set Optimize, OptimizeSize and NoInlineDefine are // generated from CodeGenOptions. if (Opts.DefaultFPContractMode == LangOptions::FPM_Fast) GenerateArg(Args, OPT_ffp_contract, "fast", SA); else if (Opts.DefaultFPContractMode == LangOptions::FPM_On) GenerateArg(Args, OPT_ffp_contract, "on", SA); else if (Opts.DefaultFPContractMode == LangOptions::FPM_Off) GenerateArg(Args, OPT_ffp_contract, "off", SA); else if (Opts.DefaultFPContractMode == LangOptions::FPM_FastHonorPragmas) GenerateArg(Args, OPT_ffp_contract, "fast-honor-pragmas", SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize)) GenerateArg(Args, OPT_fsanitize_EQ, Sanitizer, SA); // Conflating '-fsanitize-system-ignorelist' and '-fsanitize-ignorelist'. for (const std::string &F : Opts.NoSanitizeFiles) GenerateArg(Args, OPT_fsanitize_ignorelist_EQ, F, SA); if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver3_8) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "3.8", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver4) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "4.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver6) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "6.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver7) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "7.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver9) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "9.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver11) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "11.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver12) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA); - else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13) - GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA); if (Opts.getSignReturnAddressScope() == LangOptions::SignReturnAddressScopeKind::All) GenerateArg(Args, OPT_msign_return_address_EQ, "all", SA); else if (Opts.getSignReturnAddressScope() == LangOptions::SignReturnAddressScopeKind::NonLeaf) GenerateArg(Args, OPT_msign_return_address_EQ, "non-leaf", SA); if (Opts.getSignReturnAddressKey() == LangOptions::SignReturnAddressKeyKind::BKey) GenerateArg(Args, OPT_msign_return_address_key_EQ, "b_key", SA); if (Opts.CXXABI) GenerateArg(Args, OPT_fcxx_abi_EQ, TargetCXXABI::getSpelling(*Opts.CXXABI), SA); if (Opts.RelativeCXXABIVTables) GenerateArg(Args, OPT_fexperimental_relative_cxx_abi_vtables, SA); else GenerateArg(Args, OPT_fno_experimental_relative_cxx_abi_vtables, SA); for (const auto &MP : Opts.MacroPrefixMap) GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA); } bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, const llvm::Triple &T, std::vector &Includes, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); if (IK.getFormat() == InputKind::Precompiled || IK.getLanguage() == Language::LLVM_IR) { // ObjCAAutoRefCount and Sanitize LangOpts are used to setup the // PassManager in BackendUtil.cpp. They need to be initialized no matter // what the input type is. if (Args.hasArg(OPT_fobjc_arc)) Opts.ObjCAutoRefCount = 1; // PICLevel and PIELevel are needed during code generation and this should // be set regardless of the input type. Opts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags); Opts.PIE = Args.hasArg(OPT_pic_is_pie); parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ), Diags, Opts.Sanitize); return Diags.getNumErrors() == NumErrorsBefore; } // Other LangOpts are only initialized when the input is not AST or LLVM IR. // FIXME: Should we really be parsing this for an Language::Asm input? // FIXME: Cleanup per-file based stuff. LangStandard::Kind LangStd = LangStandard::lang_unspecified; if (const Arg *A = Args.getLastArg(OPT_std_EQ)) { LangStd = LangStandard::getLangKind(A->getValue()); if (LangStd == LangStandard::lang_unspecified) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); // Report supported standards with short description. for (unsigned KindValue = 0; KindValue != LangStandard::lang_unspecified; ++KindValue) { const LangStandard &Std = LangStandard::getLangStandardForKind( static_cast(KindValue)); if (IsInputCompatibleWithStandard(IK, Std)) { auto Diag = Diags.Report(diag::note_drv_use_standard); Diag << Std.getName() << Std.getDescription(); unsigned NumAliases = 0; #define LANGSTANDARD(id, name, lang, desc, features) #define LANGSTANDARD_ALIAS(id, alias) \ if (KindValue == LangStandard::lang_##id) ++NumAliases; #define LANGSTANDARD_ALIAS_DEPR(id, alias) #include "clang/Basic/LangStandards.def" Diag << NumAliases; #define LANGSTANDARD(id, name, lang, desc, features) #define LANGSTANDARD_ALIAS(id, alias) \ if (KindValue == LangStandard::lang_##id) Diag << alias; #define LANGSTANDARD_ALIAS_DEPR(id, alias) #include "clang/Basic/LangStandards.def" } } } else { // Valid standard, check to make sure language and standard are // compatible. const LangStandard &Std = LangStandard::getLangStandardForKind(LangStd); if (!IsInputCompatibleWithStandard(IK, Std)) { Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << GetInputKindName(IK); } } } // -cl-std only applies for OpenCL language standards. // Override the -std option in this case. if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) { LangStandard::Kind OpenCLLangStd = llvm::StringSwitch(A->getValue()) .Cases("cl", "CL", LangStandard::lang_opencl10) .Cases("cl1.0", "CL1.0", LangStandard::lang_opencl10) .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) .Cases("clc++", "CLC++", LangStandard::lang_openclcpp10) .Cases("clc++1.0", "CLC++1.0", LangStandard::lang_openclcpp10) .Cases("clc++2021", "CLC++2021", LangStandard::lang_openclcpp2021) .Default(LangStandard::lang_unspecified); if (OpenCLLangStd == LangStandard::lang_unspecified) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } else LangStd = OpenCLLangStd; } // These need to be parsed now. They are used to set OpenCL defaults. Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header); Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins); CompilerInvocation::setLangDefaults(Opts, IK, T, Includes, LangStd); // The key paths of codegen options defined in Options.td start with // "LangOpts->". Let's provide the expected variable name and type. LangOptions *LangOpts = &Opts; #define LANG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { StringRef Name = A->getValue(); if (Name == "full" || Name == "branch") { Opts.CFProtectionBranch = 1; } } if ((Args.hasArg(OPT_fsycl_is_device) || Args.hasArg(OPT_fsycl_is_host)) && !Args.hasArg(OPT_sycl_std_EQ)) { // If the user supplied -fsycl-is-device or -fsycl-is-host, but failed to // provide -sycl-std=, we want to default it to whatever the default SYCL // version is. I could not find a way to express this with the options // tablegen because we still want this value to be SYCL_None when the user // is not in device or host mode. Opts.setSYCLVersion(LangOptions::SYCL_Default); } if (Opts.ObjC) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); if (Opts.ObjCRuntime.tryParse(value)) Diags.Report(diag::err_drv_unknown_objc_runtime) << value; } if (Args.hasArg(OPT_fobjc_gc_only)) Opts.setGC(LangOptions::GCOnly); else if (Args.hasArg(OPT_fobjc_gc)) Opts.setGC(LangOptions::HybridGC); else if (Args.hasArg(OPT_fobjc_arc)) { Opts.ObjCAutoRefCount = 1; if (!Opts.ObjCRuntime.allowsARC()) Diags.Report(diag::err_arc_unsupported_on_runtime); } // ObjCWeakRuntime tracks whether the runtime supports __weak, not // whether the feature is actually enabled. This is predominantly // determined by -fobjc-runtime, but we allow it to be overridden // from the command line for testing purposes. if (Args.hasArg(OPT_fobjc_runtime_has_weak)) Opts.ObjCWeakRuntime = 1; else Opts.ObjCWeakRuntime = Opts.ObjCRuntime.allowsWeak(); // ObjCWeak determines whether __weak is actually enabled. // Note that we allow -fno-objc-weak to disable this even in ARC mode. if (auto weakArg = Args.getLastArg(OPT_fobjc_weak, OPT_fno_objc_weak)) { if (!weakArg->getOption().matches(OPT_fobjc_weak)) { assert(!Opts.ObjCWeak); } else if (Opts.getGC() != LangOptions::NonGC) { Diags.Report(diag::err_objc_weak_with_gc); } else if (!Opts.ObjCWeakRuntime) { Diags.Report(diag::err_objc_weak_unsupported); } else { Opts.ObjCWeak = 1; } } else if (Opts.ObjCAutoRefCount) { Opts.ObjCWeak = Opts.ObjCWeakRuntime; } if (Args.hasArg(OPT_fobjc_subscripting_legacy_runtime)) Opts.ObjCSubscriptingLegacyRuntime = (Opts.ObjCRuntime.getKind() == ObjCRuntime::FragileMacOSX); } if (Arg *A = Args.getLastArg(options::OPT_fgnuc_version_EQ)) { // Check that the version has 1 to 3 components and the minor and patch // versions fit in two decimal digits. VersionTuple GNUCVer; bool Invalid = GNUCVer.tryParse(A->getValue()); unsigned Major = GNUCVer.getMajor(); unsigned Minor = GNUCVer.getMinor().getValueOr(0); unsigned Patch = GNUCVer.getSubminor().getValueOr(0); if (Invalid || GNUCVer.getBuild() || Minor >= 100 || Patch >= 100) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } Opts.GNUCVersion = Major * 100 * 100 + Minor * 100 + Patch; } // In AIX OS, the -mignore-xcoff-visibility is enable by default if there is // no -fvisibility=* option. // This is the reason why '-fvisibility' needs to be always generated: // its absence implies '-mignore-xcoff-visibility'. // // Suppose the original cc1 command line does contain '-fvisibility default': // '-mignore-xcoff-visibility' should not be implied. // * If '-fvisibility' is not generated (as most options with default values // don't), its absence would imply '-mignore-xcoff-visibility'. This changes // the command line semantics. // * If '-fvisibility' is generated regardless of its presence and value, // '-mignore-xcoff-visibility' won't be implied and the command line // semantics are kept intact. // // When the original cc1 command line does **not** contain '-fvisibility', // '-mignore-xcoff-visibility' is implied. The generated command line will // contain both '-fvisibility default' and '-mignore-xcoff-visibility' and // subsequent calls to `CreateFromArgs`/`generateCC1CommandLine` will always // produce the same arguments. if (T.isOSAIX() && (Args.hasArg(OPT_mignore_xcoff_visibility) || !Args.hasArg(OPT_fvisibility))) Opts.IgnoreXCOFFVisibility = 1; if (Args.hasArg(OPT_ftrapv)) { Opts.setSignedOverflowBehavior(LangOptions::SOB_Trapping); // Set the handler, if one is specified. Opts.OverflowHandler = std::string(Args.getLastArgValue(OPT_ftrapv_handler)); } else if (Args.hasArg(OPT_fwrapv)) Opts.setSignedOverflowBehavior(LangOptions::SOB_Defined); Opts.MSCompatibilityVersion = 0; if (const Arg *A = Args.getLastArg(OPT_fms_compatibility_version)) { VersionTuple VT; if (VT.tryParse(A->getValue())) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); Opts.MSCompatibilityVersion = VT.getMajor() * 10000000 + VT.getMinor().getValueOr(0) * 100000 + VT.getSubminor().getValueOr(0); } // Mimicking gcc's behavior, trigraphs are only enabled if -trigraphs // is specified, or -std is set to a conforming mode. // Trigraphs are disabled by default in c++1z onwards. // For z/OS, trigraphs are enabled by default (without regard to the above). Opts.Trigraphs = (!Opts.GNUMode && !Opts.MSVCCompat && !Opts.CPlusPlus17) || T.isOSzOS(); Opts.Trigraphs = Args.hasFlag(OPT_ftrigraphs, OPT_fno_trigraphs, Opts.Trigraphs); Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL && Opts.OpenCLVersion == 200); Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || Opts.SYCLIsDevice || Args.hasArg(OPT_fconvergent_functions); Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding; if (!Opts.NoBuiltin) getAllNoBuiltinFuncValues(Args, Opts.NoBuiltinFuncs); Opts.LongDoubleSize = Args.hasArg(OPT_mlong_double_128) ? 128 : Args.hasArg(OPT_mlong_double_64) ? 64 : 0; if (Opts.FastRelaxedMath) Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); llvm::sort(Opts.ModuleFeatures); // -mrtd option if (Arg *A = Args.getLastArg(OPT_mrtd)) { if (Opts.getDefaultCallingConv() != LangOptions::DCC_None) Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << "-fdefault-calling-conv"; else { if (T.getArch() != llvm::Triple::x86) Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << T.getTriple(); else Opts.setDefaultCallingConv(LangOptions::DCC_StdCall); } } // Check if -fopenmp is specified and set default version to 5.0. Opts.OpenMP = Args.hasArg(OPT_fopenmp) ? 50 : 0; // Check if -fopenmp-simd is specified. bool IsSimdSpecified = Args.hasFlag(options::OPT_fopenmp_simd, options::OPT_fno_openmp_simd, /*Default=*/false); Opts.OpenMPSimd = !Opts.OpenMP && IsSimdSpecified; Opts.OpenMPUseTLS = Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls); Opts.OpenMPIsDevice = Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device); Opts.OpenMPIRBuilder = Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder); bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); Opts.OpenMPTargetNewRuntime = Opts.OpenMPIsDevice && Args.hasArg(options::OPT_fopenmp_target_new_runtime); Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice; if (Opts.OpenMP || Opts.OpenMPSimd) { if (int Version = getLastArgIntValue( Args, OPT_fopenmp_version_EQ, (IsSimdSpecified || IsTargetSpecified) ? 50 : Opts.OpenMP, Diags)) Opts.OpenMP = Version; // Provide diagnostic when a given target is not expected to be an OpenMP // device or host. if (!Opts.OpenMPIsDevice) { switch (T.getArch()) { default: break; // Add unsupported host targets here: case llvm::Triple::nvptx: case llvm::Triple::nvptx64: Diags.Report(diag::err_drv_omp_host_target_not_supported) << T.str(); break; } } } // Set the flag to prevent the implementation from emitting device exception // handling code for those requiring so. if ((Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN())) || Opts.OpenCLCPlusPlus) { Opts.Exceptions = 0; Opts.CXXExceptions = 0; } if (Opts.OpenMPIsDevice && T.isNVPTX()) { Opts.OpenMPCUDANumSMs = getLastArgIntValue(Args, options::OPT_fopenmp_cuda_number_of_sm_EQ, Opts.OpenMPCUDANumSMs, Diags); Opts.OpenMPCUDABlocksPerSM = getLastArgIntValue(Args, options::OPT_fopenmp_cuda_blocks_per_sm_EQ, Opts.OpenMPCUDABlocksPerSM, Diags); Opts.OpenMPCUDAReductionBufNum = getLastArgIntValue( Args, options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, Opts.OpenMPCUDAReductionBufNum, Diags); } // Set the value of the debugging flag used in the new offloading device RTL. // Set either by a specific value or to a default if not specified. if (Opts.OpenMPIsDevice && (Args.hasArg(OPT_fopenmp_target_debug) || Args.hasArg(OPT_fopenmp_target_debug_EQ))) { if (Opts.OpenMPTargetNewRuntime) { Opts.OpenMPTargetDebug = getLastArgIntValue( Args, OPT_fopenmp_target_debug_EQ, Opts.OpenMPTargetDebug, Diags); if (!Opts.OpenMPTargetDebug && Args.hasArg(OPT_fopenmp_target_debug)) Opts.OpenMPTargetDebug = 1; } else { Diags.Report(diag::err_drv_debug_no_new_runtime); } } if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) { if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription)) Opts.OpenMPTeamSubscription = true; if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription)) Opts.OpenMPThreadSubscription = true; } // Get the OpenMP target triples if any. if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit }; auto getArchPtrSize = [](const llvm::Triple &T) { if (T.isArch16Bit()) return Arch16Bit; if (T.isArch32Bit()) return Arch32Bit; assert(T.isArch64Bit() && "Expected 64-bit architecture"); return Arch64Bit; }; for (unsigned i = 0; i < A->getNumValues(); ++i) { llvm::Triple TT(A->getValue(i)); if (TT.getArch() == llvm::Triple::UnknownArch || !(TT.getArch() == llvm::Triple::aarch64 || TT.isPPC() || TT.getArch() == llvm::Triple::nvptx || TT.getArch() == llvm::Triple::nvptx64 || TT.getArch() == llvm::Triple::amdgcn || TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); else if (getArchPtrSize(T) != getArchPtrSize(TT)) Diags.Report(diag::err_drv_incompatible_omp_arch) << A->getValue(i) << T.str(); else Opts.OMPTargetTriples.push_back(TT); } } // Get OpenMP host file path if any and report if a non existent file is // found if (Arg *A = Args.getLastArg(options::OPT_fopenmp_host_ir_file_path)) { Opts.OMPHostIRFile = A->getValue(); if (!llvm::sys::fs::exists(Opts.OMPHostIRFile)) Diags.Report(diag::err_drv_omp_host_ir_file_not_found) << Opts.OMPHostIRFile; } // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && Args.hasArg(options::OPT_fopenmp_cuda_mode); // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options Opts.OpenMPCUDAForceFullRuntime = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && Args.hasArg(options::OPT_fopenmp_cuda_force_full_runtime); // FIXME: Eliminate this dependency. unsigned Opt = getOptimizationLevel(Args, IK, Diags), OptSize = getOptimizationLevelSize(Args); Opts.Optimize = Opt != 0; Opts.OptimizeSize = OptSize != 0; // This is the __NO_INLINE__ define, which just depends on things like the // optimization level and -fno-inline, not actually whether the backend has // inlining enabled. Opts.NoInlineDefine = !Opts.Optimize; if (Arg *InlineArg = Args.getLastArg( options::OPT_finline_functions, options::OPT_finline_hint_functions, options::OPT_fno_inline_functions, options::OPT_fno_inline)) if (InlineArg->getOption().matches(options::OPT_fno_inline)) Opts.NoInlineDefine = true; if (Arg *A = Args.getLastArg(OPT_ffp_contract)) { StringRef Val = A->getValue(); if (Val == "fast") Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); else if (Val == "on") Opts.setDefaultFPContractMode(LangOptions::FPM_On); else if (Val == "off") Opts.setDefaultFPContractMode(LangOptions::FPM_Off); else if (Val == "fast-honor-pragmas") Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas); else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } // Parse -fsanitize= arguments. parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ), Diags, Opts.Sanitize); Opts.NoSanitizeFiles = Args.getAllArgValues(OPT_fsanitize_ignorelist_EQ); std::vector systemIgnorelists = Args.getAllArgValues(OPT_fsanitize_system_ignorelist_EQ); Opts.NoSanitizeFiles.insert(Opts.NoSanitizeFiles.end(), systemIgnorelists.begin(), systemIgnorelists.end()); if (Arg *A = Args.getLastArg(OPT_fclang_abi_compat_EQ)) { Opts.setClangABICompat(LangOptions::ClangABI::Latest); StringRef Ver = A->getValue(); std::pair VerParts = Ver.split('.'); unsigned Major, Minor = 0; // Check the version number is valid: either 3.x (0 <= x <= 9) or // y or y.0 (4 <= y <= current version). if (!VerParts.first.startswith("0") && !VerParts.first.getAsInteger(10, Major) && 3 <= Major && Major <= CLANG_VERSION_MAJOR && (Major == 3 ? VerParts.second.size() == 1 && !VerParts.second.getAsInteger(10, Minor) : VerParts.first.size() == Ver.size() || VerParts.second == "0")) { // Got a valid version number. if (Major == 3 && Minor <= 8) Opts.setClangABICompat(LangOptions::ClangABI::Ver3_8); else if (Major <= 4) Opts.setClangABICompat(LangOptions::ClangABI::Ver4); else if (Major <= 6) Opts.setClangABICompat(LangOptions::ClangABI::Ver6); else if (Major <= 7) Opts.setClangABICompat(LangOptions::ClangABI::Ver7); else if (Major <= 9) Opts.setClangABICompat(LangOptions::ClangABI::Ver9); else if (Major <= 11) Opts.setClangABICompat(LangOptions::ClangABI::Ver11); else if (Major <= 12) Opts.setClangABICompat(LangOptions::ClangABI::Ver12); - else if (Major <= 13) - Opts.setClangABICompat(LangOptions::ClangABI::Ver13); } else if (Ver != "latest") { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } } if (Arg *A = Args.getLastArg(OPT_msign_return_address_EQ)) { StringRef SignScope = A->getValue(); if (SignScope.equals_insensitive("none")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::None); else if (SignScope.equals_insensitive("all")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::All); else if (SignScope.equals_insensitive("non-leaf")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::NonLeaf); else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << SignScope; if (Arg *A = Args.getLastArg(OPT_msign_return_address_key_EQ)) { StringRef SignKey = A->getValue(); if (!SignScope.empty() && !SignKey.empty()) { if (SignKey.equals_insensitive("a_key")) Opts.setSignReturnAddressKey( LangOptions::SignReturnAddressKeyKind::AKey); else if (SignKey.equals_insensitive("b_key")) Opts.setSignReturnAddressKey( LangOptions::SignReturnAddressKeyKind::BKey); else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << SignKey; } } } // The value can be empty, which indicates the system default should be used. StringRef CXXABI = Args.getLastArgValue(OPT_fcxx_abi_EQ); if (!CXXABI.empty()) { if (!TargetCXXABI::isABI(CXXABI)) { Diags.Report(diag::err_invalid_cxx_abi) << CXXABI; } else { auto Kind = TargetCXXABI::getKind(CXXABI); if (!TargetCXXABI::isSupportedCXXABI(T, Kind)) Diags.Report(diag::err_unsupported_cxx_abi) << CXXABI << T.str(); else Opts.CXXABI = Kind; } } Opts.RelativeCXXABIVTables = Args.hasFlag(options::OPT_fexperimental_relative_cxx_abi_vtables, options::OPT_fno_experimental_relative_cxx_abi_vtables, TargetCXXABI::usesRelativeVTables(T)); for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) { auto Split = StringRef(A).split('='); Opts.MacroPrefixMap.insert( {std::string(Split.first), std::string(Split.second)}); } // Error if -mvscale-min is unbounded. if (Arg *A = Args.getLastArg(options::OPT_mvscale_min_EQ)) { unsigned VScaleMin; if (StringRef(A->getValue()).getAsInteger(10, VScaleMin) || VScaleMin == 0) Diags.Report(diag::err_cc1_unbounded_vscale_min); } return Diags.getNumErrors() == NumErrorsBefore; } static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { switch (Action) { case frontend::ASTDeclList: case frontend::ASTDump: case frontend::ASTPrint: case frontend::ASTView: case frontend::EmitAssembly: case frontend::EmitBC: case frontend::EmitHTML: case frontend::EmitLLVM: case frontend::EmitLLVMOnly: case frontend::EmitCodeGenOnly: case frontend::EmitObj: case frontend::ExtractAPI: case frontend::FixIt: case frontend::GenerateModule: case frontend::GenerateModuleInterface: case frontend::GenerateHeaderModule: case frontend::GeneratePCH: case frontend::GenerateInterfaceStubs: case frontend::ParseSyntaxOnly: case frontend::ModuleFileInfo: case frontend::VerifyPCH: case frontend::PluginAction: case frontend::RewriteObjC: case frontend::RewriteTest: case frontend::RunAnalysis: case frontend::TemplightDump: case frontend::MigrateSource: return false; case frontend::DumpCompilerOptions: case frontend::DumpRawTokens: case frontend::DumpTokens: case frontend::InitOnly: case frontend::PrintPreamble: case frontend::PrintPreprocessedInput: case frontend::RewriteMacros: case frontend::RunPreprocessorOnly: case frontend::PrintDependencyDirectivesSourceMinimizerOutput: return true; } llvm_unreachable("invalid frontend action"); } static void GeneratePreprocessorArgs(PreprocessorOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, const LangOptions &LangOpts, const FrontendOptions &FrontendOpts, const CodeGenOptions &CodeGenOpts) { PreprocessorOptions *PreprocessorOpts = &Opts; #define PREPROCESSOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OPTION_WITH_MARSHALLING if (Opts.PCHWithHdrStop && !Opts.PCHWithHdrStopCreate) GenerateArg(Args, OPT_pch_through_hdrstop_use, SA); for (const auto &D : Opts.DeserializedPCHDeclsToErrorOn) GenerateArg(Args, OPT_error_on_deserialized_pch_decl, D, SA); if (Opts.PrecompiledPreambleBytes != std::make_pair(0u, false)) GenerateArg(Args, OPT_preamble_bytes_EQ, Twine(Opts.PrecompiledPreambleBytes.first) + "," + (Opts.PrecompiledPreambleBytes.second ? "1" : "0"), SA); for (const auto &M : Opts.Macros) { // Don't generate __CET__ macro definitions. They are implied by the // -fcf-protection option that is generated elsewhere. if (M.first == "__CET__=1" && !M.second && !CodeGenOpts.CFProtectionReturn && CodeGenOpts.CFProtectionBranch) continue; if (M.first == "__CET__=2" && !M.second && CodeGenOpts.CFProtectionReturn && !CodeGenOpts.CFProtectionBranch) continue; if (M.first == "__CET__=3" && !M.second && CodeGenOpts.CFProtectionReturn && CodeGenOpts.CFProtectionBranch) continue; GenerateArg(Args, M.second ? OPT_U : OPT_D, M.first, SA); } for (const auto &I : Opts.Includes) { // Don't generate OpenCL includes. They are implied by other flags that are // generated elsewhere. if (LangOpts.OpenCL && LangOpts.IncludeDefaultHeader && ((LangOpts.DeclareOpenCLBuiltins && I == "opencl-c-base.h") || I == "opencl-c.h")) continue; GenerateArg(Args, OPT_include, I, SA); } for (const auto &CI : Opts.ChainedIncludes) GenerateArg(Args, OPT_chain_include, CI, SA); for (const auto &RF : Opts.RemappedFiles) GenerateArg(Args, OPT_remap_file, RF.first + ";" + RF.second, SA); // Don't handle LexEditorPlaceholders. It is implied by the action that is // generated elsewhere. } static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, frontend::ActionKind Action, const FrontendOptions &FrontendOpts) { unsigned NumErrorsBefore = Diags.getNumErrors(); PreprocessorOptions *PreprocessorOpts = &Opts; #define PREPROCESSOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OPTION_WITH_MARSHALLING Opts.PCHWithHdrStop = Args.hasArg(OPT_pch_through_hdrstop_create) || Args.hasArg(OPT_pch_through_hdrstop_use); for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl)) Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue()); if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) { StringRef Value(A->getValue()); size_t Comma = Value.find(','); unsigned Bytes = 0; unsigned EndOfLine = 0; if (Comma == StringRef::npos || Value.substr(0, Comma).getAsInteger(10, Bytes) || Value.substr(Comma + 1).getAsInteger(10, EndOfLine)) Diags.Report(diag::err_drv_preamble_format); else { Opts.PrecompiledPreambleBytes.first = Bytes; Opts.PrecompiledPreambleBytes.second = (EndOfLine != 0); } } // Add the __CET__ macro if a CFProtection option is set. if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { StringRef Name = A->getValue(); if (Name == "branch") Opts.addMacroDef("__CET__=1"); else if (Name == "return") Opts.addMacroDef("__CET__=2"); else if (Name == "full") Opts.addMacroDef("__CET__=3"); } // Add macros from the command line. for (const auto *A : Args.filtered(OPT_D, OPT_U)) { if (A->getOption().matches(OPT_D)) Opts.addMacroDef(A->getValue()); else Opts.addMacroUndef(A->getValue()); } // Add the ordered list of -includes. for (const auto *A : Args.filtered(OPT_include)) Opts.Includes.emplace_back(A->getValue()); for (const auto *A : Args.filtered(OPT_chain_include)) Opts.ChainedIncludes.emplace_back(A->getValue()); for (const auto *A : Args.filtered(OPT_remap_file)) { std::pair Split = StringRef(A->getValue()).split(';'); if (Split.second.empty()) { Diags.Report(diag::err_drv_invalid_remap_file) << A->getAsString(Args); continue; } Opts.addRemappedFile(Split.first, Split.second); } // Always avoid lexing editor placeholders when we're just running the // preprocessor as we never want to emit the // "editor placeholder in source file" error in PP only mode. if (isStrictlyPreprocessorAction(Action)) Opts.LexEditorPlaceholders = false; return Diags.getNumErrors() == NumErrorsBefore; } static void GeneratePreprocessorOutputArgs( const PreprocessorOutputOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, frontend::ActionKind Action) { const PreprocessorOutputOptions &PreprocessorOutputOpts = Opts; #define PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING bool Generate_dM = isStrictlyPreprocessorAction(Action) && !Opts.ShowCPP; if (Generate_dM) GenerateArg(Args, OPT_dM, SA); if (!Generate_dM && Opts.ShowMacros) GenerateArg(Args, OPT_dD, SA); } static bool ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, frontend::ActionKind Action) { unsigned NumErrorsBefore = Diags.getNumErrors(); PreprocessorOutputOptions &PreprocessorOutputOpts = Opts; #define PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING Opts.ShowCPP = isStrictlyPreprocessorAction(Action) && !Args.hasArg(OPT_dM); Opts.ShowMacros = Args.hasArg(OPT_dM) || Args.hasArg(OPT_dD); return Diags.getNumErrors() == NumErrorsBefore; } static void GenerateTargetArgs(const TargetOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const TargetOptions *TargetOpts = &Opts; #define TARGET_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef TARGET_OPTION_WITH_MARSHALLING if (!Opts.SDKVersion.empty()) GenerateArg(Args, OPT_target_sdk_version_EQ, Opts.SDKVersion.getAsString(), SA); } static bool ParseTargetArgs(TargetOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); TargetOptions *TargetOpts = &Opts; #define TARGET_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef TARGET_OPTION_WITH_MARSHALLING if (Arg *A = Args.getLastArg(options::OPT_target_sdk_version_EQ)) { llvm::VersionTuple Version; if (Version.tryParse(A->getValue())) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); else Opts.SDKVersion = Version; } return Diags.getNumErrors() == NumErrorsBefore; } bool CompilerInvocation::CreateFromArgsImpl( CompilerInvocation &Res, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { unsigned NumErrorsBefore = Diags.getNumErrors(); // Parse the arguments. const OptTable &Opts = getDriverOptTable(); const unsigned IncludedFlagsBitmask = options::CC1Option; unsigned MissingArgIndex, MissingArgCount; InputArgList Args = Opts.ParseArgs(CommandLineArgs, MissingArgIndex, MissingArgCount, IncludedFlagsBitmask); LangOptions &LangOpts = *Res.getLangOpts(); // Check for missing argument error. if (MissingArgCount) Diags.Report(diag::err_drv_missing_argument) << Args.getArgString(MissingArgIndex) << MissingArgCount; // Issue errors on unknown arguments. for (const auto *A : Args.filtered(OPT_UNKNOWN)) { auto ArgString = A->getAsString(Args); std::string Nearest; if (Opts.findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1) Diags.Report(diag::err_drv_unknown_argument) << ArgString; else Diags.Report(diag::err_drv_unknown_argument_with_suggestion) << ArgString << Nearest; } ParseFileSystemArgs(Res.getFileSystemOpts(), Args, Diags); ParseMigratorArgs(Res.getMigratorOpts(), Args, Diags); ParseAnalyzerArgs(*Res.getAnalyzerOpts(), Args, Diags); ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags, /*DefaultDiagColor=*/false); ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags, LangOpts.IsHeaderFile); // FIXME: We shouldn't have to pass the DashX option around here InputKind DashX = Res.getFrontendOpts().DashX; ParseTargetArgs(Res.getTargetOpts(), Args, Diags); llvm::Triple T(Res.getTargetOpts().Triple); ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags, Res.getFileSystemOpts().WorkingDir); ParseLangArgs(LangOpts, Args, DashX, T, Res.getPreprocessorOpts().Includes, Diags); if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC) LangOpts.ObjCExceptions = 1; if (LangOpts.CUDA) { // During CUDA device-side compilation, the aux triple is the // triple used for host compilation. if (LangOpts.CUDAIsDevice) Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; } // Set the triple of the host for OpenMP device compile. if (LangOpts.OpenMPIsDevice) Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; ParseCodeGenArgs(Res.getCodeGenOpts(), Args, DashX, Diags, T, Res.getFrontendOpts().OutputFile, LangOpts); // FIXME: Override value name discarding when asan or msan is used because the // backend passes depend on the name of the alloca in order to print out // names. Res.getCodeGenOpts().DiscardValueNames &= !LangOpts.Sanitize.has(SanitizerKind::Address) && !LangOpts.Sanitize.has(SanitizerKind::KernelAddress) && !LangOpts.Sanitize.has(SanitizerKind::Memory) && !LangOpts.Sanitize.has(SanitizerKind::KernelMemory); ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, Diags, Res.getFrontendOpts().ProgramAction, Res.getFrontendOpts()); ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), Args, Diags, Res.getFrontendOpts().ProgramAction); ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), Args, Diags, Res.getFrontendOpts().ProgramAction, Res.getPreprocessorOutputOpts().ShowLineMarkers); if (!Res.getDependencyOutputOpts().OutputFile.empty() && Res.getDependencyOutputOpts().Targets.empty()) Diags.Report(diag::err_fe_dependency_file_requires_MT); // If sanitizer is enabled, disable OPT_ffine_grained_bitfield_accesses. if (Res.getCodeGenOpts().FineGrainedBitfieldAccesses && !Res.getLangOpts()->Sanitize.empty()) { Res.getCodeGenOpts().FineGrainedBitfieldAccesses = false; Diags.Report(diag::warn_drv_fine_grained_bitfield_accesses_ignored); } // Store the command-line for using in the CodeView backend. Res.getCodeGenOpts().Argv0 = Argv0; append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs); FixupInvocation(Res, Diags, Args, DashX); return Diags.getNumErrors() == NumErrorsBefore; } bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Invocation, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { CompilerInvocation DummyInvocation; return RoundTrip( [](CompilerInvocation &Invocation, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { return CreateFromArgsImpl(Invocation, CommandLineArgs, Diags, Argv0); }, [](CompilerInvocation &Invocation, SmallVectorImpl &Args, StringAllocator SA) { Invocation.generateCC1CommandLine(Args, SA); }, Invocation, DummyInvocation, CommandLineArgs, Diags, Argv0); } std::string CompilerInvocation::getModuleHash() const { // FIXME: Consider using SHA1 instead of MD5. llvm::HashBuilder HBuilder; // Note: For QoI reasons, the things we use as a hash here should all be // dumped via the -module-info flag. // Start the signature with the compiler version. HBuilder.add(getClangFullRepositoryVersion()); // Also include the serialization version, in case LLVM_APPEND_VC_REV is off // and getClangFullRepositoryVersion() doesn't include git revision. HBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR); // Extend the signature with the language options #define LANGOPT(Name, Bits, Default, Description) HBuilder.add(LangOpts->Name); #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ HBuilder.add(static_cast(LangOpts->get##Name())); #define BENIGN_LANGOPT(Name, Bits, Default, Description) #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description) #include "clang/Basic/LangOptions.def" HBuilder.addRange(LangOpts->ModuleFeatures); HBuilder.add(LangOpts->ObjCRuntime); HBuilder.addRange(LangOpts->CommentOpts.BlockCommandNames); // Extend the signature with the target options. HBuilder.add(TargetOpts->Triple, TargetOpts->CPU, TargetOpts->TuneCPU, TargetOpts->ABI); HBuilder.addRange(TargetOpts->FeaturesAsWritten); // Extend the signature with preprocessor options. const PreprocessorOptions &ppOpts = getPreprocessorOpts(); HBuilder.add(ppOpts.UsePredefines, ppOpts.DetailedRecord); const HeaderSearchOptions &hsOpts = getHeaderSearchOpts(); for (const auto &Macro : getPreprocessorOpts().Macros) { // If we're supposed to ignore this macro for the purposes of modules, // don't put it into the hash. if (!hsOpts.ModulesIgnoreMacros.empty()) { // Check whether we're ignoring this macro. StringRef MacroDef = Macro.first; if (hsOpts.ModulesIgnoreMacros.count( llvm::CachedHashString(MacroDef.split('=').first))) continue; } HBuilder.add(Macro); } // Extend the signature with the sysroot and other header search options. HBuilder.add(hsOpts.Sysroot, hsOpts.ModuleFormat, hsOpts.UseDebugInfo, hsOpts.UseBuiltinIncludes, hsOpts.UseStandardSystemIncludes, hsOpts.UseStandardCXXIncludes, hsOpts.UseLibcxx, hsOpts.ModulesValidateDiagnosticOptions); HBuilder.add(hsOpts.ResourceDir); if (hsOpts.ModulesStrictContextHash) { HBuilder.addRange(hsOpts.SystemHeaderPrefixes); HBuilder.addRange(hsOpts.UserEntries); const DiagnosticOptions &diagOpts = getDiagnosticOpts(); #define DIAGOPT(Name, Bits, Default) HBuilder.add(diagOpts.Name); #define ENUM_DIAGOPT(Name, Type, Bits, Default) \ HBuilder.add(diagOpts.get##Name()); #include "clang/Basic/DiagnosticOptions.def" #undef DIAGOPT #undef ENUM_DIAGOPT } // Extend the signature with the user build path. HBuilder.add(hsOpts.ModuleUserBuildPath); // Extend the signature with the module file extensions. for (const auto &ext : getFrontendOpts().ModuleFileExtensions) ext->hashExtension(HBuilder); // When compiling with -gmodules, also hash -fdebug-prefix-map as it // affects the debug info in the PCM. if (getCodeGenOpts().DebugTypeExtRefs) HBuilder.addRange(getCodeGenOpts().DebugPrefixMap); // Extend the signature with the enabled sanitizers, if at least one is // enabled. Sanitizers which cannot affect AST generation aren't hashed. SanitizerSet SanHash = LangOpts->Sanitize; SanHash.clear(getPPTransparentSanitizers()); if (!SanHash.empty()) HBuilder.add(SanHash.Mask); llvm::MD5::MD5Result Result; HBuilder.getHasher().final(Result); uint64_t Hash = Result.high() ^ Result.low(); return toString(llvm::APInt(64, Hash), 36, /*Signed=*/false); } void CompilerInvocation::generateCC1CommandLine( SmallVectorImpl &Args, StringAllocator SA) const { llvm::Triple T(TargetOpts->Triple); GenerateFileSystemArgs(FileSystemOpts, Args, SA); GenerateMigratorArgs(MigratorOpts, Args, SA); GenerateAnalyzerArgs(*AnalyzerOpts, Args, SA); GenerateDiagnosticArgs(*DiagnosticOpts, Args, SA, false); GenerateFrontendArgs(FrontendOpts, Args, SA, LangOpts->IsHeaderFile); GenerateTargetArgs(*TargetOpts, Args, SA); GenerateHeaderSearchArgs(*HeaderSearchOpts, Args, SA); GenerateLangArgs(*LangOpts, Args, SA, T, FrontendOpts.DashX); GenerateCodeGenArgs(CodeGenOpts, Args, SA, T, FrontendOpts.OutputFile, &*LangOpts); GeneratePreprocessorArgs(*PreprocessorOpts, Args, SA, *LangOpts, FrontendOpts, CodeGenOpts); GeneratePreprocessorOutputArgs(PreprocessorOutputOpts, Args, SA, FrontendOpts.ProgramAction); GenerateDependencyOutputArgs(DependencyOutputOpts, Args, SA); } IntrusiveRefCntPtr clang::createVFSFromCompilerInvocation(const CompilerInvocation &CI, DiagnosticsEngine &Diags) { return createVFSFromCompilerInvocation(CI, Diags, llvm::vfs::getRealFileSystem()); } IntrusiveRefCntPtr clang::createVFSFromCompilerInvocation( const CompilerInvocation &CI, DiagnosticsEngine &Diags, IntrusiveRefCntPtr BaseFS) { if (CI.getHeaderSearchOpts().VFSOverlayFiles.empty()) return BaseFS; IntrusiveRefCntPtr Result = BaseFS; // earlier vfs files are on the bottom for (const auto &File : CI.getHeaderSearchOpts().VFSOverlayFiles) { llvm::ErrorOr> Buffer = Result->getBufferForFile(File); if (!Buffer) { Diags.Report(diag::err_missing_vfs_overlay_file) << File; continue; } IntrusiveRefCntPtr FS = llvm::vfs::getVFSFromYAML( std::move(Buffer.get()), /*DiagHandler*/ nullptr, File, /*DiagContext*/ nullptr, Result); if (!FS) { Diags.Report(diag::err_invalid_vfs_overlay) << File; continue; } Result = FS; } return Result; } diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1da0dfec3f23..467372c71496 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1,6366 +1,6368 @@ //===--- SemaTemplateInstantiateDecl.cpp - C++ Template Decl Instantiation ===/ // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception //===----------------------------------------------------------------------===/ // // This file implements C++ template instantiation for declarations. // //===----------------------------------------------------------------------===/ #include "TreeTransform.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/DependentDiagnostic.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateInstCallback.h" #include "llvm/Support/TimeProfiler.h" using namespace clang; static bool isDeclWithinFunction(const Decl *D) { const DeclContext *DC = D->getDeclContext(); if (DC->isFunctionOrMethod()) return true; if (DC->isRecord()) return cast(DC)->isLocalClass(); return false; } template static bool SubstQualifier(Sema &SemaRef, const DeclT *OldDecl, DeclT *NewDecl, const MultiLevelTemplateArgumentList &TemplateArgs) { if (!OldDecl->getQualifierLoc()) return false; assert((NewDecl->getFriendObjectKind() || !OldDecl->getLexicalDeclContext()->isDependentContext()) && "non-friend with qualified name defined in dependent context"); Sema::ContextRAII SavedContext( SemaRef, const_cast(NewDecl->getFriendObjectKind() ? NewDecl->getLexicalDeclContext() : OldDecl->getLexicalDeclContext())); NestedNameSpecifierLoc NewQualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(OldDecl->getQualifierLoc(), TemplateArgs); if (!NewQualifierLoc) return true; NewDecl->setQualifierInfo(NewQualifierLoc); return false; } bool TemplateDeclInstantiator::SubstQualifier(const DeclaratorDecl *OldDecl, DeclaratorDecl *NewDecl) { return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs); } bool TemplateDeclInstantiator::SubstQualifier(const TagDecl *OldDecl, TagDecl *NewDecl) { return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs); } // Include attribute instantiation code. #include "clang/Sema/AttrTemplateInstantiate.inc" static void instantiateDependentAlignedAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AlignedAttr *Aligned, Decl *New, bool IsPackExpansion) { if (Aligned->isAlignmentExpr()) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Aligned->getAlignmentExpr(), TemplateArgs); if (!Result.isInvalid()) S.AddAlignedAttr(New, *Aligned, Result.getAs(), IsPackExpansion); } else { TypeSourceInfo *Result = S.SubstType(Aligned->getAlignmentType(), TemplateArgs, Aligned->getLocation(), DeclarationName()); if (Result) S.AddAlignedAttr(New, *Aligned, Result, IsPackExpansion); } } static void instantiateDependentAlignedAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AlignedAttr *Aligned, Decl *New) { if (!Aligned->isPackExpansion()) { instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false); return; } SmallVector Unexpanded; if (Aligned->isAlignmentExpr()) S.collectUnexpandedParameterPacks(Aligned->getAlignmentExpr(), Unexpanded); else S.collectUnexpandedParameterPacks(Aligned->getAlignmentType()->getTypeLoc(), Unexpanded); assert(!Unexpanded.empty() && "Pack expansion without parameter packs?"); // Determine whether we can expand this attribute pack yet. bool Expand = true, RetainExpansion = false; Optional NumExpansions; // FIXME: Use the actual location of the ellipsis. SourceLocation EllipsisLoc = Aligned->getLocation(); if (S.CheckParameterPacksForExpansion(EllipsisLoc, Aligned->getRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return; if (!Expand) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, -1); instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, true); } else { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, I); instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false); } } } static void instantiateDependentAssumeAlignedAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AssumeAlignedAttr *Aligned, Decl *New) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); Expr *E, *OE = nullptr; ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs); if (Result.isInvalid()) return; E = Result.getAs(); if (Aligned->getOffset()) { Result = S.SubstExpr(Aligned->getOffset(), TemplateArgs); if (Result.isInvalid()) return; OE = Result.getAs(); } S.AddAssumeAlignedAttr(New, *Aligned, E, OE); } static void instantiateDependentAlignValueAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AlignValueAttr *Aligned, Decl *New) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs); if (!Result.isInvalid()) S.AddAlignValueAttr(New, *Aligned, Result.getAs()); } static void instantiateDependentAllocAlignAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AllocAlignAttr *Align, Decl *New) { Expr *Param = IntegerLiteral::Create( S.getASTContext(), llvm::APInt(64, Align->getParamIndex().getSourceIndex()), S.getASTContext().UnsignedLongLongTy, Align->getLocation()); S.AddAllocAlignAttr(New, *Align, Param); } static void instantiateDependentAnnotationAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AnnotateAttr *Attr, Decl *New) { EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); SmallVector Args; Args.reserve(Attr->args_size()); for (auto *E : Attr->args()) { ExprResult Result = S.SubstExpr(E, TemplateArgs); if (!Result.isUsable()) return; Args.push_back(Result.get()); } S.AddAnnotationAttr(New, *Attr, Attr->getAnnotation(), Args); } static Expr *instantiateDependentFunctionAttrCondition( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const Attr *A, Expr *OldCond, const Decl *Tmpl, FunctionDecl *New) { Expr *Cond = nullptr; { Sema::ContextRAII SwitchContext(S, New); EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(OldCond, TemplateArgs); if (Result.isInvalid()) return nullptr; Cond = Result.getAs(); } if (!Cond->isTypeDependent()) { ExprResult Converted = S.PerformContextuallyConvertToBool(Cond); if (Converted.isInvalid()) return nullptr; Cond = Converted.get(); } SmallVector Diags; if (OldCond->isValueDependent() && !Cond->isValueDependent() && !Expr::isPotentialConstantExprUnevaluated(Cond, New, Diags)) { S.Diag(A->getLocation(), diag::err_attr_cond_never_constant_expr) << A; for (const auto &P : Diags) S.Diag(P.first, P.second); return nullptr; } return Cond; } static void instantiateDependentEnableIfAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const EnableIfAttr *EIA, const Decl *Tmpl, FunctionDecl *New) { Expr *Cond = instantiateDependentFunctionAttrCondition( S, TemplateArgs, EIA, EIA->getCond(), Tmpl, New); if (Cond) New->addAttr(new (S.getASTContext()) EnableIfAttr(S.getASTContext(), *EIA, Cond, EIA->getMessage())); } static void instantiateDependentDiagnoseIfAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const DiagnoseIfAttr *DIA, const Decl *Tmpl, FunctionDecl *New) { Expr *Cond = instantiateDependentFunctionAttrCondition( S, TemplateArgs, DIA, DIA->getCond(), Tmpl, New); if (Cond) New->addAttr(new (S.getASTContext()) DiagnoseIfAttr( S.getASTContext(), *DIA, Cond, DIA->getMessage(), DIA->getDiagnosticType(), DIA->getArgDependent(), New)); } // Constructs and adds to New a new instance of CUDALaunchBoundsAttr using // template A as the base and arguments from TemplateArgs. static void instantiateDependentCUDALaunchBoundsAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const CUDALaunchBoundsAttr &Attr, Decl *New) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Attr.getMaxThreads(), TemplateArgs); if (Result.isInvalid()) return; Expr *MaxThreads = Result.getAs(); Expr *MinBlocks = nullptr; if (Attr.getMinBlocks()) { Result = S.SubstExpr(Attr.getMinBlocks(), TemplateArgs); if (Result.isInvalid()) return; MinBlocks = Result.getAs(); } S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks); } static void instantiateDependentModeAttr(Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const ModeAttr &Attr, Decl *New) { S.AddModeAttr(New, Attr, Attr.getMode(), /*InInstantiation=*/true); } /// Instantiation of 'declare simd' attribute and its arguments. static void instantiateOMPDeclareSimdDeclAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const OMPDeclareSimdDeclAttr &Attr, Decl *New) { // Allow 'this' in clauses with varlists. if (auto *FTD = dyn_cast(New)) New = FTD->getTemplatedDecl(); auto *FD = cast(New); auto *ThisContext = dyn_cast_or_null(FD->getDeclContext()); SmallVector Uniforms, Aligneds, Alignments, Linears, Steps; SmallVector LinModifiers; auto SubstExpr = [&](Expr *E) -> ExprResult { if (auto *DRE = dyn_cast(E->IgnoreParenImpCasts())) if (auto *PVD = dyn_cast(DRE->getDecl())) { Sema::ContextRAII SavedContext(S, FD); LocalInstantiationScope Local(S); if (FD->getNumParams() > PVD->getFunctionScopeIndex()) Local.InstantiatedLocal( PVD, FD->getParamDecl(PVD->getFunctionScopeIndex())); return S.SubstExpr(E, TemplateArgs); } Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(), FD->isCXXInstanceMember()); return S.SubstExpr(E, TemplateArgs); }; // Substitute a single OpenMP clause, which is a potentially-evaluated // full-expression. auto Subst = [&](Expr *E) -> ExprResult { EnterExpressionEvaluationContext Evaluated( S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); ExprResult Res = SubstExpr(E); if (Res.isInvalid()) return Res; return S.ActOnFinishFullExpr(Res.get(), false); }; ExprResult Simdlen; if (auto *E = Attr.getSimdlen()) Simdlen = Subst(E); if (Attr.uniforms_size() > 0) { for(auto *E : Attr.uniforms()) { ExprResult Inst = Subst(E); if (Inst.isInvalid()) continue; Uniforms.push_back(Inst.get()); } } auto AI = Attr.alignments_begin(); for (auto *E : Attr.aligneds()) { ExprResult Inst = Subst(E); if (Inst.isInvalid()) continue; Aligneds.push_back(Inst.get()); Inst = ExprEmpty(); if (*AI) Inst = S.SubstExpr(*AI, TemplateArgs); Alignments.push_back(Inst.get()); ++AI; } auto SI = Attr.steps_begin(); for (auto *E : Attr.linears()) { ExprResult Inst = Subst(E); if (Inst.isInvalid()) continue; Linears.push_back(Inst.get()); Inst = ExprEmpty(); if (*SI) Inst = S.SubstExpr(*SI, TemplateArgs); Steps.push_back(Inst.get()); ++SI; } LinModifiers.append(Attr.modifiers_begin(), Attr.modifiers_end()); (void)S.ActOnOpenMPDeclareSimdDirective( S.ConvertDeclToDeclGroup(New), Attr.getBranchState(), Simdlen.get(), Uniforms, Aligneds, Alignments, Linears, LinModifiers, Steps, Attr.getRange()); } /// Instantiation of 'declare variant' attribute and its arguments. static void instantiateOMPDeclareVariantAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const OMPDeclareVariantAttr &Attr, Decl *New) { // Allow 'this' in clauses with varlists. if (auto *FTD = dyn_cast(New)) New = FTD->getTemplatedDecl(); auto *FD = cast(New); auto *ThisContext = dyn_cast_or_null(FD->getDeclContext()); auto &&SubstExpr = [FD, ThisContext, &S, &TemplateArgs](Expr *E) { if (auto *DRE = dyn_cast(E->IgnoreParenImpCasts())) if (auto *PVD = dyn_cast(DRE->getDecl())) { Sema::ContextRAII SavedContext(S, FD); LocalInstantiationScope Local(S); if (FD->getNumParams() > PVD->getFunctionScopeIndex()) Local.InstantiatedLocal( PVD, FD->getParamDecl(PVD->getFunctionScopeIndex())); return S.SubstExpr(E, TemplateArgs); } Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(), FD->isCXXInstanceMember()); return S.SubstExpr(E, TemplateArgs); }; // Substitute a single OpenMP clause, which is a potentially-evaluated // full-expression. auto &&Subst = [&SubstExpr, &S](Expr *E) { EnterExpressionEvaluationContext Evaluated( S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); ExprResult Res = SubstExpr(E); if (Res.isInvalid()) return Res; return S.ActOnFinishFullExpr(Res.get(), false); }; ExprResult VariantFuncRef; if (Expr *E = Attr.getVariantFuncRef()) { // Do not mark function as is used to prevent its emission if this is the // only place where it is used. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); VariantFuncRef = Subst(E); } // Copy the template version of the OMPTraitInfo and run substitute on all // score and condition expressiosn. OMPTraitInfo &TI = S.getASTContext().getNewOMPTraitInfo(); TI = *Attr.getTraitInfos(); // Try to substitute template parameters in score and condition expressions. auto SubstScoreOrConditionExpr = [&S, Subst](Expr *&E, bool) { if (E) { EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult ER = Subst(E); if (ER.isUsable()) E = ER.get(); else return true; } return false; }; if (TI.anyScoreOrCondition(SubstScoreOrConditionExpr)) return; Expr *E = VariantFuncRef.get(); // Check function/variant ref for `omp declare variant` but not for `omp // begin declare variant` (which use implicit attributes). Optional> DeclVarData = S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New), E, TI, Attr.appendArgs_size(), Attr.getRange()); if (!DeclVarData) return; E = DeclVarData.getValue().second; FD = DeclVarData.getValue().first; if (auto *VariantDRE = dyn_cast(E->IgnoreParenImpCasts())) { if (auto *VariantFD = dyn_cast(VariantDRE->getDecl())) { if (auto *VariantFTD = VariantFD->getDescribedFunctionTemplate()) { if (!VariantFTD->isThisDeclarationADefinition()) return; Sema::TentativeAnalysisScope Trap(S); const TemplateArgumentList *TAL = TemplateArgumentList::CreateCopy( S.Context, TemplateArgs.getInnermost()); auto *SubstFD = S.InstantiateFunctionDeclaration(VariantFTD, TAL, New->getLocation()); if (!SubstFD) return; QualType NewType = S.Context.mergeFunctionTypes( SubstFD->getType(), FD->getType(), /* OfBlockPointer */ false, /* Unqualified */ false, /* AllowCXX */ true); if (NewType.isNull()) return; S.InstantiateFunctionDefinition( New->getLocation(), SubstFD, /* Recursive */ true, /* DefinitionRequired */ false, /* AtEndOfTU */ false); SubstFD->setInstantiationIsPending(!SubstFD->isDefined()); E = DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), SourceLocation(), SubstFD, /* RefersToEnclosingVariableOrCapture */ false, /* NameLoc */ SubstFD->getLocation(), SubstFD->getType(), ExprValueKind::VK_PRValue); } } } SmallVector NothingExprs; SmallVector NeedDevicePtrExprs; SmallVector AppendArgs; for (Expr *E : Attr.adjustArgsNothing()) { ExprResult ER = Subst(E); if (ER.isInvalid()) continue; NothingExprs.push_back(ER.get()); } for (Expr *E : Attr.adjustArgsNeedDevicePtr()) { ExprResult ER = Subst(E); if (ER.isInvalid()) continue; NeedDevicePtrExprs.push_back(ER.get()); } for (auto A : Attr.appendArgs()) AppendArgs.push_back(A); S.ActOnOpenMPDeclareVariantDirective( FD, E, TI, NothingExprs, NeedDevicePtrExprs, AppendArgs, SourceLocation(), SourceLocation(), Attr.getRange()); } static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AMDGPUFlatWorkGroupSizeAttr &Attr, Decl *New) { // Both min and max expression are constant expressions. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); if (Result.isInvalid()) return; Expr *MinExpr = Result.getAs(); Result = S.SubstExpr(Attr.getMax(), TemplateArgs); if (Result.isInvalid()) return; Expr *MaxExpr = Result.getAs(); S.addAMDGPUFlatWorkGroupSizeAttr(New, Attr, MinExpr, MaxExpr); } static ExplicitSpecifier instantiateExplicitSpecifier(Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, ExplicitSpecifier ES, FunctionDecl *New) { if (!ES.getExpr()) return ES; Expr *OldCond = ES.getExpr(); Expr *Cond = nullptr; { EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult SubstResult = S.SubstExpr(OldCond, TemplateArgs); if (SubstResult.isInvalid()) { return ExplicitSpecifier::Invalid(); } Cond = SubstResult.get(); } ExplicitSpecifier Result(Cond, ES.getKind()); if (!Cond->isTypeDependent()) S.tryResolveExplicitSpecifier(Result); return Result; } static void instantiateDependentAMDGPUWavesPerEUAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AMDGPUWavesPerEUAttr &Attr, Decl *New) { // Both min and max expression are constant expressions. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); if (Result.isInvalid()) return; Expr *MinExpr = Result.getAs(); Expr *MaxExpr = nullptr; if (auto Max = Attr.getMax()) { Result = S.SubstExpr(Max, TemplateArgs); if (Result.isInvalid()) return; MaxExpr = Result.getAs(); } S.addAMDGPUWavesPerEUAttr(New, Attr, MinExpr, MaxExpr); } // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. static void instantiateDependentSYCLKernelAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const SYCLKernelAttr &Attr, Decl *New) { New->addAttr(Attr.clone(S.getASTContext())); } /// Determine whether the attribute A might be relevant to the declaration D. /// If not, we can skip instantiating it. The attribute may or may not have /// been instantiated yet. static bool isRelevantAttr(Sema &S, const Decl *D, const Attr *A) { // 'preferred_name' is only relevant to the matching specialization of the // template. if (const auto *PNA = dyn_cast(A)) { QualType T = PNA->getTypedefType(); const auto *RD = cast(D); if (!T->isDependentType() && !RD->isDependentContext() && !declaresSameEntity(T->getAsCXXRecordDecl(), RD)) return false; for (const auto *ExistingPNA : D->specific_attrs()) if (S.Context.hasSameType(ExistingPNA->getTypedefType(), PNA->getTypedefType())) return false; return true; } return true; } void Sema::InstantiateAttrsForDecl( const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, LocalInstantiationScope *OuterMostScope) { if (NamedDecl *ND = dyn_cast(New)) { // FIXME: This function is called multiple times for the same template // specialization. We should only instantiate attributes that were added // since the previous instantiation. for (const auto *TmplAttr : Tmpl->attrs()) { if (!isRelevantAttr(*this, New, TmplAttr)) continue; // FIXME: If any of the special case versions from InstantiateAttrs become // applicable to template declaration, we'll need to add them here. CXXThisScopeRAII ThisScope( *this, dyn_cast_or_null(ND->getDeclContext()), Qualifiers(), ND->isCXXInstanceMember()); Attr *NewAttr = sema::instantiateTemplateAttributeForDecl( TmplAttr, Context, *this, TemplateArgs); if (NewAttr && isRelevantAttr(*this, New, NewAttr)) New->addAttr(NewAttr); } } } static Sema::RetainOwnershipKind attrToRetainOwnershipKind(const Attr *A) { switch (A->getKind()) { case clang::attr::CFConsumed: return Sema::RetainOwnershipKind::CF; case clang::attr::OSConsumed: return Sema::RetainOwnershipKind::OS; case clang::attr::NSConsumed: return Sema::RetainOwnershipKind::NS; default: llvm_unreachable("Wrong argument supplied"); } } void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, LocalInstantiationScope *OuterMostScope) { for (const auto *TmplAttr : Tmpl->attrs()) { if (!isRelevantAttr(*this, New, TmplAttr)) continue; // FIXME: This should be generalized to more than just the AlignedAttr. const AlignedAttr *Aligned = dyn_cast(TmplAttr); if (Aligned && Aligned->isAlignmentDependent()) { instantiateDependentAlignedAttr(*this, TemplateArgs, Aligned, New); continue; } if (const auto *AssumeAligned = dyn_cast(TmplAttr)) { instantiateDependentAssumeAlignedAttr(*this, TemplateArgs, AssumeAligned, New); continue; } if (const auto *AlignValue = dyn_cast(TmplAttr)) { instantiateDependentAlignValueAttr(*this, TemplateArgs, AlignValue, New); continue; } if (const auto *AllocAlign = dyn_cast(TmplAttr)) { instantiateDependentAllocAlignAttr(*this, TemplateArgs, AllocAlign, New); continue; } if (const auto *Annotate = dyn_cast(TmplAttr)) { instantiateDependentAnnotationAttr(*this, TemplateArgs, Annotate, New); continue; } if (const auto *EnableIf = dyn_cast(TmplAttr)) { instantiateDependentEnableIfAttr(*this, TemplateArgs, EnableIf, Tmpl, cast(New)); continue; } if (const auto *DiagnoseIf = dyn_cast(TmplAttr)) { instantiateDependentDiagnoseIfAttr(*this, TemplateArgs, DiagnoseIf, Tmpl, cast(New)); continue; } if (const auto *CUDALaunchBounds = dyn_cast(TmplAttr)) { instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs, *CUDALaunchBounds, New); continue; } if (const auto *Mode = dyn_cast(TmplAttr)) { instantiateDependentModeAttr(*this, TemplateArgs, *Mode, New); continue; } if (const auto *OMPAttr = dyn_cast(TmplAttr)) { instantiateOMPDeclareSimdDeclAttr(*this, TemplateArgs, *OMPAttr, New); continue; } if (const auto *OMPAttr = dyn_cast(TmplAttr)) { instantiateOMPDeclareVariantAttr(*this, TemplateArgs, *OMPAttr, New); continue; } if (const auto *AMDGPUFlatWorkGroupSize = dyn_cast(TmplAttr)) { instantiateDependentAMDGPUFlatWorkGroupSizeAttr( *this, TemplateArgs, *AMDGPUFlatWorkGroupSize, New); } if (const auto *AMDGPUFlatWorkGroupSize = dyn_cast(TmplAttr)) { instantiateDependentAMDGPUWavesPerEUAttr(*this, TemplateArgs, *AMDGPUFlatWorkGroupSize, New); } // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { if (New->hasAttr() || New->hasAttr()) { continue; } } if (const auto *ABIAttr = dyn_cast(TmplAttr)) { AddParameterABIAttr(New, *ABIAttr, ABIAttr->getABI()); continue; } if (isa(TmplAttr) || isa(TmplAttr) || isa(TmplAttr)) { AddXConsumedAttr(New, *TmplAttr, attrToRetainOwnershipKind(TmplAttr), /*template instantiation=*/true); continue; } if (auto *A = dyn_cast(TmplAttr)) { if (!New->hasAttr()) New->addAttr(A->clone(Context)); continue; } if (auto *A = dyn_cast(TmplAttr)) { if (!New->hasAttr()) New->addAttr(A->clone(Context)); continue; } if (auto *A = dyn_cast(TmplAttr)) { instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New); continue; } assert(!TmplAttr->isPackExpansion()); if (TmplAttr->isLateParsed() && LateAttrs) { // Late parsed attributes must be instantiated and attached after the // enclosing class has been instantiated. See Sema::InstantiateClass. LocalInstantiationScope *Saved = nullptr; if (CurrentInstantiationScope) Saved = CurrentInstantiationScope->cloneScopes(OuterMostScope); LateAttrs->push_back(LateInstantiatedAttribute(TmplAttr, Saved, New)); } else { // Allow 'this' within late-parsed attributes. auto *ND = cast(New); auto *ThisContext = dyn_cast_or_null(ND->getDeclContext()); CXXThisScopeRAII ThisScope(*this, ThisContext, Qualifiers(), ND->isCXXInstanceMember()); Attr *NewAttr = sema::instantiateTemplateAttribute(TmplAttr, Context, *this, TemplateArgs); if (NewAttr && isRelevantAttr(*this, New, TmplAttr)) New->addAttr(NewAttr); } } } /// In the MS ABI, we need to instantiate default arguments of dllexported /// default constructors along with the constructor definition. This allows IR /// gen to emit a constructor closure which calls the default constructor with /// its default arguments. void Sema::InstantiateDefaultCtorDefaultArgs(CXXConstructorDecl *Ctor) { assert(Context.getTargetInfo().getCXXABI().isMicrosoft() && Ctor->isDefaultConstructor()); unsigned NumParams = Ctor->getNumParams(); if (NumParams == 0) return; DLLExportAttr *Attr = Ctor->getAttr(); if (!Attr) return; for (unsigned I = 0; I != NumParams; ++I) { (void)CheckCXXDefaultArgExpr(Attr->getLocation(), Ctor, Ctor->getParamDecl(I)); DiscardCleanupsInEvaluationContext(); } } /// Get the previous declaration of a declaration for the purposes of template /// instantiation. If this finds a previous declaration, then the previous /// declaration of the instantiation of D should be an instantiation of the /// result of this function. template static DeclT *getPreviousDeclForInstantiation(DeclT *D) { DeclT *Result = D->getPreviousDecl(); // If the declaration is within a class, and the previous declaration was // merged from a different definition of that class, then we don't have a // previous declaration for the purpose of template instantiation. if (Result && isa(D->getDeclContext()) && D->getLexicalDeclContext() != Result->getLexicalDeclContext()) return nullptr; return Result; } Decl * TemplateDeclInstantiator::VisitTranslationUnitDecl(TranslationUnitDecl *D) { llvm_unreachable("Translation units cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitPragmaCommentDecl(PragmaCommentDecl *D) { llvm_unreachable("pragma comment cannot be instantiated"); } Decl *TemplateDeclInstantiator::VisitPragmaDetectMismatchDecl( PragmaDetectMismatchDecl *D) { llvm_unreachable("pragma comment cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitExternCContextDecl(ExternCContextDecl *D) { llvm_unreachable("extern \"C\" context cannot be instantiated"); } Decl *TemplateDeclInstantiator::VisitMSGuidDecl(MSGuidDecl *D) { llvm_unreachable("GUID declaration cannot be instantiated"); } Decl *TemplateDeclInstantiator::VisitTemplateParamObjectDecl( TemplateParamObjectDecl *D) { llvm_unreachable("template parameter objects cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitLabelDecl(LabelDecl *D) { LabelDecl *Inst = LabelDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getIdentifier()); Owner->addDecl(Inst); return Inst; } Decl * TemplateDeclInstantiator::VisitNamespaceDecl(NamespaceDecl *D) { llvm_unreachable("Namespaces cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { NamespaceAliasDecl *Inst = NamespaceAliasDecl::Create(SemaRef.Context, Owner, D->getNamespaceLoc(), D->getAliasLoc(), D->getIdentifier(), D->getQualifierLoc(), D->getTargetNameLoc(), D->getNamespace()); Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D, bool IsTypeAlias) { bool Invalid = false; TypeSourceInfo *DI = D->getTypeSourceInfo(); if (DI->getType()->isInstantiationDependentType() || DI->getType()->isVariablyModifiedType()) { DI = SemaRef.SubstType(DI, TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) { Invalid = true; DI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy); } } else { SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); } // HACK: 2012-10-23 g++ has a bug where it gets the value kind of ?: wrong. // libstdc++ relies upon this bug in its implementation of common_type. If we // happen to be processing that implementation, fake up the g++ ?: // semantics. See LWG issue 2141 for more information on the bug. The bugs // are fixed in g++ and libstdc++ 4.9.0 (2014-04-22). const DecltypeType *DT = DI->getType()->getAs(); CXXRecordDecl *RD = dyn_cast(D->getDeclContext()); if (DT && RD && isa(DT->getUnderlyingExpr()) && DT->isReferenceType() && RD->getEnclosingNamespaceContext() == SemaRef.getStdNamespace() && RD->getIdentifier() && RD->getIdentifier()->isStr("common_type") && D->getIdentifier() && D->getIdentifier()->isStr("type") && SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc())) // Fold it to the (non-reference) type which g++ would have produced. DI = SemaRef.Context.getTrivialTypeSourceInfo( DI->getType().getNonReferenceType()); // Create the new typedef TypedefNameDecl *Typedef; if (IsTypeAlias) Typedef = TypeAliasDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), DI); else Typedef = TypedefDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), DI); if (Invalid) Typedef->setInvalidDecl(); // If the old typedef was the name for linkage purposes of an anonymous // tag decl, re-establish that relationship for the new typedef. if (const TagType *oldTagType = D->getUnderlyingType()->getAs()) { TagDecl *oldTag = oldTagType->getDecl(); if (oldTag->getTypedefNameForAnonDecl() == D && !Invalid) { TagDecl *newTag = DI->getType()->castAs()->getDecl(); assert(!newTag->hasNameForLinkage()); newTag->setTypedefNameForAnonDecl(Typedef); } } if (TypedefNameDecl *Prev = getPreviousDeclForInstantiation(D)) { NamedDecl *InstPrev = SemaRef.FindInstantiatedDecl(D->getLocation(), Prev, TemplateArgs); if (!InstPrev) return nullptr; TypedefNameDecl *InstPrevTypedef = cast(InstPrev); // If the typedef types are not identical, reject them. SemaRef.isIncompatibleTypedef(InstPrevTypedef, Typedef); Typedef->setPreviousDecl(InstPrevTypedef); } SemaRef.InstantiateAttrs(TemplateArgs, D, Typedef); if (D->getUnderlyingType()->getAs()) SemaRef.inferGslPointerAttribute(Typedef); Typedef->setAccess(D->getAccess()); return Typedef; } Decl *TemplateDeclInstantiator::VisitTypedefDecl(TypedefDecl *D) { Decl *Typedef = InstantiateTypedefNameDecl(D, /*IsTypeAlias=*/false); if (Typedef) Owner->addDecl(Typedef); return Typedef; } Decl *TemplateDeclInstantiator::VisitTypeAliasDecl(TypeAliasDecl *D) { Decl *Typedef = InstantiateTypedefNameDecl(D, /*IsTypeAlias=*/true); if (Typedef) Owner->addDecl(Typedef); return Typedef; } Decl * TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { // Create a local instantiation scope for this type alias template, which // will contain the instantiations of the template parameters. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; TypeAliasDecl *Pattern = D->getTemplatedDecl(); TypeAliasTemplateDecl *PrevAliasTemplate = nullptr; if (getPreviousDeclForInstantiation(Pattern)) { DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName()); if (!Found.empty()) { PrevAliasTemplate = dyn_cast(Found.front()); } } TypeAliasDecl *AliasInst = cast_or_null( InstantiateTypedefNameDecl(Pattern, /*IsTypeAlias=*/true)); if (!AliasInst) return nullptr; TypeAliasTemplateDecl *Inst = TypeAliasTemplateDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), InstParams, AliasInst); AliasInst->setDescribedAliasTemplate(Inst); if (PrevAliasTemplate) Inst->setPreviousDecl(PrevAliasTemplate); Inst->setAccess(D->getAccess()); if (!PrevAliasTemplate) Inst->setInstantiatedFromMemberTemplate(D); Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitBindingDecl(BindingDecl *D) { auto *NewBD = BindingDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getIdentifier()); NewBD->setReferenced(D->isReferenced()); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewBD); return NewBD; } Decl *TemplateDeclInstantiator::VisitDecompositionDecl(DecompositionDecl *D) { // Transform the bindings first. SmallVector NewBindings; for (auto *OldBD : D->bindings()) NewBindings.push_back(cast(VisitBindingDecl(OldBD))); ArrayRef NewBindingArray = NewBindings; auto *NewDD = cast_or_null( VisitVarDecl(D, /*InstantiatingVarTemplate=*/false, &NewBindingArray)); if (!NewDD || NewDD->isInvalidDecl()) for (auto *NewBD : NewBindings) NewBD->setInvalidDecl(); return NewDD; } Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D) { return VisitVarDecl(D, /*InstantiatingVarTemplate=*/false); } Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D, bool InstantiatingVarTemplate, ArrayRef *Bindings) { // Do substitution on the type of the declaration TypeSourceInfo *DI = SemaRef.SubstType( D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(), D->getDeclName(), /*AllowDeducedTST*/true); if (!DI) return nullptr; if (DI->getType()->isFunctionType()) { SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function) << D->isStaticDataMember() << DI->getType(); return nullptr; } DeclContext *DC = Owner; if (D->isLocalExternDecl()) SemaRef.adjustContextForLocalExternDecl(DC); // Build the instantiated declaration. VarDecl *Var; if (Bindings) Var = DecompositionDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(), D->getLocation(), DI->getType(), DI, D->getStorageClass(), *Bindings); else Var = VarDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(), D->getLocation(), D->getIdentifier(), DI->getType(), DI, D->getStorageClass()); // In ARC, infer 'retaining' for variables of retainable type. if (SemaRef.getLangOpts().ObjCAutoRefCount && SemaRef.inferObjCARCLifetime(Var)) Var->setInvalidDecl(); if (SemaRef.getLangOpts().OpenCL) SemaRef.deduceOpenCLAddressSpace(Var); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Var)) return nullptr; SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner, StartingScope, InstantiatingVarTemplate); if (D->isNRVOVariable() && !Var->isInvalidDecl()) { QualType RT; if (auto *F = dyn_cast(DC)) RT = F->getReturnType(); else if (isa(DC)) RT = cast(SemaRef.getCurBlock()->FunctionType) ->getReturnType(); else llvm_unreachable("Unknown context type"); // This is the last chance we have of checking copy elision eligibility // for functions in dependent contexts. The sema actions for building // the return statement during template instantiation will have no effect // regarding copy elision, since NRVO propagation runs on the scope exit // actions, and these are not run on instantiation. // This might run through some VarDecls which were returned from non-taken // 'if constexpr' branches, and these will end up being constructed on the // return slot even if they will never be returned, as a sort of accidental // 'optimization'. Notably, functions with 'auto' return types won't have it // deduced by this point. Coupled with the limitation described // previously, this makes it very hard to support copy elision for these. Sema::NamedReturnInfo Info = SemaRef.getNamedReturnInfo(Var); bool NRVO = SemaRef.getCopyElisionCandidate(Info, RT) != nullptr; Var->setNRVOVariable(NRVO); } Var->setImplicit(D->isImplicit()); if (Var->isStaticLocal()) SemaRef.CheckStaticLocalForDllExport(Var); return Var; } Decl *TemplateDeclInstantiator::VisitAccessSpecDecl(AccessSpecDecl *D) { AccessSpecDecl* AD = AccessSpecDecl::Create(SemaRef.Context, D->getAccess(), Owner, D->getAccessSpecifierLoc(), D->getColonLoc()); Owner->addHiddenDecl(AD); return AD; } Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) { bool Invalid = false; TypeSourceInfo *DI = D->getTypeSourceInfo(); if (DI->getType()->isInstantiationDependentType() || DI->getType()->isVariablyModifiedType()) { DI = SemaRef.SubstType(DI, TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) { DI = D->getTypeSourceInfo(); Invalid = true; } else if (DI->getType()->isFunctionType()) { // C++ [temp.arg.type]p3: // If a declaration acquires a function type through a type // dependent on a template-parameter and this causes a // declaration that does not use the syntactic form of a // function declarator to have function type, the program is // ill-formed. SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function) << DI->getType(); Invalid = true; } } else { SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); } Expr *BitWidth = D->getBitWidth(); if (Invalid) BitWidth = nullptr; else if (BitWidth) { // The bit-width expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult InstantiatedBitWidth = SemaRef.SubstExpr(BitWidth, TemplateArgs); if (InstantiatedBitWidth.isInvalid()) { Invalid = true; BitWidth = nullptr; } else BitWidth = InstantiatedBitWidth.getAs(); } FieldDecl *Field = SemaRef.CheckFieldDecl(D->getDeclName(), DI->getType(), DI, cast(Owner), D->getLocation(), D->isMutable(), BitWidth, D->getInClassInitStyle(), D->getInnerLocStart(), D->getAccess(), nullptr); if (!Field) { cast(Owner)->setInvalidDecl(); return nullptr; } SemaRef.InstantiateAttrs(TemplateArgs, D, Field, LateAttrs, StartingScope); if (Field->hasAttrs()) SemaRef.CheckAlignasUnderalignment(Field); if (Invalid) Field->setInvalidDecl(); if (!Field->getDeclName()) { // Keep track of where this decl came from. SemaRef.Context.setInstantiatedFromUnnamedFieldDecl(Field, D); } if (CXXRecordDecl *Parent= dyn_cast(Field->getDeclContext())) { if (Parent->isAnonymousStructOrUnion() && Parent->getRedeclContext()->isFunctionOrMethod()) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Field); } Field->setImplicit(D->isImplicit()); Field->setAccess(D->getAccess()); Owner->addDecl(Field); return Field; } Decl *TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl *D) { bool Invalid = false; TypeSourceInfo *DI = D->getTypeSourceInfo(); if (DI->getType()->isVariablyModifiedType()) { SemaRef.Diag(D->getLocation(), diag::err_property_is_variably_modified) << D; Invalid = true; } else if (DI->getType()->isInstantiationDependentType()) { DI = SemaRef.SubstType(DI, TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) { DI = D->getTypeSourceInfo(); Invalid = true; } else if (DI->getType()->isFunctionType()) { // C++ [temp.arg.type]p3: // If a declaration acquires a function type through a type // dependent on a template-parameter and this causes a // declaration that does not use the syntactic form of a // function declarator to have function type, the program is // ill-formed. SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function) << DI->getType(); Invalid = true; } } else { SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); } MSPropertyDecl *Property = MSPropertyDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), DI->getType(), DI, D->getBeginLoc(), D->getGetterId(), D->getSetterId()); SemaRef.InstantiateAttrs(TemplateArgs, D, Property, LateAttrs, StartingScope); if (Invalid) Property->setInvalidDecl(); Property->setAccess(D->getAccess()); Owner->addDecl(Property); return Property; } Decl *TemplateDeclInstantiator::VisitIndirectFieldDecl(IndirectFieldDecl *D) { NamedDecl **NamedChain = new (SemaRef.Context)NamedDecl*[D->getChainingSize()]; int i = 0; for (auto *PI : D->chain()) { NamedDecl *Next = SemaRef.FindInstantiatedDecl(D->getLocation(), PI, TemplateArgs); if (!Next) return nullptr; NamedChain[i++] = Next; } QualType T = cast(NamedChain[i-1])->getType(); IndirectFieldDecl *IndirectField = IndirectFieldDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getIdentifier(), T, {NamedChain, D->getChainingSize()}); for (const auto *Attr : D->attrs()) IndirectField->addAttr(Attr->clone(SemaRef.Context)); IndirectField->setImplicit(D->isImplicit()); IndirectField->setAccess(D->getAccess()); Owner->addDecl(IndirectField); return IndirectField; } Decl *TemplateDeclInstantiator::VisitFriendDecl(FriendDecl *D) { // Handle friend type expressions by simply substituting template // parameters into the pattern type and checking the result. if (TypeSourceInfo *Ty = D->getFriendType()) { TypeSourceInfo *InstTy; // If this is an unsupported friend, don't bother substituting template // arguments into it. The actual type referred to won't be used by any // parts of Clang, and may not be valid for instantiating. Just use the // same info for the instantiated friend. if (D->isUnsupportedFriend()) { InstTy = Ty; } else { InstTy = SemaRef.SubstType(Ty, TemplateArgs, D->getLocation(), DeclarationName()); } if (!InstTy) return nullptr; FriendDecl *FD = SemaRef.CheckFriendTypeDecl(D->getBeginLoc(), D->getFriendLoc(), InstTy); if (!FD) return nullptr; FD->setAccess(AS_public); FD->setUnsupportedFriend(D->isUnsupportedFriend()); Owner->addDecl(FD); return FD; } NamedDecl *ND = D->getFriendDecl(); assert(ND && "friend decl must be a decl or a type!"); // All of the Visit implementations for the various potential friend // declarations have to be carefully written to work for friend // objects, with the most important detail being that the target // decl should almost certainly not be placed in Owner. Decl *NewND = Visit(ND); if (!NewND) return nullptr; FriendDecl *FD = FriendDecl::Create(SemaRef.Context, Owner, D->getLocation(), cast(NewND), D->getFriendLoc()); FD->setAccess(AS_public); FD->setUnsupportedFriend(D->isUnsupportedFriend()); Owner->addDecl(FD); return FD; } Decl *TemplateDeclInstantiator::VisitStaticAssertDecl(StaticAssertDecl *D) { Expr *AssertExpr = D->getAssertExpr(); // The expression in a static assertion is a constant expression. EnterExpressionEvaluationContext Unevaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult InstantiatedAssertExpr = SemaRef.SubstExpr(AssertExpr, TemplateArgs); if (InstantiatedAssertExpr.isInvalid()) return nullptr; return SemaRef.BuildStaticAssertDeclaration(D->getLocation(), InstantiatedAssertExpr.get(), D->getMessage(), D->getRParenLoc(), D->isFailed()); } Decl *TemplateDeclInstantiator::VisitEnumDecl(EnumDecl *D) { EnumDecl *PrevDecl = nullptr; if (EnumDecl *PatternPrev = getPreviousDeclForInstantiation(D)) { NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(), PatternPrev, TemplateArgs); if (!Prev) return nullptr; PrevDecl = cast(Prev); } EnumDecl *Enum = EnumDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), PrevDecl, D->isScoped(), D->isScopedUsingClassTag(), D->isFixed()); if (D->isFixed()) { if (TypeSourceInfo *TI = D->getIntegerTypeSourceInfo()) { // If we have type source information for the underlying type, it means it // has been explicitly set by the user. Perform substitution on it before // moving on. SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc(); TypeSourceInfo *NewTI = SemaRef.SubstType(TI, TemplateArgs, UnderlyingLoc, DeclarationName()); if (!NewTI || SemaRef.CheckEnumUnderlyingType(NewTI)) Enum->setIntegerType(SemaRef.Context.IntTy); else Enum->setIntegerTypeSourceInfo(NewTI); } else { assert(!D->getIntegerType()->isDependentType() && "Dependent type without type source info"); Enum->setIntegerType(D->getIntegerType()); } } SemaRef.InstantiateAttrs(TemplateArgs, D, Enum); Enum->setInstantiationOfMemberEnum(D, TSK_ImplicitInstantiation); Enum->setAccess(D->getAccess()); // Forward the mangling number from the template to the instantiated decl. SemaRef.Context.setManglingNumber(Enum, SemaRef.Context.getManglingNumber(D)); // See if the old tag was defined along with a declarator. // If it did, mark the new tag as being associated with that declarator. if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D)) SemaRef.Context.addDeclaratorForUnnamedTagDecl(Enum, DD); // See if the old tag was defined along with a typedef. // If it did, mark the new tag as being associated with that typedef. if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D)) SemaRef.Context.addTypedefNameForUnnamedTagDecl(Enum, TND); if (SubstQualifier(D, Enum)) return nullptr; Owner->addDecl(Enum); EnumDecl *Def = D->getDefinition(); if (Def && Def != D) { // If this is an out-of-line definition of an enum member template, check // that the underlying types match in the instantiation of both // declarations. if (TypeSourceInfo *TI = Def->getIntegerTypeSourceInfo()) { SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc(); QualType DefnUnderlying = SemaRef.SubstType(TI->getType(), TemplateArgs, UnderlyingLoc, DeclarationName()); SemaRef.CheckEnumRedeclaration(Def->getLocation(), Def->isScoped(), DefnUnderlying, /*IsFixed=*/true, Enum); } } // C++11 [temp.inst]p1: The implicit instantiation of a class template // specialization causes the implicit instantiation of the declarations, but // not the definitions of scoped member enumerations. // // DR1484 clarifies that enumeration definitions inside of a template // declaration aren't considered entities that can be separately instantiated // from the rest of the entity they are declared inside of. if (isDeclWithinFunction(D) ? D == Def : Def && !Enum->isScoped()) { SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Enum); InstantiateEnumDefinition(Enum, Def); } return Enum; } void TemplateDeclInstantiator::InstantiateEnumDefinition( EnumDecl *Enum, EnumDecl *Pattern) { Enum->startDefinition(); // Update the location to refer to the definition. Enum->setLocation(Pattern->getLocation()); SmallVector Enumerators; EnumConstantDecl *LastEnumConst = nullptr; for (auto *EC : Pattern->enumerators()) { // The specified value for the enumerator. ExprResult Value((Expr *)nullptr); if (Expr *UninstValue = EC->getInitExpr()) { // The enumerator's value expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); Value = SemaRef.SubstExpr(UninstValue, TemplateArgs); } // Drop the initial value and continue. bool isInvalid = false; if (Value.isInvalid()) { Value = nullptr; isInvalid = true; } EnumConstantDecl *EnumConst = SemaRef.CheckEnumConstant(Enum, LastEnumConst, EC->getLocation(), EC->getIdentifier(), Value.get()); if (isInvalid) { if (EnumConst) EnumConst->setInvalidDecl(); Enum->setInvalidDecl(); } if (EnumConst) { SemaRef.InstantiateAttrs(TemplateArgs, EC, EnumConst); EnumConst->setAccess(Enum->getAccess()); Enum->addDecl(EnumConst); Enumerators.push_back(EnumConst); LastEnumConst = EnumConst; if (Pattern->getDeclContext()->isFunctionOrMethod() && !Enum->isScoped()) { // If the enumeration is within a function or method, record the enum // constant as a local. SemaRef.CurrentInstantiationScope->InstantiatedLocal(EC, EnumConst); } } } SemaRef.ActOnEnumBody(Enum->getLocation(), Enum->getBraceRange(), Enum, Enumerators, nullptr, ParsedAttributesView()); } Decl *TemplateDeclInstantiator::VisitEnumConstantDecl(EnumConstantDecl *D) { llvm_unreachable("EnumConstantDecls can only occur within EnumDecls."); } Decl * TemplateDeclInstantiator::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) { llvm_unreachable("BuiltinTemplateDecls cannot be instantiated."); } Decl *TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl *D) { bool isFriend = (D->getFriendObjectKind() != Decl::FOK_None); // Create a local instantiation scope for this class template, which // will contain the instantiations of the template parameters. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; CXXRecordDecl *Pattern = D->getTemplatedDecl(); // Instantiate the qualifier. We have to do this first in case // we're a friend declaration, because if we are then we need to put // the new declaration in the appropriate context. NestedNameSpecifierLoc QualifierLoc = Pattern->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); if (!QualifierLoc) return nullptr; } CXXRecordDecl *PrevDecl = nullptr; ClassTemplateDecl *PrevClassTemplate = nullptr; if (!isFriend && getPreviousDeclForInstantiation(Pattern)) { DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName()); if (!Found.empty()) { PrevClassTemplate = dyn_cast(Found.front()); if (PrevClassTemplate) PrevDecl = PrevClassTemplate->getTemplatedDecl(); } } // If this isn't a friend, then it's a member template, in which // case we just want to build the instantiation in the // specialization. If it is a friend, we want to build it in // the appropriate context. DeclContext *DC = Owner; if (isFriend) { if (QualifierLoc) { CXXScopeSpec SS; SS.Adopt(QualifierLoc); DC = SemaRef.computeDeclContext(SS); if (!DC) return nullptr; } else { DC = SemaRef.FindInstantiatedContext(Pattern->getLocation(), Pattern->getDeclContext(), TemplateArgs); } // Look for a previous declaration of the template in the owning // context. LookupResult R(SemaRef, Pattern->getDeclName(), Pattern->getLocation(), Sema::LookupOrdinaryName, SemaRef.forRedeclarationInCurContext()); SemaRef.LookupQualifiedName(R, DC); if (R.isSingleResult()) { PrevClassTemplate = R.getAsSingle(); if (PrevClassTemplate) PrevDecl = PrevClassTemplate->getTemplatedDecl(); } if (!PrevClassTemplate && QualifierLoc) { SemaRef.Diag(Pattern->getLocation(), diag::err_not_tag_in_scope) << D->getTemplatedDecl()->getTagKind() << Pattern->getDeclName() << DC << QualifierLoc.getSourceRange(); return nullptr; } if (PrevClassTemplate) { TemplateParameterList *PrevParams = PrevClassTemplate->getMostRecentDecl()->getTemplateParameters(); // Make sure the parameter lists match. if (!SemaRef.TemplateParameterListsAreEqual(InstParams, PrevParams, true, Sema::TPL_TemplateMatch)) return nullptr; // Do some additional validation, then merge default arguments // from the existing declarations. if (SemaRef.CheckTemplateParameterList(InstParams, PrevParams, Sema::TPC_ClassTemplate)) return nullptr; } } CXXRecordDecl *RecordInst = CXXRecordDecl::Create( SemaRef.Context, Pattern->getTagKind(), DC, Pattern->getBeginLoc(), Pattern->getLocation(), Pattern->getIdentifier(), PrevDecl, /*DelayTypeCreation=*/true); if (QualifierLoc) RecordInst->setQualifierInfo(QualifierLoc); SemaRef.InstantiateAttrsForDecl(TemplateArgs, Pattern, RecordInst, LateAttrs, StartingScope); ClassTemplateDecl *Inst = ClassTemplateDecl::Create(SemaRef.Context, DC, D->getLocation(), D->getIdentifier(), InstParams, RecordInst); assert(!(isFriend && Owner->isDependentContext())); Inst->setPreviousDecl(PrevClassTemplate); RecordInst->setDescribedClassTemplate(Inst); if (isFriend) { if (PrevClassTemplate) Inst->setAccess(PrevClassTemplate->getAccess()); else Inst->setAccess(D->getAccess()); Inst->setObjectOfFriendDecl(); // TODO: do we want to track the instantiation progeny of this // friend target decl? } else { Inst->setAccess(D->getAccess()); if (!PrevClassTemplate) Inst->setInstantiatedFromMemberTemplate(D); } // Trigger creation of the type for the instantiation. SemaRef.Context.getInjectedClassNameType(RecordInst, Inst->getInjectedClassNameSpecialization()); // Finish handling of friends. if (isFriend) { DC->makeDeclVisibleInContext(Inst); Inst->setLexicalDeclContext(Owner); RecordInst->setLexicalDeclContext(Owner); return Inst; } if (D->isOutOfLine()) { Inst->setLexicalDeclContext(D->getLexicalDeclContext()); RecordInst->setLexicalDeclContext(D->getLexicalDeclContext()); } Owner->addDecl(Inst); if (!PrevClassTemplate) { // Queue up any out-of-line partial specializations of this member // class template; the client will force their instantiation once // the enclosing class has been instantiated. SmallVector PartialSpecs; D->getPartialSpecializations(PartialSpecs); for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) if (PartialSpecs[I]->getFirstDecl()->isOutOfLine()) OutOfLinePartialSpecs.push_back(std::make_pair(Inst, PartialSpecs[I])); } return Inst; } Decl * TemplateDeclInstantiator::VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate(); // Lookup the already-instantiated declaration in the instantiation // of the class template and return that. DeclContext::lookup_result Found = Owner->lookup(ClassTemplate->getDeclName()); if (Found.empty()) return nullptr; ClassTemplateDecl *InstClassTemplate = dyn_cast(Found.front()); if (!InstClassTemplate) return nullptr; if (ClassTemplatePartialSpecializationDecl *Result = InstClassTemplate->findPartialSpecInstantiatedFromMember(D)) return Result; return InstantiateClassTemplatePartialSpecialization(InstClassTemplate, D); } Decl *TemplateDeclInstantiator::VisitVarTemplateDecl(VarTemplateDecl *D) { assert(D->getTemplatedDecl()->isStaticDataMember() && "Only static data member templates are allowed."); // Create a local instantiation scope for this variable template, which // will contain the instantiations of the template parameters. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; VarDecl *Pattern = D->getTemplatedDecl(); VarTemplateDecl *PrevVarTemplate = nullptr; if (getPreviousDeclForInstantiation(Pattern)) { DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName()); if (!Found.empty()) PrevVarTemplate = dyn_cast(Found.front()); } VarDecl *VarInst = cast_or_null(VisitVarDecl(Pattern, /*InstantiatingVarTemplate=*/true)); if (!VarInst) return nullptr; DeclContext *DC = Owner; VarTemplateDecl *Inst = VarTemplateDecl::Create( SemaRef.Context, DC, D->getLocation(), D->getIdentifier(), InstParams, VarInst); VarInst->setDescribedVarTemplate(Inst); Inst->setPreviousDecl(PrevVarTemplate); Inst->setAccess(D->getAccess()); if (!PrevVarTemplate) Inst->setInstantiatedFromMemberTemplate(D); if (D->isOutOfLine()) { Inst->setLexicalDeclContext(D->getLexicalDeclContext()); VarInst->setLexicalDeclContext(D->getLexicalDeclContext()); } Owner->addDecl(Inst); if (!PrevVarTemplate) { // Queue up any out-of-line partial specializations of this member // variable template; the client will force their instantiation once // the enclosing class has been instantiated. SmallVector PartialSpecs; D->getPartialSpecializations(PartialSpecs); for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) if (PartialSpecs[I]->getFirstDecl()->isOutOfLine()) OutOfLineVarPartialSpecs.push_back( std::make_pair(Inst, PartialSpecs[I])); } return Inst; } Decl *TemplateDeclInstantiator::VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D) { assert(D->isStaticDataMember() && "Only static data member templates are allowed."); VarTemplateDecl *VarTemplate = D->getSpecializedTemplate(); // Lookup the already-instantiated declaration and return that. DeclContext::lookup_result Found = Owner->lookup(VarTemplate->getDeclName()); assert(!Found.empty() && "Instantiation found nothing?"); VarTemplateDecl *InstVarTemplate = dyn_cast(Found.front()); assert(InstVarTemplate && "Instantiation did not find a variable template?"); if (VarTemplatePartialSpecializationDecl *Result = InstVarTemplate->findPartialSpecInstantiatedFromMember(D)) return Result; return InstantiateVarTemplatePartialSpecialization(InstVarTemplate, D); } Decl * TemplateDeclInstantiator::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { // Create a local instantiation scope for this function template, which // will contain the instantiations of the template parameters and then get // merged with the local instantiation scope for the function template // itself. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; FunctionDecl *Instantiated = nullptr; if (CXXMethodDecl *DMethod = dyn_cast(D->getTemplatedDecl())) Instantiated = cast_or_null(VisitCXXMethodDecl(DMethod, InstParams)); else Instantiated = cast_or_null(VisitFunctionDecl( D->getTemplatedDecl(), InstParams)); if (!Instantiated) return nullptr; // Link the instantiated function template declaration to the function // template from which it was instantiated. FunctionTemplateDecl *InstTemplate = Instantiated->getDescribedFunctionTemplate(); InstTemplate->setAccess(D->getAccess()); assert(InstTemplate && "VisitFunctionDecl/CXXMethodDecl didn't create a template!"); bool isFriend = (InstTemplate->getFriendObjectKind() != Decl::FOK_None); // Link the instantiation back to the pattern *unless* this is a // non-definition friend declaration. if (!InstTemplate->getInstantiatedFromMemberTemplate() && !(isFriend && !D->getTemplatedDecl()->isThisDeclarationADefinition())) InstTemplate->setInstantiatedFromMemberTemplate(D); // Make declarations visible in the appropriate context. if (!isFriend) { Owner->addDecl(InstTemplate); } else if (InstTemplate->getDeclContext()->isRecord() && !getPreviousDeclForInstantiation(D)) { SemaRef.CheckFriendAccess(InstTemplate); } return InstTemplate; } Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { CXXRecordDecl *PrevDecl = nullptr; if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) { NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(), PatternPrev, TemplateArgs); if (!Prev) return nullptr; PrevDecl = cast(Prev); } CXXRecordDecl *Record = nullptr; bool IsInjectedClassName = D->isInjectedClassName(); if (D->isLambda()) Record = CXXRecordDecl::CreateLambda( SemaRef.Context, Owner, D->getLambdaTypeInfo(), D->getLocation(), D->isDependentLambda(), D->isGenericLambda(), D->getLambdaCaptureDefault()); else Record = CXXRecordDecl::Create(SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), PrevDecl, /*DelayTypeCreation=*/IsInjectedClassName); // Link the type of the injected-class-name to that of the outer class. if (IsInjectedClassName) (void)SemaRef.Context.getTypeDeclType(Record, cast(Owner)); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Record)) return nullptr; SemaRef.InstantiateAttrsForDecl(TemplateArgs, D, Record, LateAttrs, StartingScope); Record->setImplicit(D->isImplicit()); // FIXME: Check against AS_none is an ugly hack to work around the issue that // the tag decls introduced by friend class declarations don't have an access // specifier. Remove once this area of the code gets sorted out. if (D->getAccess() != AS_none) Record->setAccess(D->getAccess()); if (!IsInjectedClassName) Record->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation); // If the original function was part of a friend declaration, // inherit its namespace state. if (D->getFriendObjectKind()) Record->setObjectOfFriendDecl(); // Make sure that anonymous structs and unions are recorded. if (D->isAnonymousStructOrUnion()) Record->setAnonymousStructOrUnion(true); if (D->isLocalClass()) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Record); // Forward the mangling number from the template to the instantiated decl. SemaRef.Context.setManglingNumber(Record, SemaRef.Context.getManglingNumber(D)); // See if the old tag was defined along with a declarator. // If it did, mark the new tag as being associated with that declarator. if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D)) SemaRef.Context.addDeclaratorForUnnamedTagDecl(Record, DD); // See if the old tag was defined along with a typedef. // If it did, mark the new tag as being associated with that typedef. if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D)) SemaRef.Context.addTypedefNameForUnnamedTagDecl(Record, TND); Owner->addDecl(Record); // DR1484 clarifies that the members of a local class are instantiated as part // of the instantiation of their enclosing entity. if (D->isCompleteDefinition() && D->isLocalClass()) { Sema::LocalEagerInstantiationScope LocalInstantiations(SemaRef); SemaRef.InstantiateClass(D->getLocation(), Record, D, TemplateArgs, TSK_ImplicitInstantiation, /*Complain=*/true); // For nested local classes, we will instantiate the members when we // reach the end of the outermost (non-nested) local class. if (!D->isCXXClassMember()) SemaRef.InstantiateClassMembers(D->getLocation(), Record, TemplateArgs, TSK_ImplicitInstantiation); // This class may have local implicit instantiations that need to be // performed within this scope. LocalInstantiations.perform(); } SemaRef.DiagnoseUnusedNestedTypedefs(Record); if (IsInjectedClassName) assert(Record->isInjectedClassName() && "Broken injected-class-name"); return Record; } /// Adjust the given function type for an instantiation of the /// given declaration, to cope with modifications to the function's type that /// aren't reflected in the type-source information. /// /// \param D The declaration we're instantiating. /// \param TInfo The already-instantiated type. static QualType adjustFunctionTypeForInstantiation(ASTContext &Context, FunctionDecl *D, TypeSourceInfo *TInfo) { const FunctionProtoType *OrigFunc = D->getType()->castAs(); const FunctionProtoType *NewFunc = TInfo->getType()->castAs(); if (OrigFunc->getExtInfo() == NewFunc->getExtInfo()) return TInfo->getType(); FunctionProtoType::ExtProtoInfo NewEPI = NewFunc->getExtProtoInfo(); NewEPI.ExtInfo = OrigFunc->getExtInfo(); return Context.getFunctionType(NewFunc->getReturnType(), NewFunc->getParamTypes(), NewEPI); } /// Normal class members are of more specific types and therefore /// don't make it here. This function serves three purposes: /// 1) instantiating function templates /// 2) substituting friend declarations /// 3) substituting deduction guide declarations for nested class templates Decl *TemplateDeclInstantiator::VisitFunctionDecl( FunctionDecl *D, TemplateParameterList *TemplateParams, RewriteKind FunctionRewriteKind) { // Check whether there is already a function template specialization for // this declaration. FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate(); if (FunctionTemplate && !TemplateParams) { ArrayRef Innermost = TemplateArgs.getInnermost(); void *InsertPos = nullptr; FunctionDecl *SpecFunc = FunctionTemplate->findSpecialization(Innermost, InsertPos); // If we already have a function template specialization, return it. if (SpecFunc) return SpecFunc; } bool isFriend; if (FunctionTemplate) isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None); else isFriend = (D->getFriendObjectKind() != Decl::FOK_None); bool MergeWithParentScope = (TemplateParams != nullptr) || Owner->isFunctionOrMethod() || !(isa(Owner) && cast(Owner)->isDefinedOutsideFunctionOrMethod()); LocalInstantiationScope Scope(SemaRef, MergeWithParentScope); ExplicitSpecifier InstantiatedExplicitSpecifier; if (auto *DGuide = dyn_cast(D)) { InstantiatedExplicitSpecifier = instantiateExplicitSpecifier( SemaRef, TemplateArgs, DGuide->getExplicitSpecifier(), DGuide); if (InstantiatedExplicitSpecifier.isInvalid()) return nullptr; } SmallVector Params; TypeSourceInfo *TInfo = SubstFunctionType(D, Params); if (!TInfo) return nullptr; QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo); if (TemplateParams && TemplateParams->size()) { auto *LastParam = dyn_cast(TemplateParams->asArray().back()); if (LastParam && LastParam->isImplicit() && LastParam->hasTypeConstraint()) { // In abbreviated templates, the type-constraints of invented template // type parameters are instantiated with the function type, invalidating // the TemplateParameterList which relied on the template type parameter // not having a type constraint. Recreate the TemplateParameterList with // the updated parameter list. TemplateParams = TemplateParameterList::Create( SemaRef.Context, TemplateParams->getTemplateLoc(), TemplateParams->getLAngleLoc(), TemplateParams->asArray(), TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause()); } } NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); if (!QualifierLoc) return nullptr; } // FIXME: Concepts: Do not substitute into constraint expressions Expr *TrailingRequiresClause = D->getTrailingRequiresClause(); if (TrailingRequiresClause) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause, TemplateArgs); if (SubstRC.isInvalid()) return nullptr; TrailingRequiresClause = SubstRC.get(); if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause)) return nullptr; } // If we're instantiating a local function declaration, put the result // in the enclosing namespace; otherwise we need to find the instantiated // context. DeclContext *DC; if (D->isLocalExternDecl()) { DC = Owner; SemaRef.adjustContextForLocalExternDecl(DC); } else if (isFriend && QualifierLoc) { CXXScopeSpec SS; SS.Adopt(QualifierLoc); DC = SemaRef.computeDeclContext(SS); if (!DC) return nullptr; } else { DC = SemaRef.FindInstantiatedContext(D->getLocation(), D->getDeclContext(), TemplateArgs); } DeclarationNameInfo NameInfo = SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs); if (FunctionRewriteKind != RewriteKind::None) adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo); FunctionDecl *Function; if (auto *DGuide = dyn_cast(D)) { Function = CXXDeductionGuideDecl::Create( SemaRef.Context, DC, D->getInnerLocStart(), InstantiatedExplicitSpecifier, NameInfo, T, TInfo, D->getSourceRange().getEnd()); if (DGuide->isCopyDeductionCandidate()) cast(Function)->setIsCopyDeductionCandidate(); Function->setAccess(D->getAccess()); } else { Function = FunctionDecl::Create( SemaRef.Context, DC, D->getInnerLocStart(), NameInfo, T, TInfo, D->getCanonicalDecl()->getStorageClass(), D->UsesFPIntrin(), D->isInlineSpecified(), D->hasWrittenPrototype(), D->getConstexprKind(), TrailingRequiresClause); Function->setRangeEnd(D->getSourceRange().getEnd()); } if (D->isInlined()) Function->setImplicitlyInline(); if (QualifierLoc) Function->setQualifierInfo(QualifierLoc); if (D->isLocalExternDecl()) Function->setLocalExternDecl(); DeclContext *LexicalDC = Owner; if (!isFriend && D->isOutOfLine() && !D->isLocalExternDecl()) { assert(D->getDeclContext()->isFileContext()); LexicalDC = D->getDeclContext(); } Function->setLexicalDeclContext(LexicalDC); // Attach the parameters for (unsigned P = 0; P < Params.size(); ++P) if (Params[P]) Params[P]->setOwningFunction(Function); Function->setParams(Params); if (TrailingRequiresClause) Function->setTrailingRequiresClause(TrailingRequiresClause); if (TemplateParams) { // Our resulting instantiation is actually a function template, since we // are substituting only the outer template parameters. For example, given // // template // struct X { // template friend void f(T, U); // }; // // X x; // // We are instantiating the friend function template "f" within X, // which means substituting int for T, but leaving "f" as a friend function // template. // Build the function template itself. FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, DC, Function->getLocation(), Function->getDeclName(), TemplateParams, Function); Function->setDescribedFunctionTemplate(FunctionTemplate); FunctionTemplate->setLexicalDeclContext(LexicalDC); if (isFriend && D->isThisDeclarationADefinition()) { FunctionTemplate->setInstantiatedFromMemberTemplate( D->getDescribedFunctionTemplate()); } } else if (FunctionTemplate) { // Record this function template specialization. ArrayRef Innermost = TemplateArgs.getInnermost(); Function->setFunctionTemplateSpecialization(FunctionTemplate, TemplateArgumentList::CreateCopy(SemaRef.Context, Innermost), /*InsertPos=*/nullptr); } else if (isFriend && D->isThisDeclarationADefinition()) { // Do not connect the friend to the template unless it's actually a // definition. We don't want non-template functions to be marked as being // template instantiations. Function->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation); } if (isFriend) { Function->setObjectOfFriendDecl(); if (FunctionTemplateDecl *FT = Function->getDescribedFunctionTemplate()) FT->setObjectOfFriendDecl(); } if (InitFunctionInstantiation(Function, D)) Function->setInvalidDecl(); bool IsExplicitSpecialization = false; LookupResult Previous( SemaRef, Function->getDeclName(), SourceLocation(), D->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage : Sema::LookupOrdinaryName, D->isLocalExternDecl() ? Sema::ForExternalRedeclaration : SemaRef.forRedeclarationInCurContext()); if (DependentFunctionTemplateSpecializationInfo *Info = D->getDependentSpecializationInfo()) { assert(isFriend && "non-friend has dependent specialization info?"); // Instantiate the explicit template arguments. TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; // Map the candidate templates to their instantiations. for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) { Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(), Info->getTemplate(I), TemplateArgs); if (!Temp) return nullptr; Previous.addDecl(cast(Temp)); } if (SemaRef.CheckFunctionTemplateSpecialization(Function, &ExplicitArgs, Previous)) Function->setInvalidDecl(); IsExplicitSpecialization = true; } else if (const ASTTemplateArgumentListInfo *Info = D->getTemplateSpecializationArgsAsWritten()) { // The name of this function was written as a template-id. SemaRef.LookupQualifiedName(Previous, DC); // Instantiate the explicit template arguments. TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; if (SemaRef.CheckFunctionTemplateSpecialization(Function, &ExplicitArgs, Previous)) Function->setInvalidDecl(); IsExplicitSpecialization = true; } else if (TemplateParams || !FunctionTemplate) { // Look only into the namespace where the friend would be declared to // find a previous declaration. This is the innermost enclosing namespace, // as described in ActOnFriendFunctionDecl. SemaRef.LookupQualifiedName(Previous, DC->getRedeclContext()); // In C++, the previous declaration we find might be a tag type // (class or enum). In this case, the new declaration will hide the // tag type. Note that this does does not apply if we're declaring a // typedef (C++ [dcl.typedef]p4). if (Previous.isSingleTagDecl()) Previous.clear(); // Filter out previous declarations that don't match the scope. The only // effect this has is to remove declarations found in inline namespaces // for friend declarations with unqualified names. SemaRef.FilterLookupForScope(Previous, DC, /*Scope*/ nullptr, /*ConsiderLinkage*/ true, QualifierLoc.hasQualifier()); } SemaRef.CheckFunctionDeclaration(/*Scope*/ nullptr, Function, Previous, IsExplicitSpecialization); // Check the template parameter list against the previous declaration. The // goal here is to pick up default arguments added since the friend was // declared; we know the template parameter lists match, since otherwise // we would not have picked this template as the previous declaration. if (isFriend && TemplateParams && FunctionTemplate->getPreviousDecl()) { SemaRef.CheckTemplateParameterList( TemplateParams, FunctionTemplate->getPreviousDecl()->getTemplateParameters(), Function->isThisDeclarationADefinition() ? Sema::TPC_FriendFunctionTemplateDefinition : Sema::TPC_FriendFunctionTemplate); } // If we're introducing a friend definition after the first use, trigger // instantiation. // FIXME: If this is a friend function template definition, we should check // to see if any specializations have been used. if (isFriend && D->isThisDeclarationADefinition() && Function->isUsed(false)) { if (MemberSpecializationInfo *MSInfo = Function->getMemberSpecializationInfo()) { if (MSInfo->getPointOfInstantiation().isInvalid()) { SourceLocation Loc = D->getLocation(); // FIXME MSInfo->setPointOfInstantiation(Loc); SemaRef.PendingLocalImplicitInstantiations.push_back( std::make_pair(Function, Loc)); } } } if (D->isExplicitlyDefaulted()) { if (SubstDefaultedFunction(Function, D)) return nullptr; } if (D->isDeleted()) SemaRef.SetDeclDeleted(Function, D->getLocation()); NamedDecl *PrincipalDecl = (TemplateParams ? cast(FunctionTemplate) : Function); // If this declaration lives in a different context from its lexical context, // add it to the corresponding lookup table. if (isFriend || (Function->isLocalExternDecl() && !Function->getPreviousDecl())) DC->makeDeclVisibleInContext(PrincipalDecl); if (Function->isOverloadedOperator() && !DC->isRecord() && PrincipalDecl->isInIdentifierNamespace(Decl::IDNS_Ordinary)) PrincipalDecl->setNonMemberOperator(); return Function; } Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( CXXMethodDecl *D, TemplateParameterList *TemplateParams, Optional ClassScopeSpecializationArgs, RewriteKind FunctionRewriteKind) { FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate(); if (FunctionTemplate && !TemplateParams) { // We are creating a function template specialization from a function // template. Check whether there is already a function template // specialization for this particular set of template arguments. ArrayRef Innermost = TemplateArgs.getInnermost(); void *InsertPos = nullptr; FunctionDecl *SpecFunc = FunctionTemplate->findSpecialization(Innermost, InsertPos); // If we already have a function template specialization, return it. if (SpecFunc) return SpecFunc; } bool isFriend; if (FunctionTemplate) isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None); else isFriend = (D->getFriendObjectKind() != Decl::FOK_None); bool MergeWithParentScope = (TemplateParams != nullptr) || !(isa(Owner) && cast(Owner)->isDefinedOutsideFunctionOrMethod()); LocalInstantiationScope Scope(SemaRef, MergeWithParentScope); // Instantiate enclosing template arguments for friends. SmallVector TempParamLists; unsigned NumTempParamLists = 0; if (isFriend && (NumTempParamLists = D->getNumTemplateParameterLists())) { TempParamLists.resize(NumTempParamLists); for (unsigned I = 0; I != NumTempParamLists; ++I) { TemplateParameterList *TempParams = D->getTemplateParameterList(I); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; TempParamLists[I] = InstParams; } } ExplicitSpecifier InstantiatedExplicitSpecifier = instantiateExplicitSpecifier(SemaRef, TemplateArgs, ExplicitSpecifier::getFromDecl(D), D); if (InstantiatedExplicitSpecifier.isInvalid()) return nullptr; // Implicit destructors/constructors created for local classes in // DeclareImplicit* (see SemaDeclCXX.cpp) might not have an associated TSI. // Unfortunately there isn't enough context in those functions to // conditionally populate the TSI without breaking non-template related use // cases. Populate TSIs prior to calling SubstFunctionType to make sure we get // a proper transformation. if (cast(D->getParent())->isLambda() && !D->getTypeSourceInfo() && isa(D)) { TypeSourceInfo *TSI = SemaRef.Context.getTrivialTypeSourceInfo(D->getType()); D->setTypeSourceInfo(TSI); } SmallVector Params; TypeSourceInfo *TInfo = SubstFunctionType(D, Params); if (!TInfo) return nullptr; QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo); if (TemplateParams && TemplateParams->size()) { auto *LastParam = dyn_cast(TemplateParams->asArray().back()); if (LastParam && LastParam->isImplicit() && LastParam->hasTypeConstraint()) { // In abbreviated templates, the type-constraints of invented template // type parameters are instantiated with the function type, invalidating // the TemplateParameterList which relied on the template type parameter // not having a type constraint. Recreate the TemplateParameterList with // the updated parameter list. TemplateParams = TemplateParameterList::Create( SemaRef.Context, TemplateParams->getTemplateLoc(), TemplateParams->getLAngleLoc(), TemplateParams->asArray(), TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause()); } } NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); if (!QualifierLoc) return nullptr; } // FIXME: Concepts: Do not substitute into constraint expressions Expr *TrailingRequiresClause = D->getTrailingRequiresClause(); if (TrailingRequiresClause) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); auto *ThisContext = dyn_cast_or_null(Owner); Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, D->getMethodQualifiers(), ThisContext); ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause, TemplateArgs); if (SubstRC.isInvalid()) return nullptr; TrailingRequiresClause = SubstRC.get(); if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause)) return nullptr; } DeclContext *DC = Owner; if (isFriend) { if (QualifierLoc) { CXXScopeSpec SS; SS.Adopt(QualifierLoc); DC = SemaRef.computeDeclContext(SS); if (DC && SemaRef.RequireCompleteDeclContext(SS, DC)) return nullptr; } else { DC = SemaRef.FindInstantiatedContext(D->getLocation(), D->getDeclContext(), TemplateArgs); } if (!DC) return nullptr; } DeclarationNameInfo NameInfo = SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs); if (FunctionRewriteKind != RewriteKind::None) adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo); // Build the instantiated method declaration. CXXRecordDecl *Record = cast(DC); CXXMethodDecl *Method = nullptr; SourceLocation StartLoc = D->getInnerLocStart(); if (CXXConstructorDecl *Constructor = dyn_cast(D)) { Method = CXXConstructorDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, InstantiatedExplicitSpecifier, Constructor->UsesFPIntrin(), Constructor->isInlineSpecified(), false, Constructor->getConstexprKind(), InheritedConstructor(), TrailingRequiresClause); Method->setRangeEnd(Constructor->getEndLoc()); } else if (CXXDestructorDecl *Destructor = dyn_cast(D)) { Method = CXXDestructorDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, Destructor->UsesFPIntrin(), Destructor->isInlineSpecified(), false, Destructor->getConstexprKind(), TrailingRequiresClause); Method->setRangeEnd(Destructor->getEndLoc()); Method->setDeclName(SemaRef.Context.DeclarationNames.getCXXDestructorName( SemaRef.Context.getCanonicalType( SemaRef.Context.getTypeDeclType(Record)))); } else if (CXXConversionDecl *Conversion = dyn_cast(D)) { Method = CXXConversionDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, Conversion->UsesFPIntrin(), Conversion->isInlineSpecified(), InstantiatedExplicitSpecifier, Conversion->getConstexprKind(), Conversion->getEndLoc(), TrailingRequiresClause); } else { StorageClass SC = D->isStatic() ? SC_Static : SC_None; Method = CXXMethodDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, SC, D->UsesFPIntrin(), D->isInlineSpecified(), D->getConstexprKind(), D->getEndLoc(), TrailingRequiresClause); } if (D->isInlined()) Method->setImplicitlyInline(); if (QualifierLoc) Method->setQualifierInfo(QualifierLoc); if (TemplateParams) { // Our resulting instantiation is actually a function template, since we // are substituting only the outer template parameters. For example, given // // template // struct X { // template void f(T, U); // }; // // X x; // // We are instantiating the member template "f" within X, which means // substituting int for T, but leaving "f" as a member function template. // Build the function template itself. FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, Record, Method->getLocation(), Method->getDeclName(), TemplateParams, Method); if (isFriend) { FunctionTemplate->setLexicalDeclContext(Owner); FunctionTemplate->setObjectOfFriendDecl(); } else if (D->isOutOfLine()) FunctionTemplate->setLexicalDeclContext(D->getLexicalDeclContext()); Method->setDescribedFunctionTemplate(FunctionTemplate); } else if (FunctionTemplate) { // Record this function template specialization. ArrayRef Innermost = TemplateArgs.getInnermost(); Method->setFunctionTemplateSpecialization(FunctionTemplate, TemplateArgumentList::CreateCopy(SemaRef.Context, Innermost), /*InsertPos=*/nullptr); } else if (!isFriend) { // Record that this is an instantiation of a member function. Method->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation); } // If we are instantiating a member function defined // out-of-line, the instantiation will have the same lexical // context (which will be a namespace scope) as the template. if (isFriend) { if (NumTempParamLists) Method->setTemplateParameterListsInfo( SemaRef.Context, llvm::makeArrayRef(TempParamLists.data(), NumTempParamLists)); Method->setLexicalDeclContext(Owner); Method->setObjectOfFriendDecl(); } else if (D->isOutOfLine()) Method->setLexicalDeclContext(D->getLexicalDeclContext()); // Attach the parameters for (unsigned P = 0; P < Params.size(); ++P) Params[P]->setOwningFunction(Method); Method->setParams(Params); if (InitMethodInstantiation(Method, D)) Method->setInvalidDecl(); LookupResult Previous(SemaRef, NameInfo, Sema::LookupOrdinaryName, Sema::ForExternalRedeclaration); bool IsExplicitSpecialization = false; // If the name of this function was written as a template-id, instantiate // the explicit template arguments. if (DependentFunctionTemplateSpecializationInfo *Info = D->getDependentSpecializationInfo()) { assert(isFriend && "non-friend has dependent specialization info?"); // Instantiate the explicit template arguments. TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; // Map the candidate templates to their instantiations. for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) { Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(), Info->getTemplate(I), TemplateArgs); if (!Temp) return nullptr; Previous.addDecl(cast(Temp)); } if (SemaRef.CheckFunctionTemplateSpecialization(Method, &ExplicitArgs, Previous)) Method->setInvalidDecl(); IsExplicitSpecialization = true; } else if (const ASTTemplateArgumentListInfo *Info = ClassScopeSpecializationArgs.getValueOr( D->getTemplateSpecializationArgsAsWritten())) { SemaRef.LookupQualifiedName(Previous, DC); TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; if (SemaRef.CheckFunctionTemplateSpecialization(Method, &ExplicitArgs, Previous)) Method->setInvalidDecl(); IsExplicitSpecialization = true; } else if (ClassScopeSpecializationArgs) { // Class-scope explicit specialization written without explicit template // arguments. SemaRef.LookupQualifiedName(Previous, DC); if (SemaRef.CheckFunctionTemplateSpecialization(Method, nullptr, Previous)) Method->setInvalidDecl(); IsExplicitSpecialization = true; } else if (!FunctionTemplate || TemplateParams || isFriend) { SemaRef.LookupQualifiedName(Previous, Record); // In C++, the previous declaration we find might be a tag type // (class or enum). In this case, the new declaration will hide the // tag type. Note that this does does not apply if we're declaring a // typedef (C++ [dcl.typedef]p4). if (Previous.isSingleTagDecl()) Previous.clear(); } SemaRef.CheckFunctionDeclaration(nullptr, Method, Previous, IsExplicitSpecialization); if (D->isPure()) SemaRef.CheckPureMethod(Method, SourceRange()); // Propagate access. For a non-friend declaration, the access is // whatever we're propagating from. For a friend, it should be the // previous declaration we just found. if (isFriend && Method->getPreviousDecl()) Method->setAccess(Method->getPreviousDecl()->getAccess()); else Method->setAccess(D->getAccess()); if (FunctionTemplate) FunctionTemplate->setAccess(Method->getAccess()); SemaRef.CheckOverrideControl(Method); // If a function is defined as defaulted or deleted, mark it as such now. if (D->isExplicitlyDefaulted()) { if (SubstDefaultedFunction(Method, D)) return nullptr; } if (D->isDeletedAsWritten()) SemaRef.SetDeclDeleted(Method, Method->getLocation()); // If this is an explicit specialization, mark the implicitly-instantiated // template specialization as being an explicit specialization too. // FIXME: Is this necessary? if (IsExplicitSpecialization && !isFriend) SemaRef.CompleteMemberSpecialization(Method, Previous); // If there's a function template, let our caller handle it. if (FunctionTemplate) { // do nothing // Don't hide a (potentially) valid declaration with an invalid one. } else if (Method->isInvalidDecl() && !Previous.empty()) { // do nothing // Otherwise, check access to friends and make them visible. } else if (isFriend) { // We only need to re-check access for methods which we didn't // manage to match during parsing. if (!D->getPreviousDecl()) SemaRef.CheckFriendAccess(Method); Record->makeDeclVisibleInContext(Method); // Otherwise, add the declaration. We don't need to do this for // class-scope specializations because we'll have matched them with // the appropriate template. } else { Owner->addDecl(Method); } // PR17480: Honor the used attribute to instantiate member function // definitions if (Method->hasAttr()) { if (const auto *A = dyn_cast(Owner)) { SourceLocation Loc; if (const MemberSpecializationInfo *MSInfo = A->getMemberSpecializationInfo()) Loc = MSInfo->getPointOfInstantiation(); else if (const auto *Spec = dyn_cast(A)) Loc = Spec->getPointOfInstantiation(); SemaRef.MarkFunctionReferenced(Loc, Method); } } return Method; } Decl *TemplateDeclInstantiator::VisitCXXConstructorDecl(CXXConstructorDecl *D) { return VisitCXXMethodDecl(D); } Decl *TemplateDeclInstantiator::VisitCXXDestructorDecl(CXXDestructorDecl *D) { return VisitCXXMethodDecl(D); } Decl *TemplateDeclInstantiator::VisitCXXConversionDecl(CXXConversionDecl *D) { return VisitCXXMethodDecl(D); } Decl *TemplateDeclInstantiator::VisitParmVarDecl(ParmVarDecl *D) { return SemaRef.SubstParmVarDecl(D, TemplateArgs, /*indexAdjustment*/ 0, None, /*ExpectParameterPack=*/ false); } Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl( TemplateTypeParmDecl *D) { assert(D->getTypeForDecl()->isTemplateTypeParmType()); Optional NumExpanded; if (const TypeConstraint *TC = D->getTypeConstraint()) { if (D->isPackExpansion() && !D->isExpandedParameterPack()) { assert(TC->getTemplateArgsAsWritten() && "type parameter can only be an expansion when explicit arguments " "are specified"); // The template type parameter pack's type is a pack expansion of types. // Determine whether we need to expand this parameter pack into separate // types. SmallVector Unexpanded; for (auto &ArgLoc : TC->getTemplateArgsAsWritten()->arguments()) SemaRef.collectUnexpandedParameterPacks(ArgLoc, Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; if (SemaRef.CheckParameterPacksForExpansion( cast(TC->getImmediatelyDeclaredConstraint()) ->getEllipsisLoc(), SourceRange(TC->getConceptNameLoc(), TC->hasExplicitTemplateArgs() ? TC->getTemplateArgsAsWritten()->getRAngleLoc() : TC->getConceptNameInfo().getEndLoc()), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpanded)) return nullptr; } } TemplateTypeParmDecl *Inst = TemplateTypeParmDecl::Create( SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getIndex(), D->getIdentifier(), D->wasDeclaredWithTypename(), D->isParameterPack(), D->hasTypeConstraint(), NumExpanded); Inst->setAccess(AS_public); Inst->setImplicit(D->isImplicit()); if (auto *TC = D->getTypeConstraint()) { if (!D->isImplicit()) { // Invented template parameter type constraints will be instantiated with // the corresponding auto-typed parameter as it might reference other // parameters. // TODO: Concepts: do not instantiate the constraint (delayed constraint // substitution) if (SemaRef.SubstTypeConstraint(Inst, TC, TemplateArgs)) return nullptr; } } if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) { TypeSourceInfo *InstantiatedDefaultArg = SemaRef.SubstType(D->getDefaultArgumentInfo(), TemplateArgs, D->getDefaultArgumentLoc(), D->getDeclName()); if (InstantiatedDefaultArg) Inst->setDefaultArgument(InstantiatedDefaultArg); } // Introduce this template parameter's instantiation into the instantiation // scope. SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( NonTypeTemplateParmDecl *D) { // Substitute into the type of the non-type template parameter. TypeLoc TL = D->getTypeSourceInfo()->getTypeLoc(); SmallVector ExpandedParameterPackTypesAsWritten; SmallVector ExpandedParameterPackTypes; bool IsExpandedParameterPack = false; TypeSourceInfo *DI; QualType T; bool Invalid = false; if (D->isExpandedParameterPack()) { // The non-type template parameter pack is an already-expanded pack // expansion of types. Substitute into each of the expanded types. ExpandedParameterPackTypes.reserve(D->getNumExpansionTypes()); ExpandedParameterPackTypesAsWritten.reserve(D->getNumExpansionTypes()); for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { TypeSourceInfo *NewDI = SemaRef.SubstType(D->getExpansionTypeSourceInfo(I), TemplateArgs, D->getLocation(), D->getDeclName()); if (!NewDI) return nullptr; QualType NewT = SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation()); if (NewT.isNull()) return nullptr; ExpandedParameterPackTypesAsWritten.push_back(NewDI); ExpandedParameterPackTypes.push_back(NewT); } IsExpandedParameterPack = true; DI = D->getTypeSourceInfo(); T = DI->getType(); } else if (D->isPackExpansion()) { // The non-type template parameter pack's type is a pack expansion of types. // Determine whether we need to expand this parameter pack into separate // types. PackExpansionTypeLoc Expansion = TL.castAs(); TypeLoc Pattern = Expansion.getPatternLoc(); SmallVector Unexpanded; SemaRef.collectUnexpandedParameterPacks(Pattern, Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; Optional OrigNumExpansions = Expansion.getTypePtr()->getNumExpansions(); Optional NumExpansions = OrigNumExpansions; if (SemaRef.CheckParameterPacksForExpansion(Expansion.getEllipsisLoc(), Pattern.getSourceRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return nullptr; if (Expand) { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I); TypeSourceInfo *NewDI = SemaRef.SubstType(Pattern, TemplateArgs, D->getLocation(), D->getDeclName()); if (!NewDI) return nullptr; QualType NewT = SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation()); if (NewT.isNull()) return nullptr; ExpandedParameterPackTypesAsWritten.push_back(NewDI); ExpandedParameterPackTypes.push_back(NewT); } // Note that we have an expanded parameter pack. The "type" of this // expanded parameter pack is the original expansion type, but callers // will end up using the expanded parameter pack types for type-checking. IsExpandedParameterPack = true; DI = D->getTypeSourceInfo(); T = DI->getType(); } else { // We cannot fully expand the pack expansion now, so substitute into the // pattern and create a new pack expansion type. Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1); TypeSourceInfo *NewPattern = SemaRef.SubstType(Pattern, TemplateArgs, D->getLocation(), D->getDeclName()); if (!NewPattern) return nullptr; SemaRef.CheckNonTypeTemplateParameterType(NewPattern, D->getLocation()); DI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(), NumExpansions); if (!DI) return nullptr; T = DI->getType(); } } else { // Simple case: substitution into a parameter that is not a parameter pack. DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) return nullptr; // Check that this type is acceptable for a non-type template parameter. T = SemaRef.CheckNonTypeTemplateParameterType(DI, D->getLocation()); if (T.isNull()) { T = SemaRef.Context.IntTy; Invalid = true; } } NonTypeTemplateParmDecl *Param; if (IsExpandedParameterPack) Param = NonTypeTemplateParmDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->getIdentifier(), T, DI, ExpandedParameterPackTypes, ExpandedParameterPackTypesAsWritten); else Param = NonTypeTemplateParmDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), DI); if (AutoTypeLoc AutoLoc = DI->getTypeLoc().getContainedAutoTypeLoc()) if (AutoLoc.isConstrained()) if (SemaRef.AttachTypeConstraint( AutoLoc, Param, IsExpandedParameterPack ? DI->getTypeLoc().getAs() .getEllipsisLoc() : SourceLocation())) Invalid = true; Param->setAccess(AS_public); Param->setImplicit(D->isImplicit()); if (Invalid) Param->setInvalidDecl(); if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Value = SemaRef.SubstExpr(D->getDefaultArgument(), TemplateArgs); if (!Value.isInvalid()) Param->setDefaultArgument(Value.get()); } // Introduce this template parameter's instantiation into the instantiation // scope. SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param); return Param; } static void collectUnexpandedParameterPacks( Sema &S, TemplateParameterList *Params, SmallVectorImpl &Unexpanded) { for (const auto &P : *Params) { if (P->isTemplateParameterPack()) continue; if (NonTypeTemplateParmDecl *NTTP = dyn_cast(P)) S.collectUnexpandedParameterPacks(NTTP->getTypeSourceInfo()->getTypeLoc(), Unexpanded); if (TemplateTemplateParmDecl *TTP = dyn_cast(P)) collectUnexpandedParameterPacks(S, TTP->getTemplateParameters(), Unexpanded); } } Decl * TemplateDeclInstantiator::VisitTemplateTemplateParmDecl( TemplateTemplateParmDecl *D) { // Instantiate the template parameter list of the template template parameter. TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams; SmallVector ExpandedParams; bool IsExpandedParameterPack = false; if (D->isExpandedParameterPack()) { // The template template parameter pack is an already-expanded pack // expansion of template parameters. Substitute into each of the expanded // parameters. ExpandedParams.reserve(D->getNumExpansionTemplateParameters()); for (unsigned I = 0, N = D->getNumExpansionTemplateParameters(); I != N; ++I) { LocalInstantiationScope Scope(SemaRef); TemplateParameterList *Expansion = SubstTemplateParams(D->getExpansionTemplateParameters(I)); if (!Expansion) return nullptr; ExpandedParams.push_back(Expansion); } IsExpandedParameterPack = true; InstParams = TempParams; } else if (D->isPackExpansion()) { // The template template parameter pack expands to a pack of template // template parameters. Determine whether we need to expand this parameter // pack into separate parameters. SmallVector Unexpanded; collectUnexpandedParameterPacks(SemaRef, D->getTemplateParameters(), Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; Optional NumExpansions; if (SemaRef.CheckParameterPacksForExpansion(D->getLocation(), TempParams->getSourceRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return nullptr; if (Expand) { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I); LocalInstantiationScope Scope(SemaRef); TemplateParameterList *Expansion = SubstTemplateParams(TempParams); if (!Expansion) return nullptr; ExpandedParams.push_back(Expansion); } // Note that we have an expanded parameter pack. The "type" of this // expanded parameter pack is the original expansion type, but callers // will end up using the expanded parameter pack types for type-checking. IsExpandedParameterPack = true; InstParams = TempParams; } else { // We cannot fully expand the pack expansion now, so just substitute // into the pattern. Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1); LocalInstantiationScope Scope(SemaRef); InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; } } else { // Perform the actual substitution of template parameters within a new, // local instantiation scope. LocalInstantiationScope Scope(SemaRef); InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; } // Build the template template parameter. TemplateTemplateParmDecl *Param; if (IsExpandedParameterPack) Param = TemplateTemplateParmDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->getIdentifier(), InstParams, ExpandedParams); else Param = TemplateTemplateParmDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->isParameterPack(), D->getIdentifier(), InstParams); if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) { NestedNameSpecifierLoc QualifierLoc = D->getDefaultArgument().getTemplateQualifierLoc(); QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); TemplateName TName = SemaRef.SubstTemplateName( QualifierLoc, D->getDefaultArgument().getArgument().getAsTemplate(), D->getDefaultArgument().getTemplateNameLoc(), TemplateArgs); if (!TName.isNull()) Param->setDefaultArgument( SemaRef.Context, TemplateArgumentLoc(SemaRef.Context, TemplateArgument(TName), D->getDefaultArgument().getTemplateQualifierLoc(), D->getDefaultArgument().getTemplateNameLoc())); } Param->setAccess(AS_public); Param->setImplicit(D->isImplicit()); // Introduce this template parameter's instantiation into the instantiation // scope. SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param); return Param; } Decl *TemplateDeclInstantiator::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) { // Using directives are never dependent (and never contain any types or // expressions), so they require no explicit instantiation work. UsingDirectiveDecl *Inst = UsingDirectiveDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getNamespaceKeyLocation(), D->getQualifierLoc(), D->getIdentLocation(), D->getNominatedNamespace(), D->getCommonAncestor()); // Add the using directive to its declaration context // only if this is not a function or method. if (!Owner->isFunctionOrMethod()) Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitBaseUsingDecls(BaseUsingDecl *D, BaseUsingDecl *Inst, LookupResult *Lookup) { bool isFunctionScope = Owner->isFunctionOrMethod(); for (auto *Shadow : D->shadows()) { // FIXME: UsingShadowDecl doesn't preserve its immediate target, so // reconstruct it in the case where it matters. Hm, can we extract it from // the DeclSpec when parsing and save it in the UsingDecl itself? NamedDecl *OldTarget = Shadow->getTargetDecl(); if (auto *CUSD = dyn_cast(Shadow)) if (auto *BaseShadow = CUSD->getNominatedBaseClassShadowDecl()) OldTarget = BaseShadow; NamedDecl *InstTarget = nullptr; if (auto *EmptyD = dyn_cast(Shadow->getTargetDecl())) { InstTarget = UnresolvedUsingIfExistsDecl::Create( SemaRef.Context, Owner, EmptyD->getLocation(), EmptyD->getDeclName()); } else { InstTarget = cast_or_null(SemaRef.FindInstantiatedDecl( Shadow->getLocation(), OldTarget, TemplateArgs)); } if (!InstTarget) return nullptr; UsingShadowDecl *PrevDecl = nullptr; if (Lookup && SemaRef.CheckUsingShadowDecl(Inst, InstTarget, *Lookup, PrevDecl)) continue; if (UsingShadowDecl *OldPrev = getPreviousDeclForInstantiation(Shadow)) PrevDecl = cast_or_null(SemaRef.FindInstantiatedDecl( Shadow->getLocation(), OldPrev, TemplateArgs)); UsingShadowDecl *InstShadow = SemaRef.BuildUsingShadowDecl( /*Scope*/ nullptr, Inst, InstTarget, PrevDecl); SemaRef.Context.setInstantiatedFromUsingShadowDecl(InstShadow, Shadow); if (isFunctionScope) SemaRef.CurrentInstantiationScope->InstantiatedLocal(Shadow, InstShadow); } return Inst; } Decl *TemplateDeclInstantiator::VisitUsingDecl(UsingDecl *D) { // The nested name specifier may be dependent, for example // template struct t { // struct s1 { T f1(); }; // struct s2 : s1 { using s1::f1; }; // }; // template struct t; // Here, in using s1::f1, s1 refers to t::s1; // we need to substitute for t::s1. NestedNameSpecifierLoc QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(), TemplateArgs); if (!QualifierLoc) return nullptr; // For an inheriting constructor declaration, the name of the using // declaration is the name of a constructor in this class, not in the // base class. DeclarationNameInfo NameInfo = D->getNameInfo(); if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName) if (auto *RD = dyn_cast(SemaRef.CurContext)) NameInfo.setName(SemaRef.Context.DeclarationNames.getCXXConstructorName( SemaRef.Context.getCanonicalType(SemaRef.Context.getRecordType(RD)))); // We only need to do redeclaration lookups if we're in a class scope (in // fact, it's not really even possible in non-class scopes). bool CheckRedeclaration = Owner->isRecord(); LookupResult Prev(SemaRef, NameInfo, Sema::LookupUsingDeclName, Sema::ForVisibleRedeclaration); UsingDecl *NewUD = UsingDecl::Create(SemaRef.Context, Owner, D->getUsingLoc(), QualifierLoc, NameInfo, D->hasTypename()); CXXScopeSpec SS; SS.Adopt(QualifierLoc); if (CheckRedeclaration) { Prev.setHideTags(false); SemaRef.LookupQualifiedName(Prev, Owner); // Check for invalid redeclarations. if (SemaRef.CheckUsingDeclRedeclaration(D->getUsingLoc(), D->hasTypename(), SS, D->getLocation(), Prev)) NewUD->setInvalidDecl(); } if (!NewUD->isInvalidDecl() && SemaRef.CheckUsingDeclQualifier(D->getUsingLoc(), D->hasTypename(), SS, NameInfo, D->getLocation(), nullptr, D)) NewUD->setInvalidDecl(); SemaRef.Context.setInstantiatedFromUsingDecl(NewUD, D); NewUD->setAccess(D->getAccess()); Owner->addDecl(NewUD); // Don't process the shadow decls for an invalid decl. if (NewUD->isInvalidDecl()) return NewUD; // If the using scope was dependent, or we had dependent bases, we need to // recheck the inheritance if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName) SemaRef.CheckInheritingConstructorUsingDecl(NewUD); return VisitBaseUsingDecls(D, NewUD, CheckRedeclaration ? &Prev : nullptr); } Decl *TemplateDeclInstantiator::VisitUsingEnumDecl(UsingEnumDecl *D) { // Cannot be a dependent type, but still could be an instantiation EnumDecl *EnumD = cast_or_null(SemaRef.FindInstantiatedDecl( D->getLocation(), D->getEnumDecl(), TemplateArgs)); if (SemaRef.RequireCompleteEnumDecl(EnumD, EnumD->getLocation())) return nullptr; UsingEnumDecl *NewUD = UsingEnumDecl::Create(SemaRef.Context, Owner, D->getUsingLoc(), D->getEnumLoc(), D->getLocation(), EnumD); SemaRef.Context.setInstantiatedFromUsingEnumDecl(NewUD, D); NewUD->setAccess(D->getAccess()); Owner->addDecl(NewUD); // Don't process the shadow decls for an invalid decl. if (NewUD->isInvalidDecl()) return NewUD; // We don't have to recheck for duplication of the UsingEnumDecl itself, as it // cannot be dependent, and will therefore have been checked during template // definition. return VisitBaseUsingDecls(D, NewUD, nullptr); } Decl *TemplateDeclInstantiator::VisitUsingShadowDecl(UsingShadowDecl *D) { // Ignore these; we handle them in bulk when processing the UsingDecl. return nullptr; } Decl *TemplateDeclInstantiator::VisitConstructorUsingShadowDecl( ConstructorUsingShadowDecl *D) { // Ignore these; we handle them in bulk when processing the UsingDecl. return nullptr; } template Decl *TemplateDeclInstantiator::instantiateUnresolvedUsingDecl( T *D, bool InstantiatingPackElement) { // If this is a pack expansion, expand it now. if (D->isPackExpansion() && !InstantiatingPackElement) { SmallVector Unexpanded; SemaRef.collectUnexpandedParameterPacks(D->getQualifierLoc(), Unexpanded); SemaRef.collectUnexpandedParameterPacks(D->getNameInfo(), Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; Optional NumExpansions; if (SemaRef.CheckParameterPacksForExpansion( D->getEllipsisLoc(), D->getSourceRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return nullptr; // This declaration cannot appear within a function template signature, // so we can't have a partial argument list for a parameter pack. assert(!RetainExpansion && "should never need to retain an expansion for UsingPackDecl"); if (!Expand) { // We cannot fully expand the pack expansion now, so substitute into the // pattern and create a new pack expansion. Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1); return instantiateUnresolvedUsingDecl(D, true); } // Within a function, we don't have any normal way to check for conflicts // between shadow declarations from different using declarations in the // same pack expansion, but this is always ill-formed because all expansions // must produce (conflicting) enumerators. // // Sadly we can't just reject this in the template definition because it // could be valid if the pack is empty or has exactly one expansion. if (D->getDeclContext()->isFunctionOrMethod() && *NumExpansions > 1) { SemaRef.Diag(D->getEllipsisLoc(), diag::err_using_decl_redeclaration_expansion); return nullptr; } // Instantiate the slices of this pack and build a UsingPackDecl. SmallVector Expansions; for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I); Decl *Slice = instantiateUnresolvedUsingDecl(D, true); if (!Slice) return nullptr; // Note that we can still get unresolved using declarations here, if we // had arguments for all packs but the pattern also contained other // template arguments (this only happens during partial substitution, eg // into the body of a generic lambda in a function template). Expansions.push_back(cast(Slice)); } auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions); if (isDeclWithinFunction(D)) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD); return NewD; } UnresolvedUsingTypenameDecl *TD = dyn_cast(D); SourceLocation TypenameLoc = TD ? TD->getTypenameLoc() : SourceLocation(); NestedNameSpecifierLoc QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(), TemplateArgs); if (!QualifierLoc) return nullptr; CXXScopeSpec SS; SS.Adopt(QualifierLoc); DeclarationNameInfo NameInfo = SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs); // Produce a pack expansion only if we're not instantiating a particular // slice of a pack expansion. bool InstantiatingSlice = D->getEllipsisLoc().isValid() && SemaRef.ArgumentPackSubstitutionIndex != -1; SourceLocation EllipsisLoc = InstantiatingSlice ? SourceLocation() : D->getEllipsisLoc(); bool IsUsingIfExists = D->template hasAttr(); NamedDecl *UD = SemaRef.BuildUsingDeclaration( /*Scope*/ nullptr, D->getAccess(), D->getUsingLoc(), /*HasTypename*/ TD, TypenameLoc, SS, NameInfo, EllipsisLoc, ParsedAttributesView(), /*IsInstantiation*/ true, IsUsingIfExists); if (UD) { SemaRef.InstantiateAttrs(TemplateArgs, D, UD); SemaRef.Context.setInstantiatedFromUsingDecl(UD, D); } return UD; } Decl *TemplateDeclInstantiator::VisitUnresolvedUsingTypenameDecl( UnresolvedUsingTypenameDecl *D) { return instantiateUnresolvedUsingDecl(D); } Decl *TemplateDeclInstantiator::VisitUnresolvedUsingValueDecl( UnresolvedUsingValueDecl *D) { return instantiateUnresolvedUsingDecl(D); } Decl *TemplateDeclInstantiator::VisitUnresolvedUsingIfExistsDecl( UnresolvedUsingIfExistsDecl *D) { llvm_unreachable("referring to unresolved decl out of UsingShadowDecl"); } Decl *TemplateDeclInstantiator::VisitUsingPackDecl(UsingPackDecl *D) { SmallVector Expansions; for (auto *UD : D->expansions()) { if (NamedDecl *NewUD = SemaRef.FindInstantiatedDecl(D->getLocation(), UD, TemplateArgs)) Expansions.push_back(NewUD); else return nullptr; } auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions); if (isDeclWithinFunction(D)) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD); return NewD; } Decl *TemplateDeclInstantiator::VisitClassScopeFunctionSpecializationDecl( ClassScopeFunctionSpecializationDecl *Decl) { CXXMethodDecl *OldFD = Decl->getSpecialization(); return cast_or_null( VisitCXXMethodDecl(OldFD, nullptr, Decl->getTemplateArgsAsWritten())); } Decl *TemplateDeclInstantiator::VisitOMPThreadPrivateDecl( OMPThreadPrivateDecl *D) { SmallVector Vars; for (auto *I : D->varlists()) { Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get(); assert(isa(Var) && "threadprivate arg is not a DeclRefExpr"); Vars.push_back(Var); } OMPThreadPrivateDecl *TD = SemaRef.CheckOMPThreadPrivateDecl(D->getLocation(), Vars); TD->setAccess(AS_public); Owner->addDecl(TD); return TD; } Decl *TemplateDeclInstantiator::VisitOMPAllocateDecl(OMPAllocateDecl *D) { SmallVector Vars; for (auto *I : D->varlists()) { Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get(); assert(isa(Var) && "allocate arg is not a DeclRefExpr"); Vars.push_back(Var); } SmallVector Clauses; // Copy map clauses from the original mapper. for (OMPClause *C : D->clauselists()) { OMPClause *IC = nullptr; if (auto *AC = dyn_cast(C)) { ExprResult NewE = SemaRef.SubstExpr(AC->getAllocator(), TemplateArgs); if (!NewE.isUsable()) continue; IC = SemaRef.ActOnOpenMPAllocatorClause( NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc()); } else if (auto *AC = dyn_cast(C)) { ExprResult NewE = SemaRef.SubstExpr(AC->getAlignment(), TemplateArgs); if (!NewE.isUsable()) continue; IC = SemaRef.ActOnOpenMPAlignClause(NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc()); // If align clause value ends up being invalid, this can end up null. if (!IC) continue; } Clauses.push_back(IC); } Sema::DeclGroupPtrTy Res = SemaRef.ActOnOpenMPAllocateDirective( D->getLocation(), Vars, Clauses, Owner); if (Res.get().isNull()) return nullptr; return Res.get().getSingleDecl(); } Decl *TemplateDeclInstantiator::VisitOMPRequiresDecl(OMPRequiresDecl *D) { llvm_unreachable( "Requires directive cannot be instantiated within a dependent context"); } Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( OMPDeclareReductionDecl *D) { // Instantiate type and check if it is allowed. const bool RequiresInstantiation = D->getType()->isDependentType() || D->getType()->isInstantiationDependentType() || D->getType()->containsUnexpandedParameterPack(); QualType SubstReductionType; if (RequiresInstantiation) { SubstReductionType = SemaRef.ActOnOpenMPDeclareReductionType( D->getLocation(), ParsedType::make(SemaRef.SubstType( D->getType(), TemplateArgs, D->getLocation(), DeclarationName()))); } else { SubstReductionType = D->getType(); } if (SubstReductionType.isNull()) return nullptr; Expr *Combiner = D->getCombiner(); Expr *Init = D->getInitializer(); bool IsCorrect = true; // Create instantiated copy. std::pair ReductionTypes[] = { std::make_pair(SubstReductionType, D->getLocation())}; auto *PrevDeclInScope = D->getPrevDeclInScope(); if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) { PrevDeclInScope = cast( SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope) ->get()); } auto DRD = SemaRef.ActOnOpenMPDeclareReductionDirectiveStart( /*S=*/nullptr, Owner, D->getDeclName(), ReductionTypes, D->getAccess(), PrevDeclInScope); auto *NewDRD = cast(DRD.get().getSingleDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDRD); Expr *SubstCombiner = nullptr; Expr *SubstInitializer = nullptr; // Combiners instantiation sequence. if (Combiner) { SemaRef.ActOnOpenMPDeclareReductionCombinerStart( /*S=*/nullptr, NewDRD); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getCombinerIn())->getDecl(), cast(NewDRD->getCombinerIn())->getDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getCombinerOut())->getDecl(), cast(NewDRD->getCombinerOut())->getDecl()); auto *ThisContext = dyn_cast_or_null(Owner); Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), ThisContext); SubstCombiner = SemaRef.SubstExpr(Combiner, TemplateArgs).get(); SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner); } // Initializers instantiation sequence. if (Init) { VarDecl *OmpPrivParm = SemaRef.ActOnOpenMPDeclareReductionInitializerStart( /*S=*/nullptr, NewDRD); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getInitOrig())->getDecl(), cast(NewDRD->getInitOrig())->getDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getInitPriv())->getDecl(), cast(NewDRD->getInitPriv())->getDecl()); if (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit) { SubstInitializer = SemaRef.SubstExpr(Init, TemplateArgs).get(); } else { auto *OldPrivParm = cast(cast(D->getInitPriv())->getDecl()); IsCorrect = IsCorrect && OldPrivParm->hasInit(); if (IsCorrect) SemaRef.InstantiateVariableInitializer(OmpPrivParm, OldPrivParm, TemplateArgs); } SemaRef.ActOnOpenMPDeclareReductionInitializerEnd(NewDRD, SubstInitializer, OmpPrivParm); } IsCorrect = IsCorrect && SubstCombiner && (!Init || (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit && SubstInitializer) || (D->getInitializerKind() != OMPDeclareReductionDecl::CallInit && !SubstInitializer)); (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd( /*S=*/nullptr, DRD, IsCorrect && !D->isInvalidDecl()); return NewDRD; } Decl * TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { // Instantiate type and check if it is allowed. const bool RequiresInstantiation = D->getType()->isDependentType() || D->getType()->isInstantiationDependentType() || D->getType()->containsUnexpandedParameterPack(); QualType SubstMapperTy; DeclarationName VN = D->getVarName(); if (RequiresInstantiation) { SubstMapperTy = SemaRef.ActOnOpenMPDeclareMapperType( D->getLocation(), ParsedType::make(SemaRef.SubstType(D->getType(), TemplateArgs, D->getLocation(), VN))); } else { SubstMapperTy = D->getType(); } if (SubstMapperTy.isNull()) return nullptr; // Create an instantiated copy of mapper. auto *PrevDeclInScope = D->getPrevDeclInScope(); if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) { PrevDeclInScope = cast( SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope) ->get()); } bool IsCorrect = true; SmallVector Clauses; // Instantiate the mapper variable. DeclarationNameInfo DirName; SemaRef.StartOpenMPDSABlock(llvm::omp::OMPD_declare_mapper, DirName, /*S=*/nullptr, (*D->clauselist_begin())->getBeginLoc()); ExprResult MapperVarRef = SemaRef.ActOnOpenMPDeclareMapperDirectiveVarDecl( /*S=*/nullptr, SubstMapperTy, D->getLocation(), VN); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getMapperVarRef())->getDecl(), cast(MapperVarRef.get())->getDecl()); auto *ThisContext = dyn_cast_or_null(Owner); Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), ThisContext); // Instantiate map clauses. for (OMPClause *C : D->clauselists()) { auto *OldC = cast(C); SmallVector NewVars; for (Expr *OE : OldC->varlists()) { Expr *NE = SemaRef.SubstExpr(OE, TemplateArgs).get(); if (!NE) { IsCorrect = false; break; } NewVars.push_back(NE); } if (!IsCorrect) break; NestedNameSpecifierLoc NewQualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(OldC->getMapperQualifierLoc(), TemplateArgs); CXXScopeSpec SS; SS.Adopt(NewQualifierLoc); DeclarationNameInfo NewNameInfo = SemaRef.SubstDeclarationNameInfo(OldC->getMapperIdInfo(), TemplateArgs); OMPVarListLocTy Locs(OldC->getBeginLoc(), OldC->getLParenLoc(), OldC->getEndLoc()); OMPClause *NewC = SemaRef.ActOnOpenMPMapClause( OldC->getMapTypeModifiers(), OldC->getMapTypeModifiersLoc(), SS, NewNameInfo, OldC->getMapType(), OldC->isImplicitMapType(), OldC->getMapLoc(), OldC->getColonLoc(), NewVars, Locs); Clauses.push_back(NewC); } SemaRef.EndOpenMPDSABlock(nullptr); if (!IsCorrect) return nullptr; Sema::DeclGroupPtrTy DG = SemaRef.ActOnOpenMPDeclareMapperDirective( /*S=*/nullptr, Owner, D->getDeclName(), SubstMapperTy, D->getLocation(), VN, D->getAccess(), MapperVarRef.get(), Clauses, PrevDeclInScope); Decl *NewDMD = DG.get().getSingleDecl(); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDMD); return NewDMD; } Decl *TemplateDeclInstantiator::VisitOMPCapturedExprDecl( OMPCapturedExprDecl * /*D*/) { llvm_unreachable("Should not be met in templates"); } Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D) { return VisitFunctionDecl(D, nullptr); } Decl * TemplateDeclInstantiator::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { Decl *Inst = VisitFunctionDecl(D, nullptr); if (Inst && !D->getDescribedFunctionTemplate()) Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl *D) { return VisitCXXMethodDecl(D, nullptr); } Decl *TemplateDeclInstantiator::VisitRecordDecl(RecordDecl *D) { llvm_unreachable("There are only CXXRecordDecls in C++"); } Decl * TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D) { // As a MS extension, we permit class-scope explicit specialization // of member class templates. ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate(); assert(ClassTemplate->getDeclContext()->isRecord() && D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization && "can only instantiate an explicit specialization " "for a member class template"); // Lookup the already-instantiated declaration in the instantiation // of the class template. ClassTemplateDecl *InstClassTemplate = cast_or_null(SemaRef.FindInstantiatedDecl( D->getLocation(), ClassTemplate, TemplateArgs)); if (!InstClassTemplate) return nullptr; // Substitute into the template arguments of the class template explicit // specialization. TemplateSpecializationTypeLoc Loc = D->getTypeAsWritten()->getTypeLoc(). castAs(); TemplateArgumentListInfo InstTemplateArgs(Loc.getLAngleLoc(), Loc.getRAngleLoc()); SmallVector ArgLocs; for (unsigned I = 0; I != Loc.getNumArgs(); ++I) ArgLocs.push_back(Loc.getArgLoc(I)); if (SemaRef.SubstTemplateArguments(ArgLocs, TemplateArgs, InstTemplateArgs)) return nullptr; // Check that the template argument list is well-formed for this // class template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(InstClassTemplate, D->getLocation(), InstTemplateArgs, false, Converted, /*UpdateArgsWithConversions=*/true)) return nullptr; // Figure out where to insert this class template explicit specialization // in the member template's set of class template explicit specializations. void *InsertPos = nullptr; ClassTemplateSpecializationDecl *PrevDecl = InstClassTemplate->findSpecialization(Converted, InsertPos); // Check whether we've already seen a conflicting instantiation of this // declaration (for instance, if there was a prior implicit instantiation). bool Ignored; if (PrevDecl && SemaRef.CheckSpecializationInstantiationRedecl(D->getLocation(), D->getSpecializationKind(), PrevDecl, PrevDecl->getSpecializationKind(), PrevDecl->getPointOfInstantiation(), Ignored)) return nullptr; // If PrevDecl was a definition and D is also a definition, diagnose. // This happens in cases like: // // template // struct Outer { // template struct Inner; // template<> struct Inner {}; // template<> struct Inner {}; // }; // // Outer outer; // error: the explicit specializations of Inner // // have the same signature. if (PrevDecl && PrevDecl->getDefinition() && D->isThisDeclarationADefinition()) { SemaRef.Diag(D->getLocation(), diag::err_redefinition) << PrevDecl; SemaRef.Diag(PrevDecl->getDefinition()->getLocation(), diag::note_previous_definition); return nullptr; } // Create the class template partial specialization declaration. ClassTemplateSpecializationDecl *InstD = ClassTemplateSpecializationDecl::Create( SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(), D->getLocation(), InstClassTemplate, Converted, PrevDecl); // Add this partial specialization to the set of class template partial // specializations. if (!PrevDecl) InstClassTemplate->AddSpecialization(InstD, InsertPos); // Substitute the nested name specifier, if any. if (SubstQualifier(D, InstD)) return nullptr; // Build the canonical type that describes the converted template // arguments of the class template explicit specialization. QualType CanonType = SemaRef.Context.getTemplateSpecializationType( TemplateName(InstClassTemplate), Converted, SemaRef.Context.getRecordType(InstD)); // Build the fully-sugared type for this class template // specialization as the user wrote in the specialization // itself. This means that we'll pretty-print the type retrieved // from the specialization's declaration the way that the user // actually wrote the specialization, rather than formatting the // name based on the "canonical" representation used to store the // template arguments in the specialization. TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo( TemplateName(InstClassTemplate), D->getLocation(), InstTemplateArgs, CanonType); InstD->setAccess(D->getAccess()); InstD->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation); InstD->setSpecializationKind(D->getSpecializationKind()); InstD->setTypeAsWritten(WrittenTy); InstD->setExternLoc(D->getExternLoc()); InstD->setTemplateKeywordLoc(D->getTemplateKeywordLoc()); Owner->addDecl(InstD); // Instantiate the members of the class-scope explicit specialization eagerly. // We don't have support for lazy instantiation of an explicit specialization // yet, and MSVC eagerly instantiates in this case. // FIXME: This is wrong in standard C++. if (D->isThisDeclarationADefinition() && SemaRef.InstantiateClass(D->getLocation(), InstD, D, TemplateArgs, TSK_ImplicitInstantiation, /*Complain=*/true)) return nullptr; return InstD; } Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl( VarTemplateSpecializationDecl *D) { TemplateArgumentListInfo VarTemplateArgsInfo; VarTemplateDecl *VarTemplate = D->getSpecializedTemplate(); assert(VarTemplate && "A template specialization without specialized template?"); VarTemplateDecl *InstVarTemplate = cast_or_null(SemaRef.FindInstantiatedDecl( D->getLocation(), VarTemplate, TemplateArgs)); if (!InstVarTemplate) return nullptr; // Substitute the current template arguments. const TemplateArgumentListInfo &TemplateArgsInfo = D->getTemplateArgsInfo(); VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc()); VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc()); if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs, VarTemplateArgsInfo)) return nullptr; // Check that the template argument list is well-formed for this template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(InstVarTemplate, D->getLocation(), VarTemplateArgsInfo, false, Converted, /*UpdateArgsWithConversions=*/true)) return nullptr; // Check whether we've already seen a declaration of this specialization. void *InsertPos = nullptr; VarTemplateSpecializationDecl *PrevDecl = InstVarTemplate->findSpecialization(Converted, InsertPos); // Check whether we've already seen a conflicting instantiation of this // declaration (for instance, if there was a prior implicit instantiation). bool Ignored; if (PrevDecl && SemaRef.CheckSpecializationInstantiationRedecl( D->getLocation(), D->getSpecializationKind(), PrevDecl, PrevDecl->getSpecializationKind(), PrevDecl->getPointOfInstantiation(), Ignored)) return nullptr; return VisitVarTemplateSpecializationDecl( InstVarTemplate, D, VarTemplateArgsInfo, Converted, PrevDecl); } Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl( VarTemplateDecl *VarTemplate, VarDecl *D, const TemplateArgumentListInfo &TemplateArgsInfo, ArrayRef Converted, VarTemplateSpecializationDecl *PrevDecl) { // Do substitution on the type of the declaration TypeSourceInfo *DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(), D->getDeclName()); if (!DI) return nullptr; if (DI->getType()->isFunctionType()) { SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function) << D->isStaticDataMember() << DI->getType(); return nullptr; } // Build the instantiated declaration VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted); Var->setTemplateArgsInfo(TemplateArgsInfo); if (!PrevDecl) { void *InsertPos = nullptr; VarTemplate->findSpecialization(Converted, InsertPos); VarTemplate->AddSpecialization(Var, InsertPos); } if (SemaRef.getLangOpts().OpenCL) SemaRef.deduceOpenCLAddressSpace(Var); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Var)) return nullptr; SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner, StartingScope, false, PrevDecl); return Var; } Decl *TemplateDeclInstantiator::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D) { llvm_unreachable("@defs is not supported in Objective-C++"); } Decl *TemplateDeclInstantiator::VisitFriendTemplateDecl(FriendTemplateDecl *D) { // FIXME: We need to be able to instantiate FriendTemplateDecls. unsigned DiagID = SemaRef.getDiagnostics().getCustomDiagID( DiagnosticsEngine::Error, "cannot instantiate %0 yet"); SemaRef.Diag(D->getLocation(), DiagID) << D->getDeclKindName(); return nullptr; } Decl *TemplateDeclInstantiator::VisitConceptDecl(ConceptDecl *D) { llvm_unreachable("Concept definitions cannot reside inside a template"); } Decl * TemplateDeclInstantiator::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) { return RequiresExprBodyDecl::Create(SemaRef.Context, D->getDeclContext(), D->getBeginLoc()); } Decl *TemplateDeclInstantiator::VisitDecl(Decl *D) { llvm_unreachable("Unexpected decl"); } Decl *Sema::SubstDecl(Decl *D, DeclContext *Owner, const MultiLevelTemplateArgumentList &TemplateArgs) { TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs); if (D->isInvalidDecl()) return nullptr; Decl *SubstD; runWithSufficientStackSpace(D->getLocation(), [&] { SubstD = Instantiator.Visit(D); }); return SubstD; } void TemplateDeclInstantiator::adjustForRewrite(RewriteKind RK, FunctionDecl *Orig, QualType &T, TypeSourceInfo *&TInfo, DeclarationNameInfo &NameInfo) { assert(RK == RewriteKind::RewriteSpaceshipAsEqualEqual); // C++2a [class.compare.default]p3: // the return type is replaced with bool auto *FPT = T->castAs(); T = SemaRef.Context.getFunctionType( SemaRef.Context.BoolTy, FPT->getParamTypes(), FPT->getExtProtoInfo()); // Update the return type in the source info too. The most straightforward // way is to create new TypeSourceInfo for the new type. Use the location of // the '= default' as the location of the new type. // // FIXME: Set the correct return type when we initially transform the type, // rather than delaying it to now. TypeSourceInfo *NewTInfo = SemaRef.Context.getTrivialTypeSourceInfo(T, Orig->getEndLoc()); auto OldLoc = TInfo->getTypeLoc().getAsAdjusted(); assert(OldLoc && "type of function is not a function type?"); auto NewLoc = NewTInfo->getTypeLoc().castAs(); for (unsigned I = 0, N = OldLoc.getNumParams(); I != N; ++I) NewLoc.setParam(I, OldLoc.getParam(I)); TInfo = NewTInfo; // and the declarator-id is replaced with operator== NameInfo.setName( SemaRef.Context.DeclarationNames.getCXXOperatorName(OO_EqualEqual)); } FunctionDecl *Sema::SubstSpaceshipAsEqualEqual(CXXRecordDecl *RD, FunctionDecl *Spaceship) { if (Spaceship->isInvalidDecl()) return nullptr; // C++2a [class.compare.default]p3: // an == operator function is declared implicitly [...] with the same // access and function-definition and in the same class scope as the // three-way comparison operator function MultiLevelTemplateArgumentList NoTemplateArgs; NoTemplateArgs.setKind(TemplateSubstitutionKind::Rewrite); NoTemplateArgs.addOuterRetainedLevels(RD->getTemplateDepth()); TemplateDeclInstantiator Instantiator(*this, RD, NoTemplateArgs); Decl *R; if (auto *MD = dyn_cast(Spaceship)) { R = Instantiator.VisitCXXMethodDecl( MD, nullptr, None, TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual); } else { assert(Spaceship->getFriendObjectKind() && "defaulted spaceship is neither a member nor a friend"); R = Instantiator.VisitFunctionDecl( Spaceship, nullptr, TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual); if (!R) return nullptr; FriendDecl *FD = FriendDecl::Create(Context, RD, Spaceship->getLocation(), cast(R), Spaceship->getBeginLoc()); FD->setAccess(AS_public); RD->addDecl(FD); } return cast_or_null(R); } /// Instantiates a nested template parameter list in the current /// instantiation context. /// /// \param L The parameter list to instantiate /// /// \returns NULL if there was an error TemplateParameterList * TemplateDeclInstantiator::SubstTemplateParams(TemplateParameterList *L) { // Get errors for all the parameters before bailing out. bool Invalid = false; unsigned N = L->size(); typedef SmallVector ParamVector; ParamVector Params; Params.reserve(N); for (auto &P : *L) { NamedDecl *D = cast_or_null(Visit(P)); Params.push_back(D); Invalid = Invalid || !D || D->isInvalidDecl(); } // Clean up if we had an error. if (Invalid) return nullptr; // FIXME: Concepts: Substitution into requires clause should only happen when // checking satisfaction. Expr *InstRequiresClause = nullptr; if (Expr *E = L->getRequiresClause()) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); ExprResult Res = SemaRef.SubstExpr(E, TemplateArgs); if (Res.isInvalid() || !Res.isUsable()) { return nullptr; } InstRequiresClause = Res.get(); } TemplateParameterList *InstL = TemplateParameterList::Create(SemaRef.Context, L->getTemplateLoc(), L->getLAngleLoc(), Params, L->getRAngleLoc(), InstRequiresClause); return InstL; } TemplateParameterList * Sema::SubstTemplateParams(TemplateParameterList *Params, DeclContext *Owner, const MultiLevelTemplateArgumentList &TemplateArgs) { TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs); return Instantiator.SubstTemplateParams(Params); } /// Instantiate the declaration of a class template partial /// specialization. /// /// \param ClassTemplate the (instantiated) class template that is partially // specialized by the instantiation of \p PartialSpec. /// /// \param PartialSpec the (uninstantiated) class template partial /// specialization that we are instantiating. /// /// \returns The instantiated partial specialization, if successful; otherwise, /// NULL to indicate an error. ClassTemplatePartialSpecializationDecl * TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization( ClassTemplateDecl *ClassTemplate, ClassTemplatePartialSpecializationDecl *PartialSpec) { // Create a local instantiation scope for this class template partial // specialization, which will contain the instantiations of the template // parameters. LocalInstantiationScope Scope(SemaRef); // Substitute into the template parameters of the class template partial // specialization. TemplateParameterList *TempParams = PartialSpec->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; // Substitute into the template arguments of the class template partial // specialization. const ASTTemplateArgumentListInfo *TemplArgInfo = PartialSpec->getTemplateArgsAsWritten(); TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc, TemplArgInfo->RAngleLoc); if (SemaRef.SubstTemplateArguments(TemplArgInfo->arguments(), TemplateArgs, InstTemplateArgs)) return nullptr; // Check that the template argument list is well-formed for this // class template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(ClassTemplate, PartialSpec->getLocation(), InstTemplateArgs, false, Converted)) return nullptr; // Check these arguments are valid for a template partial specialization. if (SemaRef.CheckTemplatePartialSpecializationArgs( PartialSpec->getLocation(), ClassTemplate, InstTemplateArgs.size(), Converted)) return nullptr; // Figure out where to insert this class template partial specialization // in the member template's set of class template partial specializations. void *InsertPos = nullptr; ClassTemplateSpecializationDecl *PrevDecl = ClassTemplate->findPartialSpecialization(Converted, InstParams, InsertPos); // Build the canonical type that describes the converted template // arguments of the class template partial specialization. QualType CanonType = SemaRef.Context.getTemplateSpecializationType(TemplateName(ClassTemplate), Converted); // Build the fully-sugared type for this class template // specialization as the user wrote in the specialization // itself. This means that we'll pretty-print the type retrieved // from the specialization's declaration the way that the user // actually wrote the specialization, rather than formatting the // name based on the "canonical" representation used to store the // template arguments in the specialization. TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo( TemplateName(ClassTemplate), PartialSpec->getLocation(), InstTemplateArgs, CanonType); if (PrevDecl) { // We've already seen a partial specialization with the same template // parameters and template arguments. This can happen, for example, when // substituting the outer template arguments ends up causing two // class template partial specializations of a member class template // to have identical forms, e.g., // // template // struct Outer { // template struct Inner; // template struct Inner; // template struct Inner; // }; // // Outer outer; // error: the partial specializations of Inner // // have the same signature. SemaRef.Diag(PartialSpec->getLocation(), diag::err_partial_spec_redeclared) << WrittenTy->getType(); SemaRef.Diag(PrevDecl->getLocation(), diag::note_prev_partial_spec_here) << SemaRef.Context.getTypeDeclType(PrevDecl); return nullptr; } // Create the class template partial specialization declaration. ClassTemplatePartialSpecializationDecl *InstPartialSpec = ClassTemplatePartialSpecializationDecl::Create( SemaRef.Context, PartialSpec->getTagKind(), Owner, PartialSpec->getBeginLoc(), PartialSpec->getLocation(), InstParams, ClassTemplate, Converted, InstTemplateArgs, CanonType, nullptr); // Substitute the nested name specifier, if any. if (SubstQualifier(PartialSpec, InstPartialSpec)) return nullptr; InstPartialSpec->setInstantiatedFromMember(PartialSpec); InstPartialSpec->setTypeAsWritten(WrittenTy); // Check the completed partial specialization. SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec); // Add this partial specialization to the set of class template partial // specializations. ClassTemplate->AddPartialSpecialization(InstPartialSpec, /*InsertPos=*/nullptr); return InstPartialSpec; } /// Instantiate the declaration of a variable template partial /// specialization. /// /// \param VarTemplate the (instantiated) variable template that is partially /// specialized by the instantiation of \p PartialSpec. /// /// \param PartialSpec the (uninstantiated) variable template partial /// specialization that we are instantiating. /// /// \returns The instantiated partial specialization, if successful; otherwise, /// NULL to indicate an error. VarTemplatePartialSpecializationDecl * TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization( VarTemplateDecl *VarTemplate, VarTemplatePartialSpecializationDecl *PartialSpec) { // Create a local instantiation scope for this variable template partial // specialization, which will contain the instantiations of the template // parameters. LocalInstantiationScope Scope(SemaRef); // Substitute into the template parameters of the variable template partial // specialization. TemplateParameterList *TempParams = PartialSpec->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; // Substitute into the template arguments of the variable template partial // specialization. const ASTTemplateArgumentListInfo *TemplArgInfo = PartialSpec->getTemplateArgsAsWritten(); TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc, TemplArgInfo->RAngleLoc); if (SemaRef.SubstTemplateArguments(TemplArgInfo->arguments(), TemplateArgs, InstTemplateArgs)) return nullptr; // Check that the template argument list is well-formed for this // class template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(VarTemplate, PartialSpec->getLocation(), InstTemplateArgs, false, Converted)) return nullptr; // Check these arguments are valid for a template partial specialization. if (SemaRef.CheckTemplatePartialSpecializationArgs( PartialSpec->getLocation(), VarTemplate, InstTemplateArgs.size(), Converted)) return nullptr; // Figure out where to insert this variable template partial specialization // in the member template's set of variable template partial specializations. void *InsertPos = nullptr; VarTemplateSpecializationDecl *PrevDecl = VarTemplate->findPartialSpecialization(Converted, InstParams, InsertPos); // Build the canonical type that describes the converted template // arguments of the variable template partial specialization. QualType CanonType = SemaRef.Context.getTemplateSpecializationType( TemplateName(VarTemplate), Converted); // Build the fully-sugared type for this variable template // specialization as the user wrote in the specialization // itself. This means that we'll pretty-print the type retrieved // from the specialization's declaration the way that the user // actually wrote the specialization, rather than formatting the // name based on the "canonical" representation used to store the // template arguments in the specialization. TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo( TemplateName(VarTemplate), PartialSpec->getLocation(), InstTemplateArgs, CanonType); if (PrevDecl) { // We've already seen a partial specialization with the same template // parameters and template arguments. This can happen, for example, when // substituting the outer template arguments ends up causing two // variable template partial specializations of a member variable template // to have identical forms, e.g., // // template // struct Outer { // template pair p; // template pair p; // template pair p; // }; // // Outer outer; // error: the partial specializations of Inner // // have the same signature. SemaRef.Diag(PartialSpec->getLocation(), diag::err_var_partial_spec_redeclared) << WrittenTy->getType(); SemaRef.Diag(PrevDecl->getLocation(), diag::note_var_prev_partial_spec_here); return nullptr; } // Do substitution on the type of the declaration TypeSourceInfo *DI = SemaRef.SubstType( PartialSpec->getTypeSourceInfo(), TemplateArgs, PartialSpec->getTypeSpecStartLoc(), PartialSpec->getDeclName()); if (!DI) return nullptr; if (DI->getType()->isFunctionType()) { SemaRef.Diag(PartialSpec->getLocation(), diag::err_variable_instantiates_to_function) << PartialSpec->isStaticDataMember() << DI->getType(); return nullptr; } // Create the variable template partial specialization declaration. VarTemplatePartialSpecializationDecl *InstPartialSpec = VarTemplatePartialSpecializationDecl::Create( SemaRef.Context, Owner, PartialSpec->getInnerLocStart(), PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(), DI, PartialSpec->getStorageClass(), Converted, InstTemplateArgs); // Substitute the nested name specifier, if any. if (SubstQualifier(PartialSpec, InstPartialSpec)) return nullptr; InstPartialSpec->setInstantiatedFromMember(PartialSpec); InstPartialSpec->setTypeAsWritten(WrittenTy); // Check the completed partial specialization. SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec); // Add this partial specialization to the set of variable template partial // specializations. The instantiation of the initializer is not necessary. VarTemplate->AddPartialSpecialization(InstPartialSpec, /*InsertPos=*/nullptr); SemaRef.BuildVariableInstantiation(InstPartialSpec, PartialSpec, TemplateArgs, LateAttrs, Owner, StartingScope); return InstPartialSpec; } TypeSourceInfo* TemplateDeclInstantiator::SubstFunctionType(FunctionDecl *D, SmallVectorImpl &Params) { TypeSourceInfo *OldTInfo = D->getTypeSourceInfo(); assert(OldTInfo && "substituting function without type source info"); assert(Params.empty() && "parameter vector is non-empty at start"); CXXRecordDecl *ThisContext = nullptr; Qualifiers ThisTypeQuals; if (CXXMethodDecl *Method = dyn_cast(D)) { ThisContext = cast(Owner); ThisTypeQuals = Method->getMethodQualifiers(); } TypeSourceInfo *NewTInfo = SemaRef.SubstFunctionDeclType(OldTInfo, TemplateArgs, D->getTypeSpecStartLoc(), D->getDeclName(), ThisContext, ThisTypeQuals); if (!NewTInfo) return nullptr; TypeLoc OldTL = OldTInfo->getTypeLoc().IgnoreParens(); if (FunctionProtoTypeLoc OldProtoLoc = OldTL.getAs()) { if (NewTInfo != OldTInfo) { // Get parameters from the new type info. TypeLoc NewTL = NewTInfo->getTypeLoc().IgnoreParens(); FunctionProtoTypeLoc NewProtoLoc = NewTL.castAs(); unsigned NewIdx = 0; for (unsigned OldIdx = 0, NumOldParams = OldProtoLoc.getNumParams(); OldIdx != NumOldParams; ++OldIdx) { ParmVarDecl *OldParam = OldProtoLoc.getParam(OldIdx); if (!OldParam) return nullptr; LocalInstantiationScope *Scope = SemaRef.CurrentInstantiationScope; Optional NumArgumentsInExpansion; if (OldParam->isParameterPack()) NumArgumentsInExpansion = SemaRef.getNumArgumentsInExpansion(OldParam->getType(), TemplateArgs); if (!NumArgumentsInExpansion) { // Simple case: normal parameter, or a parameter pack that's // instantiated to a (still-dependent) parameter pack. ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++); Params.push_back(NewParam); Scope->InstantiatedLocal(OldParam, NewParam); } else { // Parameter pack expansion: make the instantiation an argument pack. Scope->MakeInstantiatedLocalArgPack(OldParam); for (unsigned I = 0; I != *NumArgumentsInExpansion; ++I) { ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++); Params.push_back(NewParam); Scope->InstantiatedLocalPackArg(OldParam, NewParam); } } } } else { // The function type itself was not dependent and therefore no // substitution occurred. However, we still need to instantiate // the function parameters themselves. const FunctionProtoType *OldProto = cast(OldProtoLoc.getType()); for (unsigned i = 0, i_end = OldProtoLoc.getNumParams(); i != i_end; ++i) { ParmVarDecl *OldParam = OldProtoLoc.getParam(i); if (!OldParam) { Params.push_back(SemaRef.BuildParmVarDeclForTypedef( D, D->getLocation(), OldProto->getParamType(i))); continue; } ParmVarDecl *Parm = cast_or_null(VisitParmVarDecl(OldParam)); if (!Parm) return nullptr; Params.push_back(Parm); } } } else { // If the type of this function, after ignoring parentheses, is not // *directly* a function type, then we're instantiating a function that // was declared via a typedef or with attributes, e.g., // // typedef int functype(int, int); // functype func; // int __cdecl meth(int, int); // // In this case, we'll just go instantiate the ParmVarDecls that we // synthesized in the method declaration. SmallVector ParamTypes; Sema::ExtParameterInfoBuilder ExtParamInfos; if (SemaRef.SubstParmTypes(D->getLocation(), D->parameters(), nullptr, TemplateArgs, ParamTypes, &Params, ExtParamInfos)) return nullptr; } return NewTInfo; } /// Introduce the instantiated function parameters into the local /// instantiation scope, and set the parameter names to those used /// in the template. static bool addInstantiatedParametersToScope(Sema &S, FunctionDecl *Function, const FunctionDecl *PatternDecl, LocalInstantiationScope &Scope, const MultiLevelTemplateArgumentList &TemplateArgs) { unsigned FParamIdx = 0; for (unsigned I = 0, N = PatternDecl->getNumParams(); I != N; ++I) { const ParmVarDecl *PatternParam = PatternDecl->getParamDecl(I); if (!PatternParam->isParameterPack()) { // Simple case: not a parameter pack. assert(FParamIdx < Function->getNumParams()); ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx); FunctionParam->setDeclName(PatternParam->getDeclName()); // If the parameter's type is not dependent, update it to match the type // in the pattern. They can differ in top-level cv-qualifiers, and we want // the pattern's type here. If the type is dependent, they can't differ, // per core issue 1668. Substitute into the type from the pattern, in case // it's instantiation-dependent. // FIXME: Updating the type to work around this is at best fragile. if (!PatternDecl->getType()->isDependentType()) { QualType T = S.SubstType(PatternParam->getType(), TemplateArgs, FunctionParam->getLocation(), FunctionParam->getDeclName()); if (T.isNull()) return true; FunctionParam->setType(T); } Scope.InstantiatedLocal(PatternParam, FunctionParam); ++FParamIdx; continue; } // Expand the parameter pack. Scope.MakeInstantiatedLocalArgPack(PatternParam); Optional NumArgumentsInExpansion = S.getNumArgumentsInExpansion(PatternParam->getType(), TemplateArgs); if (NumArgumentsInExpansion) { QualType PatternType = PatternParam->getType()->castAs()->getPattern(); for (unsigned Arg = 0; Arg < *NumArgumentsInExpansion; ++Arg) { ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx); FunctionParam->setDeclName(PatternParam->getDeclName()); if (!PatternDecl->getType()->isDependentType()) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, Arg); QualType T = S.SubstType(PatternType, TemplateArgs, FunctionParam->getLocation(), FunctionParam->getDeclName()); if (T.isNull()) return true; FunctionParam->setType(T); } Scope.InstantiatedLocalPackArg(PatternParam, FunctionParam); ++FParamIdx; } } } return false; } bool Sema::InstantiateDefaultArgument(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { assert(Param->hasUninstantiatedDefaultArg()); Expr *UninstExpr = Param->getUninstantiatedDefaultArg(); EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); // Instantiate the expression. // // FIXME: Pass in a correct Pattern argument, otherwise // getTemplateInstantiationArgs uses the lexical context of FD, e.g. // // template // struct A { // static int FooImpl(); // // template // // bug: default argument A::FooImpl() is evaluated with 2-level // // template argument list [[T], [Tp]], should be [[Tp]]. // friend A Foo(int a); // }; // // template // A Foo(int a = A::FooImpl()); MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(FD, nullptr, /*RelativeToPrimary=*/true); InstantiatingTemplate Inst(*this, CallLoc, Param, TemplateArgs.getInnermost()); if (Inst.isInvalid()) return true; if (Inst.isAlreadyInstantiating()) { Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD; Param->setInvalidDecl(); return true; } ExprResult Result; { // C++ [dcl.fct.default]p5: // The names in the [default argument] expression are bound, and // the semantic constraints are checked, at the point where the // default argument expression appears. ContextRAII SavedContext(*this, FD); LocalInstantiationScope Local(*this); FunctionDecl *Pattern = FD->getTemplateInstantiationPattern( /*ForDefinition*/ false); if (addInstantiatedParametersToScope(*this, FD, Pattern, Local, TemplateArgs)) return true; runWithSufficientStackSpace(CallLoc, [&] { Result = SubstInitializer(UninstExpr, TemplateArgs, /*DirectInit*/false); }); } if (Result.isInvalid()) return true; // Check the expression as an initializer for the parameter. InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, Param); InitializationKind Kind = InitializationKind::CreateCopy( Param->getLocation(), /*FIXME:EqualLoc*/ UninstExpr->getBeginLoc()); Expr *ResultE = Result.getAs(); InitializationSequence InitSeq(*this, Entity, Kind, ResultE); Result = InitSeq.Perform(*this, Entity, Kind, ResultE); if (Result.isInvalid()) return true; Result = ActOnFinishFullExpr(Result.getAs(), Param->getOuterLocStart(), /*DiscardedValue*/ false); if (Result.isInvalid()) return true; // Remember the instantiated default argument. Param->setDefaultArg(Result.getAs()); if (ASTMutationListener *L = getASTMutationListener()) L->DefaultArgumentInstantiated(Param); return false; } void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation, FunctionDecl *Decl) { const FunctionProtoType *Proto = Decl->getType()->castAs(); if (Proto->getExceptionSpecType() != EST_Uninstantiated) return; InstantiatingTemplate Inst(*this, PointOfInstantiation, Decl, InstantiatingTemplate::ExceptionSpecification()); if (Inst.isInvalid()) { // We hit the instantiation depth limit. Clear the exception specification // so that our callers don't have to cope with EST_Uninstantiated. UpdateExceptionSpec(Decl, EST_None); return; } if (Inst.isAlreadyInstantiating()) { // This exception specification indirectly depends on itself. Reject. // FIXME: Corresponding rule in the standard? Diag(PointOfInstantiation, diag::err_exception_spec_cycle) << Decl; UpdateExceptionSpec(Decl, EST_None); return; } // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Decl); LocalInstantiationScope Scope(*this); MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(Decl, nullptr, /*RelativeToPrimary*/true); // FIXME: We can't use getTemplateInstantiationPattern(false) in general // here, because for a non-defining friend declaration in a class template, // we don't store enough information to map back to the friend declaration in // the template. FunctionDecl *Template = Proto->getExceptionSpecTemplate(); if (addInstantiatedParametersToScope(*this, Decl, Template, Scope, TemplateArgs)) { UpdateExceptionSpec(Decl, EST_None); return; } SubstExceptionSpec(Decl, Template->getType()->castAs(), TemplateArgs); } bool Sema::CheckInstantiatedFunctionTemplateConstraints( SourceLocation PointOfInstantiation, FunctionDecl *Decl, ArrayRef TemplateArgs, ConstraintSatisfaction &Satisfaction) { // In most cases we're not going to have constraints, so check for that first. FunctionTemplateDecl *Template = Decl->getPrimaryTemplate(); // Note - code synthesis context for the constraints check is created // inside CheckConstraintsSatisfaction. SmallVector TemplateAC; Template->getAssociatedConstraints(TemplateAC); if (TemplateAC.empty()) { Satisfaction.IsSatisfied = true; return false; } // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Decl); LocalInstantiationScope Scope(*this); // If this is not an explicit specialization - we need to get the instantiated // version of the template arguments and add them to scope for the // substitution. if (Decl->isTemplateInstantiation()) { InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(), InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(), TemplateArgs, SourceRange()); if (Inst.isInvalid()) return true; MultiLevelTemplateArgumentList MLTAL( *Decl->getTemplateSpecializationArgs()); if (addInstantiatedParametersToScope( *this, Decl, Decl->getPrimaryTemplate()->getTemplatedDecl(), Scope, MLTAL)) return true; } Qualifiers ThisQuals; CXXRecordDecl *Record = nullptr; if (auto *Method = dyn_cast(Decl)) { ThisQuals = Method->getMethodQualifiers(); Record = Method->getParent(); } CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr); return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs, PointOfInstantiation, Satisfaction); } /// Initializes the common fields of an instantiation function /// declaration (New) from the corresponding fields of its template (Tmpl). /// /// \returns true if there was an error bool TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New, FunctionDecl *Tmpl) { New->setImplicit(Tmpl->isImplicit()); // Forward the mangling number from the template to the instantiated decl. SemaRef.Context.setManglingNumber(New, SemaRef.Context.getManglingNumber(Tmpl)); // If we are performing substituting explicitly-specified template arguments // or deduced template arguments into a function template and we reach this // point, we are now past the point where SFINAE applies and have committed // to keeping the new function template specialization. We therefore // convert the active template instantiation for the function template // into a template instantiation for this specific function template // specialization, which is not a SFINAE context, so that we diagnose any // further errors in the declaration itself. // // FIXME: This is a hack. typedef Sema::CodeSynthesisContext ActiveInstType; ActiveInstType &ActiveInst = SemaRef.CodeSynthesisContexts.back(); if (ActiveInst.Kind == ActiveInstType::ExplicitTemplateArgumentSubstitution || ActiveInst.Kind == ActiveInstType::DeducedTemplateArgumentSubstitution) { if (FunctionTemplateDecl *FunTmpl = dyn_cast(ActiveInst.Entity)) { assert(FunTmpl->getTemplatedDecl() == Tmpl && "Deduction from the wrong function template?"); (void) FunTmpl; SemaRef.InstantiatingSpecializations.erase( {ActiveInst.Entity->getCanonicalDecl(), ActiveInst.Kind}); atTemplateEnd(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst); ActiveInst.Kind = ActiveInstType::TemplateInstantiation; ActiveInst.Entity = New; atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst); } } const FunctionProtoType *Proto = Tmpl->getType()->getAs(); assert(Proto && "Function template without prototype?"); if (Proto->hasExceptionSpec() || Proto->getNoReturnAttr()) { FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo(); // DR1330: In C++11, defer instantiation of a non-trivial // exception specification. // DR1484: Local classes and their members are instantiated along with the // containing function. if (SemaRef.getLangOpts().CPlusPlus11 && EPI.ExceptionSpec.Type != EST_None && EPI.ExceptionSpec.Type != EST_DynamicNone && EPI.ExceptionSpec.Type != EST_BasicNoexcept && !Tmpl->isInLocalScopeForInstantiation()) { FunctionDecl *ExceptionSpecTemplate = Tmpl; if (EPI.ExceptionSpec.Type == EST_Uninstantiated) ExceptionSpecTemplate = EPI.ExceptionSpec.SourceTemplate; ExceptionSpecificationType NewEST = EST_Uninstantiated; if (EPI.ExceptionSpec.Type == EST_Unevaluated) NewEST = EST_Unevaluated; // Mark the function has having an uninstantiated exception specification. const FunctionProtoType *NewProto = New->getType()->getAs(); assert(NewProto && "Template instantiation without function prototype?"); EPI = NewProto->getExtProtoInfo(); EPI.ExceptionSpec.Type = NewEST; EPI.ExceptionSpec.SourceDecl = New; EPI.ExceptionSpec.SourceTemplate = ExceptionSpecTemplate; New->setType(SemaRef.Context.getFunctionType( NewProto->getReturnType(), NewProto->getParamTypes(), EPI)); } else { Sema::ContextRAII SwitchContext(SemaRef, New); SemaRef.SubstExceptionSpec(New, Proto, TemplateArgs); } } // Get the definition. Leaves the variable unchanged if undefined. const FunctionDecl *Definition = Tmpl; Tmpl->isDefined(Definition); SemaRef.InstantiateAttrs(TemplateArgs, Definition, New, LateAttrs, StartingScope); return false; } /// Initializes common fields of an instantiated method /// declaration (New) from the corresponding fields of its template /// (Tmpl). /// /// \returns true if there was an error bool TemplateDeclInstantiator::InitMethodInstantiation(CXXMethodDecl *New, CXXMethodDecl *Tmpl) { if (InitFunctionInstantiation(New, Tmpl)) return true; if (isa(New) && SemaRef.getLangOpts().CPlusPlus11) SemaRef.AdjustDestructorExceptionSpec(cast(New)); New->setAccess(Tmpl->getAccess()); if (Tmpl->isVirtualAsWritten()) New->setVirtualAsWritten(true); // FIXME: New needs a pointer to Tmpl return false; } bool TemplateDeclInstantiator::SubstDefaultedFunction(FunctionDecl *New, FunctionDecl *Tmpl) { // Transfer across any unqualified lookups. if (auto *DFI = Tmpl->getDefaultedFunctionInfo()) { SmallVector Lookups; Lookups.reserve(DFI->getUnqualifiedLookups().size()); bool AnyChanged = false; for (DeclAccessPair DA : DFI->getUnqualifiedLookups()) { NamedDecl *D = SemaRef.FindInstantiatedDecl(New->getLocation(), DA.getDecl(), TemplateArgs); if (!D) return true; AnyChanged |= (D != DA.getDecl()); Lookups.push_back(DeclAccessPair::make(D, DA.getAccess())); } // It's unlikely that substitution will change any declarations. Don't // store an unnecessary copy in that case. New->setDefaultedFunctionInfo( AnyChanged ? FunctionDecl::DefaultedFunctionInfo::Create( SemaRef.Context, Lookups) : DFI); } SemaRef.SetDeclDefaulted(New, Tmpl->getLocation()); return false; } /// Instantiate (or find existing instantiation of) a function template with a /// given set of template arguments. /// /// Usually this should not be used, and template argument deduction should be /// used in its place. FunctionDecl * Sema::InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD, const TemplateArgumentList *Args, SourceLocation Loc) { FunctionDecl *FD = FTD->getTemplatedDecl(); sema::TemplateDeductionInfo Info(Loc); InstantiatingTemplate Inst( *this, Loc, FTD, Args->asArray(), CodeSynthesisContext::ExplicitTemplateArgumentSubstitution, Info); if (Inst.isInvalid()) return nullptr; ContextRAII SavedContext(*this, FD); MultiLevelTemplateArgumentList MArgs(*Args); return cast_or_null(SubstDecl(FD, FD->getParent(), MArgs)); } /// Instantiate the definition of the given function from its /// template. /// /// \param PointOfInstantiation the point at which the instantiation was /// required. Note that this is not precisely a "point of instantiation" /// for the function, but it's close. /// /// \param Function the already-instantiated declaration of a /// function template specialization or member function of a class template /// specialization. /// /// \param Recursive if true, recursively instantiates any functions that /// are required by this instantiation. /// /// \param DefinitionRequired if true, then we are performing an explicit /// instantiation where the body of the function is required. Complain if /// there is no such body. void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, FunctionDecl *Function, bool Recursive, bool DefinitionRequired, bool AtEndOfTU) { if (Function->isInvalidDecl() || isa(Function)) return; // Never instantiate an explicit specialization except if it is a class scope // explicit specialization. TemplateSpecializationKind TSK = Function->getTemplateSpecializationKindForInstantiation(); if (TSK == TSK_ExplicitSpecialization) return; // Don't instantiate a definition if we already have one. const FunctionDecl *ExistingDefn = nullptr; if (Function->isDefined(ExistingDefn, /*CheckForPendingFriendDefinition=*/true)) { if (ExistingDefn->isThisDeclarationADefinition()) return; // If we're asked to instantiate a function whose body comes from an // instantiated friend declaration, attach the instantiated body to the // corresponding declaration of the function. assert(ExistingDefn->isThisDeclarationInstantiatedFromAFriendDefinition()); Function = const_cast(ExistingDefn); } // Find the function body that we'll be substituting. const FunctionDecl *PatternDecl = Function->getTemplateInstantiationPattern(); assert(PatternDecl && "instantiating a non-template"); const FunctionDecl *PatternDef = PatternDecl->getDefinition(); Stmt *Pattern = nullptr; if (PatternDef) { Pattern = PatternDef->getBody(PatternDef); PatternDecl = PatternDef; if (PatternDef->willHaveBody()) PatternDef = nullptr; } // FIXME: We need to track the instantiation stack in order to know which // definitions should be visible within this instantiation. if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Function, Function->getInstantiatedFromMemberFunction(), PatternDecl, PatternDef, TSK, /*Complain*/DefinitionRequired)) { if (DefinitionRequired) Function->setInvalidDecl(); else if (TSK == TSK_ExplicitInstantiationDefinition) { // Try again at the end of the translation unit (at which point a // definition will be required). assert(!Recursive); Function->setInstantiationIsPending(true); PendingInstantiations.push_back( std::make_pair(Function, PointOfInstantiation)); } else if (TSK == TSK_ImplicitInstantiation) { if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() && !getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) { Diag(PointOfInstantiation, diag::warn_func_template_missing) << Function; Diag(PatternDecl->getLocation(), diag::note_forward_template_decl); if (getLangOpts().CPlusPlus11) Diag(PointOfInstantiation, diag::note_inst_declaration_hint) << Function; } } return; } // Postpone late parsed template instantiations. if (PatternDecl->isLateTemplateParsed() && !LateTemplateParser) { Function->setInstantiationIsPending(true); LateParsedInstantiations.push_back( std::make_pair(Function, PointOfInstantiation)); return; } llvm::TimeTraceScope TimeScope("InstantiateFunction", [&]() { std::string Name; llvm::raw_string_ostream OS(Name); Function->getNameForDiagnostic(OS, getPrintingPolicy(), /*Qualified=*/true); return Name; }); // If we're performing recursive template instantiation, create our own // queue of pending implicit instantiations that we will instantiate later, // while we're still within our own instantiation context. // This has to happen before LateTemplateParser below is called, so that // it marks vtables used in late parsed templates as used. GlobalEagerInstantiationScope GlobalInstantiations(*this, /*Enabled=*/Recursive); LocalEagerInstantiationScope LocalInstantiations(*this); // Call the LateTemplateParser callback if there is a need to late parse // a templated function definition. if (!Pattern && PatternDecl->isLateTemplateParsed() && LateTemplateParser) { // FIXME: Optimize to allow individual templates to be deserialized. if (PatternDecl->isFromASTFile()) ExternalSource->ReadLateParsedTemplates(LateParsedTemplateMap); auto LPTIter = LateParsedTemplateMap.find(PatternDecl); assert(LPTIter != LateParsedTemplateMap.end() && "missing LateParsedTemplate"); LateTemplateParser(OpaqueParser, *LPTIter->second); Pattern = PatternDecl->getBody(PatternDecl); } // Note, we should never try to instantiate a deleted function template. assert((Pattern || PatternDecl->isDefaulted() || PatternDecl->hasSkippedBody()) && "unexpected kind of function template definition"); // C++1y [temp.explicit]p10: // Except for inline functions, declarations with types deduced from their // initializer or return value, and class template specializations, other // explicit instantiation declarations have the effect of suppressing the // implicit instantiation of the entity to which they refer. if (TSK == TSK_ExplicitInstantiationDeclaration && !PatternDecl->isInlined() && !PatternDecl->getReturnType()->getContainedAutoType()) return; if (PatternDecl->isInlined()) { // Function, and all later redeclarations of it (from imported modules, // for instance), are now implicitly inline. for (auto *D = Function->getMostRecentDecl(); /**/; D = D->getPreviousDecl()) { D->setImplicitlyInline(); if (D == Function) break; } } InstantiatingTemplate Inst(*this, PointOfInstantiation, Function); if (Inst.isInvalid() || Inst.isAlreadyInstantiating()) return; PrettyDeclStackTraceEntry CrashInfo(Context, Function, SourceLocation(), "instantiating function definition"); // The instantiation is visible here, even if it was first declared in an // unimported module. Function->setVisibleDespiteOwningModule(); // Copy the inner loc start from the pattern. Function->setInnerLocStart(PatternDecl->getInnerLocStart()); EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); // Introduce a new scope where local variable instantiations will be // recorded, unless we're actually a member function within a local // class, in which case we need to merge our results with the parent // scope (of the enclosing function). The exception is instantiating // a function template specialization, since the template to be // instantiated already has references to locals properly substituted. bool MergeWithParentScope = false; if (CXXRecordDecl *Rec = dyn_cast(Function->getDeclContext())) MergeWithParentScope = Rec->isLocalClass() && !Function->isFunctionTemplateSpecialization(); LocalInstantiationScope Scope(*this, MergeWithParentScope); auto RebuildTypeSourceInfoForDefaultSpecialMembers = [&]() { // Special members might get their TypeSourceInfo set up w.r.t the // PatternDecl context, in which case parameters could still be pointing // back to the original class, make sure arguments are bound to the // instantiated record instead. assert(PatternDecl->isDefaulted() && "Special member needs to be defaulted"); auto PatternSM = getDefaultedFunctionKind(PatternDecl).asSpecialMember(); if (!(PatternSM == Sema::CXXCopyConstructor || PatternSM == Sema::CXXCopyAssignment || PatternSM == Sema::CXXMoveConstructor || PatternSM == Sema::CXXMoveAssignment)) return; auto *NewRec = dyn_cast(Function->getDeclContext()); const auto *PatternRec = dyn_cast(PatternDecl->getDeclContext()); if (!NewRec || !PatternRec) return; if (!PatternRec->isLambda()) return; struct SpecialMemberTypeInfoRebuilder : TreeTransform { using Base = TreeTransform; const CXXRecordDecl *OldDecl; CXXRecordDecl *NewDecl; SpecialMemberTypeInfoRebuilder(Sema &SemaRef, const CXXRecordDecl *O, CXXRecordDecl *N) : TreeTransform(SemaRef), OldDecl(O), NewDecl(N) {} bool TransformExceptionSpec(SourceLocation Loc, FunctionProtoType::ExceptionSpecInfo &ESI, SmallVectorImpl &Exceptions, bool &Changed) { return false; } QualType TransformRecordType(TypeLocBuilder &TLB, RecordTypeLoc TL) { const RecordType *T = TL.getTypePtr(); RecordDecl *Record = cast_or_null( getDerived().TransformDecl(TL.getNameLoc(), T->getDecl())); if (Record != OldDecl) return Base::TransformRecordType(TLB, TL); QualType Result = getDerived().RebuildRecordType(NewDecl); if (Result.isNull()) return QualType(); RecordTypeLoc NewTL = TLB.push(Result); NewTL.setNameLoc(TL.getNameLoc()); return Result; } } IR{*this, PatternRec, NewRec}; TypeSourceInfo *NewSI = IR.TransformType(Function->getTypeSourceInfo()); Function->setType(NewSI->getType()); Function->setTypeSourceInfo(NewSI); ParmVarDecl *Parm = Function->getParamDecl(0); TypeSourceInfo *NewParmSI = IR.TransformType(Parm->getTypeSourceInfo()); Parm->setType(NewParmSI->getType()); Parm->setTypeSourceInfo(NewParmSI); }; if (PatternDecl->isDefaulted()) { RebuildTypeSourceInfoForDefaultSpecialMembers(); SetDeclDefaulted(Function, PatternDecl->getLocation()); } else { MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(Function, nullptr, false, PatternDecl); // Substitute into the qualifier; we can get a substitution failure here // through evil use of alias templates. // FIXME: Is CurContext correct for this? Should we go to the (instantiation // of the) lexical context of the pattern? SubstQualifier(*this, PatternDecl, Function, TemplateArgs); ActOnStartOfFunctionDef(nullptr, Function); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Function); if (addInstantiatedParametersToScope(*this, Function, PatternDecl, Scope, TemplateArgs)) return; StmtResult Body; if (PatternDecl->hasSkippedBody()) { ActOnSkippedFunctionBody(Function); Body = nullptr; } else { if (CXXConstructorDecl *Ctor = dyn_cast(Function)) { // If this is a constructor, instantiate the member initializers. InstantiateMemInitializers(Ctor, cast(PatternDecl), TemplateArgs); // If this is an MS ABI dllexport default constructor, instantiate any // default arguments. if (Context.getTargetInfo().getCXXABI().isMicrosoft() && Ctor->isDefaultConstructor()) { InstantiateDefaultCtorDefaultArgs(Ctor); } } // Instantiate the function body. Body = SubstStmt(Pattern, TemplateArgs); if (Body.isInvalid()) Function->setInvalidDecl(); } // FIXME: finishing the function body while in an expression evaluation // context seems wrong. Investigate more. ActOnFinishFunctionBody(Function, Body.get(), /*IsInstantiation=*/true); PerformDependentDiagnostics(PatternDecl, TemplateArgs); if (auto *Listener = getASTMutationListener()) Listener->FunctionDefinitionInstantiated(Function); savedContext.pop(); } DeclGroupRef DG(Function); Consumer.HandleTopLevelDecl(DG); // This class may have local implicit instantiations that need to be // instantiation within this scope. LocalInstantiations.perform(); Scope.Exit(); GlobalInstantiations.perform(); } VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation( VarTemplateDecl *VarTemplate, VarDecl *FromVar, const TemplateArgumentList &TemplateArgList, const TemplateArgumentListInfo &TemplateArgsInfo, SmallVectorImpl &Converted, SourceLocation PointOfInstantiation, LateInstantiatedAttrVec *LateAttrs, LocalInstantiationScope *StartingScope) { if (FromVar->isInvalidDecl()) return nullptr; InstantiatingTemplate Inst(*this, PointOfInstantiation, FromVar); if (Inst.isInvalid()) return nullptr; MultiLevelTemplateArgumentList TemplateArgLists; TemplateArgLists.addOuterTemplateArguments(&TemplateArgList); // Instantiate the first declaration of the variable template: for a partial // specialization of a static data member template, the first declaration may // or may not be the declaration in the class; if it's in the class, we want // to instantiate a member in the class (a declaration), and if it's outside, // we want to instantiate a definition. // // If we're instantiating an explicitly-specialized member template or member // partial specialization, don't do this. The member specialization completely // replaces the original declaration in this case. bool IsMemberSpec = false; if (VarTemplatePartialSpecializationDecl *PartialSpec = dyn_cast(FromVar)) IsMemberSpec = PartialSpec->isMemberSpecialization(); else if (VarTemplateDecl *FromTemplate = FromVar->getDescribedVarTemplate()) IsMemberSpec = FromTemplate->isMemberSpecialization(); if (!IsMemberSpec) FromVar = FromVar->getFirstDecl(); MultiLevelTemplateArgumentList MultiLevelList(TemplateArgList); TemplateDeclInstantiator Instantiator(*this, FromVar->getDeclContext(), MultiLevelList); // TODO: Set LateAttrs and StartingScope ... return cast_or_null( Instantiator.VisitVarTemplateSpecializationDecl( VarTemplate, FromVar, TemplateArgsInfo, Converted)); } /// Instantiates a variable template specialization by completing it /// with appropriate type information and initializer. VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl( VarTemplateSpecializationDecl *VarSpec, VarDecl *PatternDecl, const MultiLevelTemplateArgumentList &TemplateArgs) { assert(PatternDecl->isThisDeclarationADefinition() && "don't have a definition to instantiate from"); // Do substitution on the type of the declaration TypeSourceInfo *DI = SubstType(PatternDecl->getTypeSourceInfo(), TemplateArgs, PatternDecl->getTypeSpecStartLoc(), PatternDecl->getDeclName()); if (!DI) return nullptr; // Update the type of this variable template specialization. VarSpec->setType(DI->getType()); // Convert the declaration into a definition now. VarSpec->setCompleteDefinition(); // Instantiate the initializer. InstantiateVariableInitializer(VarSpec, PatternDecl, TemplateArgs); if (getLangOpts().OpenCL) deduceOpenCLAddressSpace(VarSpec); return VarSpec; } /// BuildVariableInstantiation - Used after a new variable has been created. /// Sets basic variable data and decides whether to postpone the /// variable instantiation. void Sema::BuildVariableInstantiation( VarDecl *NewVar, VarDecl *OldVar, const MultiLevelTemplateArgumentList &TemplateArgs, LateInstantiatedAttrVec *LateAttrs, DeclContext *Owner, LocalInstantiationScope *StartingScope, bool InstantiatingVarTemplate, VarTemplateSpecializationDecl *PrevDeclForVarTemplateSpecialization) { // Instantiating a partial specialization to produce a partial // specialization. bool InstantiatingVarTemplatePartialSpec = isa(OldVar) && isa(NewVar); // Instantiating from a variable template (or partial specialization) to // produce a variable template specialization. bool InstantiatingSpecFromTemplate = isa(NewVar) && (OldVar->getDescribedVarTemplate() || isa(OldVar)); // If we are instantiating a local extern declaration, the // instantiation belongs lexically to the containing function. // If we are instantiating a static data member defined // out-of-line, the instantiation will have the same lexical // context (which will be a namespace scope) as the template. if (OldVar->isLocalExternDecl()) { NewVar->setLocalExternDecl(); NewVar->setLexicalDeclContext(Owner); } else if (OldVar->isOutOfLine()) NewVar->setLexicalDeclContext(OldVar->getLexicalDeclContext()); NewVar->setTSCSpec(OldVar->getTSCSpec()); NewVar->setInitStyle(OldVar->getInitStyle()); NewVar->setCXXForRangeDecl(OldVar->isCXXForRangeDecl()); NewVar->setObjCForDecl(OldVar->isObjCForDecl()); NewVar->setConstexpr(OldVar->isConstexpr()); NewVar->setInitCapture(OldVar->isInitCapture()); NewVar->setPreviousDeclInSameBlockScope( OldVar->isPreviousDeclInSameBlockScope()); NewVar->setAccess(OldVar->getAccess()); if (!OldVar->isStaticDataMember()) { if (OldVar->isUsed(false)) NewVar->setIsUsed(); NewVar->setReferenced(OldVar->isReferenced()); } InstantiateAttrs(TemplateArgs, OldVar, NewVar, LateAttrs, StartingScope); LookupResult Previous( *this, NewVar->getDeclName(), NewVar->getLocation(), NewVar->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage : Sema::LookupOrdinaryName, NewVar->isLocalExternDecl() ? Sema::ForExternalRedeclaration : forRedeclarationInCurContext()); if (NewVar->isLocalExternDecl() && OldVar->getPreviousDecl() && (!OldVar->getPreviousDecl()->getDeclContext()->isDependentContext() || OldVar->getPreviousDecl()->getDeclContext()==OldVar->getDeclContext())) { // We have a previous declaration. Use that one, so we merge with the // right type. if (NamedDecl *NewPrev = FindInstantiatedDecl( NewVar->getLocation(), OldVar->getPreviousDecl(), TemplateArgs)) Previous.addDecl(NewPrev); } else if (!isa(NewVar) && OldVar->hasLinkage()) { LookupQualifiedName(Previous, NewVar->getDeclContext(), false); } else if (PrevDeclForVarTemplateSpecialization) { Previous.addDecl(PrevDeclForVarTemplateSpecialization); } CheckVariableDeclaration(NewVar, Previous); if (!InstantiatingVarTemplate) { NewVar->getLexicalDeclContext()->addHiddenDecl(NewVar); if (!NewVar->isLocalExternDecl() || !NewVar->getPreviousDecl()) NewVar->getDeclContext()->makeDeclVisibleInContext(NewVar); } if (!OldVar->isOutOfLine()) { if (NewVar->getDeclContext()->isFunctionOrMethod()) CurrentInstantiationScope->InstantiatedLocal(OldVar, NewVar); } // Link instantiations of static data members back to the template from // which they were instantiated. // // Don't do this when instantiating a template (we link the template itself // back in that case) nor when instantiating a static data member template // (that's not a member specialization). if (NewVar->isStaticDataMember() && !InstantiatingVarTemplate && !InstantiatingSpecFromTemplate) NewVar->setInstantiationOfStaticDataMember(OldVar, TSK_ImplicitInstantiation); // If the pattern is an (in-class) explicit specialization, then the result // is also an explicit specialization. if (VarTemplateSpecializationDecl *OldVTSD = dyn_cast(OldVar)) { if (OldVTSD->getSpecializationKind() == TSK_ExplicitSpecialization && !isa(OldVTSD)) cast(NewVar)->setSpecializationKind( TSK_ExplicitSpecialization); } // Forward the mangling number from the template to the instantiated decl. Context.setManglingNumber(NewVar, Context.getManglingNumber(OldVar)); Context.setStaticLocalNumber(NewVar, Context.getStaticLocalNumber(OldVar)); // Figure out whether to eagerly instantiate the initializer. if (InstantiatingVarTemplate || InstantiatingVarTemplatePartialSpec) { // We're producing a template. Don't instantiate the initializer yet. } else if (NewVar->getType()->isUndeducedType()) { // We need the type to complete the declaration of the variable. InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs); } else if (InstantiatingSpecFromTemplate || (OldVar->isInline() && OldVar->isThisDeclarationADefinition() && !NewVar->isThisDeclarationADefinition())) { // Delay instantiation of the initializer for variable template // specializations or inline static data members until a definition of the // variable is needed. } else { InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs); } // Diagnose unused local variables with dependent types, where the diagnostic // will have been deferred. if (!NewVar->isInvalidDecl() && NewVar->getDeclContext()->isFunctionOrMethod() && OldVar->getType()->isDependentType()) DiagnoseUnusedDecl(NewVar); } /// Instantiate the initializer of a variable. void Sema::InstantiateVariableInitializer( VarDecl *Var, VarDecl *OldVar, const MultiLevelTemplateArgumentList &TemplateArgs) { if (ASTMutationListener *L = getASTContext().getASTMutationListener()) L->VariableDefinitionInstantiated(Var); // We propagate the 'inline' flag with the initializer, because it // would otherwise imply that the variable is a definition for a // non-static data member. if (OldVar->isInlineSpecified()) Var->setInlineSpecified(); else if (OldVar->isInline()) Var->setImplicitlyInline(); if (OldVar->getInit()) { EnterExpressionEvaluationContext Evaluated( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Var); // Instantiate the initializer. ExprResult Init; { ContextRAII SwitchContext(*this, Var->getDeclContext()); Init = SubstInitializer(OldVar->getInit(), TemplateArgs, OldVar->getInitStyle() == VarDecl::CallInit); } if (!Init.isInvalid()) { Expr *InitExpr = Init.get(); if (Var->hasAttr() && (!InitExpr || !InitExpr->isConstantInitializer(getASTContext(), false))) { // Do not dynamically initialize dllimport variables. } else if (InitExpr) { bool DirectInit = OldVar->isDirectInit(); AddInitializerToDecl(Var, InitExpr, DirectInit); } else ActOnUninitializedDecl(Var); } else { // FIXME: Not too happy about invalidating the declaration // because of a bogus initializer. Var->setInvalidDecl(); } } else { // `inline` variables are a definition and declaration all in one; we won't // pick up an initializer from anywhere else. if (Var->isStaticDataMember() && !Var->isInline()) { if (!Var->isOutOfLine()) return; // If the declaration inside the class had an initializer, don't add // another one to the out-of-line definition. if (OldVar->getFirstDecl()->hasInit()) return; } // We'll add an initializer to a for-range declaration later. if (Var->isCXXForRangeDecl() || Var->isObjCForDecl()) return; ActOnUninitializedDecl(Var); } if (getLangOpts().CUDA) checkAllowedCUDAInitializer(Var); } /// Instantiate the definition of the given variable from its /// template. /// /// \param PointOfInstantiation the point at which the instantiation was /// required. Note that this is not precisely a "point of instantiation" /// for the variable, but it's close. /// /// \param Var the already-instantiated declaration of a templated variable. /// /// \param Recursive if true, recursively instantiates any functions that /// are required by this instantiation. /// /// \param DefinitionRequired if true, then we are performing an explicit /// instantiation where a definition of the variable is required. Complain /// if there is no such definition. void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation, VarDecl *Var, bool Recursive, bool DefinitionRequired, bool AtEndOfTU) { if (Var->isInvalidDecl()) return; // Never instantiate an explicitly-specialized entity. TemplateSpecializationKind TSK = Var->getTemplateSpecializationKindForInstantiation(); if (TSK == TSK_ExplicitSpecialization) return; // Find the pattern and the arguments to substitute into it. VarDecl *PatternDecl = Var->getTemplateInstantiationPattern(); assert(PatternDecl && "no pattern for templated variable"); MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(Var); VarTemplateSpecializationDecl *VarSpec = dyn_cast(Var); if (VarSpec) { // If this is a static data member template, there might be an // uninstantiated initializer on the declaration. If so, instantiate // it now. // // FIXME: This largely duplicates what we would do below. The difference // is that along this path we may instantiate an initializer from an // in-class declaration of the template and instantiate the definition // from a separate out-of-class definition. if (PatternDecl->isStaticDataMember() && (PatternDecl = PatternDecl->getFirstDecl())->hasInit() && !Var->hasInit()) { // FIXME: Factor out the duplicated instantiation context setup/tear down // code here. InstantiatingTemplate Inst(*this, PointOfInstantiation, Var); if (Inst.isInvalid() || Inst.isAlreadyInstantiating()) return; PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(), "instantiating variable initializer"); // The instantiation is visible here, even if it was first declared in an // unimported module. Var->setVisibleDespiteOwningModule(); // If we're performing recursive template instantiation, create our own // queue of pending implicit instantiations that we will instantiate // later, while we're still within our own instantiation context. GlobalEagerInstantiationScope GlobalInstantiations(*this, /*Enabled=*/Recursive); LocalInstantiationScope Local(*this); LocalEagerInstantiationScope LocalInstantiations(*this); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. ContextRAII PreviousContext(*this, Var->getDeclContext()); InstantiateVariableInitializer(Var, PatternDecl, TemplateArgs); PreviousContext.pop(); // This variable may have local implicit instantiations that need to be // instantiated within this scope. LocalInstantiations.perform(); Local.Exit(); GlobalInstantiations.perform(); } } else { assert(Var->isStaticDataMember() && PatternDecl->isStaticDataMember() && "not a static data member?"); } VarDecl *Def = PatternDecl->getDefinition(getASTContext()); // If we don't have a definition of the variable template, we won't perform // any instantiation. Rather, we rely on the user to instantiate this // definition (or provide a specialization for it) in another translation // unit. if (!Def && !DefinitionRequired) { if (TSK == TSK_ExplicitInstantiationDefinition) { PendingInstantiations.push_back( std::make_pair(Var, PointOfInstantiation)); } else if (TSK == TSK_ImplicitInstantiation) { // Warn about missing definition at the end of translation unit. if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() && !getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) { Diag(PointOfInstantiation, diag::warn_var_template_missing) << Var; Diag(PatternDecl->getLocation(), diag::note_forward_template_decl); if (getLangOpts().CPlusPlus11) Diag(PointOfInstantiation, diag::note_inst_declaration_hint) << Var; } return; } } // FIXME: We need to track the instantiation stack in order to know which // definitions should be visible within this instantiation. // FIXME: Produce diagnostics when Var->getInstantiatedFromStaticDataMember(). if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Var, /*InstantiatedFromMember*/false, PatternDecl, Def, TSK, /*Complain*/DefinitionRequired)) return; // C++11 [temp.explicit]p10: // Except for inline functions, const variables of literal types, variables // of reference types, [...] explicit instantiation declarations // have the effect of suppressing the implicit instantiation of the entity // to which they refer. // // FIXME: That's not exactly the same as "might be usable in constant // expressions", which only allows constexpr variables and const integral // types, not arbitrary const literal types. if (TSK == TSK_ExplicitInstantiationDeclaration && !Var->mightBeUsableInConstantExpressions(getASTContext())) return; // Make sure to pass the instantiated variable to the consumer at the end. struct PassToConsumerRAII { ASTConsumer &Consumer; VarDecl *Var; PassToConsumerRAII(ASTConsumer &Consumer, VarDecl *Var) : Consumer(Consumer), Var(Var) { } ~PassToConsumerRAII() { Consumer.HandleCXXStaticMemberVarInstantiation(Var); } } PassToConsumerRAII(Consumer, Var); // If we already have a definition, we're done. if (VarDecl *Def = Var->getDefinition()) { // We may be explicitly instantiating something we've already implicitly // instantiated. Def->setTemplateSpecializationKind(Var->getTemplateSpecializationKind(), PointOfInstantiation); return; } InstantiatingTemplate Inst(*this, PointOfInstantiation, Var); if (Inst.isInvalid() || Inst.isAlreadyInstantiating()) return; PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(), "instantiating variable definition"); // If we're performing recursive template instantiation, create our own // queue of pending implicit instantiations that we will instantiate later, // while we're still within our own instantiation context. GlobalEagerInstantiationScope GlobalInstantiations(*this, /*Enabled=*/Recursive); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. ContextRAII PreviousContext(*this, Var->getDeclContext()); LocalInstantiationScope Local(*this); LocalEagerInstantiationScope LocalInstantiations(*this); VarDecl *OldVar = Var; if (Def->isStaticDataMember() && !Def->isOutOfLine()) { // We're instantiating an inline static data member whose definition was // provided inside the class. InstantiateVariableInitializer(Var, Def, TemplateArgs); } else if (!VarSpec) { Var = cast_or_null(SubstDecl(Def, Var->getDeclContext(), TemplateArgs)); } else if (Var->isStaticDataMember() && Var->getLexicalDeclContext()->isRecord()) { // We need to instantiate the definition of a static data member template, // and all we have is the in-class declaration of it. Instantiate a separate // declaration of the definition. TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(), TemplateArgs); Var = cast_or_null(Instantiator.VisitVarTemplateSpecializationDecl( VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(), VarSpec->getTemplateArgs().asArray(), VarSpec)); if (Var) { llvm::PointerUnion PatternPtr = VarSpec->getSpecializedTemplateOrPartial(); if (VarTemplatePartialSpecializationDecl *Partial = PatternPtr.dyn_cast()) cast(Var)->setInstantiationOf( Partial, &VarSpec->getTemplateInstantiationArgs()); // Attach the initializer. InstantiateVariableInitializer(Var, Def, TemplateArgs); } } else // Complete the existing variable's definition with an appropriately // substituted type and initializer. Var = CompleteVarTemplateSpecializationDecl(VarSpec, Def, TemplateArgs); PreviousContext.pop(); if (Var) { PassToConsumerRAII.Var = Var; Var->setTemplateSpecializationKind(OldVar->getTemplateSpecializationKind(), OldVar->getPointOfInstantiation()); } // This variable may have local implicit instantiations that need to be // instantiated within this scope. LocalInstantiations.perform(); Local.Exit(); GlobalInstantiations.perform(); } void Sema::InstantiateMemInitializers(CXXConstructorDecl *New, const CXXConstructorDecl *Tmpl, const MultiLevelTemplateArgumentList &TemplateArgs) { SmallVector NewInits; bool AnyErrors = Tmpl->isInvalidDecl(); // Instantiate all the initializers. for (const auto *Init : Tmpl->inits()) { // Only instantiate written initializers, let Sema re-construct implicit // ones. if (!Init->isWritten()) continue; SourceLocation EllipsisLoc; if (Init->isPackExpansion()) { // This is a pack expansion. We should expand it now. TypeLoc BaseTL = Init->getTypeSourceInfo()->getTypeLoc(); SmallVector Unexpanded; collectUnexpandedParameterPacks(BaseTL, Unexpanded); collectUnexpandedParameterPacks(Init->getInit(), Unexpanded); bool ShouldExpand = false; bool RetainExpansion = false; Optional NumExpansions; if (CheckParameterPacksForExpansion(Init->getEllipsisLoc(), BaseTL.getSourceRange(), Unexpanded, TemplateArgs, ShouldExpand, RetainExpansion, NumExpansions)) { AnyErrors = true; New->setInvalidDecl(); continue; } assert(ShouldExpand && "Partial instantiation of base initializer?"); // Loop over all of the arguments in the argument pack(s), for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(*this, I); // Instantiate the initializer. ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs, /*CXXDirectInit=*/true); if (TempInit.isInvalid()) { AnyErrors = true; break; } // Instantiate the base type. TypeSourceInfo *BaseTInfo = SubstType(Init->getTypeSourceInfo(), TemplateArgs, Init->getSourceLocation(), New->getDeclName()); if (!BaseTInfo) { AnyErrors = true; break; } // Build the initializer. MemInitResult NewInit = BuildBaseInitializer(BaseTInfo->getType(), BaseTInfo, TempInit.get(), New->getParent(), SourceLocation()); if (NewInit.isInvalid()) { AnyErrors = true; break; } NewInits.push_back(NewInit.get()); } continue; } // Instantiate the initializer. ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs, /*CXXDirectInit=*/true); if (TempInit.isInvalid()) { AnyErrors = true; continue; } MemInitResult NewInit; if (Init->isDelegatingInitializer() || Init->isBaseInitializer()) { TypeSourceInfo *TInfo = SubstType(Init->getTypeSourceInfo(), TemplateArgs, Init->getSourceLocation(), New->getDeclName()); if (!TInfo) { AnyErrors = true; New->setInvalidDecl(); continue; } if (Init->isBaseInitializer()) NewInit = BuildBaseInitializer(TInfo->getType(), TInfo, TempInit.get(), New->getParent(), EllipsisLoc); else NewInit = BuildDelegatingInitializer(TInfo, TempInit.get(), cast(CurContext->getParent())); } else if (Init->isMemberInitializer()) { FieldDecl *Member = cast_or_null(FindInstantiatedDecl( Init->getMemberLocation(), Init->getMember(), TemplateArgs)); if (!Member) { AnyErrors = true; New->setInvalidDecl(); continue; } NewInit = BuildMemberInitializer(Member, TempInit.get(), Init->getSourceLocation()); } else if (Init->isIndirectMemberInitializer()) { IndirectFieldDecl *IndirectMember = cast_or_null(FindInstantiatedDecl( Init->getMemberLocation(), Init->getIndirectMember(), TemplateArgs)); if (!IndirectMember) { AnyErrors = true; New->setInvalidDecl(); continue; } NewInit = BuildMemberInitializer(IndirectMember, TempInit.get(), Init->getSourceLocation()); } if (NewInit.isInvalid()) { AnyErrors = true; New->setInvalidDecl(); } else { NewInits.push_back(NewInit.get()); } } // Assign all the initializers to the new constructor. ActOnMemInitializers(New, /*FIXME: ColonLoc */ SourceLocation(), NewInits, AnyErrors); } // TODO: this could be templated if the various decl types used the // same method name. static bool isInstantiationOf(ClassTemplateDecl *Pattern, ClassTemplateDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberTemplate(); } while (Instance); return false; } static bool isInstantiationOf(FunctionTemplateDecl *Pattern, FunctionTemplateDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberTemplate(); } while (Instance); return false; } static bool isInstantiationOf(ClassTemplatePartialSpecializationDecl *Pattern, ClassTemplatePartialSpecializationDecl *Instance) { Pattern = cast(Pattern->getCanonicalDecl()); do { Instance = cast( Instance->getCanonicalDecl()); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMember(); } while (Instance); return false; } static bool isInstantiationOf(CXXRecordDecl *Pattern, CXXRecordDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberClass(); } while (Instance); return false; } static bool isInstantiationOf(FunctionDecl *Pattern, FunctionDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberFunction(); } while (Instance); return false; } static bool isInstantiationOf(EnumDecl *Pattern, EnumDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberEnum(); } while (Instance); return false; } static bool isInstantiationOf(UsingShadowDecl *Pattern, UsingShadowDecl *Instance, ASTContext &C) { return declaresSameEntity(C.getInstantiatedFromUsingShadowDecl(Instance), Pattern); } static bool isInstantiationOf(UsingDecl *Pattern, UsingDecl *Instance, ASTContext &C) { return declaresSameEntity(C.getInstantiatedFromUsingDecl(Instance), Pattern); } template static bool isInstantiationOfUnresolvedUsingDecl(T *Pattern, Decl *Other, ASTContext &Ctx) { // An unresolved using declaration can instantiate to an unresolved using // declaration, or to a using declaration or a using declaration pack. // // Multiple declarations can claim to be instantiated from an unresolved // using declaration if it's a pack expansion. We want the UsingPackDecl // in that case, not the individual UsingDecls within the pack. bool OtherIsPackExpansion; NamedDecl *OtherFrom; if (auto *OtherUUD = dyn_cast(Other)) { OtherIsPackExpansion = OtherUUD->isPackExpansion(); OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUUD); } else if (auto *OtherUPD = dyn_cast(Other)) { OtherIsPackExpansion = true; OtherFrom = OtherUPD->getInstantiatedFromUsingDecl(); } else if (auto *OtherUD = dyn_cast(Other)) { OtherIsPackExpansion = false; OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUD); } else { return false; } return Pattern->isPackExpansion() == OtherIsPackExpansion && declaresSameEntity(OtherFrom, Pattern); } static bool isInstantiationOfStaticDataMember(VarDecl *Pattern, VarDecl *Instance) { assert(Instance->isStaticDataMember()); Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromStaticDataMember(); } while (Instance); return false; } // Other is the prospective instantiation // D is the prospective pattern static bool isInstantiationOf(ASTContext &Ctx, NamedDecl *D, Decl *Other) { if (auto *UUD = dyn_cast(D)) return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx); if (auto *UUD = dyn_cast(D)) return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx); if (D->getKind() != Other->getKind()) return false; if (auto *Record = dyn_cast(Other)) return isInstantiationOf(cast(D), Record); if (auto *Function = dyn_cast(Other)) return isInstantiationOf(cast(D), Function); if (auto *Enum = dyn_cast(Other)) return isInstantiationOf(cast(D), Enum); if (auto *Var = dyn_cast(Other)) if (Var->isStaticDataMember()) return isInstantiationOfStaticDataMember(cast(D), Var); if (auto *Temp = dyn_cast(Other)) return isInstantiationOf(cast(D), Temp); if (auto *Temp = dyn_cast(Other)) return isInstantiationOf(cast(D), Temp); if (auto *PartialSpec = dyn_cast(Other)) return isInstantiationOf(cast(D), PartialSpec); if (auto *Field = dyn_cast(Other)) { if (!Field->getDeclName()) { // This is an unnamed field. return declaresSameEntity(Ctx.getInstantiatedFromUnnamedFieldDecl(Field), cast(D)); } } if (auto *Using = dyn_cast(Other)) return isInstantiationOf(cast(D), Using, Ctx); if (auto *Shadow = dyn_cast(Other)) return isInstantiationOf(cast(D), Shadow, Ctx); return D->getDeclName() && D->getDeclName() == cast(Other)->getDeclName(); } template static NamedDecl *findInstantiationOf(ASTContext &Ctx, NamedDecl *D, ForwardIterator first, ForwardIterator last) { for (; first != last; ++first) if (isInstantiationOf(Ctx, D, *first)) return cast(*first); return nullptr; } /// Finds the instantiation of the given declaration context /// within the current instantiation. /// /// \returns NULL if there was an error DeclContext *Sema::FindInstantiatedContext(SourceLocation Loc, DeclContext* DC, const MultiLevelTemplateArgumentList &TemplateArgs) { if (NamedDecl *D = dyn_cast(DC)) { Decl* ID = FindInstantiatedDecl(Loc, D, TemplateArgs, true); return cast_or_null(ID); } else return DC; } /// Determine whether the given context is dependent on template parameters at /// level \p Level or below. /// /// Sometimes we only substitute an inner set of template arguments and leave /// the outer templates alone. In such cases, contexts dependent only on the /// outer levels are not effectively dependent. static bool isDependentContextAtLevel(DeclContext *DC, unsigned Level) { if (!DC->isDependentContext()) return false; if (!Level) return true; return cast(DC)->getTemplateDepth() > Level; } /// Find the instantiation of the given declaration within the /// current instantiation. /// /// This routine is intended to be used when \p D is a declaration /// referenced from within a template, that needs to mapped into the /// corresponding declaration within an instantiation. For example, /// given: /// /// \code /// template /// struct X { /// enum Kind { /// KnownValue = sizeof(T) /// }; /// /// bool getKind() const { return KnownValue; } /// }; /// /// template struct X; /// \endcode /// /// In the instantiation of X::getKind(), we need to map the \p /// EnumConstantDecl for \p KnownValue (which refers to /// X::::KnownValue) to its instantiation (X::::KnownValue). /// \p FindInstantiatedDecl performs this mapping from within the instantiation /// of X. NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, const MultiLevelTemplateArgumentList &TemplateArgs, bool FindingInstantiatedContext) { DeclContext *ParentDC = D->getDeclContext(); // Determine whether our parent context depends on any of the template // arguments we're currently substituting. bool ParentDependsOnArgs = isDependentContextAtLevel( ParentDC, TemplateArgs.getNumRetainedOuterLevels()); // FIXME: Parameters of pointer to functions (y below) that are themselves // parameters (p below) can have their ParentDC set to the translation-unit // - thus we can not consistently check if the ParentDC of such a parameter // is Dependent or/and a FunctionOrMethod. // For e.g. this code, during Template argument deduction tries to // find an instantiated decl for (T y) when the ParentDC for y is // the translation unit. // e.g. template void Foo(auto (*p)(T y) -> decltype(y())) {} // float baz(float(*)()) { return 0.0; } // Foo(baz); // The better fix here is perhaps to ensure that a ParmVarDecl, by the time // it gets here, always has a FunctionOrMethod as its ParentDC?? // For now: // - as long as we have a ParmVarDecl whose parent is non-dependent and // whose type is not instantiation dependent, do nothing to the decl // - otherwise find its instantiated decl. if (isa(D) && !ParentDependsOnArgs && !cast(D)->getType()->isInstantiationDependentType()) return D; if (isa(D) || isa(D) || isa(D) || isa(D) || (ParentDependsOnArgs && (ParentDC->isFunctionOrMethod() || isa(ParentDC) || isa(ParentDC))) || - (isa(D) && cast(D)->isLambda())) { + (isa(D) && cast(D)->isLambda() && + cast(D)->getTemplateDepth() > + TemplateArgs.getNumRetainedOuterLevels())) { // D is a local of some kind. Look into the map of local // declarations to their instantiations. if (CurrentInstantiationScope) { if (auto Found = CurrentInstantiationScope->findInstantiationOf(D)) { if (Decl *FD = Found->dyn_cast()) return cast(FD); int PackIdx = ArgumentPackSubstitutionIndex; assert(PackIdx != -1 && "found declaration pack but not pack expanding"); typedef LocalInstantiationScope::DeclArgumentPack DeclArgumentPack; return cast((*Found->get())[PackIdx]); } } // If we're performing a partial substitution during template argument // deduction, we may not have values for template parameters yet. They // just map to themselves. if (isa(D) || isa(D) || isa(D)) return D; if (D->isInvalidDecl()) return nullptr; // Normally this function only searches for already instantiated declaration // however we have to make an exclusion for local types used before // definition as in the code: // // template void f1() { // void g1(struct x1); // struct x1 {}; // } // // In this case instantiation of the type of 'g1' requires definition of // 'x1', which is defined later. Error recovery may produce an enum used // before definition. In these cases we need to instantiate relevant // declarations here. bool NeedInstantiate = false; if (CXXRecordDecl *RD = dyn_cast(D)) NeedInstantiate = RD->isLocalClass(); else if (isa(D) && isa(D->getDeclContext())) NeedInstantiate = true; else NeedInstantiate = isa(D); if (NeedInstantiate) { Decl *Inst = SubstDecl(D, CurContext, TemplateArgs); CurrentInstantiationScope->InstantiatedLocal(D, Inst); return cast(Inst); } // If we didn't find the decl, then we must have a label decl that hasn't // been found yet. Lazily instantiate it and return it now. assert(isa(D)); Decl *Inst = SubstDecl(D, CurContext, TemplateArgs); assert(Inst && "Failed to instantiate label??"); CurrentInstantiationScope->InstantiatedLocal(D, Inst); return cast(Inst); } if (CXXRecordDecl *Record = dyn_cast(D)) { if (!Record->isDependentContext()) return D; // Determine whether this record is the "templated" declaration describing // a class template or class template partial specialization. ClassTemplateDecl *ClassTemplate = Record->getDescribedClassTemplate(); if (ClassTemplate) ClassTemplate = ClassTemplate->getCanonicalDecl(); else if (ClassTemplatePartialSpecializationDecl *PartialSpec = dyn_cast(Record)) ClassTemplate = PartialSpec->getSpecializedTemplate()->getCanonicalDecl(); // Walk the current context to find either the record or an instantiation of // it. DeclContext *DC = CurContext; while (!DC->isFileContext()) { // If we're performing substitution while we're inside the template // definition, we'll find our own context. We're done. if (DC->Equals(Record)) return Record; if (CXXRecordDecl *InstRecord = dyn_cast(DC)) { // Check whether we're in the process of instantiating a class template // specialization of the template we're mapping. if (ClassTemplateSpecializationDecl *InstSpec = dyn_cast(InstRecord)){ ClassTemplateDecl *SpecTemplate = InstSpec->getSpecializedTemplate(); if (ClassTemplate && isInstantiationOf(ClassTemplate, SpecTemplate)) return InstRecord; } // Check whether we're in the process of instantiating a member class. if (isInstantiationOf(Record, InstRecord)) return InstRecord; } // Move to the outer template scope. if (FunctionDecl *FD = dyn_cast(DC)) { if (FD->getFriendObjectKind() && FD->getDeclContext()->isFileContext()){ DC = FD->getLexicalDeclContext(); continue; } // An implicit deduction guide acts as if it's within the class template // specialization described by its name and first N template params. auto *Guide = dyn_cast(FD); if (Guide && Guide->isImplicit()) { TemplateDecl *TD = Guide->getDeducedTemplate(); // Convert the arguments to an "as-written" list. TemplateArgumentListInfo Args(Loc, Loc); for (TemplateArgument Arg : TemplateArgs.getInnermost().take_front( TD->getTemplateParameters()->size())) { ArrayRef Unpacked(Arg); if (Arg.getKind() == TemplateArgument::Pack) Unpacked = Arg.pack_elements(); for (TemplateArgument UnpackedArg : Unpacked) Args.addArgument( getTrivialTemplateArgumentLoc(UnpackedArg, QualType(), Loc)); } QualType T = CheckTemplateIdType(TemplateName(TD), Loc, Args); if (T.isNull()) return nullptr; auto *SubstRecord = T->getAsCXXRecordDecl(); assert(SubstRecord && "class template id not a class type?"); // Check that this template-id names the primary template and not a // partial or explicit specialization. (In the latter cases, it's // meaningless to attempt to find an instantiation of D within the // specialization.) // FIXME: The standard doesn't say what should happen here. if (FindingInstantiatedContext && usesPartialOrExplicitSpecialization( Loc, cast(SubstRecord))) { Diag(Loc, diag::err_specialization_not_primary_template) << T << (SubstRecord->getTemplateSpecializationKind() == TSK_ExplicitSpecialization); return nullptr; } DC = SubstRecord; continue; } } DC = DC->getParent(); } // Fall through to deal with other dependent record types (e.g., // anonymous unions in class templates). } if (!ParentDependsOnArgs) return D; ParentDC = FindInstantiatedContext(Loc, ParentDC, TemplateArgs); if (!ParentDC) return nullptr; if (ParentDC != D->getDeclContext()) { // We performed some kind of instantiation in the parent context, // so now we need to look into the instantiated parent context to // find the instantiation of the declaration D. // If our context used to be dependent, we may need to instantiate // it before performing lookup into that context. bool IsBeingInstantiated = false; if (CXXRecordDecl *Spec = dyn_cast(ParentDC)) { if (!Spec->isDependentContext()) { QualType T = Context.getTypeDeclType(Spec); const RecordType *Tag = T->getAs(); assert(Tag && "type of non-dependent record is not a RecordType"); if (Tag->isBeingDefined()) IsBeingInstantiated = true; if (!Tag->isBeingDefined() && RequireCompleteType(Loc, T, diag::err_incomplete_type)) return nullptr; ParentDC = Tag->getDecl(); } } NamedDecl *Result = nullptr; // FIXME: If the name is a dependent name, this lookup won't necessarily // find it. Does that ever matter? if (auto Name = D->getDeclName()) { DeclarationNameInfo NameInfo(Name, D->getLocation()); DeclarationNameInfo NewNameInfo = SubstDeclarationNameInfo(NameInfo, TemplateArgs); Name = NewNameInfo.getName(); if (!Name) return nullptr; DeclContext::lookup_result Found = ParentDC->lookup(Name); Result = findInstantiationOf(Context, D, Found.begin(), Found.end()); } else { // Since we don't have a name for the entity we're looking for, // our only option is to walk through all of the declarations to // find that name. This will occur in a few cases: // // - anonymous struct/union within a template // - unnamed class/struct/union/enum within a template // // FIXME: Find a better way to find these instantiations! Result = findInstantiationOf(Context, D, ParentDC->decls_begin(), ParentDC->decls_end()); } if (!Result) { if (isa(D)) { // UsingShadowDecls can instantiate to nothing because of using hiding. } else if (hasUncompilableErrorOccurred()) { // We've already complained about some ill-formed code, so most likely // this declaration failed to instantiate. There's no point in // complaining further, since this is normal in invalid code. // FIXME: Use more fine-grained 'invalid' tracking for this. } else if (IsBeingInstantiated) { // The class in which this member exists is currently being // instantiated, and we haven't gotten around to instantiating this // member yet. This can happen when the code uses forward declarations // of member classes, and introduces ordering dependencies via // template instantiation. Diag(Loc, diag::err_member_not_yet_instantiated) << D->getDeclName() << Context.getTypeDeclType(cast(ParentDC)); Diag(D->getLocation(), diag::note_non_instantiated_member_here); } else if (EnumConstantDecl *ED = dyn_cast(D)) { // This enumeration constant was found when the template was defined, // but can't be found in the instantiation. This can happen if an // unscoped enumeration member is explicitly specialized. EnumDecl *Enum = cast(ED->getLexicalDeclContext()); EnumDecl *Spec = cast(FindInstantiatedDecl(Loc, Enum, TemplateArgs)); assert(Spec->getTemplateSpecializationKind() == TSK_ExplicitSpecialization); Diag(Loc, diag::err_enumerator_does_not_exist) << D->getDeclName() << Context.getTypeDeclType(cast(Spec->getDeclContext())); Diag(Spec->getLocation(), diag::note_enum_specialized_here) << Context.getTypeDeclType(Spec); } else { // We should have found something, but didn't. llvm_unreachable("Unable to find instantiation of declaration!"); } } D = Result; } return D; } /// Performs template instantiation for all implicit template /// instantiations we have seen until this point. void Sema::PerformPendingInstantiations(bool LocalOnly) { std::deque delayedPCHInstantiations; while (!PendingLocalImplicitInstantiations.empty() || (!LocalOnly && !PendingInstantiations.empty())) { PendingImplicitInstantiation Inst; if (PendingLocalImplicitInstantiations.empty()) { Inst = PendingInstantiations.front(); PendingInstantiations.pop_front(); } else { Inst = PendingLocalImplicitInstantiations.front(); PendingLocalImplicitInstantiations.pop_front(); } // Instantiate function definitions if (FunctionDecl *Function = dyn_cast(Inst.first)) { bool DefinitionRequired = Function->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; if (Function->isMultiVersion()) { getASTContext().forEachMultiversionedFunctionVersion( Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true, DefinitionRequired, true); if (CurFD->isDefined()) CurFD->setInstantiationIsPending(false); }); } else { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true, DefinitionRequired, true); if (Function->isDefined()) Function->setInstantiationIsPending(false); } // Definition of a PCH-ed template declaration may be available only in the TU. if (!LocalOnly && LangOpts.PCHInstantiateTemplates && TUKind == TU_Prefix && Function->instantiationIsPending()) delayedPCHInstantiations.push_back(Inst); continue; } // Instantiate variable definitions VarDecl *Var = cast(Inst.first); assert((Var->isStaticDataMember() || isa(Var)) && "Not a static data member, nor a variable template" " specialization?"); // Don't try to instantiate declarations if the most recent redeclaration // is invalid. if (Var->getMostRecentDecl()->isInvalidDecl()) continue; // Check if the most recent declaration has changed the specialization kind // and removed the need for implicit instantiation. switch (Var->getMostRecentDecl() ->getTemplateSpecializationKindForInstantiation()) { case TSK_Undeclared: llvm_unreachable("Cannot instantitiate an undeclared specialization."); case TSK_ExplicitInstantiationDeclaration: case TSK_ExplicitSpecialization: continue; // No longer need to instantiate this type. case TSK_ExplicitInstantiationDefinition: // We only need an instantiation if the pending instantiation *is* the // explicit instantiation. if (Var != Var->getMostRecentDecl()) continue; break; case TSK_ImplicitInstantiation: break; } PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(), "instantiating variable definition"); bool DefinitionRequired = Var->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; // Instantiate static data member definitions or variable template // specializations. InstantiateVariableDefinition(/*FIXME:*/ Inst.second, Var, true, DefinitionRequired, true); } if (!LocalOnly && LangOpts.PCHInstantiateTemplates) PendingInstantiations.swap(delayedPCHInstantiations); } void Sema::PerformDependentDiagnostics(const DeclContext *Pattern, const MultiLevelTemplateArgumentList &TemplateArgs) { for (auto DD : Pattern->ddiags()) { switch (DD->getKind()) { case DependentDiagnostic::Access: HandleDependentAccessCheck(*DD, TemplateArgs); break; } } } diff --git a/contrib/llvm-project/libcxx/include/span b/contrib/llvm-project/libcxx/include/span index fd95ecca17f7..b8dbc7e01fd6 100644 --- a/contrib/llvm-project/libcxx/include/span +++ b/contrib/llvm-project/libcxx/include/span @@ -1,592 +1,636 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// #ifndef _LIBCPP_SPAN #define _LIBCPP_SPAN /* span synopsis namespace std { // constants inline constexpr size_t dynamic_extent = numeric_limits::max(); // [views.span], class template span template class span; template inline constexpr bool ranges::enable_view> = true; template inline constexpr bool ranges::enable_borrowed_range> = true; // [span.objectrep], views of object representation template span as_bytes(span s) noexcept; template span< byte, ((Extent == dynamic_extent) ? dynamic_extent : (sizeof(ElementType) * Extent))> as_writable_bytes(span s) noexcept; template class span { public: // constants and types using element_type = ElementType; using value_type = remove_cv_t; using size_type = size_t; using difference_type = ptrdiff_t; using pointer = element_type*; using const_pointer = const element_type*; using reference = element_type&; using const_reference = const element_type&; using iterator = implementation-defined; using reverse_iterator = std::reverse_iterator; static constexpr size_type extent = Extent; // [span.cons], span constructors, copy, assignment, and destructor constexpr span() noexcept; template constexpr explicit(Extent != dynamic_extent) span(It first, size_type count); template constexpr explicit(Extent != dynamic_extent) span(It first, End last); template constexpr span(type_identity_t (&arr)[N]) noexcept; template constexpr span(array& arr) noexcept; template constexpr span(const array& arr) noexcept; template constexpr explicit(Extent != dynamic_extent) span(R&& r); constexpr span(const span& other) noexcept = default; template constexpr explicit(Extent != dynamic_extent) span(const span& s) noexcept; ~span() noexcept = default; constexpr span& operator=(const span& other) noexcept = default; // [span.sub], span subviews template constexpr span first() const; template constexpr span last() const; template constexpr span subspan() const; constexpr span first(size_type count) const; constexpr span last(size_type count) const; constexpr span subspan(size_type offset, size_type count = dynamic_extent) const; // [span.obs], span observers constexpr size_type size() const noexcept; constexpr size_type size_bytes() const noexcept; [[nodiscard]] constexpr bool empty() const noexcept; // [span.elem], span element access constexpr reference operator[](size_type idx) const; constexpr reference front() const; constexpr reference back() const; constexpr pointer data() const noexcept; // [span.iterators], span iterator support constexpr iterator begin() const noexcept; constexpr iterator end() const noexcept; constexpr reverse_iterator rbegin() const noexcept; constexpr reverse_iterator rend() const noexcept; private: pointer data_; // exposition only size_type size_; // exposition only }; template span(It, EndOrSize) -> span>>; template span(T (&)[N]) -> span; template span(array&) -> span; template span(const array&) -> span; template span(R&&) -> span>>; } // namespace std */ #include <__config> #include <__debug> #include <__iterator/concepts.h> #include <__iterator/wrap_iter.h> #include <__ranges/concepts.h> #include <__ranges/data.h> #include <__ranges/enable_borrowed_range.h> #include <__ranges/enable_view.h> #include <__ranges/size.h> #include // for array #include // for byte #include // for iterators #include #include // for remove_cv, etc #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif _LIBCPP_PUSH_MACROS #include <__undef_macros> _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER > 17 inline constexpr size_t dynamic_extent = numeric_limits::max(); template class span; template struct __is_std_array : false_type {}; template struct __is_std_array> : true_type {}; template struct __is_std_span : false_type {}; template struct __is_std_span> : true_type {}; -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +// This is a temporary workaround until we ship -- we've unfortunately been +// shipping before its API was finalized, and we used to provide a constructor +// from container types that had the requirements below. To avoid breaking code that +// has started relying on the range-based constructor until we ship all of , +// we emulate the constructor requirements like this. +template +struct __span_compatible_range : false_type { }; + +template +struct __span_compatible_range<_Range, _ElementType, void_t< + enable_if_t>::value>, + enable_if_t>::value>, + enable_if_t>>, + decltype(data(declval<_Range>())), + decltype(size(declval<_Range>())), + enable_if_t()))>(*)[], _ElementType(*)[]>> +>> : true_type { }; +#else template concept __span_compatible_range = ranges::contiguous_range<_Range> && ranges::sized_range<_Range> && (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && !__is_std_span>::value && !__is_std_array>::value && !is_array_v> && is_convertible_v>(*)[], _ElementType(*)[]>; #endif template class _LIBCPP_TEMPLATE_VIS span { public: // constants and types using element_type = _Tp; using value_type = remove_cv_t<_Tp>; using size_type = size_t; using difference_type = ptrdiff_t; using pointer = _Tp *; using const_pointer = const _Tp *; using reference = _Tp &; using const_reference = const _Tp &; #if (_LIBCPP_DEBUG_LEVEL == 2) || defined(_LIBCPP_ABI_SPAN_POINTER_ITERATORS) using iterator = pointer; #else using iterator = __wrap_iter; #endif using reverse_iterator = _VSTD::reverse_iterator; static constexpr size_type extent = _Extent; // [span.cons], span constructors, copy, assignment, and destructor template = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr} {} constexpr span (const span&) noexcept = default; constexpr span& operator=(const span&) noexcept = default; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template && is_convertible_v>(*)[], element_type (*)[]>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_It __first, size_type __count) : __data{_VSTD::to_address(__first)} { (void)__count; _LIBCPP_ASSERT(_Extent == __count, "size mismatch in span's constructor (iterator, len)"); } template < class _It, class _End, enable_if_t > (*)[], element_type (*)[]> && contiguous_iterator<_It> && sized_sentinel_for<_End, _It> && !is_convertible_v<_End, size_t>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_It __first, _End __last) : __data{_VSTD::to_address(__first)} { (void)__last; _LIBCPP_ASSERT((__last - __first >= 0), "invalid range in span's constructor (iterator, sentinel)"); _LIBCPP_ASSERT(__last - __first == _Extent, "invalid range in span's constructor (iterator, sentinel): last - first != extent"); } #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_INLINE_VISIBILITY constexpr span(type_identity_t (&__arr)[_Extent]) noexcept : __data{__arr} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {} -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr explicit span(_Container& __c) : __data{std::data(__c)} { + _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)"); + } + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr explicit span(const _Container& __c) : __data{std::data(__c)} { + _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)"); + } +#else template <__span_compatible_range _Range> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_Range&& __r) : __data{ranges::data(__r)} { _LIBCPP_ASSERT(ranges::size(__r) == _Extent, "size mismatch in span's constructor (range)"); } #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) template _LIBCPP_INLINE_VISIBILITY constexpr span(const span<_OtherElementType, _Extent>& __other, enable_if_t< is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr) : __data{__other.data()} {} template _LIBCPP_INLINE_VISIBILITY constexpr explicit span(const span<_OtherElementType, dynamic_extent>& __other, enable_if_t< is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr) noexcept : __data{__other.data()} { _LIBCPP_ASSERT(_Extent == __other.size(), "size mismatch in span's constructor (other span)"); } // ~span() noexcept = default; template _LIBCPP_INLINE_VISIBILITY constexpr span first() const noexcept { static_assert(_Count <= _Extent, "Count out of range in span::first()"); return span{data(), _Count}; } template _LIBCPP_INLINE_VISIBILITY constexpr span last() const noexcept { static_assert(_Count <= _Extent, "Count out of range in span::last()"); return span{data() + size() - _Count, _Count}; } _LIBCPP_INLINE_VISIBILITY constexpr span first(size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::first(count)"); return {data(), __count}; } _LIBCPP_INLINE_VISIBILITY constexpr span last(size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::last(count)"); return {data() + size() - __count, __count}; } template _LIBCPP_INLINE_VISIBILITY constexpr auto subspan() const noexcept -> span { static_assert(_Offset <= _Extent, "Offset out of range in span::subspan()"); static_assert(_Count == dynamic_extent || _Count <= _Extent - _Offset, "Offset + count out of range in span::subspan()"); using _ReturnType = span; return _ReturnType{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; } _LIBCPP_INLINE_VISIBILITY constexpr span subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept { _LIBCPP_ASSERT(__offset <= size(), "Offset out of range in span::subspan(offset, count)"); _LIBCPP_ASSERT(__count <= size() || __count == dynamic_extent, "Count out of range in span::subspan(offset, count)"); if (__count == dynamic_extent) return {data() + __offset, size() - __offset}; _LIBCPP_ASSERT(__count <= size() - __offset, "Offset + count out of range in span::subspan(offset, count)"); return {data() + __offset, __count}; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return _Extent; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return _Extent * sizeof(element_type); } [[nodiscard]] _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return _Extent == 0; } _LIBCPP_INLINE_VISIBILITY constexpr reference operator[](size_type __idx) const noexcept { _LIBCPP_ASSERT(__idx < size(), "span[] index out of bounds"); return __data[__idx]; } _LIBCPP_INLINE_VISIBILITY constexpr reference front() const noexcept { _LIBCPP_ASSERT(!empty(), "span::front() on empty span"); return __data[0]; } _LIBCPP_INLINE_VISIBILITY constexpr reference back() const noexcept { _LIBCPP_ASSERT(!empty(), "span::back() on empty span"); return __data[size()-1]; } _LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; } // [span.iter], span iterator support _LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); } _LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } _LIBCPP_INLINE_VISIBILITY span __as_bytes() const noexcept { return span{reinterpret_cast(data()), size_bytes()}; } _LIBCPP_INLINE_VISIBILITY span __as_writable_bytes() const noexcept { return span{reinterpret_cast(data()), size_bytes()}; } private: pointer __data; }; template class _LIBCPP_TEMPLATE_VIS span<_Tp, dynamic_extent> { private: public: // constants and types using element_type = _Tp; using value_type = remove_cv_t<_Tp>; using size_type = size_t; using difference_type = ptrdiff_t; using pointer = _Tp *; using const_pointer = const _Tp *; using reference = _Tp &; using const_reference = const _Tp &; #if (_LIBCPP_DEBUG_LEVEL == 2) || defined(_LIBCPP_ABI_SPAN_POINTER_ITERATORS) using iterator = pointer; #else using iterator = __wrap_iter; #endif using reverse_iterator = _VSTD::reverse_iterator; static constexpr size_type extent = dynamic_extent; // [span.cons], span constructors, copy, assignment, and destructor _LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr}, __size{0} {} constexpr span (const span&) noexcept = default; constexpr span& operator=(const span&) noexcept = default; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template && is_convertible_v > (*)[], element_type (*)[]>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(_It __first, size_type __count) : __data{_VSTD::to_address(__first)}, __size{__count} {} template < class _It, class _End, enable_if_t > (*)[], element_type (*)[]> && contiguous_iterator<_It> && sized_sentinel_for<_End, _It> && !is_convertible_v<_End, size_t>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(_It __first, _End __last) : __data(_VSTD::to_address(__first)), __size(__last - __first) {} #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) template _LIBCPP_INLINE_VISIBILITY constexpr span(type_identity_t (&__arr)[_Sz]) noexcept : __data{__arr}, __size{_Sz} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr span(_Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {} + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr span(const _Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {} +#else template <__span_compatible_range _Range> _LIBCPP_INLINE_VISIBILITY constexpr span(_Range&& __r) : __data(ranges::data(__r)), __size{ranges::size(__r)} {} #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) template _LIBCPP_INLINE_VISIBILITY constexpr span(const span<_OtherElementType, _OtherExtent>& __other, enable_if_t< is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr) noexcept : __data{__other.data()}, __size{__other.size()} {} // ~span() noexcept = default; template _LIBCPP_INLINE_VISIBILITY constexpr span first() const noexcept { _LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::first()"); return span{data(), _Count}; } template _LIBCPP_INLINE_VISIBILITY constexpr span last() const noexcept { _LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::last()"); return span{data() + size() - _Count, _Count}; } _LIBCPP_INLINE_VISIBILITY constexpr span first(size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::first(count)"); return {data(), __count}; } _LIBCPP_INLINE_VISIBILITY constexpr span last (size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::last(count)"); return {data() + size() - __count, __count}; } template _LIBCPP_INLINE_VISIBILITY constexpr span subspan() const noexcept { _LIBCPP_ASSERT(_Offset <= size(), "Offset out of range in span::subspan()"); _LIBCPP_ASSERT(_Count == dynamic_extent || _Count <= size() - _Offset, "Offset + count out of range in span::subspan()"); return span{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; } constexpr span _LIBCPP_INLINE_VISIBILITY subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept { _LIBCPP_ASSERT(__offset <= size(), "Offset out of range in span::subspan(offset, count)"); _LIBCPP_ASSERT(__count <= size() || __count == dynamic_extent, "count out of range in span::subspan(offset, count)"); if (__count == dynamic_extent) return {data() + __offset, size() - __offset}; _LIBCPP_ASSERT(__count <= size() - __offset, "Offset + count out of range in span::subspan(offset, count)"); return {data() + __offset, __count}; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return __size; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return __size * sizeof(element_type); } [[nodiscard]] _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return __size == 0; } _LIBCPP_INLINE_VISIBILITY constexpr reference operator[](size_type __idx) const noexcept { _LIBCPP_ASSERT(__idx < size(), "span[] index out of bounds"); return __data[__idx]; } _LIBCPP_INLINE_VISIBILITY constexpr reference front() const noexcept { _LIBCPP_ASSERT(!empty(), "span[].front() on empty span"); return __data[0]; } _LIBCPP_INLINE_VISIBILITY constexpr reference back() const noexcept { _LIBCPP_ASSERT(!empty(), "span[].back() on empty span"); return __data[size()-1]; } _LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; } // [span.iter], span iterator support _LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); } _LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } _LIBCPP_INLINE_VISIBILITY span __as_bytes() const noexcept { return {reinterpret_cast(data()), size_bytes()}; } _LIBCPP_INLINE_VISIBILITY span __as_writable_bytes() const noexcept { return {reinterpret_cast(data()), size_bytes()}; } private: pointer __data; size_type __size; }; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template inline constexpr bool ranges::enable_borrowed_range > = true; template inline constexpr bool ranges::enable_view> = true; #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) // as_bytes & as_writable_bytes template _LIBCPP_INLINE_VISIBILITY auto as_bytes(span<_Tp, _Extent> __s) noexcept -> decltype(__s.__as_bytes()) { return __s.__as_bytes(); } template _LIBCPP_INLINE_VISIBILITY auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept -> enable_if_t, decltype(__s.__as_writable_bytes())> { return __s.__as_writable_bytes(); } #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template span(_It, _EndOrSize) -> span>>; #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) template span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>; template span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>; template span(const array<_Tp, _Sz>&) -> span; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) template span(_Range&&) -> span>>; #endif #endif // _LIBCPP_STD_VER > 17 _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #endif // _LIBCPP_SPAN diff --git a/contrib/llvm-project/lld/COFF/Writer.cpp b/contrib/llvm-project/lld/COFF/Writer.cpp index 12db942f1db5..1ed2327ea630 100644 --- a/contrib/llvm-project/lld/COFF/Writer.cpp +++ b/contrib/llvm-project/lld/COFF/Writer.cpp @@ -1,2098 +1,2104 @@ //===- Writer.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Writer.h" #include "COFFLinkerContext.h" #include "CallGraphSort.h" #include "Config.h" #include "DLL.h" #include "InputFiles.h" #include "LLDMapFile.h" #include "MapFile.h" #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/xxhash.h" #include #include #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::coff; /* To re-generate DOSProgram: $ cat > /tmp/DOSProgram.asm org 0 ; Copy cs to ds. push cs pop ds ; Point ds:dx at the $-terminated string. mov dx, str ; Int 21/AH=09h: Write string to standard output. mov ah, 0x9 int 0x21 ; Int 21/AH=4Ch: Exit with return code (in AL). mov ax, 0x4C01 int 0x21 str: db 'This program cannot be run in DOS mode.$' align 8, db 0 $ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin $ xxd -i /tmp/DOSProgram.bin */ static unsigned char dosProgram[] = { 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00 }; static_assert(sizeof(dosProgram) % 8 == 0, "DOSProgram size must be multiple of 8"); static const int dosStubSize = sizeof(dos_header) + sizeof(dosProgram); static_assert(dosStubSize % 8 == 0, "DOSStub size must be multiple of 8"); static const int numberOfDataDirectory = 16; namespace { class DebugDirectoryChunk : public NonSectionChunk { public: DebugDirectoryChunk(COFFLinkerContext &c, const std::vector> &r, bool writeRepro) : records(r), writeRepro(writeRepro), ctx(c) {} size_t getSize() const override { return (records.size() + int(writeRepro)) * sizeof(debug_directory); } void writeTo(uint8_t *b) const override { auto *d = reinterpret_cast(b); for (const std::pair& record : records) { Chunk *c = record.second; OutputSection *os = ctx.getOutputSection(c); uint64_t offs = os->getFileOff() + (c->getRVA() - os->getRVA()); fillEntry(d, record.first, c->getSize(), c->getRVA(), offs); ++d; } if (writeRepro) { // FIXME: The COFF spec allows either a 0-sized entry to just say // "the timestamp field is really a hash", or a 4-byte size field // followed by that many bytes containing a longer hash (with the // lowest 4 bytes usually being the timestamp in little-endian order). // Consider storing the full 8 bytes computed by xxHash64 here. fillEntry(d, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0); } } void setTimeDateStamp(uint32_t timeDateStamp) { for (support::ulittle32_t *tds : timeDateStamps) *tds = timeDateStamp; } private: void fillEntry(debug_directory *d, COFF::DebugType debugType, size_t size, uint64_t rva, uint64_t offs) const { d->Characteristics = 0; d->TimeDateStamp = 0; d->MajorVersion = 0; d->MinorVersion = 0; d->Type = debugType; d->SizeOfData = size; d->AddressOfRawData = rva; d->PointerToRawData = offs; timeDateStamps.push_back(&d->TimeDateStamp); } mutable std::vector timeDateStamps; const std::vector> &records; bool writeRepro; COFFLinkerContext &ctx; }; class CVDebugRecordChunk : public NonSectionChunk { public: size_t getSize() const override { return sizeof(codeview::DebugInfo) + config->pdbAltPath.size() + 1; } void writeTo(uint8_t *b) const override { // Save off the DebugInfo entry to backfill the file signature (build id) // in Writer::writeBuildId buildId = reinterpret_cast(b); // variable sized field (PDB Path) char *p = reinterpret_cast(b + sizeof(*buildId)); if (!config->pdbAltPath.empty()) memcpy(p, config->pdbAltPath.data(), config->pdbAltPath.size()); p[config->pdbAltPath.size()] = '\0'; } mutable codeview::DebugInfo *buildId = nullptr; }; class ExtendedDllCharacteristicsChunk : public NonSectionChunk { public: ExtendedDllCharacteristicsChunk(uint32_t c) : characteristics(c) {} size_t getSize() const override { return 4; } void writeTo(uint8_t *buf) const override { write32le(buf, characteristics); } uint32_t characteristics = 0; }; // PartialSection represents a group of chunks that contribute to an // OutputSection. Collating a collection of PartialSections of same name and // characteristics constitutes the OutputSection. class PartialSectionKey { public: StringRef name; unsigned characteristics; bool operator<(const PartialSectionKey &other) const { int c = name.compare(other.name); if (c == 1) return false; if (c == 0) return characteristics < other.characteristics; return true; } }; // The writer writes a SymbolTable result to a file. class Writer { public: Writer(COFFLinkerContext &c) : buffer(errorHandler().outputBuffer), ctx(c) {} void run(); private: void createSections(); void createMiscChunks(); void createImportTables(); void appendImportThunks(); void locateImportTables(); void createExportTable(); void mergeSections(); void removeUnusedSections(); void assignAddresses(); void finalizeAddresses(); void removeEmptySections(); void assignOutputSectionIndices(); void createSymbolAndStringTable(); void openFile(StringRef outputPath); template void writeHeader(); void createSEHTable(); void createRuntimePseudoRelocs(); void insertCtorDtorSymbols(); void createGuardCFTables(); void markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols); void getSymbolsFromSections(ObjFile *file, ArrayRef symIdxChunks, std::vector &symbols); void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym, bool hasFlag=false); void setSectionPermissions(); void writeSections(); void writeBuildId(); void sortSections(); void sortExceptionTable(); void sortCRTSectionChunks(std::vector &chunks); void addSyntheticIdata(); void fixPartialSectionChars(StringRef name, uint32_t chars); bool fixGnuImportChunks(); void fixTlsAlignment(); PartialSection *createPartialSection(StringRef name, uint32_t outChars); PartialSection *findPartialSection(StringRef name, uint32_t outChars); llvm::Optional createSymbol(Defined *d); size_t addEntryToStringTable(StringRef str); OutputSection *findSection(StringRef name); void addBaserels(); void addBaserelBlocks(std::vector &v); uint32_t getSizeOfInitializedData(); std::unique_ptr &buffer; std::map partialSections; std::vector strtab; std::vector outputSymtab; IdataContents idata; Chunk *importTableStart = nullptr; uint64_t importTableSize = 0; Chunk *edataStart = nullptr; Chunk *edataEnd = nullptr; Chunk *iatStart = nullptr; uint64_t iatSize = 0; DelayLoadContents delayIdata; EdataContents edata; bool setNoSEHCharacteristic = false; uint32_t tlsAlignment = 0; DebugDirectoryChunk *debugDirectory = nullptr; std::vector> debugRecords; CVDebugRecordChunk *buildId = nullptr; ArrayRef sectionTable; uint64_t fileSize; uint32_t pointerToSymbolTable = 0; uint64_t sizeOfImage; uint64_t sizeOfHeaders; OutputSection *textSec; OutputSection *rdataSec; OutputSection *buildidSec; OutputSection *dataSec; OutputSection *pdataSec; OutputSection *idataSec; OutputSection *edataSec; OutputSection *didatSec; OutputSection *rsrcSec; OutputSection *relocSec; OutputSection *ctorsSec; OutputSection *dtorsSec; // The first and last .pdata sections in the output file. // // We need to keep track of the location of .pdata in whichever section it // gets merged into so that we can sort its contents and emit a correct data // directory entry for the exception table. This is also the case for some // other sections (such as .edata) but because the contents of those sections // are entirely linker-generated we can keep track of their locations using // the chunks that the linker creates. All .pdata chunks come from input // files, so we need to keep track of them separately. Chunk *firstPdata = nullptr; Chunk *lastPdata; COFFLinkerContext &ctx; }; } // anonymous namespace void lld::coff::writeResult(COFFLinkerContext &ctx) { Writer(ctx).run(); } void OutputSection::addChunk(Chunk *c) { chunks.push_back(c); } void OutputSection::insertChunkAtStart(Chunk *c) { chunks.insert(chunks.begin(), c); } void OutputSection::setPermissions(uint32_t c) { header.Characteristics &= ~permMask; header.Characteristics |= c; } void OutputSection::merge(OutputSection *other) { chunks.insert(chunks.end(), other->chunks.begin(), other->chunks.end()); other->chunks.clear(); contribSections.insert(contribSections.end(), other->contribSections.begin(), other->contribSections.end()); other->contribSections.clear(); } // Write the section header to a given buffer. void OutputSection::writeHeaderTo(uint8_t *buf) { auto *hdr = reinterpret_cast(buf); *hdr = header; if (stringTableOff) { // If name is too long, write offset into the string table as a name. sprintf(hdr->Name, "/%d", stringTableOff); } else { assert(!config->debug || name.size() <= COFF::NameSize || (hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0); strncpy(hdr->Name, name.data(), std::min(name.size(), (size_t)COFF::NameSize)); } } void OutputSection::addContributingPartialSection(PartialSection *sec) { contribSections.push_back(sec); } // Check whether the target address S is in range from a relocation // of type relType at address P. static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) { if (config->machine == ARMNT) { int64_t diff = AbsoluteDifference(s, p + 4) + margin; switch (relType) { case IMAGE_REL_ARM_BRANCH20T: return isInt<21>(diff); case IMAGE_REL_ARM_BRANCH24T: case IMAGE_REL_ARM_BLX23T: return isInt<25>(diff); default: return true; } } else if (config->machine == ARM64) { int64_t diff = AbsoluteDifference(s, p) + margin; switch (relType) { case IMAGE_REL_ARM64_BRANCH26: return isInt<28>(diff); case IMAGE_REL_ARM64_BRANCH19: return isInt<21>(diff); case IMAGE_REL_ARM64_BRANCH14: return isInt<16>(diff); default: return true; } } else { llvm_unreachable("Unexpected architecture"); } } // Return the last thunk for the given target if it is in range, // or create a new one. static std::pair getThunk(DenseMap &lastThunks, Defined *target, uint64_t p, uint16_t type, int margin) { Defined *&lastThunk = lastThunks[target->getRVA()]; if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin)) return {lastThunk, false}; Chunk *c; switch (config->machine) { case ARMNT: c = make(target); break; case ARM64: c = make(target); break; default: llvm_unreachable("Unexpected architecture"); } Defined *d = make("", c); lastThunk = d; return {d, true}; } // This checks all relocations, and for any relocation which isn't in range // it adds a thunk after the section chunk that contains the relocation. // If the latest thunk for the specific target is in range, that is used // instead of creating a new thunk. All range checks are done with the // specified margin, to make sure that relocations that originally are in // range, but only barely, also get thunks - in case other added thunks makes // the target go out of range. // // After adding thunks, we verify that all relocations are in range (with // no extra margin requirements). If this failed, we restart (throwing away // the previously created thunks) and retry with a wider margin. static bool createThunks(OutputSection *os, int margin) { bool addressesChanged = false; DenseMap lastThunks; DenseMap, uint32_t> thunkSymtabIndices; size_t thunksSize = 0; // Recheck Chunks.size() each iteration, since we can insert more // elements into it. for (size_t i = 0; i != os->chunks.size(); ++i) { SectionChunk *sc = dyn_cast_or_null(os->chunks[i]); if (!sc) continue; size_t thunkInsertionSpot = i + 1; // Try to get a good enough estimate of where new thunks will be placed. // Offset this by the size of the new thunks added so far, to make the // estimate slightly better. size_t thunkInsertionRVA = sc->getRVA() + sc->getSize() + thunksSize; ObjFile *file = sc->file; std::vector> relocReplacements; ArrayRef originalRelocs = file->getCOFFObj()->getRelocations(sc->header); for (size_t j = 0, e = originalRelocs.size(); j < e; ++j) { const coff_relocation &rel = originalRelocs[j]; Symbol *relocTarget = file->getSymbol(rel.SymbolTableIndex); // The estimate of the source address P should be pretty accurate, // but we don't know whether the target Symbol address should be // offset by thunksSize or not (or by some of thunksSize but not all of // it), giving us some uncertainty once we have added one thunk. uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize; Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t s = sym->getRVA(); if (isInRange(rel.Type, s, p, margin)) continue; // If the target isn't in range, hook it up to an existing or new // thunk. Defined *thunk; bool wasNew; std::tie(thunk, wasNew) = getThunk(lastThunks, sym, p, rel.Type, margin); if (wasNew) { Chunk *thunkChunk = thunk->getChunk(); thunkChunk->setRVA( thunkInsertionRVA); // Estimate of where it will be located. os->chunks.insert(os->chunks.begin() + thunkInsertionSpot, thunkChunk); thunkInsertionSpot++; thunksSize += thunkChunk->getSize(); thunkInsertionRVA += thunkChunk->getSize(); addressesChanged = true; } // To redirect the relocation, add a symbol to the parent object file's // symbol table, and replace the relocation symbol table index with the // new index. auto insertion = thunkSymtabIndices.insert({{file, thunk}, ~0U}); uint32_t &thunkSymbolIndex = insertion.first->second; if (insertion.second) thunkSymbolIndex = file->addRangeThunkSymbol(thunk); relocReplacements.push_back({j, thunkSymbolIndex}); } // Get a writable copy of this section's relocations so they can be // modified. If the relocations point into the object file, allocate new // memory. Otherwise, this must be previously allocated memory that can be // modified in place. ArrayRef curRelocs = sc->getRelocs(); MutableArrayRef newRelocs; if (originalRelocs.data() == curRelocs.data()) { newRelocs = makeMutableArrayRef( bAlloc().Allocate(originalRelocs.size()), originalRelocs.size()); } else { newRelocs = makeMutableArrayRef( const_cast(curRelocs.data()), curRelocs.size()); } // Copy each relocation, but replace the symbol table indices which need // thunks. auto nextReplacement = relocReplacements.begin(); auto endReplacement = relocReplacements.end(); for (size_t i = 0, e = originalRelocs.size(); i != e; ++i) { newRelocs[i] = originalRelocs[i]; if (nextReplacement != endReplacement && nextReplacement->first == i) { newRelocs[i].SymbolTableIndex = nextReplacement->second; ++nextReplacement; } } sc->setRelocs(newRelocs); } return addressesChanged; } // Verify that all relocations are in range, with no extra margin requirements. static bool verifyRanges(const std::vector chunks) { for (Chunk *c : chunks) { SectionChunk *sc = dyn_cast_or_null(c); if (!sc) continue; ArrayRef relocs = sc->getRelocs(); for (size_t j = 0, e = relocs.size(); j < e; ++j) { const coff_relocation &rel = relocs[j]; Symbol *relocTarget = sc->file->getSymbol(rel.SymbolTableIndex); Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t p = sc->getRVA() + rel.VirtualAddress; uint64_t s = sym->getRVA(); if (!isInRange(rel.Type, s, p, 0)) return false; } } return true; } // Assign addresses and add thunks if necessary. void Writer::finalizeAddresses() { assignAddresses(); if (config->machine != ARMNT && config->machine != ARM64) return; size_t origNumChunks = 0; for (OutputSection *sec : ctx.outputSections) { sec->origChunks = sec->chunks; origNumChunks += sec->chunks.size(); } int pass = 0; int margin = 1024 * 100; while (true) { // First check whether we need thunks at all, or if the previous pass of // adding them turned out ok. bool rangesOk = true; size_t numChunks = 0; for (OutputSection *sec : ctx.outputSections) { if (!verifyRanges(sec->chunks)) { rangesOk = false; break; } numChunks += sec->chunks.size(); } if (rangesOk) { if (pass > 0) log("Added " + Twine(numChunks - origNumChunks) + " thunks with " + "margin " + Twine(margin) + " in " + Twine(pass) + " passes"); return; } if (pass >= 10) fatal("adding thunks hasn't converged after " + Twine(pass) + " passes"); if (pass > 0) { // If the previous pass didn't work out, reset everything back to the // original conditions before retrying with a wider margin. This should // ideally never happen under real circumstances. for (OutputSection *sec : ctx.outputSections) sec->chunks = sec->origChunks; margin *= 2; } // Try adding thunks everywhere where it is needed, with a margin // to avoid things going out of range due to the added thunks. bool addressesChanged = false; for (OutputSection *sec : ctx.outputSections) addressesChanged |= createThunks(sec, margin); // If the verification above thought we needed thunks, we should have // added some. assert(addressesChanged); (void)addressesChanged; // Recalculate the layout for the whole image (and verify the ranges at // the start of the next round). assignAddresses(); pass++; } } // The main function of the writer. void Writer::run() { ScopedTimer t1(ctx.codeLayoutTimer); createImportTables(); createSections(); appendImportThunks(); // Import thunks must be added before the Control Flow Guard tables are added. createMiscChunks(); createExportTable(); mergeSections(); removeUnusedSections(); finalizeAddresses(); removeEmptySections(); assignOutputSectionIndices(); setSectionPermissions(); createSymbolAndStringTable(); if (fileSize > UINT32_MAX) fatal("image size (" + Twine(fileSize) + ") " + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); openFile(config->outputFile); if (config->is64()) { writeHeader(); } else { writeHeader(); } writeSections(); sortExceptionTable(); // Fix up the alignment in the TLS Directory's characteristic field, // if a specific alignment value is needed if (tlsAlignment) fixTlsAlignment(); t1.stop(); if (!config->pdbPath.empty() && config->debug) { assert(buildId); createPDB(ctx, sectionTable, buildId->buildId); } writeBuildId(); writeLLDMapFile(ctx); writeMapFile(ctx); if (errorCount()) return; ScopedTimer t2(ctx.outputCommitTimer); if (auto e = buffer->commit()) fatal("failed to write the output file: " + toString(std::move(e))); } static StringRef getOutputSectionName(StringRef name) { StringRef s = name.split('$').first; // Treat a later period as a separator for MinGW, for sections like // ".ctors.01234". return s.substr(0, s.find('.', 1)); } // For /order. static void sortBySectionOrder(std::vector &chunks) { auto getPriority = [](const Chunk *c) { if (auto *sec = dyn_cast(c)) if (sec->sym) return config->order.lookup(sec->sym->getName()); return 0; }; llvm::stable_sort(chunks, [=](const Chunk *a, const Chunk *b) { return getPriority(a) < getPriority(b); }); } // Change the characteristics of existing PartialSections that belong to the // section Name to Chars. void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) { for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef curName = pSec->name; if (!curName.consume_front(name) || (!curName.empty() && !curName.startswith("$"))) continue; if (pSec->characteristics == chars) continue; PartialSection *destSec = createPartialSection(pSec->name, chars); destSec->chunks.insert(destSec->chunks.end(), pSec->chunks.begin(), pSec->chunks.end()); pSec->chunks.clear(); } } // Sort concrete section chunks from GNU import libraries. // // GNU binutils doesn't use short import files, but instead produces import // libraries that consist of object files, with section chunks for the .idata$* // sections. These are linked just as regular static libraries. Each import // library consists of one header object, one object file for every imported // symbol, and one trailer object. In order for the .idata tables/lists to // be formed correctly, the section chunks within each .idata$* section need // to be grouped by library, and sorted alphabetically within each library // (which makes sure the header comes first and the trailer last). bool Writer::fixGnuImportChunks() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; // Make sure all .idata$* section chunks are mapped as RDATA in order to // be sorted into the same sections as our own synthesized .idata chunks. fixPartialSectionChars(".idata", rdata); bool hasIdata = false; // Sort all .idata$* chunks, grouping chunks from the same library, // with alphabetical ordering of the object fils within a library. for (auto it : partialSections) { PartialSection *pSec = it.second; if (!pSec->name.startswith(".idata")) continue; if (!pSec->chunks.empty()) hasIdata = true; llvm::stable_sort(pSec->chunks, [&](Chunk *s, Chunk *t) { SectionChunk *sc1 = dyn_cast_or_null(s); SectionChunk *sc2 = dyn_cast_or_null(t); if (!sc1 || !sc2) { // if SC1, order them ascending. If SC2 or both null, // S is not less than T. return sc1 != nullptr; } // Make a string with "libraryname/objectfile" for sorting, achieving // both grouping by library and sorting of objects within a library, // at once. std::string key1 = (sc1->file->parentName + "/" + sc1->file->getName()).str(); std::string key2 = (sc2->file->parentName + "/" + sc2->file->getName()).str(); return key1 < key2; }); } return hasIdata; } // Add generated idata chunks, for imported symbols and DLLs, and a // terminator in .idata$2. void Writer::addSyntheticIdata() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; idata.create(); // Add the .idata content in the right section groups, to allow // chunks from other linked in object files to be grouped together. // See Microsoft PE/COFF spec 5.4 for details. auto add = [&](StringRef n, std::vector &v) { PartialSection *pSec = createPartialSection(n, rdata); pSec->chunks.insert(pSec->chunks.end(), v.begin(), v.end()); }; // The loader assumes a specific order of data. // Add each type in the correct order. add(".idata$2", idata.dirs); add(".idata$4", idata.lookups); add(".idata$5", idata.addresses); if (!idata.hints.empty()) add(".idata$6", idata.hints); add(".idata$7", idata.dllNames); } // Locate the first Chunk and size of the import directory list and the // IAT. void Writer::locateImportTables() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; if (PartialSection *importDirs = findPartialSection(".idata$2", rdata)) { if (!importDirs->chunks.empty()) importTableStart = importDirs->chunks.front(); for (Chunk *c : importDirs->chunks) importTableSize += c->getSize(); } if (PartialSection *importAddresses = findPartialSection(".idata$5", rdata)) { if (!importAddresses->chunks.empty()) iatStart = importAddresses->chunks.front(); for (Chunk *c : importAddresses->chunks) iatSize += c->getSize(); } } // Return whether a SectionChunk's suffix (the dollar and any trailing // suffix) should be removed and sorted into the main suffixless // PartialSection. static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) { // On MinGW, comdat groups are formed by putting the comdat group name // after the '$' in the section name. For .eh_frame$, that must // still be sorted before the .eh_frame trailer from crtend.o, thus just // strip the section name trailer. For other sections, such as // .tls$$ (where non-comdat .tls symbols are otherwise stored in // ".tls$"), they must be strictly sorted after .tls. And for the // hypothetical case of comdat .CRT$XCU, we definitely need to keep the // suffix for sorting. Thus, to play it safe, only strip the suffix for // the standard sections. if (!config->mingw) return false; if (!sc || !sc->isCOMDAT()) return false; return name.startswith(".text$") || name.startswith(".data$") || name.startswith(".rdata$") || name.startswith(".pdata$") || name.startswith(".xdata$") || name.startswith(".eh_frame$"); } void Writer::sortSections() { if (!config->callGraphProfile.empty()) { DenseMap order = computeCallGraphProfileOrder(ctx); for (auto it : order) { if (DefinedRegular *sym = it.first->sym) config->order[sym->getName()] = it.second; } } if (!config->order.empty()) for (auto it : partialSections) sortBySectionOrder(it.second->chunks); } // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA; const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA; const uint32_t code = IMAGE_SCN_CNT_CODE; const uint32_t discardable = IMAGE_SCN_MEM_DISCARDABLE; const uint32_t r = IMAGE_SCN_MEM_READ; const uint32_t w = IMAGE_SCN_MEM_WRITE; const uint32_t x = IMAGE_SCN_MEM_EXECUTE; SmallDenseMap, OutputSection *> sections; auto createSection = [&](StringRef name, uint32_t outChars) { OutputSection *&sec = sections[{name, outChars}]; if (!sec) { sec = make(name, outChars); ctx.outputSections.push_back(sec); } return sec; }; // Try to match the section order used by link.exe. textSec = createSection(".text", code | r | x); createSection(".bss", bss | r | w); rdataSec = createSection(".rdata", data | r); buildidSec = createSection(".buildid", data | r); dataSec = createSection(".data", data | r | w); pdataSec = createSection(".pdata", data | r); idataSec = createSection(".idata", data | r); edataSec = createSection(".edata", data | r); didatSec = createSection(".didat", data | r); rsrcSec = createSection(".rsrc", data | r); relocSec = createSection(".reloc", data | discardable | r); ctorsSec = createSection(".ctors", data | r | w); dtorsSec = createSection(".dtors", data | r | w); // Then bin chunks by name and output characteristics. for (Chunk *c : ctx.symtab.getChunks()) { auto *sc = dyn_cast(c); if (sc && !sc->live) { if (config->verbose) sc->printDiscardedMessage(); continue; } StringRef name = c->getSectionName(); if (shouldStripSectionSuffix(sc, name)) name = name.split('$').first; if (name.startswith(".tls")) tlsAlignment = std::max(tlsAlignment, c->getAlignment()); PartialSection *pSec = createPartialSection(name, c->getOutputCharacteristics()); pSec->chunks.push_back(c); } fixPartialSectionChars(".rsrc", data | r); fixPartialSectionChars(".edata", data | r); // Even in non MinGW cases, we might need to link against GNU import // libraries. bool hasIdata = fixGnuImportChunks(); if (!idata.empty()) hasIdata = true; if (hasIdata) addSyntheticIdata(); sortSections(); if (hasIdata) locateImportTables(); // Then create an OutputSection for each section. // '$' and all following characters in input section names are // discarded when determining output section. So, .text$foo // contributes to .text, for example. See PE/COFF spec 3.2. for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef name = getOutputSectionName(pSec->name); uint32_t outChars = pSec->characteristics; if (name == ".CRT") { // In link.exe, there is a special case for the I386 target where .CRT // sections are treated as if they have output characteristics DATA | R if // their characteristics are DATA | R | W. This implements the same // special case for all architectures. outChars = data | r; log("Processing section " + pSec->name + " -> " + name); sortCRTSectionChunks(pSec->chunks); } OutputSection *sec = createSection(name, outChars); for (Chunk *c : pSec->chunks) sec->addChunk(c); sec->addContributingPartialSection(pSec); } // Finally, move some output sections to the end. auto sectionOrder = [&](const OutputSection *s) { // Move DISCARDABLE (or non-memory-mapped) sections to the end of file // because the loader cannot handle holes. Stripping can remove other // discardable ones than .reloc, which is first of them (created early). - if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) + if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) { + // Move discardable sections named .debug_ to the end, after other + // discardable sections. Stripping only removes the sections named + // .debug_* - thus try to avoid leaving holes after stripping. + if (s->name.startswith(".debug_")) + return 3; return 2; + } // .rsrc should come at the end of the non-discardable sections because its // size may change by the Win32 UpdateResources() function, causing // subsequent sections to move (see https://crbug.com/827082). if (s == rsrcSec) return 1; return 0; }; llvm::stable_sort(ctx.outputSections, [&](const OutputSection *s, const OutputSection *t) { return sectionOrder(s) < sectionOrder(t); }); } void Writer::createMiscChunks() { for (MergeChunk *p : ctx.mergeChunkInstances) { if (p) { p->finalizeContents(); rdataSec->addChunk(p); } } // Create thunks for locally-dllimported symbols. if (!ctx.symtab.localImportChunks.empty()) { for (Chunk *c : ctx.symtab.localImportChunks) rdataSec->addChunk(c); } // Create Debug Information Chunks OutputSection *debugInfoSec = config->mingw ? buildidSec : rdataSec; if (config->debug || config->repro || config->cetCompat) { debugDirectory = make(ctx, debugRecords, config->repro); debugDirectory->setAlignment(4); debugInfoSec->addChunk(debugDirectory); } if (config->debug) { // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We // output a PDB no matter what, and this chunk provides the only means of // allowing a debugger to match a PDB and an executable. So we need it even // if we're ultimately not going to write CodeView data to the PDB. buildId = make(); debugRecords.push_back({COFF::IMAGE_DEBUG_TYPE_CODEVIEW, buildId}); } if (config->cetCompat) { debugRecords.push_back({COFF::IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS, make( IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT)}); } // Align and add each chunk referenced by the debug data directory. for (std::pair r : debugRecords) { r.second->setAlignment(4); debugInfoSec->addChunk(r.second); } // Create SEH table. x86-only. if (config->safeSEH) createSEHTable(); // Create /guard:cf tables if requested. if (config->guardCF != GuardCFLevel::Off) createGuardCFTables(); if (config->autoImport) createRuntimePseudoRelocs(); if (config->mingw) insertCtorDtorSymbols(); } // Create .idata section for the DLL-imported symbol table. // The format of this section is inherently Windows-specific. // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { // Initialize DLLOrder so that import entries are ordered in // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) for (ImportFile *file : ctx.importFileInstances) { if (!file->live) continue; std::string dll = StringRef(file->dllName).lower(); if (config->dllOrder.count(dll) == 0) config->dllOrder[dll] = config->dllOrder.size(); if (file->impSym && !isa(file->impSym)) fatal(toString(*file->impSym) + " was replaced"); DefinedImportData *impSym = cast_or_null(file->impSym); if (config->delayLoads.count(StringRef(file->dllName).lower())) { if (!file->thunkSym) fatal("cannot delay-load " + toString(file) + " due to import of data: " + toString(*impSym)); delayIdata.add(impSym); } else { idata.add(impSym); } } } void Writer::appendImportThunks() { if (ctx.importFileInstances.empty()) return; for (ImportFile *file : ctx.importFileInstances) { if (!file->live) continue; if (!file->thunkSym) continue; if (!isa(file->thunkSym)) fatal(toString(*file->thunkSym) + " was replaced"); DefinedImportThunk *thunk = cast(file->thunkSym); if (file->thunkLive) textSec->addChunk(thunk->getChunk()); } if (!delayIdata.empty()) { Defined *helper = cast(config->delayLoadHelper); delayIdata.create(ctx, helper); for (Chunk *c : delayIdata.getChunks()) didatSec->addChunk(c); for (Chunk *c : delayIdata.getDataChunks()) dataSec->addChunk(c); for (Chunk *c : delayIdata.getCodeChunks()) textSec->addChunk(c); } } void Writer::createExportTable() { if (!edataSec->chunks.empty()) { // Allow using a custom built export table from input object files, instead // of having the linker synthesize the tables. if (config->hadExplicitExports) warn("literal .edata sections override exports"); } else if (!config->exports.empty()) { for (Chunk *c : edata.chunks) edataSec->addChunk(c); } if (!edataSec->chunks.empty()) { edataStart = edataSec->chunks.front(); edataEnd = edataSec->chunks.back(); } // Warn on exported deleting destructor. for (auto e : config->exports) if (e.sym && e.sym->getName().startswith("??_G")) warn("export of deleting dtor: " + toString(*e.sym)); } void Writer::removeUnusedSections() { // Remove sections that we can be sure won't get content, to avoid // allocating space for their section headers. auto isUnused = [this](OutputSection *s) { if (s == relocSec) return false; // This section is populated later. // MergeChunks have zero size at this point, as their size is finalized // later. Only remove sections that have no Chunks at all. return s->chunks.empty(); }; llvm::erase_if(ctx.outputSections, isUnused); } // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; }; llvm::erase_if(ctx.outputSections, isEmpty); } void Writer::assignOutputSectionIndices() { // Assign final output section indices, and assign each chunk to its output // section. uint32_t idx = 1; for (OutputSection *os : ctx.outputSections) { os->sectionIndex = idx; for (Chunk *c : os->chunks) c->setOutputSectionIdx(idx); ++idx; } // Merge chunks are containers of chunks, so assign those an output section // too. for (MergeChunk *mc : ctx.mergeChunkInstances) if (mc) for (SectionChunk *sc : mc->sections) if (sc && sc->live) sc->setOutputSectionIdx(mc->getOutputSectionIdx()); } size_t Writer::addEntryToStringTable(StringRef str) { assert(str.size() > COFF::NameSize); size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field strtab.insert(strtab.end(), str.begin(), str.end()); strtab.push_back('\0'); return offsetOfEntry; } Optional Writer::createSymbol(Defined *def) { coff_symbol16 sym; switch (def->kind()) { case Symbol::DefinedAbsoluteKind: sym.Value = def->getRVA(); sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; case Symbol::DefinedSyntheticKind: // Relative symbols are unrepresentable in a COFF symbol table. return None; default: { // Don't write symbols that won't be written to the output to the symbol // table. Chunk *c = def->getChunk(); if (!c) return None; OutputSection *os = ctx.getOutputSection(c); if (!os) return None; sym.Value = def->getRVA() - os->getRVA(); sym.SectionNumber = os->sectionIndex; break; } } // Symbols that are runtime pseudo relocations don't point to the actual // symbol data itself (as they are imported), but points to the IAT entry // instead. Avoid emitting them to the symbol table, as they can confuse // debuggers. if (def->isRuntimePseudoReloc) return None; StringRef name = def->getName(); if (name.size() > COFF::NameSize) { sym.Name.Offset.Zeroes = 0; sym.Name.Offset.Offset = addEntryToStringTable(name); } else { memset(sym.Name.ShortName, 0, COFF::NameSize); memcpy(sym.Name.ShortName, name.data(), name.size()); } if (auto *d = dyn_cast(def)) { COFFSymbolRef ref = d->getCOFFSymbol(); sym.Type = ref.getType(); sym.StorageClass = ref.getStorageClass(); } else { sym.Type = IMAGE_SYM_TYPE_NULL; sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; } sym.NumberOfAuxSymbols = 0; return sym; } void Writer::createSymbolAndStringTable() { // PE/COFF images are limited to 8 byte section names. Longer names can be // supported by writing a non-standard string table, but this string table is // not mapped at runtime and the long names will therefore be inaccessible. // link.exe always truncates section names to 8 bytes, whereas binutils always // preserves long section names via the string table. LLD adopts a hybrid // solution where discardable sections have long names preserved and // non-discardable sections have their names truncated, to ensure that any // section which is mapped at runtime also has its name mapped at runtime. for (OutputSection *sec : ctx.outputSections) { if (sec->name.size() <= COFF::NameSize) continue; if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0) continue; if (config->warnLongSectionNames) { warn("section name " + sec->name + " is longer than 8 characters and will use a non-standard string " "table"); } sec->setStringTableOff(addEntryToStringTable(sec->name)); } if (config->debugDwarf || config->debugSymtab) { for (ObjFile *file : ctx.objFileInstances) { for (Symbol *b : file->getSymbols()) { auto *d = dyn_cast_or_null(b); if (!d || d->writtenToSymtab) continue; d->writtenToSymtab = true; if (auto *dc = dyn_cast_or_null(d)) { COFFSymbolRef symRef = dc->getCOFFSymbol(); if (symRef.isSectionDefinition() || symRef.getStorageClass() == COFF::IMAGE_SYM_CLASS_LABEL) continue; } if (Optional sym = createSymbol(d)) outputSymtab.push_back(*sym); } } } if (outputSymtab.empty() && strtab.empty()) return; // We position the symbol table to be adjacent to the end of the last section. uint64_t fileOff = fileSize; pointerToSymbolTable = fileOff; fileOff += outputSymtab.size() * sizeof(coff_symbol16); fileOff += 4 + strtab.size(); fileSize = alignTo(fileOff, config->fileAlign); } void Writer::mergeSections() { if (!pdataSec->chunks.empty()) { firstPdata = pdataSec->chunks.front(); lastPdata = pdataSec->chunks.back(); } for (auto &p : config->merge) { StringRef toName = p.second; if (p.first == toName) continue; StringSet<> names; while (true) { if (!names.insert(toName).second) fatal("/merge: cycle found for section '" + p.first + "'"); auto i = config->merge.find(toName); if (i == config->merge.end()) break; toName = i->second; } OutputSection *from = findSection(p.first); OutputSection *to = findSection(toName); if (!from) continue; if (!to) { from->name = toName; continue; } to->merge(from); } } // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + sizeof(data_directory) * numberOfDataDirectory + sizeof(coff_section) * ctx.outputSections.size(); sizeOfHeaders += config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); sizeOfHeaders = alignTo(sizeOfHeaders, config->fileAlign); fileSize = sizeOfHeaders; // The first page is kept unmapped. uint64_t rva = alignTo(sizeOfHeaders, config->align); for (OutputSection *sec : ctx.outputSections) { if (sec == relocSec) addBaserels(); uint64_t rawSize = 0, virtualSize = 0; sec->header.VirtualAddress = rva; // If /FUNCTIONPADMIN is used, functions are padded in order to create a // hotpatchable image. const bool isCodeSection = (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && (sec->header.Characteristics & IMAGE_SCN_MEM_READ) && (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE); uint32_t padding = isCodeSection ? config->functionPadMin : 0; for (Chunk *c : sec->chunks) { if (padding && c->isHotPatchable()) virtualSize += padding; virtualSize = alignTo(virtualSize, c->getAlignment()); c->setRVA(rva + virtualSize); virtualSize += c->getSize(); if (c->hasData) rawSize = alignTo(virtualSize, config->fileAlign); } if (virtualSize > UINT32_MAX) error("section larger than 4 GiB: " + sec->name); sec->header.VirtualSize = virtualSize; sec->header.SizeOfRawData = rawSize; if (rawSize != 0) sec->header.PointerToRawData = fileSize; rva += alignTo(virtualSize, config->align); fileSize += alignTo(rawSize, config->fileAlign); } sizeOfImage = alignTo(rva, config->align); // Assign addresses to sections in MergeChunks. for (MergeChunk *mc : ctx.mergeChunkInstances) if (mc) mc->assignSubsectionRVAs(); } template void Writer::writeHeader() { // Write DOS header. For backwards compatibility, the first part of a PE/COFF // executable consists of an MS-DOS MZ executable. If the executable is run // under DOS, that program gets run (usually to just print an error message). // When run under Windows, the loader looks at AddressOfNewExeHeader and uses // the PE header instead. uint8_t *buf = buffer->getBufferStart(); auto *dos = reinterpret_cast(buf); buf += sizeof(dos_header); dos->Magic[0] = 'M'; dos->Magic[1] = 'Z'; dos->UsedBytesInTheLastPage = dosStubSize % 512; dos->FileSizeInPages = divideCeil(dosStubSize, 512); dos->HeaderSizeInParagraphs = sizeof(dos_header) / 16; dos->AddressOfRelocationTable = sizeof(dos_header); dos->AddressOfNewExeHeader = dosStubSize; // Write DOS program. memcpy(buf, dosProgram, sizeof(dosProgram)); buf += sizeof(dosProgram); // Write PE magic memcpy(buf, PEMagic, sizeof(PEMagic)); buf += sizeof(PEMagic); // Write COFF header auto *coff = reinterpret_cast(buf); buf += sizeof(*coff); coff->Machine = config->machine; coff->NumberOfSections = ctx.outputSections.size(); coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; if (config->largeAddressAware) coff->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; if (!config->is64()) coff->Characteristics |= IMAGE_FILE_32BIT_MACHINE; if (config->dll) coff->Characteristics |= IMAGE_FILE_DLL; if (config->driverUponly) coff->Characteristics |= IMAGE_FILE_UP_SYSTEM_ONLY; if (!config->relocatable) coff->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; if (config->swaprunCD) coff->Characteristics |= IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP; if (config->swaprunNet) coff->Characteristics |= IMAGE_FILE_NET_RUN_FROM_SWAP; coff->SizeOfOptionalHeader = sizeof(PEHeaderTy) + sizeof(data_directory) * numberOfDataDirectory; // Write PE header auto *pe = reinterpret_cast(buf); buf += sizeof(*pe); pe->Magic = config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32; // If {Major,Minor}LinkerVersion is left at 0.0, then for some // reason signing the resulting PE file with Authenticode produces a // signature that fails to validate on Windows 7 (but is OK on 10). // Set it to 14.0, which is what VS2015 outputs, and which avoids // that problem. pe->MajorLinkerVersion = 14; pe->MinorLinkerVersion = 0; pe->ImageBase = config->imageBase; pe->SectionAlignment = config->align; pe->FileAlignment = config->fileAlign; pe->MajorImageVersion = config->majorImageVersion; pe->MinorImageVersion = config->minorImageVersion; pe->MajorOperatingSystemVersion = config->majorOSVersion; pe->MinorOperatingSystemVersion = config->minorOSVersion; pe->MajorSubsystemVersion = config->majorSubsystemVersion; pe->MinorSubsystemVersion = config->minorSubsystemVersion; pe->Subsystem = config->subsystem; pe->SizeOfImage = sizeOfImage; pe->SizeOfHeaders = sizeOfHeaders; if (!config->noEntry) { Defined *entry = cast(config->entry); pe->AddressOfEntryPoint = entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (config->machine == ARMNT) pe->AddressOfEntryPoint |= 1; } pe->SizeOfStackReserve = config->stackReserve; pe->SizeOfStackCommit = config->stackCommit; pe->SizeOfHeapReserve = config->heapReserve; pe->SizeOfHeapCommit = config->heapCommit; if (config->appContainer) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER; if (config->driverWdm) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER; if (config->dynamicBase) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; if (config->highEntropyVA) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; if (!config->allowBind) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND; if (config->nxCompat) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; if (!config->allowIsolation) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; if (config->guardCF != GuardCFLevel::Off) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF; if (config->integrityCheck) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY; if (setNoSEHCharacteristic || config->noSEH) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH; if (config->terminalServerAware) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; pe->NumberOfRvaAndSize = numberOfDataDirectory; if (textSec->getVirtualSize()) { pe->BaseOfCode = textSec->getRVA(); pe->SizeOfCode = textSec->getRawSize(); } pe->SizeOfInitializedData = getSizeOfInitializedData(); // Write data directory auto *dir = reinterpret_cast(buf); buf += sizeof(*dir) * numberOfDataDirectory; if (edataStart) { dir[EXPORT_TABLE].RelativeVirtualAddress = edataStart->getRVA(); dir[EXPORT_TABLE].Size = edataEnd->getRVA() + edataEnd->getSize() - edataStart->getRVA(); } if (importTableStart) { dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA(); dir[IMPORT_TABLE].Size = importTableSize; } if (iatStart) { dir[IAT].RelativeVirtualAddress = iatStart->getRVA(); dir[IAT].Size = iatSize; } if (rsrcSec->getVirtualSize()) { dir[RESOURCE_TABLE].RelativeVirtualAddress = rsrcSec->getRVA(); dir[RESOURCE_TABLE].Size = rsrcSec->getVirtualSize(); } if (firstPdata) { dir[EXCEPTION_TABLE].RelativeVirtualAddress = firstPdata->getRVA(); dir[EXCEPTION_TABLE].Size = lastPdata->getRVA() + lastPdata->getSize() - firstPdata->getRVA(); } if (relocSec->getVirtualSize()) { dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA(); dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize(); } if (Symbol *sym = ctx.symtab.findUnderscore("_tls_used")) { if (Defined *b = dyn_cast(sym)) { dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA(); dir[TLS_TABLE].Size = config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); } } if (debugDirectory) { dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA(); dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize(); } if (Symbol *sym = ctx.symtab.findUnderscore("_load_config_used")) { if (auto *b = dyn_cast(sym)) { SectionChunk *sc = b->getChunk(); assert(b->getRVA() >= sc->getRVA()); uint64_t offsetInChunk = b->getRVA() - sc->getRVA(); if (!sc->hasData || offsetInChunk + 4 > sc->getSize()) fatal("_load_config_used is malformed"); ArrayRef secContents = sc->getContents(); uint32_t loadConfigSize = *reinterpret_cast(&secContents[offsetInChunk]); if (offsetInChunk + loadConfigSize > sc->getSize()) fatal("_load_config_used is too large"); dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = b->getRVA(); dir[LOAD_CONFIG_TABLE].Size = loadConfigSize; } } if (!delayIdata.empty()) { dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = delayIdata.getDirRVA(); dir[DELAY_IMPORT_DESCRIPTOR].Size = delayIdata.getDirSize(); } // Write section table for (OutputSection *sec : ctx.outputSections) { sec->writeHeaderTo(buf); buf += sizeof(coff_section); } sectionTable = ArrayRef( buf - ctx.outputSections.size() * sizeof(coff_section), buf); if (outputSymtab.empty() && strtab.empty()) return; coff->PointerToSymbolTable = pointerToSymbolTable; uint32_t numberOfSymbols = outputSymtab.size(); coff->NumberOfSymbols = numberOfSymbols; auto *symbolTable = reinterpret_cast( buffer->getBufferStart() + coff->PointerToSymbolTable); for (size_t i = 0; i != numberOfSymbols; ++i) symbolTable[i] = outputSymtab[i]; // Create the string table, it follows immediately after the symbol table. // The first 4 bytes is length including itself. buf = reinterpret_cast(&symbolTable[numberOfSymbols]); write32le(buf, strtab.size() + 4); if (!strtab.empty()) memcpy(buf + 4, strtab.data(), strtab.size()); } void Writer::openFile(StringRef path) { buffer = CHECK( FileOutputBuffer::create(path, fileSize, FileOutputBuffer::F_executable), "failed to open " + path); } void Writer::createSEHTable() { SymbolRVASet handlers; for (ObjFile *file : ctx.objFileInstances) { if (!file->hasSafeSEH()) error("/safeseh: " + file->getName() + " is not compatible with SEH"); markSymbolsForRVATable(file, file->getSXDataChunks(), handlers); } // Set the "no SEH" characteristic if there really were no handlers, or if // there is no load config object to point to the table of handlers. setNoSEHCharacteristic = handlers.empty() || !ctx.symtab.findUnderscore("_load_config_used"); maybeAddRVATable(std::move(handlers), "__safe_se_handler_table", "__safe_se_handler_count"); } // Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set // cannot contain duplicates. Therefore, the set is uniqued by Chunk and the // symbol's offset into that Chunk. static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) { Chunk *c = s->getChunk(); if (auto *sc = dyn_cast(c)) c = sc->repl; // Look through ICF replacement. uint32_t off = s->getRVA() - (c ? c->getRVA() : 0); rvaSet.insert({c, off}); } // Given a symbol, add it to the GFIDs table if it is a live, defined, function // symbol in an executable section. static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms, Symbol *s) { if (!s) return; switch (s->kind()) { case Symbol::DefinedLocalImportKind: case Symbol::DefinedImportDataKind: // Defines an __imp_ pointer, so it is data, so it is ignored. break; case Symbol::DefinedCommonKind: // Common is always data, so it is ignored. break; case Symbol::DefinedAbsoluteKind: case Symbol::DefinedSyntheticKind: // Absolute is never code, synthetic generally isn't and usually isn't // determinable. break; case Symbol::LazyArchiveKind: case Symbol::LazyObjectKind: case Symbol::LazyDLLSymbolKind: case Symbol::UndefinedKind: // Undefined symbols resolve to zero, so they don't have an RVA. Lazy // symbols shouldn't have relocations. break; case Symbol::DefinedImportThunkKind: // Thunks are always code, include them. addSymbolToRVASet(addressTakenSyms, cast(s)); break; case Symbol::DefinedRegularKind: { // This is a regular, defined, symbol from a COFF file. Mark the symbol as // address taken if the symbol type is function and it's in an executable // section. auto *d = cast(s); if (d->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) { SectionChunk *sc = dyn_cast(d->getChunk()); if (sc && sc->live && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) addSymbolToRVASet(addressTakenSyms, d); } break; } } } // Visit all relocations from all section contributions of this object file and // mark the relocation target as address-taken. static void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols) { for (Chunk *c : file->getChunks()) { // We only care about live section chunks. Common chunks and other chunks // don't generally contain relocations. SectionChunk *sc = dyn_cast(c); if (!sc || !sc->live) continue; for (const coff_relocation &reloc : sc->getRelocs()) { if (config->machine == I386 && reloc.Type == COFF::IMAGE_REL_I386_REL32) // Ignore relative relocations on x86. On x86_64 they can't be ignored // since they're also used to compute absolute addresses. continue; Symbol *ref = sc->file->getSymbol(reloc.SymbolTableIndex); maybeAddAddressTakenFunction(usedSymbols, ref); } } } // Create the guard function id table. This is a table of RVAs of all // address-taken functions. It is sorted and uniqued, just like the safe SEH // table. void Writer::createGuardCFTables() { SymbolRVASet addressTakenSyms; SymbolRVASet giatsRVASet; std::vector giatsSymbols; SymbolRVASet longJmpTargets; SymbolRVASet ehContTargets; for (ObjFile *file : ctx.objFileInstances) { // If the object was compiled with /guard:cf, the address taken symbols // are in .gfids$y sections, the longjmp targets are in .gljmp$y sections, // and ehcont targets are in .gehcont$y sections. If the object was not // compiled with /guard:cf, we assume there were no setjmp and ehcont // targets, and that all code symbols with relocations are possibly // address-taken. if (file->hasGuardCF()) { markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms); markSymbolsForRVATable(file, file->getGuardIATChunks(), giatsRVASet); getSymbolsFromSections(file, file->getGuardIATChunks(), giatsSymbols); markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets); markSymbolsForRVATable(file, file->getGuardEHContChunks(), ehContTargets); } else { markSymbolsWithRelocations(file, addressTakenSyms); } } // Mark the image entry as address-taken. if (config->entry) maybeAddAddressTakenFunction(addressTakenSyms, config->entry); // Mark exported symbols in executable sections as address-taken. for (Export &e : config->exports) maybeAddAddressTakenFunction(addressTakenSyms, e.sym); // For each entry in the .giats table, check if it has a corresponding load // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if // so, add the load thunk to the address taken (.gfids) table. for (Symbol *s : giatsSymbols) { if (auto *di = dyn_cast(s)) { if (di->loadThunkSym) addSymbolToRVASet(addressTakenSyms, di->loadThunkSym); } } // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &c : addressTakenSyms) if (c.inputChunk->getAlignment() < 16) c.inputChunk->setAlignment(16); maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table", "__guard_fids_count"); // Add the Guard Address Taken IAT Entry Table (.giats). maybeAddRVATable(std::move(giatsRVASet), "__guard_iat_table", "__guard_iat_count"); // Add the longjmp target table unless the user told us not to. if (config->guardCF & GuardCFLevel::LongJmp) maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table", "__guard_longjmp_count"); // Add the ehcont target table unless the user told us not to. if (config->guardCF & GuardCFLevel::EHCont) maybeAddRVATable(std::move(ehContTargets), "__guard_eh_cont_table", "__guard_eh_cont_count", true); // Set __guard_flags, which will be used in the load config to indicate that // /guard:cf was enabled. uint32_t guardFlags = uint32_t(coff_guard_flags::CFInstrumented) | uint32_t(coff_guard_flags::HasFidTable); if (config->guardCF & GuardCFLevel::LongJmp) guardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable); if (config->guardCF & GuardCFLevel::EHCont) guardFlags |= uint32_t(coff_guard_flags::HasEHContTable); Symbol *flagSym = ctx.symtab.findUnderscore("__guard_flags"); cast(flagSym)->setVA(guardFlags); } // Take a list of input sections containing symbol table indices and add those // symbols to a vector. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. void Writer::getSymbolsFromSections(ObjFile *file, ArrayRef symIdxChunks, std::vector &symbols) { for (SectionChunk *c : symIdxChunks) { // Skip sections discarded by linker GC. This comes up when a .gfids section // is associated with something like a vtable and the vtable is discarded. // In this case, the associated gfids section is discarded, and we don't // mark the virtual member functions as address-taken by the vtable. if (!c->live) continue; // Validate that the contents look like symbol table indices. ArrayRef data = c->getContents(); if (data.size() % 4 != 0) { warn("ignoring " + c->getSectionName() + " symbol table index section in object " + toString(file)); continue; } // Read each symbol table index and check if that symbol was included in the // final link. If so, add it to the vector of symbols. ArrayRef symIndices( reinterpret_cast(data.data()), data.size() / 4); ArrayRef objSymbols = file->getSymbols(); for (uint32_t symIndex : symIndices) { if (symIndex >= objSymbols.size()) { warn("ignoring invalid symbol table index in section " + c->getSectionName() + " in object " + toString(file)); continue; } if (Symbol *s = objSymbols[symIndex]) { if (s->isLive()) symbols.push_back(cast(s)); } } } } // Take a list of input sections containing symbol table indices and add those // symbols to an RVA table. void Writer::markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols) { std::vector syms; getSymbolsFromSections(file, symIdxChunks, syms); for (Symbol *s : syms) addSymbolToRVASet(tableSymbols, cast(s)); } // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym, bool hasFlag) { if (tableSymbols.empty()) return; NonSectionChunk *tableChunk; if (hasFlag) tableChunk = make(std::move(tableSymbols)); else tableChunk = make(std::move(tableSymbols)); rdataSec->addChunk(tableChunk); Symbol *t = ctx.symtab.findUnderscore(tableSym); Symbol *c = ctx.symtab.findUnderscore(countSym); replaceSymbol(t, t->getName(), tableChunk); cast(c)->setVA(tableChunk->getSize() / (hasFlag ? 5 : 4)); } // MinGW specific. Gather all relocations that are imported from a DLL even // though the code didn't expect it to, produce the table that the runtime // uses for fixing them up, and provide the synthetic symbols that the // runtime uses for finding the table. void Writer::createRuntimePseudoRelocs() { std::vector rels; for (Chunk *c : ctx.symtab.getChunks()) { auto *sc = dyn_cast(c); if (!sc || !sc->live) continue; sc->getRuntimePseudoRelocs(rels); } if (!config->pseudoRelocs) { // Not writing any pseudo relocs; if some were needed, error out and // indicate what required them. for (const RuntimePseudoReloc &rpr : rels) error("automatic dllimport of " + rpr.sym->getName() + " in " + toString(rpr.target->file) + " requires pseudo relocations"); return; } if (!rels.empty()) log("Writing " + Twine(rels.size()) + " runtime pseudo relocations"); PseudoRelocTableChunk *table = make(rels); rdataSec->addChunk(table); EmptyChunk *endOfList = make(); rdataSec->addChunk(endOfList); Symbol *headSym = ctx.symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__"); Symbol *endSym = ctx.symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__"); replaceSymbol(headSym, headSym->getName(), table); replaceSymbol(endSym, endSym->getName(), endOfList); } // MinGW specific. // The MinGW .ctors and .dtors lists have sentinels at each end; // a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end. // There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__ // and __DTOR_LIST__ respectively. void Writer::insertCtorDtorSymbols() { AbsolutePointerChunk *ctorListHead = make(-1); AbsolutePointerChunk *ctorListEnd = make(0); AbsolutePointerChunk *dtorListHead = make(-1); AbsolutePointerChunk *dtorListEnd = make(0); ctorsSec->insertChunkAtStart(ctorListHead); ctorsSec->addChunk(ctorListEnd); dtorsSec->insertChunkAtStart(dtorListHead); dtorsSec->addChunk(dtorListEnd); Symbol *ctorListSym = ctx.symtab.findUnderscore("__CTOR_LIST__"); Symbol *dtorListSym = ctx.symtab.findUnderscore("__DTOR_LIST__"); replaceSymbol(ctorListSym, ctorListSym->getName(), ctorListHead); replaceSymbol(dtorListSym, dtorListSym->getName(), dtorListHead); } // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { for (auto &p : config->section) { StringRef name = p.first; uint32_t perm = p.second; for (OutputSection *sec : ctx.outputSections) if (sec->name == name) sec->setPermissions(perm); } } // Write section contents to a mmap'ed file. void Writer::writeSections() { // Record the number of sections to apply section index relocations // against absolute symbols. See applySecIdx in Chunks.cpp.. DefinedAbsolute::numOutputSections = ctx.outputSections.size(); uint8_t *buf = buffer->getBufferStart(); for (OutputSection *sec : ctx.outputSections) { uint8_t *secBuf = buf + sec->getFileOff(); // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) memset(secBuf, 0xCC, sec->getRawSize()); parallelForEach(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); }); } } void Writer::writeBuildId() { // There are two important parts to the build ID. // 1) If building with debug info, the COFF debug directory contains a // timestamp as well as a Guid and Age of the PDB. // 2) In all cases, the PE COFF file header also contains a timestamp. // For reproducibility, instead of a timestamp we want to use a hash of the // PE contents. if (config->debug) { assert(buildId && "BuildId is not set!"); // BuildId->BuildId was filled in when the PDB was written. } // At this point the only fields in the COFF file which remain unset are the // "timestamp" in the COFF file header, and the ones in the coff debug // directory. Now we can hash the file and write that hash to the various // timestamp fields in the file. StringRef outputFileData( reinterpret_cast(buffer->getBufferStart()), buffer->getBufferSize()); uint32_t timestamp = config->timestamp; uint64_t hash = 0; bool generateSyntheticBuildId = config->mingw && config->debug && config->pdbPath.empty(); if (config->repro || generateSyntheticBuildId) hash = xxHash64(outputFileData); if (config->repro) timestamp = static_cast(hash); if (generateSyntheticBuildId) { // For MinGW builds without a PDB file, we still generate a build id // to allow associating a crash dump to the executable. buildId->buildId->PDB70.CVSignature = OMF::Signature::PDB70; buildId->buildId->PDB70.Age = 1; memcpy(buildId->buildId->PDB70.Signature, &hash, 8); // xxhash only gives us 8 bytes, so put some fixed data in the other half. memcpy(&buildId->buildId->PDB70.Signature[8], "LLD PDB.", 8); } if (debugDirectory) debugDirectory->setTimeDateStamp(timestamp); uint8_t *buf = buffer->getBufferStart(); buf += dosStubSize + sizeof(PEMagic); object::coff_file_header *coffHeader = reinterpret_cast(buf); coffHeader->TimeDateStamp = timestamp; } // Sort .pdata section contents according to PE/COFF spec 5.5. void Writer::sortExceptionTable() { if (!firstPdata) return; // We assume .pdata contains function table entries only. auto bufAddr = [&](Chunk *c) { OutputSection *os = ctx.getOutputSection(c); return buffer->getBufferStart() + os->getFileOff() + c->getRVA() - os->getRVA(); }; uint8_t *begin = bufAddr(firstPdata); uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize(); if (config->machine == AMD64) { struct Entry { ulittle32_t begin, end, unwind; }; if ((end - begin) % sizeof(Entry) != 0) { fatal("unexpected .pdata size: " + Twine(end - begin) + " is not a multiple of " + Twine(sizeof(Entry))); } parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } if (config->machine == ARMNT || config->machine == ARM64) { struct Entry { ulittle32_t begin, unwind; }; if ((end - begin) % sizeof(Entry) != 0) { fatal("unexpected .pdata size: " + Twine(end - begin) + " is not a multiple of " + Twine(sizeof(Entry))); } parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } lld::errs() << "warning: don't know how to handle .pdata.\n"; } // The CRT section contains, among other things, the array of function // pointers that initialize every global variable that is not trivially // constructed. The CRT calls them one after the other prior to invoking // main(). // // As per C++ spec, 3.6.2/2.3, // "Variables with ordered initialization defined within a single // translation unit shall be initialized in the order of their definitions // in the translation unit" // // It is therefore critical to sort the chunks containing the function // pointers in the order that they are listed in the object file (top to // bottom), otherwise global objects might not be initialized in the // correct order. void Writer::sortCRTSectionChunks(std::vector &chunks) { auto sectionChunkOrder = [](const Chunk *a, const Chunk *b) { auto sa = dyn_cast(a); auto sb = dyn_cast(b); assert(sa && sb && "Non-section chunks in CRT section!"); StringRef sAObj = sa->file->mb.getBufferIdentifier(); StringRef sBObj = sb->file->mb.getBufferIdentifier(); return sAObj == sBObj && sa->getSectionNumber() < sb->getSectionNumber(); }; llvm::stable_sort(chunks, sectionChunkOrder); if (config->verbose) { for (auto &c : chunks) { auto sc = dyn_cast(c); log(" " + sc->file->mb.getBufferIdentifier().str() + ", SectionID: " + Twine(sc->getSectionNumber())); } } } OutputSection *Writer::findSection(StringRef name) { for (OutputSection *sec : ctx.outputSections) if (sec->name == name) return sec; return nullptr; } uint32_t Writer::getSizeOfInitializedData() { uint32_t res = 0; for (OutputSection *s : ctx.outputSections) if (s->header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) res += s->getRawSize(); return res; } // Add base relocations to .reloc section. void Writer::addBaserels() { if (!config->relocatable) return; relocSec->chunks.clear(); std::vector v; for (OutputSection *sec : ctx.outputSections) { if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) continue; // Collect all locations for base relocations. for (Chunk *c : sec->chunks) c->getBaserels(&v); // Add the addresses to .reloc section. if (!v.empty()) addBaserelBlocks(v); v.clear(); } } // Add addresses to .reloc section. Note that addresses are grouped by page. void Writer::addBaserelBlocks(std::vector &v) { const uint32_t mask = ~uint32_t(pageSize - 1); uint32_t page = v[0].rva & mask; size_t i = 0, j = 1; for (size_t e = v.size(); j < e; ++j) { uint32_t p = v[j].rva & mask; if (p == page) continue; relocSec->addChunk(make(page, &v[i], &v[0] + j)); i = j; page = p; } if (i == j) return; relocSec->addChunk(make(page, &v[i], &v[0] + j)); } PartialSection *Writer::createPartialSection(StringRef name, uint32_t outChars) { PartialSection *&pSec = partialSections[{name, outChars}]; if (pSec) return pSec; pSec = make(name, outChars); return pSec; } PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) { auto it = partialSections.find({name, outChars}); if (it != partialSections.end()) return it->second; return nullptr; } void Writer::fixTlsAlignment() { Defined *tlsSym = dyn_cast_or_null(ctx.symtab.findUnderscore("_tls_used")); if (!tlsSym) return; OutputSection *sec = ctx.getOutputSection(tlsSym->getChunk()); assert(sec && tlsSym->getRVA() >= sec->getRVA() && "no output section for _tls_used"); uint8_t *secBuf = buffer->getBufferStart() + sec->getFileOff(); uint64_t tlsOffset = tlsSym->getRVA() - sec->getRVA(); uint64_t directorySize = config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); if (tlsOffset + directorySize > sec->getRawSize()) fatal("_tls_used sym is malformed"); if (config->is64()) { object::coff_tls_directory64 *tlsDir = reinterpret_cast(&secBuf[tlsOffset]); tlsDir->setAlignment(tlsAlignment); } else { object::coff_tls_directory32 *tlsDir = reinterpret_cast(&secBuf[tlsOffset]); tlsDir->setAlignment(tlsAlignment); } } diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineCost.h b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineCost.h index f86ee5a14874..d3fa3b879125 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/InlineCost.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/InlineCost.h @@ -1,325 +1,330 @@ //===- InlineCost.h - Cost analysis for inliner -----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements heuristics for inlining decisions. // //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineModelFeatureMaps.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include #include namespace llvm { class BlockFrequencyInfo; class CallBase; class DataLayout; class Function; class ProfileSummaryInfo; class TargetTransformInfo; class TargetLibraryInfo; namespace InlineConstants { // Various thresholds used by inline cost analysis. /// Use when optsize (-Os) is specified. const int OptSizeThreshold = 50; /// Use when minsize (-Oz) is specified. const int OptMinSizeThreshold = 5; /// Use when -O3 is specified. const int OptAggressiveThreshold = 250; // Various magic constants used to adjust heuristics. const int InstrCost = 5; const int IndirectCallThreshold = 100; const int LoopPenalty = 25; const int LastCallToStaticBonus = 15000; const int ColdccPenalty = 2000; /// Do not inline functions which allocate this many bytes on the stack /// when the caller is recursive. const unsigned TotalAllocaSizeRecursiveCaller = 1024; /// Do not inline dynamic allocas that have been constant propagated to be /// static allocas above this amount in bytes. const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536; + +const char FunctionInlineCostMultiplierAttributeName[] = + "function-inline-cost-multiplier"; } // namespace InlineConstants // The cost-benefit pair computed by cost-benefit analysis. class CostBenefitPair { public: CostBenefitPair(APInt Cost, APInt Benefit) : Cost(Cost), Benefit(Benefit) {} const APInt &getCost() const { return Cost; } const APInt &getBenefit() const { return Benefit; } private: APInt Cost; APInt Benefit; }; /// Represents the cost of inlining a function. /// /// This supports special values for functions which should "always" or /// "never" be inlined. Otherwise, the cost represents a unitless amount; /// smaller values increase the likelihood of the function being inlined. /// /// Objects of this type also provide the adjusted threshold for inlining /// based on the information available for a particular callsite. They can be /// directly tested to determine if inlining should occur given the cost and /// threshold for this cost metric. class InlineCost { enum SentinelValues { AlwaysInlineCost = INT_MIN, NeverInlineCost = INT_MAX }; /// The estimated cost of inlining this callsite. int Cost = 0; /// The adjusted threshold against which this cost was computed. int Threshold = 0; /// Must be set for Always and Never instances. const char *Reason = nullptr; /// The cost-benefit pair computed by cost-benefit analysis. Optional CostBenefit = None; // Trivial constructor, interesting logic in the factory functions below. InlineCost(int Cost, int Threshold, const char *Reason = nullptr, Optional CostBenefit = None) : Cost(Cost), Threshold(Threshold), Reason(Reason), CostBenefit(CostBenefit) { assert((isVariable() || Reason) && "Reason must be provided for Never or Always"); } public: static InlineCost get(int Cost, int Threshold) { assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value"); assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); return InlineCost(Cost, Threshold); } static InlineCost getAlways(const char *Reason, Optional CostBenefit = None) { return InlineCost(AlwaysInlineCost, 0, Reason, CostBenefit); } static InlineCost getNever(const char *Reason, Optional CostBenefit = None) { return InlineCost(NeverInlineCost, 0, Reason, CostBenefit); } /// Test whether the inline cost is low enough for inlining. explicit operator bool() const { return Cost < Threshold; } bool isAlways() const { return Cost == AlwaysInlineCost; } bool isNever() const { return Cost == NeverInlineCost; } bool isVariable() const { return !isAlways() && !isNever(); } /// Get the inline cost estimate. /// It is an error to call this on an "always" or "never" InlineCost. int getCost() const { assert(isVariable() && "Invalid access of InlineCost"); return Cost; } /// Get the threshold against which the cost was computed int getThreshold() const { assert(isVariable() && "Invalid access of InlineCost"); return Threshold; } /// Get the cost-benefit pair which was computed by cost-benefit analysis Optional getCostBenefit() const { return CostBenefit; } /// Get the reason of Always or Never. const char *getReason() const { assert((Reason || isVariable()) && "InlineCost reason must be set for Always or Never"); return Reason; } /// Get the cost delta from the threshold for inlining. /// Only valid if the cost is of the variable kind. Returns a negative /// value if the cost is too high to inline. int getCostDelta() const { return Threshold - getCost(); } }; /// InlineResult is basically true or false. For false results the message /// describes a reason. class InlineResult { const char *Message = nullptr; InlineResult(const char *Message = nullptr) : Message(Message) {} public: static InlineResult success() { return {}; } static InlineResult failure(const char *Reason) { return InlineResult(Reason); } bool isSuccess() const { return Message == nullptr; } const char *getFailureReason() const { assert(!isSuccess() && "getFailureReason should only be called in failure cases"); return Message; } }; /// Thresholds to tune inline cost analysis. The inline cost analysis decides /// the condition to apply a threshold and applies it. Otherwise, /// DefaultThreshold is used. If a threshold is Optional, it is applied only /// when it has a valid value. Typically, users of inline cost analysis /// obtain an InlineParams object through one of the \c getInlineParams methods /// and pass it to \c getInlineCost. Some specialized versions of inliner /// (such as the pre-inliner) might have custom logic to compute \c InlineParams /// object. struct InlineParams { /// The default threshold to start with for a callee. int DefaultThreshold = -1; /// Threshold to use for callees with inline hint. Optional HintThreshold; /// Threshold to use for cold callees. Optional ColdThreshold; /// Threshold to use when the caller is optimized for size. Optional OptSizeThreshold; /// Threshold to use when the caller is optimized for minsize. Optional OptMinSizeThreshold; /// Threshold to use when the callsite is considered hot. Optional HotCallSiteThreshold; /// Threshold to use when the callsite is considered hot relative to function /// entry. Optional LocallyHotCallSiteThreshold; /// Threshold to use when the callsite is considered cold. Optional ColdCallSiteThreshold; /// Compute inline cost even when the cost has exceeded the threshold. Optional ComputeFullInlineCost; /// Indicate whether we should allow inline deferral. Optional EnableDeferral; /// Indicate whether we allow inlining for recursive call. Optional AllowRecursiveCall = false; }; +Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind); + /// Generate the parameters to tune the inline cost analysis based only on the /// commandline options. InlineParams getInlineParams(); /// Generate the parameters to tune the inline cost analysis based on command /// line options. If -inline-threshold option is not explicitly passed, /// \p Threshold is used as the default threshold. InlineParams getInlineParams(int Threshold); /// Generate the parameters to tune the inline cost analysis based on command /// line options. If -inline-threshold option is not explicitly passed, /// the default threshold is computed from \p OptLevel and \p SizeOptLevel. /// An \p OptLevel value above 3 is considered an aggressive optimization mode. /// \p SizeOptLevel of 1 corresponds to the -Os flag and 2 corresponds to /// the -Oz flag. InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel); /// Return the cost associated with a callsite, including parameter passing /// and the call/return instruction. int getCallsiteCost(CallBase &Call, const DataLayout &DL); /// Get an InlineCost object representing the cost of inlining this /// callsite. /// /// Note that a default threshold is passed into this function. This threshold /// could be modified based on callsite's properties and only costs below this /// new threshold are computed with any accuracy. The new threshold can be /// used to bound the computation necessary to determine whether the cost is /// sufficiently low to warrant inlining. /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a /// pointer. This behaves exactly as the version with no explicit callee /// parameter in all other respects. // InlineCost getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Returns InlineResult::success() if the call site should be always inlined /// because of user directives, and the inlining is viable. Returns /// InlineResult::failure() if the inlining may never happen because of user /// directives or incompatibilities detectable without needing callee traversal. /// Otherwise returns None, meaning that inlining should be decided based on /// other criteria (e.g. cost modeling). Optional getAttributeBasedInliningDecision( CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref GetTLI); /// Get the cost estimate ignoring thresholds. This is similar to getInlineCost /// when passed InlineParams::ComputeFullInlineCost, or a non-null ORE. It /// uses default InlineParams otherwise. /// Contrary to getInlineCost, which makes a threshold-based final evaluation of /// should/shouldn't inline, captured in InlineResult, getInliningCostEstimate /// returns: /// - None, if the inlining cannot happen (is illegal) /// - an integer, representing the cost. Optional getInliningCostEstimate( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Get the expanded cost features. The features are returned unconditionally, /// even if inlining is impossible. Optional getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Minimal filter to detect invalid constructs for inlining. InlineResult isInlineViable(Function &Callee); // This pass is used to annotate instructions during the inline process for // debugging and analysis. The main purpose of the pass is to see and test // inliner's decisions when creating new optimizations to InlineCost. struct InlineCostAnnotationPrinterPass : PassInfoMixin { raw_ostream &OS; public: explicit InlineCostAnnotationPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; } // namespace llvm #endif diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h index d6228700aa9a..4d6874f784ef 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar.h @@ -1,567 +1,568 @@ //===-- Scalar.h - Scalar Transformations -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This header file defines prototypes for accessor functions that expose passes // in the Scalar transformations library. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_H #define LLVM_TRANSFORMS_SCALAR_H #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include namespace llvm { class Function; class FunctionPass; class ModulePass; class Pass; //===----------------------------------------------------------------------===// // // AlignmentFromAssumptions - Use assume intrinsics to set load/store // alignments. // FunctionPass *createAlignmentFromAssumptionsPass(); //===----------------------------------------------------------------------===// // // AnnotationRemarks - Emit remarks for !annotation metadata. // FunctionPass *createAnnotationRemarksLegacyPass(); //===----------------------------------------------------------------------===// // // SCCP - Sparse conditional constant propagation. // FunctionPass *createSCCPPass(); //===----------------------------------------------------------------------===// // // RedundantDbgInstElimination - This pass removes redundant dbg intrinsics // without modifying the CFG of the function. It is a FunctionPass. // Pass *createRedundantDbgInstEliminationPass(); //===----------------------------------------------------------------------===// // // DeadCodeElimination - This pass is more powerful than DeadInstElimination, // because it is worklist driven that can potentially revisit instructions when // their other instructions become dead, to eliminate chains of dead // computations. // FunctionPass *createDeadCodeEliminationPass(); //===----------------------------------------------------------------------===// // // DeadStoreElimination - This pass deletes stores that are post-dominated by // must-aliased stores and are not loaded used between the stores. // FunctionPass *createDeadStoreEliminationPass(); //===----------------------------------------------------------------------===// // // CallSiteSplitting - This pass split call-site based on its known argument // values. FunctionPass *createCallSiteSplittingPass(); //===----------------------------------------------------------------------===// // // AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This // algorithm assumes instructions are dead until proven otherwise, which makes // it more successful are removing non-obviously dead instructions. // FunctionPass *createAggressiveDCEPass(); //===----------------------------------------------------------------------===// // // GuardWidening - An optimization over the @llvm.experimental.guard intrinsic // that (optimistically) combines multiple guards into one to have fewer checks // at runtime. // FunctionPass *createGuardWideningPass(); //===----------------------------------------------------------------------===// // // LoopGuardWidening - Analogous to the GuardWidening pass, but restricted to a // single loop at a time for use within a LoopPassManager. Desired effect is // to widen guards into preheader or a single guard within loop if that's not // possible. // Pass *createLoopGuardWideningPass(); //===----------------------------------------------------------------------===// // // BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to // remove computations of dead bits. // FunctionPass *createBitTrackingDCEPass(); //===----------------------------------------------------------------------===// // // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. // FunctionPass *createSROAPass(); //===----------------------------------------------------------------------===// // // InductiveRangeCheckElimination - Transform loops to elide range checks on // linear functions of the induction variable. // Pass *createInductiveRangeCheckEliminationPass(); //===----------------------------------------------------------------------===// // // InductionVariableSimplify - Transform induction variables in a program to all // use a single canonical induction variable per loop. // Pass *createIndVarSimplifyPass(); //===----------------------------------------------------------------------===// // // LICM - This pass is a loop invariant code motion and memory promotion pass. // Pass *createLICMPass(); Pass *createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool AllowSpeculation); //===----------------------------------------------------------------------===// // // LoopSink - This pass sinks invariants from preheader to loop body where // frequency is lower than loop preheader. // Pass *createLoopSinkPass(); //===----------------------------------------------------------------------===// // // LoopPredication - This pass does loop predication on guards. // Pass *createLoopPredicationPass(); //===----------------------------------------------------------------------===// // // LoopInterchange - This pass interchanges loops to provide a more // cache-friendly memory access patterns. // Pass *createLoopInterchangePass(); //===----------------------------------------------------------------------===// // // LoopFlatten - This pass flattens nested loops into a single loop. // FunctionPass *createLoopFlattenPass(); //===----------------------------------------------------------------------===// // // LoopStrengthReduce - This pass is strength reduces GEP instructions that use // a loop's canonical induction variable as one of their indices. // Pass *createLoopStrengthReducePass(); //===----------------------------------------------------------------------===// // // LoopUnswitch - This pass is a simple loop unswitching pass. // Pass *createLoopUnswitchPass(bool OptimizeForSize = false, bool hasBranchDivergence = false); //===----------------------------------------------------------------------===// // // LoopInstSimplify - This pass simplifies instructions in a loop's body. // Pass *createLoopInstSimplifyPass(); //===----------------------------------------------------------------------===// // // LoopUnroll - This pass is a simple loop unrolling pass. // Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false, int Threshold = -1, int Count = -1, int AllowPartial = -1, int Runtime = -1, int UpperBound = -1, int AllowPeeling = -1); // Create an unrolling pass for full unrolling that uses exact trip count only // and also does peeling. Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false); //===----------------------------------------------------------------------===// // // LoopUnrollAndJam - This pass is a simple loop unroll and jam pass. // Pass *createLoopUnrollAndJamPass(int OptLevel = 2); //===----------------------------------------------------------------------===// // // LoopReroll - This pass is a simple loop rerolling pass. // Pass *createLoopRerollPass(); //===----------------------------------------------------------------------===// // // LoopRotate - This pass is a simple loop rotating pass. // Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false); //===----------------------------------------------------------------------===// // // LoopIdiom - This pass recognizes and replaces idioms in loops. // Pass *createLoopIdiomPass(); //===----------------------------------------------------------------------===// // // LoopVersioningLICM - This pass is a loop versioning pass for LICM. // Pass *createLoopVersioningLICMPass(); //===----------------------------------------------------------------------===// // // DemoteRegisterToMemoryPass - This pass is used to demote registers to memory // references. In basically undoes the PromoteMemoryToRegister pass to make cfg // hacking easier. // FunctionPass *createDemoteRegisterToMemoryPass(); extern char &DemoteRegisterToMemoryID; //===----------------------------------------------------------------------===// // // Reassociate - This pass reassociates commutative expressions in an order that // is designed to promote better constant propagation, GCSE, LICM, PRE... // // For example: 4 + (x + 5) -> x + (4 + 5) // FunctionPass *createReassociatePass(); //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some // preds always go to some succ. If FreezeSelectCond is true, unfold the // condition of a select that unfolds to branch. Thresholds other than minus one // override the internal BB duplication default threshold. // FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, int Threshold = -1); //===----------------------------------------------------------------------===// // // DFAJumpThreading - When a switch statement inside a loop is used to // implement a deterministic finite automata we can jump thread the switch // statement reducing number of conditional jumps. // FunctionPass *createDFAJumpThreadingPass(); //===----------------------------------------------------------------------===// // // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, // simplify terminator instructions, convert switches to lookup tables, etc. // FunctionPass *createCFGSimplificationPass( SimplifyCFGOptions Options = SimplifyCFGOptions(), std::function Ftor = nullptr); //===----------------------------------------------------------------------===// // // FlattenCFG - flatten CFG, reduce number of conditional branches by using // parallel-and and parallel-or mode, etc... // FunctionPass *createFlattenCFGPass(); //===----------------------------------------------------------------------===// // // CFG Structurization - Remove irreducible control flow // /// /// When \p SkipUniformRegions is true the structizer will not structurize /// regions that only contain uniform branches. Pass *createStructurizeCFGPass(bool SkipUniformRegions = false); //===----------------------------------------------------------------------===// // // TailCallElimination - This pass eliminates call instructions to the current // function which occur immediately before return instructions. // FunctionPass *createTailCallEliminationPass(); //===----------------------------------------------------------------------===// // // EarlyCSE - This pass performs a simple and fast CSE pass over the dominator // tree. // FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false); //===----------------------------------------------------------------------===// // // GVNHoist - This pass performs a simple and fast GVN pass over the dominator // tree to hoist common expressions from sibling branches. // FunctionPass *createGVNHoistPass(); //===----------------------------------------------------------------------===// // // GVNSink - This pass uses an "inverted" value numbering to decide the // similarity of expressions and sinks similar expressions into successors. // FunctionPass *createGVNSinkPass(); //===----------------------------------------------------------------------===// // // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // FunctionPass *createMergedLoadStoreMotionPass(bool SplitFooterBB = false); //===----------------------------------------------------------------------===// // // GVN - This pass performs global value numbering and redundant load // elimination cotemporaneously. // FunctionPass *createNewGVNPass(); //===----------------------------------------------------------------------===// // // DivRemPairs - Hoist/decompose integer division and remainder instructions. // FunctionPass *createDivRemPairsPass(); //===----------------------------------------------------------------------===// // // MemCpyOpt - This pass performs optimizations related to eliminating memcpy // calls and/or combining multiple stores into memset's. // FunctionPass *createMemCpyOptPass(); //===----------------------------------------------------------------------===// // // LoopDeletion - This pass performs DCE of non-infinite loops that it // can prove are dead. // Pass *createLoopDeletionPass(); //===----------------------------------------------------------------------===// // // ConstantHoisting - This pass prepares a function for expensive constants. // FunctionPass *createConstantHoistingPass(); //===----------------------------------------------------------------------===// // // ConstraintElimination - This pass eliminates conditions based on found // constraints. // FunctionPass *createConstraintEliminationPass(); //===----------------------------------------------------------------------===// // // Sink - Code Sinking // FunctionPass *createSinkingPass(); //===----------------------------------------------------------------------===// // // LowerAtomic - Lower atomic intrinsics to non-atomic form // Pass *createLowerAtomicPass(); //===----------------------------------------------------------------------===// // // LowerGuardIntrinsic - Lower guard intrinsics to normal control flow. // Pass *createLowerGuardIntrinsicPass(); //===----------------------------------------------------------------------===// // // LowerMatrixIntrinsics - Lower matrix intrinsics to vector operations. // Pass *createLowerMatrixIntrinsicsPass(); //===----------------------------------------------------------------------===// // // LowerMatrixIntrinsicsMinimal - Lower matrix intrinsics to vector operations // (lightweight, does not require extra analysis) // Pass *createLowerMatrixIntrinsicsMinimalPass(); //===----------------------------------------------------------------------===// // // LowerWidenableCondition - Lower widenable condition to i1 true. // Pass *createLowerWidenableConditionPass(); //===----------------------------------------------------------------------===// // // MergeICmps - Merge integer comparison chains into a memcmp // Pass *createMergeICmpsLegacyPass(); //===----------------------------------------------------------------------===// // // ValuePropagation - Propagate CFG-derived value information // Pass *createCorrelatedValuePropagationPass(); //===----------------------------------------------------------------------===// // // InferAddressSpaces - Modify users of addrspacecast instructions with values // in the source address space if using the destination address space is slower // on the target. If AddressSpace is left to its default value, it will be // obtained from the TargetTransformInfo. // FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u); extern char &InferAddressSpacesID; //===----------------------------------------------------------------------===// // // LowerExpectIntrinsics - Removes llvm.expect intrinsics and creates // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); //===----------------------------------------------------------------------===// // // LowerConstantIntrinsicss - Expand any remaining llvm.objectsize and // llvm.is.constant intrinsic calls, even for the unknown cases. // FunctionPass *createLowerConstantIntrinsicsPass(); //===----------------------------------------------------------------------===// // // PartiallyInlineLibCalls - Tries to inline the fast path of library // calls such as sqrt. // FunctionPass *createPartiallyInlineLibCallsPass(); //===----------------------------------------------------------------------===// // // SeparateConstOffsetFromGEP - Split GEPs for better CSE // FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false); //===----------------------------------------------------------------------===// // // SpeculativeExecution - Aggressively hoist instructions to enable // speculative execution on targets where branches are expensive. // FunctionPass *createSpeculativeExecutionPass(); // Same as createSpeculativeExecutionPass, but does nothing unless // TargetTransformInfo::hasBranchDivergence() is true. FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); //===----------------------------------------------------------------------===// // // StraightLineStrengthReduce - This pass strength-reduces some certain // instruction patterns in straight-line code. // FunctionPass *createStraightLineStrengthReducePass(); //===----------------------------------------------------------------------===// // // PlaceSafepoints - Rewrite any IR calls to gc.statepoints and insert any // safepoint polls (method entry, backedge) that might be required. This pass // does not generate explicit relocation sequences - that's handled by // RewriteStatepointsForGC which can be run at an arbitrary point in the pass // order following this pass. // FunctionPass *createPlaceSafepointsPass(); //===----------------------------------------------------------------------===// // // RewriteStatepointsForGC - Rewrite any gc.statepoints which do not yet have // explicit relocations to include explicit relocations. // ModulePass *createRewriteStatepointsForGCLegacyPass(); //===----------------------------------------------------------------------===// // // Float2Int - Demote floats to ints where possible. // FunctionPass *createFloat2IntPass(); //===----------------------------------------------------------------------===// // // NaryReassociate - Simplify n-ary operations by reassociation. // FunctionPass *createNaryReassociatePass(); //===----------------------------------------------------------------------===// // // LoopDistribute - Distribute loops. // FunctionPass *createLoopDistributePass(); //===----------------------------------------------------------------------===// // // LoopFuse - Fuse loops. // FunctionPass *createLoopFusePass(); //===----------------------------------------------------------------------===// // // LoopLoadElimination - Perform loop-aware load elimination. // FunctionPass *createLoopLoadEliminationPass(); //===----------------------------------------------------------------------===// // // LoopVersioning - Perform loop multi-versioning. // FunctionPass *createLoopVersioningPass(); //===----------------------------------------------------------------------===// // // LoopDataPrefetch - Perform data prefetching in loops. // FunctionPass *createLoopDataPrefetchPass(); ///===---------------------------------------------------------------------===// ModulePass *createNameAnonGlobalPass(); ModulePass *createCanonicalizeAliasesPass(); //===----------------------------------------------------------------------===// // // LibCallsShrinkWrap - Shrink-wraps a call to function if the result is not // used. // FunctionPass *createLibCallsShrinkWrapPass(); //===----------------------------------------------------------------------===// // // LoopSimplifyCFG - This pass performs basic CFG simplification on loops, // primarily to help other loop passes. // Pass *createLoopSimplifyCFGPass(); //===----------------------------------------------------------------------===// // // WarnMissedTransformations - This pass emits warnings for leftover forced // transformations. // Pass *createWarnMissedTransformationsPass(); //===----------------------------------------------------------------------===// // // This pass does instruction simplification on each // instruction in a function. // FunctionPass *createInstSimplifyLegacyPass(); //===----------------------------------------------------------------------===// // // createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather // and scatter intrinsics with scalar code when target doesn't support them. // FunctionPass *createScalarizeMaskedMemIntrinLegacyPass(); } // End llvm namespace #endif diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h index 751f75c0ccb2..503c8792d309 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Scalar/LICM.h @@ -1,78 +1,86 @@ //===- LICM.h - Loop Invariant Code Motion Pass -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass performs loop invariant code motion, attempting to remove as much // code from the body of a loop as possible. It does this by either hoisting // code into the preheader block, or by sinking code to the exit blocks if it is // safe. This pass also promotes must-aliased memory locations in the loop to // live in registers, thus hoisting and sinking "invariant" loads and stores. // // This pass uses alias analysis for two purposes: // // 1. Moving loop invariant loads and calls out of loops. If we can determine // that a load or call inside of a loop never aliases anything stored to, // we can hoist it or sink it like any other instruction. // 2. Scalar Promotion of Memory - If there is a store instruction inside of // the loop, we try to move the store to happen AFTER the loop instead of // inside of the loop. This can only happen if a few conditions are true: // A. The pointer stored through is loop invariant // B. There are no stores or loads in the loop which _may_ alias the // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use // the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_LICM_H #define LLVM_TRANSFORMS_SCALAR_LICM_H #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { extern cl::opt SetLicmMssaOptCap; extern cl::opt SetLicmMssaNoAccForPromotionCap; /// Performs Loop Invariant Code Motion Pass. class LICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(true) {} + LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; /// Performs LoopNest Invariant Code Motion Pass. class LNICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LNICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(true) {} + LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LICM_H diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 3a712d78df67..134f8bcfd888 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -1,534 +1,539 @@ //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines some loop transformation utilities. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { template class DomTreeNodeBase; using DomTreeNode = DomTreeNodeBase; class AAResults; class AliasSet; class AliasSetTracker; class BasicBlock; class BlockFrequencyInfo; class ICFLoopSafetyInfo; class IRBuilderBase; class Loop; class LoopInfo; class MemoryAccess; class MemorySSA; class MemorySSAUpdater; class OptimizationRemarkEmitter; class PredIteratorCache; class ScalarEvolution; class SCEV; class SCEVExpander; class TargetLibraryInfo; class LPPassManager; class Instruction; struct RuntimeCheckingPtrGroup; typedef std::pair RuntimePointerCheck; template class Optional; template class SmallSetVector; template class SmallVector; template class SmallVectorImpl; template class SmallPriorityWorklist; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA); /// Ensure that all exit blocks of the loop are dedicated exits. /// /// For any loop exit block with non-loop predecessors, we split the loop /// predecessors to use a dedicated loop exit block. We update the dominator /// tree and loop info if provided, and will preserve LCSSA if requested. bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA); /// Ensures LCSSA form for every instruction from the Worklist in the scope of /// innermost containing loop. /// /// For the given instruction which have uses outside of the loop, an LCSSA PHI /// node is inserted and the uses outside the loop are rewritten to use this /// node. /// /// LoopInfo and DominatorTree are required and, since the routine makes no /// changes to CFG, preserved. /// /// Returns true if any modifications are made. /// /// This function may introduce unused PHI nodes. If \p PHIsToRemove is not /// nullptr, those are added to it (before removing, the caller has to check if /// they still do not have any uses). Otherwise the PHIs are directly removed. bool formLCSSAForInstructions( SmallVectorImpl &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl *PHIsToRemove = nullptr); /// Put loop into LCSSA form. /// /// Looks at all instructions in the loop which have uses outside of the /// current loop. For each, an LCSSA PHI node is inserted and the uses outside /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form /// already. /// /// LoopInfo and DominatorTree are required and preserved. /// /// If ScalarEvolution is passed in, it will be preserved. /// /// Returns true if any modifications are made to the loop. bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE); /// Put a loop nest into LCSSA form. /// /// This recursively forms LCSSA for a loop nest. /// /// LoopInfo and DominatorTree are required and preserved. /// /// If ScalarEvolution is passed in, it will be preserved. /// /// Returns true if any modifications are made to the loop. bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE); /// Flags controlling how much is checked when sinking or hoisting /// instructions. The number of memory access in the loop (and whether there /// are too many) is determined in the constructors when using MemorySSA. class SinkAndHoistLICMFlags { public: // Explicitly set limits. SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, Loop *L = nullptr, MemorySSA *MSSA = nullptr); // Use default limits. SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr, MemorySSA *MSSA = nullptr); void setIsSink(bool B) { IsSink = B; } bool getIsSink() { return IsSink; } bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } void incrementClobberingCalls() { ++LicmMssaOptCounter; } protected: bool NoOfMemAccTooLarge = false; unsigned LicmMssaOptCounter = 0; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; bool IsSink; }; /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in /// reverse depth first order w.r.t the DominatorTree. This allows us to visit /// uses before definitions, allowing us to sink a loop body in one pass without /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. /// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when /// this function is called by \p sinkRegionForLoopNest. bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr); /// Call sinkRegion on loops contained within the specified loop /// in order from innermost to outermost. bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions /// before uses, allowing us to hoist a loop body in one pass without iteration. /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, + bool AllowSpeculation); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. /// The function requires a bunch or prerequisites to be present: /// - The loop needs to be in LCSSA form /// - The loop needs to have a Preheader /// - A unique dedicated exit block must exist /// /// This also updates the relevant analysis information in \p DT, \p SE, \p LI /// and \p MSSA if pointers to those are provided. /// It also updates the loop PM if an updater struct is provided. void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA = nullptr); /// Remove the backedge of the specified loop. Handles loop nests and general /// loop structures subject to the precondition that the loop has no parent /// loop and has a single latch block. Preserves all listed analyses. void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA); /// Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. It takes a set of must-alias values, Loop exit blocks /// vector, loop exit blocks insertion point vector, PredIteratorCache, /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool promoteLoopAccessesToScalars( const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *); + OptimizationRemarkEmitter *, bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop); /// Returns the instructions that use values defined in the loop. SmallVector findDefsUsedOutsideOfLoop(Loop *L); /// Find a combination of metadata ("llvm.loop.vectorize.width" and /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found /// then None is returned. Optional getOptionalElementCountLoopAttribute(const Loop *TheLoop); /// Create a new loop identifier for a loop created from a loop transformation. /// /// @param OrigLoopID The loop ID of the loop before the transformation. /// @param FollowupAttrs List of attribute names that contain attributes to be /// added to the new loop ID. /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited /// from the original loop. The following values /// are considered: /// nullptr : Inherit all attributes from @p OrigLoopID. /// "" : Do not inherit any attribute from @p OrigLoopID; only use /// those specified by a followup attribute. /// "": Inherit all attributes except those which start with /// ; commonly used to remove metadata for the /// applied transformation. /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return /// None. /// /// @return The loop ID for the after-transformation loop. The following values /// can be returned: /// None : No followup attribute was found; it is up to the /// transformation to choose attributes that make sense. /// @p OrigLoopID: The original identifier can be reused. /// nullptr : The new loop has no attributes. /// MDNode* : A new unique loop identifier. Optional makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef FollowupAttrs, const char *InheritOptionsAttrsPrefix = "", bool AlwaysNew = false); /// Look for the loop attribute that disables all transformation heuristic. bool hasDisableAllTransformsHint(const Loop *L); /// Look for the loop attribute that disables the LICM transformation heuristics. bool hasDisableLICMTransformsHint(const Loop *L); /// The mode sets how eager a transformation should be applied. enum TransformationMode { /// The pass can use heuristics to determine whether a transformation should /// be applied. TM_Unspecified, /// The transformation should be applied without considering a cost model. TM_Enable, /// The transformation should not be applied. TM_Disable, /// Force is a flag and should not be used alone. TM_Force = 0x04, /// The transformation was directed by the user, e.g. by a #pragma in /// the source code. If the transformation could not be applied, a /// warning should be emitted. TM_ForcedByUser = TM_Enable | TM_Force, /// The transformation must not be applied. For instance, `#pragma clang loop /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike /// general loop metadata, it must not be dropped. Most passes should not /// behave differently under TM_Disable and TM_SuppressedByUser. TM_SuppressedByUser = TM_Disable | TM_Force }; /// @{ /// Get the mode for LLVM's supported loop transformations. TransformationMode hasUnrollTransformation(const Loop *L); TransformationMode hasUnrollAndJamTransformation(const Loop *L); TransformationMode hasVectorizeTransformation(const Loop *L); TransformationMode hasDistributeTransformation(const Loop *L); TransformationMode hasLICMVersioningTransformation(const Loop *L); /// @} /// Set input string into loop metadata by keeping other values intact. /// If the string is already in loop metadata update value if it is /// different. void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); /// Returns a loop's estimated trip count based on branch weight metadata. /// In addition if \p EstimatedLoopInvocationWeight is not null it is /// initialized with weight of loop's latch leading to the exit. /// Returns 0 when the count is estimated to be 0, or None when a meaningful /// estimate can not be made. Optional getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight = nullptr); /// Set a loop's branch weight metadata to reflect that loop has \p /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits /// through latch. Returns true if metadata is successfully updated, false /// otherwise. Note that loop must have a latch block which controls loop exit /// in order to succeed. bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight); /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially /// true. bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE); /// Helper to consistently add the set of standard passes to a loop pass's \c /// AnalysisUsage. /// /// All loop passes should call this as part of implementing their \c /// getAnalysisUsage. void getLoopAnalysisUsage(AnalysisUsage &AU); /// Returns true if is legal to hoist or sink this instruction disregarding the /// possible introduction of faults. Reasoning about potential faulting /// instructions is the responsibility of the caller since it is challenging to /// do efficiently from within this routine. /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the /// target executes at most once per execution of the loop body. This is used /// to assess the legality of duplicating atomic loads. Generally, this is /// true when moving out of loop and not true when moving into loops. /// If \p ORE is set use it to emit optimization remarks. bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *LICMFlags = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Returns the comparison predicate used when expanding a min/max reduction. CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); /// See RecurrenceDescriptor::isSelectCmpPattern for a description of the /// pattern we are trying to match. In this pattern we are only ever selecting /// between two values: 1) an initial PHI start value, and 2) a loop invariant /// value. This function uses \p LoopExitInst to determine 2), which we then use /// to select between \p Left and \p Right. Any lane value in \p Left that /// matches 2) will be merged into \p Right. Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. /// The Builder's fast-math-flags must be set to propagate the expected values. Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right); /// Generates an ordered vector reduction using extracts to reduce the value. Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind = RecurKind::None); /// Generates a vector reduction using shufflevectors to reduce the value. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind = RecurKind::None); /// Create a target reduction of the given vector. The reduction operation /// is described by the \p Opcode parameter. min/max reductions require /// additional information supplied in \p RdxKind. /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *createSimpleTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind); /// Create a target reduction of the given vector \p Src for a reduction of the /// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation /// is described by \p Desc. Value *createSelectCmpTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi); /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the RecurrenceDescriptor. Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi = nullptr); /// Create an ordered reduction intrinsic using the given recurrence /// descriptor \p Desc. Value *createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start); /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). /// If OpValue is non-null, we only consider operations similar to OpValue /// when intersecting. /// Flag set: NSW, NUW, exact, and all of fast-math. void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr); /// Returns true if we can prove that \p S is defined and always negative in /// loop \p L. bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); /// Returns true if we can prove that \p S is defined and always non-negative in /// loop \p L. bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); /// Returns true if \p S is defined and never is equal to signed/unsigned max. bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed); /// Returns true if \p S is defined and never is equal to signed/unsigned min. bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed); enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; /// If the final value of any expressions that are recurrent in the loop can /// be computed, substitute the exit values from the loop into any instructions /// outside of the loop that use the final values of the current expressions. /// Return the number of loop exit values that have been replaced, and the /// corresponding phi node will be added to DeadInsts. int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector &DeadInsts); /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect /// the remaining TC%UF iterations. /// /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly. /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are /// equal. \p UF must be greater than zero. /// If \p OrigLoop has no profile info associated nothing happens. /// /// This utility may be useful for such optimizations as unroller and /// vectorizer as it's typical transformation for them. void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF); /// Utility that implements appending of loops onto a worklist given a range. /// We want to process loops in postorder, but the worklist is a LIFO data /// structure, so we append to it in *reverse* postorder. /// For trees, a preorder traversal is a viable reverse postorder, so we /// actually append using a preorder walk algorithm. template void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist &); /// Utility that implements appending of loops onto a worklist given a range. /// It has the same behavior as appendLoopsToWorklist, but assumes the range of /// loops has already been reversed, so it processes loops in the given order. template void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist &); /// Utility that implements appending of loops onto a worklist given LoopInfo. /// Calls the templated utility taking a Range of loops, handing it the Loops /// in LoopInfo, iterated in reverse. This is because the loops are stored in /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling, /// loop deletion, and LICM, we largely want to work forward across the CFG so /// that we visit defs before uses and can propagate simplifications from one /// loop nest into the next. Calls appendReversedLoopsToWorklist with the /// already reversed loops in LI. /// FIXME: Consider changing the order in LoopInfo. void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist &); /// Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM); /// Add code that checks at runtime if the accessed arrays in \p PointerChecks /// overlap. Returns the final comparator value or NULL if no check is needed. Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl &PointerChecks, SCEVExpander &Expander); /// Struct to hold information about a partially invariant condition. struct IVConditionInfo { /// Instructions that need to be duplicated and checked for the unswitching /// condition. SmallVector InstToDuplicate; /// Constant to indicate for which value the condition is invariant. Constant *KnownValue = nullptr; /// True if the partially invariant path is no-op (=does not have any /// side-effects and no loop value is used outside the loop). bool PathIsNoop = true; /// If the partially invariant path reaches a single exit block, ExitForPath /// is set to that block. Otherwise it is nullptr. BasicBlock *ExitForPath = nullptr; }; /// Check if the loop header has a conditional branch that is not /// loop-invariant, because it involves load instructions. If all paths from /// either the true or false successor to the header or loop exists do not /// modify the memory feeding the condition, perform 'partial unswitching'. That /// is, duplicate the instructions feeding the condition in the pre-header. Then /// unswitch on the duplicated condition. The condition is now known in the /// unswitched version for the 'invariant' path through the original loop. /// /// If the branch condition of the header is partially invariant, return a pair /// containing the instructions to duplicate and a boolean Constant to update /// the condition in the loops created for the true or false successors. Optional hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, AAResults &AA); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H diff --git a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index fb3a7490346f..7af879638a4d 100644 --- a/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/contrib/llvm-project/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -1,77 +1,82 @@ //===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // A set of parameters used to control the transforms in the SimplifyCFG pass. // Options may change depending on the position in the optimization pipeline. // For example, canonical form that includes switches and branches may later be // replaced by lookup tables and selects. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H #define LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H namespace llvm { class AssumptionCache; struct SimplifyCFGOptions { int BonusInstThreshold = 1; bool ForwardSwitchCondToPhi = false; + bool ConvertSwitchRangeToICmp = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; bool HoistCommonInsts = false; bool SinkCommonInsts = false; bool SimplifyCondBranch = true; bool FoldTwoEntryPHINode = true; AssumptionCache *AC = nullptr; // Support 'builder' pattern to set members by name at construction time. SimplifyCFGOptions &bonusInstThreshold(int I) { BonusInstThreshold = I; return *this; } SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) { ForwardSwitchCondToPhi = B; return *this; } + SimplifyCFGOptions &convertSwitchRangeToICmp(bool B) { + ConvertSwitchRangeToICmp = B; + return *this; + } SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { ConvertSwitchToLookupTable = B; return *this; } SimplifyCFGOptions &needCanonicalLoops(bool B) { NeedCanonicalLoop = B; return *this; } SimplifyCFGOptions &hoistCommonInsts(bool B) { HoistCommonInsts = B; return *this; } SimplifyCFGOptions &sinkCommonInsts(bool B) { SinkCommonInsts = B; return *this; } SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { AC = Cache; return *this; } SimplifyCFGOptions &setSimplifyCondBranch(bool B) { SimplifyCondBranch = B; return *this; } SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) { FoldTwoEntryPHINode = B; return *this; } }; } // namespace llvm #endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H diff --git a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp index d5411d916c77..cd5314e7a17a 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/InlineCost.cpp @@ -1,3126 +1,3135 @@ //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements inline cost analysis. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/InlineCost.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "inline-cost" STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); static cl::opt DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Default amount of inlining to perform")); static cl::opt PrintInstructionComments( "print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis")); static cl::opt InlineThreshold( "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt HintThreshold( "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites")); static cl::opt InlineEnableCostBenefitAnalysis( "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner")); static cl::opt InlineSavingsMultiplier( "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::desc("Multiplier to multiply cycle savings by during inlining")); static cl::opt InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings")); // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites ")); static cl::opt LocallyHotCallSiteThreshold( "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites ")); static cl::opt ColdCallSiteRelFreq( "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information.")); static cl::opt HotCallSiteRelFreq( "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information.")); static cl::opt CallPenalty( "inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining")); static cl::opt OptComputeFullInlineCost( "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold.")); static cl::opt InlineCallerSupersetNoBuiltin( "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes.")); static cl::opt DisableGEPConstOperand( "disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands")); namespace { -class InlineCostCallAnalyzer; - /// This function behaves more like CallBase::hasFnAttr: when it looks for the /// requested attribute, it check both the call instruction and the called /// function (if it's available and operand bundles don't prohibit that). Attribute getFnAttr(CallBase &CB, StringRef AttrKind) { Attribute CallAttr = CB.getFnAttr(AttrKind); if (CallAttr.isValid()) return CallAttr; // Operand bundles override attributes on the called function, but don't // override attributes directly present on the call instruction. if (!CB.isFnAttrDisallowedByOpBundle(AttrKind)) if (const Function *F = CB.getCalledFunction()) return F->getFnAttribute(AttrKind); return {}; } +} // namespace +namespace llvm { Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { Attribute Attr = getFnAttr(CB, AttrKind); int AttrValue; if (Attr.getValueAsString().getAsInteger(10, AttrValue)) return None; return AttrValue; } +} // namespace llvm + +namespace { +class InlineCostCallAnalyzer; // This struct is used to store information about inline cost of a // particular instruction struct InstructionCostDetail { int CostBefore = 0; int CostAfter = 0; int ThresholdBefore = 0; int ThresholdAfter = 0; int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; } int getCostDelta() const { return CostAfter - CostBefore; } bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; } }; class InlineCostAnnotationWriter : public AssemblyAnnotationWriter { private: InlineCostCallAnalyzer *const ICCA; public: InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {} virtual void emitInstructionAnnot(const Instruction *I, formatted_raw_ostream &OS) override; }; /// Carry out call site analysis, in order to evaluate inlinability. /// NOTE: the type is currently used as implementation detail of functions such /// as llvm::getInlineCost. Note the function_ref constructor parameters - the /// expectation is that they come from the outer scope, from the wrapper /// functions. If we want to support constructing CallAnalyzer objects where /// lambdas are provided inline at construction, or where the object needs to /// otherwise survive past the scope of the provided functions, we need to /// revisit the argument types. class CallAnalyzer : public InstVisitor { typedef InstVisitor Base; friend class InstVisitor; protected: virtual ~CallAnalyzer() {} /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; /// Getter for the cache of @llvm.assume intrinsics. function_ref GetAssumptionCache; /// Getter for BlockFrequencyInfo function_ref GetBFI; /// Profile summary information. ProfileSummaryInfo *PSI; /// The called function. Function &F; // Cache the DataLayout since we use it a lot. const DataLayout &DL; /// The OptimizationRemarkEmitter available for this compilation. OptimizationRemarkEmitter *ORE; /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. CallBase &CandidateCall; /// Extension points for handling callsite features. // Called before a basic block was analyzed. virtual void onBlockStart(const BasicBlock *BB) {} /// Called after a basic block was analyzed. virtual void onBlockAnalyzed(const BasicBlock *BB) {} /// Called before an instruction was analyzed virtual void onInstructionAnalysisStart(const Instruction *I) {} /// Called after an instruction was analyzed virtual void onInstructionAnalysisFinish(const Instruction *I) {} /// Called at the end of the analysis of the callsite. Return the outcome of /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or /// the reason it can't. virtual InlineResult finalizeAnalysis() { return InlineResult::success(); } /// Called when we're about to start processing a basic block, and every time /// we are done processing an instruction. Return true if there is no point in /// continuing the analysis (e.g. we've determined already the call site is /// too expensive to inline) virtual bool shouldStop() { return false; } /// Called before the analysis of the callee body starts (with callsite /// contexts propagated). It checks callsite-specific information. Return a /// reason analysis can't continue if that's the case, or 'true' if it may /// continue. virtual InlineResult onAnalysisStart() { return InlineResult::success(); } /// Called if the analysis engine decides SROA cannot be done for the given /// alloca. virtual void onDisableSROA(AllocaInst *Arg) {} /// Called the analysis engine determines load elimination won't happen. virtual void onDisableLoadElimination() {} /// Called when we visit a CallBase, before the analysis starts. Return false /// to stop further processing of the instruction. virtual bool onCallBaseVisitStart(CallBase &Call) { return true; } /// Called to account for a call. virtual void onCallPenalty() {} /// Called to account for the expectation the inlining would result in a load /// elimination. virtual void onLoadEliminationOpportunity() {} /// Called to account for the cost of argument setup for the Call in the /// callee's body (not the callsite currently under analysis). virtual void onCallArgumentSetup(const CallBase &Call) {} /// Called to account for a load relative intrinsic. virtual void onLoadRelativeIntrinsic() {} /// Called to account for a lowered call. virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) { } /// Account for a jump table of given size. Return false to stop further /// processing the switch instruction virtual bool onJumpTable(unsigned JumpTableSize) { return true; } /// Account for a case cluster of given size. Return false to stop further /// processing of the instruction. virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; } /// Called at the end of processing a switch instruction, with the given /// number of case clusters. virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster) {} /// Called to account for any other instruction not specifically accounted /// for. virtual void onMissedSimplification() {} /// Start accounting potential benefits due to SROA for the given alloca. virtual void onInitializeSROAArg(AllocaInst *Arg) {} /// Account SROA savings for the AllocaInst value. virtual void onAggregateSROAUse(AllocaInst *V) {} bool handleSROA(Value *V, bool DoNotDisable) { // Check for SROA candidates in comparisons. if (auto *SROAArg = getSROAArgForValueOrNull(V)) { if (DoNotDisable) { onAggregateSROAUse(SROAArg); return true; } disableSROAForArg(SROAArg); } return false; } bool IsCallerRecursive = false; bool IsRecursiveCall = false; bool ExposesReturnsTwice = false; bool HasDynamicAlloca = false; bool ContainsNoDuplicateCall = false; bool HasReturn = false; bool HasIndirectBr = false; bool HasUninlineableIntrinsic = false; bool InitsVargArgs = false; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; unsigned NumInstructions = 0; unsigned NumVectorInstructions = 0; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The /// idea is to propagate any special information we have about arguments to /// this call through the inlinable section of the function, and account for /// likely simplifications post-inlining. The most important aspect we track /// is CFG altering simplifications -- when we prove a basic block dead, that /// can cause dramatic shifts in the cost of inlining a function. DenseMap SimplifiedValues; /// Keep track of the values which map back (through function arguments) to /// allocas on the caller stack which could be simplified through SROA. DenseMap SROAArgValues; /// Keep track of Allocas for which we believe we may get SROA optimization. DenseSet EnabledSROAAllocas; /// Keep track of values which map to a pointer base and constant offset. DenseMap> ConstantOffsetPtrs; /// Keep track of dead blocks due to the constant arguments. SetVector DeadBlocks; /// The mapping of the blocks to their known unique successors due to the /// constant arguments. DenseMap KnownSuccessors; /// Model the elimination of repeated loads that is expected to happen /// whenever we simplify away the stores that would otherwise cause them to be /// loads. bool EnableLoadElimination = true; /// Whether we allow inlining for recursive call. bool AllowRecursiveCall = false; SmallPtrSet LoadAddrSet; AllocaInst *getSROAArgForValueOrNull(Value *V) const { auto It = SROAArgValues.find(V); if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0) return nullptr; return It->second; } // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); void disableSROAForArg(AllocaInst *SROAArg); void disableSROA(Value *V); void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); void disableLoadElimination(); bool isGEPFree(GetElementPtrInst &GEP); bool canFoldInboundsGEP(GetElementPtrInst &I); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallBase &Call); template bool simplifyInstruction(Instruction &I, Callable Evaluate); bool simplifyIntrinsicCallIsConstant(CallBase &CB); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); /// Return true if the given argument to the function being considered for /// inlining has the given attribute set either at the call site or the /// function declaration. Primarily used to inspect call site specific /// attributes since these can be more precise than the ones on the callee /// itself. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); /// Return true if the given value is known non null within the callee if /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); /// Return true if size growth is allowed when inlining the callee at \p Call. bool allowSizeGrowth(CallBase &Call); // Custom analysis routines. InlineResult analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. void visit(Module *); void visit(Module &); void visit(Function *); void visit(Function &); void visit(BasicBlock *); void visit(BasicBlock &); // Provide base case for our instruction visit. bool visitInstruction(Instruction &I); // Our visit overrides. bool visitAlloca(AllocaInst &I); bool visitPHI(PHINode &I); bool visitGetElementPtr(GetElementPtrInst &I); bool visitBitCast(BitCastInst &I); bool visitPtrToInt(PtrToIntInst &I); bool visitIntToPtr(IntToPtrInst &I); bool visitCastInst(CastInst &I); bool visitCmpInst(CmpInst &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); bool visitFNeg(UnaryOperator &I); bool visitLoad(LoadInst &I); bool visitStore(StoreInst &I); bool visitExtractValue(ExtractValueInst &I); bool visitInsertValue(InsertValueInst &I); bool visitCallBase(CallBase &Call); bool visitReturnInst(ReturnInst &RI); bool visitBranchInst(BranchInst &BI); bool visitSelectInst(SelectInst &SI); bool visitSwitchInst(SwitchInst &SI); bool visitIndirectBrInst(IndirectBrInst &IBI); bool visitResumeInst(ResumeInst &RI); bool visitCleanupReturnInst(CleanupReturnInst &RI); bool visitCatchReturnInst(CatchReturnInst &RI); bool visitUnreachableInst(UnreachableInst &I); public: CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call) {} InlineResult analyze(); Optional getSimplifiedValue(Instruction *I) { if (SimplifiedValues.find(I) != SimplifiedValues.end()) return SimplifiedValues[I]; return None; } // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. unsigned NumConstantArgs = 0; unsigned NumConstantOffsetPtrArgs = 0; unsigned NumAllocaArgs = 0; unsigned NumConstantPtrCmps = 0; unsigned NumConstantPtrDiffs = 0; unsigned NumInstructionsSimplified = 0; void dump(); }; // Considering forming a binary search, we should find the number of nodes // which is same as the number of comparisons when lowered. For a given // number of clusters, n, we can define a recursive function, f(n), to find // the number of nodes in the tree. The recursion is : // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, // and f(n) = n, when n <= 3. // This will lead a binary tree where the leaf should be either f(2) or f(3) // when n > 3. So, the number of comparisons from leaves should be n, while // the number of non-leaf should be : // 2^(log2(n) - 1) - 1 // = 2^log2(n) * 2^-1 - 1 // = n / 2 - 1. // Considering comparisons from leaf and non-leaf nodes, we can estimate the // number of comparisons in a simple closed form : // n + n / 2 - 1 = n * 3 / 2 - 1 int64_t getExpectedNumberOfCompare(int NumCaseCluster) { return 3 * static_cast(NumCaseCluster) / 2 - 1; } /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer class InlineCostCallAnalyzer final : public CallAnalyzer { const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; const bool ComputeFullInlineCost; int LoadEliminationCost = 0; /// Bonus to be applied when percentage of vector instructions in callee is /// high (see more details in updateThreshold). int VectorBonus = 0; /// Bonus to be applied when the callee has only one reachable basic block. int SingleBBBonus = 0; /// Tunable parameters that control the analysis. const InlineParams &Params; // This DenseMap stores the delta change in cost and threshold after // accounting for the given instruction. The map is filled only with the // flag PrintInstructionComments on. DenseMap InstructionCostDetailMap; /// Upper bound for the inlining cost. Bonuses are being applied to account /// for speculative "expected profit" of the inlining decision. int Threshold = 0; /// Attempt to evaluate indirect calls to boost its inline cost. const bool BoostIndirectCalls; /// Ignore the threshold when finalizing analysis. const bool IgnoreThreshold; // True if the cost-benefit-analysis-based inliner is enabled. const bool CostBenefitAnalysisEnabled; /// Inlining cost measured in abstract units, accounts for all the /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site /// arguments are not counted here. int Cost = 0; // The cumulative cost at the beginning of the basic block being analyzed. At // the end of analyzing each basic block, "Cost - CostAtBBStart" represents // the size of that basic block. int CostAtBBStart = 0; // The static size of live but cold basic blocks. This is "static" in the // sense that it's not weighted by profile counts at all. int ColdSize = 0; // Whether inlining is decided by cost-threshold analysis. bool DecidedByCostThreshold = false; // Whether inlining is decided by cost-benefit analysis. bool DecidedByCostBenefit = false; // The cost-benefit pair computed by cost-benefit analysis. Optional CostBenefit = None; bool SingleBB = true; unsigned SROACostSavings = 0; unsigned SROACostSavingsLost = 0; /// The mapping of caller Alloca values to their accumulated cost savings. If /// we have to disable SROA for one of the allocas, this tells us how much /// cost must be added. DenseMap SROAArgCosts; /// Return true if \p Call is a cold callsite. bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI); /// Update Threshold based on callsite properties such as callee /// attributes and callee hotness for PGO builds. The Callee is explicitly /// passed to support analyzing indirect calls whose target is inferred by /// analysis. void updateThreshold(CallBase &Call, Function &Callee); /// Return a higher threshold if \p Call is a hot callsite. Optional getHotCallSiteThreshold(CallBase &Call, BlockFrequencyInfo *CallerBFI); /// Handle a capped 'int' increment for Cost. void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) { assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound"); Cost = std::min(UpperBound, Cost + Inc); } void onDisableSROA(AllocaInst *Arg) override { auto CostIt = SROAArgCosts.find(Arg); if (CostIt == SROAArgCosts.end()) return; addCost(CostIt->second); SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); } void onDisableLoadElimination() override { addCost(LoadEliminationCost); LoadEliminationCost = 0; } bool onCallBaseVisitStart(CallBase &Call) override { if (Optional AttrCallThresholdBonus = getStringFnAttrAsInt(Call, "call-threshold-bonus")) Threshold += *AttrCallThresholdBonus; if (Optional AttrCallCost = getStringFnAttrAsInt(Call, "call-inline-cost")) { addCost(*AttrCallCost); // Prevent further processing of the call since we want to override its // inline cost, not just add to it. return false; } return true; } void onCallPenalty() override { addCost(CallPenalty); } void onCallArgumentSetup(const CallBase &Call) override { // Pay the price of the argument setup. We account for the average 1 // instruction per call argument setup here. addCost(Call.arg_size() * InlineConstants::InstrCost); } void onLoadRelativeIntrinsic() override { // This is normally lowered to 4 LLVM instructions. addCost(3 * InlineConstants::InstrCost); } void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) override { // We account for the average 1 instruction per call argument setup here. addCost(Call.arg_size() * InlineConstants::InstrCost); // If we have a constant that we are calling as a function, we can peer // through it and see the function target. This happens not infrequently // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan out. // Pretend to inline the function, with a custom threshold. if (IsIndirectCall && BoostIndirectCalls) { auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; /// FIXME: if InlineCostCallAnalyzer is derived from, this may need /// to instantiate the derived class. InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, GetAssumptionCache, GetBFI, PSI, ORE, false); if (CA.analyze().isSuccess()) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } } else // Otherwise simply add the cost for merely making the call. addCost(CallPenalty); } void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster) override { // If suitable for a jump table, consider the cost for the table size and // branch to destination. // Maximum valid cost increased in this function. if (JumpTableSize) { int64_t JTCost = static_cast(JumpTableSize) * InlineConstants::InstrCost + 4 * InlineConstants::InstrCost; addCost(JTCost, static_cast(CostUpperBound)); return; } if (NumCaseCluster <= 3) { // Suppose a comparison includes one compare and one conditional branch. addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); return; } int64_t ExpectedNumberOfCompare = getExpectedNumberOfCompare(NumCaseCluster); int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; addCost(SwitchCost, static_cast(CostUpperBound)); } void onMissedSimplification() override { addCost(InlineConstants::InstrCost); } void onInitializeSROAArg(AllocaInst *Arg) override { assert(Arg != nullptr && "Should not initialize SROA costs for null value."); SROAArgCosts[Arg] = 0; } void onAggregateSROAUse(AllocaInst *SROAArg) override { auto CostIt = SROAArgCosts.find(SROAArg); assert(CostIt != SROAArgCosts.end() && "expected this argument to have a cost"); CostIt->second += InlineConstants::InstrCost; SROACostSavings += InlineConstants::InstrCost; } void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; } void onBlockAnalyzed(const BasicBlock *BB) override { if (CostBenefitAnalysisEnabled) { // Keep track of the static size of live but cold basic blocks. For now, // we define a cold basic block to be one that's never executed. assert(GetBFI && "GetBFI must be available"); BlockFrequencyInfo *BFI = &(GetBFI(F)); assert(BFI && "BFI must be available"); auto ProfileCount = BFI->getBlockProfileCount(BB); assert(ProfileCount.hasValue()); if (ProfileCount.getValue() == 0) ColdSize += Cost - CostAtBBStart; } auto *TI = BB->getTerminator(); // If we had any successors at this point, than post-inlining is likely to // have them as well. Note that we assume any basic blocks which existed // due to branches or switches which folded above will also fold after // inlining. if (SingleBB && TI->getNumSuccessors() > 1) { // Take off the bonus we applied to the threshold. Threshold -= SingleBBBonus; SingleBB = false; } } void onInstructionAnalysisStart(const Instruction *I) override { // This function is called to store the initial cost of inlining before // the given instruction was assessed. if (!PrintInstructionComments) return; InstructionCostDetailMap[I].CostBefore = Cost; InstructionCostDetailMap[I].ThresholdBefore = Threshold; } void onInstructionAnalysisFinish(const Instruction *I) override { // This function is called to find new values of cost and threshold after // the instruction has been assessed. if (!PrintInstructionComments) return; InstructionCostDetailMap[I].CostAfter = Cost; InstructionCostDetailMap[I].ThresholdAfter = Threshold; } bool isCostBenefitAnalysisEnabled() { if (!PSI || !PSI->hasProfileSummary()) return false; if (!GetBFI) return false; if (InlineEnableCostBenefitAnalysis.getNumOccurrences()) { // Honor the explicit request from the user. if (!InlineEnableCostBenefitAnalysis) return false; } else { // Otherwise, require instrumentation profile. if (!PSI->hasInstrumentationProfile()) return false; } auto *Caller = CandidateCall.getParent()->getParent(); if (!Caller->getEntryCount()) return false; BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller)); if (!CallerBFI) return false; // For now, limit to hot call site. if (!PSI->isHotCallSite(CandidateCall, CallerBFI)) return false; // Make sure we have a nonzero entry count. auto EntryCount = F.getEntryCount(); if (!EntryCount || !EntryCount->getCount()) return false; BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); if (!CalleeBFI) return false; return true; } // Determine whether we should inline the given call site, taking into account // both the size cost and the cycle savings. Return None if we don't have // suficient profiling information to determine. Optional costBenefitAnalysis() { if (!CostBenefitAnalysisEnabled) return None; // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0 // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by // falling back to the cost-based metric. // TODO: Improve this hacky condition. if (Threshold == 0) return None; assert(GetBFI); BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); assert(CalleeBFI); // The cycle savings expressed as the sum of InlineConstants::InstrCost // multiplied by the estimated dynamic count of each instruction we can // avoid. Savings come from the call site cost, such as argument setup and // the call instruction, as well as the instructions that are folded. // // We use 128-bit APInt here to avoid potential overflow. This variable // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case // assumes that we can avoid or fold a billion instructions, each with a // profile count of 10^^15 -- roughly the number of cycles for a 24-hour // period on a 4GHz machine. APInt CycleSavings(128, 0); for (auto &BB : F) { APInt CurrentSavings(128, 0); for (auto &I : BB) { if (BranchInst *BI = dyn_cast(&I)) { // Count a conditional branch as savings if it becomes unconditional. if (BI->isConditional() && isa_and_nonnull( SimplifiedValues.lookup(BI->getCondition()))) { CurrentSavings += InlineConstants::InstrCost; } } else if (Value *V = dyn_cast(&I)) { // Count an instruction as savings if we can fold it. if (SimplifiedValues.count(V)) { CurrentSavings += InlineConstants::InstrCost; } } } auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB); assert(ProfileCount.hasValue()); CurrentSavings *= ProfileCount.getValue(); CycleSavings += CurrentSavings; } // Compute the cycle savings per call. auto EntryProfileCount = F.getEntryCount(); assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount()); auto EntryCount = EntryProfileCount->getCount(); CycleSavings += EntryCount / 2; CycleSavings = CycleSavings.udiv(EntryCount); // Compute the total savings for the call site. auto *CallerBB = CandidateCall.getParent(); BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent()))); CycleSavings += getCallsiteCost(this->CandidateCall, DL); CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue(); // Remove the cost of the cold basic blocks. int Size = Cost - ColdSize; // Allow tiny callees to be inlined regardless of whether they meet the // savings threshold. Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1; CostBenefit.emplace(APInt(128, Size), CycleSavings); // Return true if the savings justify the cost of inlining. Specifically, // we evaluate the following inequality: // // CycleSavings PSI->getOrCompHotCountThreshold() // -------------- >= ----------------------------------- // Size InlineSavingsMultiplier // // Note that the left hand side is specific to a call site. The right hand // side is a constant for the entire executable. APInt LHS = CycleSavings; LHS *= InlineSavingsMultiplier; APInt RHS(128, PSI->getOrCompHotCountThreshold()); RHS *= Size; return LHS.uge(RHS); } InlineResult finalizeAnalysis() override { // Loops generally act a lot like calls in that they act like barriers to // movement, require a certain amount of setup, etc. So when optimising for // size, we penalise any call sites that perform loops. We do this after all // other costs here, so will likely only be dealing with relatively small // functions (and hence DT and LI will hopefully be cheap). auto *Caller = CandidateCall.getFunction(); if (Caller->hasMinSize()) { DominatorTree DT(F); LoopInfo LI(DT); int NumLoops = 0; for (Loop *L : LI) { // Ignore loops that will not be executed if (DeadBlocks.count(L->getHeader())) continue; NumLoops++; } addCost(NumLoops * InlineConstants::LoopPenalty); } // We applied the maximum possible vector bonus at the beginning. Now, // subtract the excess bonus, if any, from the Threshold before // comparing against Cost. if (NumVectorInstructions <= NumInstructions / 10) Threshold -= VectorBonus; else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; if (Optional AttrCost = getStringFnAttrAsInt(CandidateCall, "function-inline-cost")) Cost = *AttrCost; + if (Optional AttrCostMult = getStringFnAttrAsInt( + CandidateCall, + InlineConstants::FunctionInlineCostMultiplierAttributeName)) + Cost *= *AttrCostMult; + if (Optional AttrThreshold = getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) Threshold = *AttrThreshold; if (auto Result = costBenefitAnalysis()) { DecidedByCostBenefit = true; if (Result.getValue()) return InlineResult::success(); else return InlineResult::failure("Cost over threshold."); } if (IgnoreThreshold) return InlineResult::success(); DecidedByCostThreshold = true; return Cost < std::max(1, Threshold) ? InlineResult::success() : InlineResult::failure("Cost over threshold."); } bool shouldStop() override { if (IgnoreThreshold || ComputeFullInlineCost) return false; // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. if (Cost < Threshold) return false; DecidedByCostThreshold = true; return true; } void onLoadEliminationOpportunity() override { LoadEliminationCost += InlineConstants::InstrCost; } InlineResult onAnalysisStart() override { // Perform some tweaks to the cost and threshold based on the direct // callsite information. // We want to more aggressively inline vector-dense kernels, so up the // threshold, and we'll lower it if the % of vector instructions gets too // low. Note that these bonuses are some what arbitrary and evolved over // time by accident as much as because they are principled bonuses. // // FIXME: It would be nice to remove all such bonuses. At least it would be // nice to base the bonus values on something more scientific. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); // Update the threshold based on callsite properties updateThreshold(CandidateCall, F); // While Threshold depends on commandline options that can take negative // values, we want to enforce the invariant that the computed threshold and // bonuses are non-negative. assert(Threshold >= 0); assert(SingleBBBonus >= 0); assert(VectorBonus >= 0); // Speculatively apply all possible bonuses to Threshold. If cost exceeds // this Threshold any time, and cost cannot decrease, we can stop processing // the rest of the function body. Threshold += (SingleBBBonus + VectorBonus); // Give out bonuses for the callsite, as the instructions setting them up // will be gone after inlining. addCost(-getCallsiteCost(this->CandidateCall, DL)); // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. if (Cost >= Threshold && !ComputeFullInlineCost) return InlineResult::failure("high cost"); return InlineResult::success(); } public: InlineCostCallAnalyzer( Function &Callee, CallBase &Call, const InlineParams &Params, const TargetTransformInfo &TTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true, bool IgnoreThreshold = false) : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE || isCostBenefitAnalysisEnabled()), Params(Params), Threshold(Params.DefaultThreshold), BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold), CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()), Writer(this) { AllowRecursiveCall = Params.AllowRecursiveCall.getValue(); } /// Annotation Writer for instruction details InlineCostAnnotationWriter Writer; void dump(); // Prints the same analysis as dump(), but its definition is not dependent // on the build. void print(raw_ostream &OS); Optional getCostDetails(const Instruction *I) { if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end()) return InstructionCostDetailMap[I]; return None; } virtual ~InlineCostCallAnalyzer() {} int getThreshold() const { return Threshold; } int getCost() const { return Cost; } Optional getCostBenefitPair() { return CostBenefit; } bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; } bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; } }; class InlineCostFeaturesAnalyzer final : public CallAnalyzer { private: InlineCostFeatures Cost = {}; // FIXME: These constants are taken from the heuristic-based cost visitor. // These should be removed entirely in a later revision to avoid reliance on // heuristics in the ML inliner. static constexpr int JTCostMultiplier = 4; static constexpr int CaseClusterCostMultiplier = 2; static constexpr int SwitchCostMultiplier = 2; // FIXME: These are taken from the heuristic-based cost visitor: we should // eventually abstract these to the CallAnalyzer to avoid duplication. unsigned SROACostSavingOpportunities = 0; int VectorBonus = 0; int SingleBBBonus = 0; int Threshold = 5; DenseMap SROACosts; void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) { Cost[static_cast(Feature)] += Delta; } void set(InlineCostFeatureIndex Feature, int64_t Value) { Cost[static_cast(Feature)] = Value; } void onDisableSROA(AllocaInst *Arg) override { auto CostIt = SROACosts.find(Arg); if (CostIt == SROACosts.end()) return; increment(InlineCostFeatureIndex::SROALosses, CostIt->second); SROACostSavingOpportunities -= CostIt->second; SROACosts.erase(CostIt); } void onDisableLoadElimination() override { set(InlineCostFeatureIndex::LoadElimination, 1); } void onCallPenalty() override { increment(InlineCostFeatureIndex::CallPenalty, CallPenalty); } void onCallArgumentSetup(const CallBase &Call) override { increment(InlineCostFeatureIndex::CallArgumentSetup, Call.arg_size() * InlineConstants::InstrCost); } void onLoadRelativeIntrinsic() override { increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 * InlineConstants::InstrCost); } void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) override { increment(InlineCostFeatureIndex::LoweredCallArgSetup, Call.arg_size() * InlineConstants::InstrCost); if (IsIndirectCall) { InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0, /*HintThreshold*/ {}, /*ColdThreshold*/ {}, /*OptSizeThreshold*/ {}, /*OptMinSizeThreshold*/ {}, /*HotCallSiteThreshold*/ {}, /*LocallyHotCallSiteThreshold*/ {}, /*ColdCallSiteThreshold*/ {}, /*ComputeFullInlineCost*/ true, /*EnableDeferral*/ true}; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, GetAssumptionCache, GetBFI, PSI, ORE, false, true); if (CA.analyze().isSuccess()) { increment(InlineCostFeatureIndex::NestedInlineCostEstimate, CA.getCost()); increment(InlineCostFeatureIndex::NestedInlines, 1); } } else { onCallPenalty(); } } void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster) override { if (JumpTableSize) { int64_t JTCost = static_cast(JumpTableSize) * InlineConstants::InstrCost + JTCostMultiplier * InlineConstants::InstrCost; increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost); return; } if (NumCaseCluster <= 3) { increment(InlineCostFeatureIndex::CaseClusterPenalty, NumCaseCluster * CaseClusterCostMultiplier * InlineConstants::InstrCost); return; } int64_t ExpectedNumberOfCompare = getExpectedNumberOfCompare(NumCaseCluster); int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier * InlineConstants::InstrCost; increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost); } void onMissedSimplification() override { increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions, InlineConstants::InstrCost); } void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; } void onAggregateSROAUse(AllocaInst *Arg) override { SROACosts.find(Arg)->second += InlineConstants::InstrCost; SROACostSavingOpportunities += InlineConstants::InstrCost; } void onBlockAnalyzed(const BasicBlock *BB) override { if (BB->getTerminator()->getNumSuccessors() > 1) set(InlineCostFeatureIndex::IsMultipleBlocks, 1); Threshold -= SingleBBBonus; } InlineResult finalizeAnalysis() override { auto *Caller = CandidateCall.getFunction(); if (Caller->hasMinSize()) { DominatorTree DT(F); LoopInfo LI(DT); for (Loop *L : LI) { // Ignore loops that will not be executed if (DeadBlocks.count(L->getHeader())) continue; increment(InlineCostFeatureIndex::NumLoops, InlineConstants::LoopPenalty); } } set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size()); set(InlineCostFeatureIndex::SimplifiedInstructions, NumInstructionsSimplified); set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs); set(InlineCostFeatureIndex::ConstantOffsetPtrArgs, NumConstantOffsetPtrArgs); set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities); if (NumVectorInstructions <= NumInstructions / 10) Threshold -= VectorBonus; else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; set(InlineCostFeatureIndex::Threshold, Threshold); return InlineResult::success(); } bool shouldStop() override { return false; } void onLoadEliminationOpportunity() override { increment(InlineCostFeatureIndex::LoadElimination, 1); } InlineResult onAnalysisStart() override { increment(InlineCostFeatureIndex::CallSiteCost, -1 * getCallsiteCost(this->CandidateCall, DL)); set(InlineCostFeatureIndex::ColdCcPenalty, (F.getCallingConv() == CallingConv::Cold)); // FIXME: we shouldn't repeat this logic in both the Features and Cost // analyzer - instead, we should abstract it to a common method in the // CallAnalyzer int SingleBBBonusPercent = 50; int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); Threshold += TTI.adjustInliningThreshold(&CandidateCall); Threshold *= TTI.getInliningThresholdMultiplier(); SingleBBBonus = Threshold * SingleBBBonusPercent / 100; VectorBonus = Threshold * VectorBonusPercent / 100; Threshold += (SingleBBBonus + VectorBonus); return InlineResult::success(); } public: InlineCostFeaturesAnalyzer( const TargetTransformInfo &TTI, function_ref &GetAssumptionCache, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, CallBase &Call) : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {} const InlineCostFeatures &features() const { return Cost; } }; } // namespace /// Test whether the given value is an Alloca-derived function argument. bool CallAnalyzer::isAllocaDerivedArg(Value *V) { return SROAArgValues.count(V); } void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) { onDisableSROA(SROAArg); EnabledSROAAllocas.erase(SROAArg); disableLoadElimination(); } void InlineCostAnnotationWriter::emitInstructionAnnot( const Instruction *I, formatted_raw_ostream &OS) { // The cost of inlining of the given instruction is printed always. // The threshold delta is printed only when it is non-zero. It happens // when we decided to give a bonus at a particular instruction. Optional Record = ICCA->getCostDetails(I); if (!Record) OS << "; No analysis for the instruction"; else { OS << "; cost before = " << Record->CostBefore << ", cost after = " << Record->CostAfter << ", threshold before = " << Record->ThresholdBefore << ", threshold after = " << Record->ThresholdAfter << ", "; OS << "cost delta = " << Record->getCostDelta(); if (Record->hasThresholdChanged()) OS << ", threshold delta = " << Record->getThresholdDelta(); } auto C = ICCA->getSimplifiedValue(const_cast(I)); if (C) { OS << ", simplified to "; C.getValue()->print(OS, true); } OS << "\n"; } /// If 'V' maps to a SROA candidate, disable SROA for it. void CallAnalyzer::disableSROA(Value *V) { if (auto *SROAArg = getSROAArgForValueOrNull(V)) { disableSROAForArg(SROAArg); } } void CallAnalyzer::disableLoadElimination() { if (EnableLoadElimination) { onDisableLoadElimination(); EnableLoadElimination = false; } } /// Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); GTI != GTE; ++GTI) { ConstantInt *OpC = dyn_cast(GTI.getOperand()); if (!OpC) if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) OpC = dyn_cast(SimpleOp); if (!OpC) return false; if (OpC->isZero()) continue; // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = GTI.getStructTypeOrNull()) { unsigned ElementIdx = OpC->getZExtValue(); const StructLayout *SL = DL.getStructLayout(STy); Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); continue; } APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } return true; } /// Use TTI to check whether a GEP is free. /// /// Respects any simplified values known during the analysis of this callsite. bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { SmallVector Operands; Operands.push_back(GEP.getOperand(0)); for (const Use &Op : GEP.indices()) if (Constant *SimpleOp = SimplifiedValues.lookup(Op)) Operands.push_back(SimpleOp); else Operands.push_back(Op); return TTI.getUserCost(&GEP, Operands, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::visitAlloca(AllocaInst &I) { disableSROA(I.getOperand(0)); // Check whether inlining will turn a dynamic alloca into a static // alloca and handle that case. if (I.isArrayAllocation()) { Constant *Size = SimplifiedValues.lookup(I.getArraySize()); if (auto *AllocSize = dyn_cast_or_null(Size)) { // Sometimes a dynamic alloca could be converted into a static alloca // after this constant prop, and become a huge static alloca on an // unconditional CFG path. Avoid inlining if this is going to happen above // a threshold. // FIXME: If the threshold is removed or lowered too much, we could end up // being too pessimistic and prevent inlining non-problematic code. This // could result in unintended perf regressions. A better overall strategy // is needed to track stack usage during inlining. Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) HasDynamicAlloca = true; return false; } } // Accumulate the allocated size. if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); } // FIXME: This is overly conservative. Dynamic allocas are inefficient for // a variety of reasons, and so we would like to not inline them into // functions which don't currently have a dynamic alloca. This simply // disables inlining altogether in the presence of a dynamic alloca. if (!I.isStaticAlloca()) HasDynamicAlloca = true; return false; } bool CallAnalyzer::visitPHI(PHINode &I) { // FIXME: We need to propagate SROA *disabling* through phi nodes, even // though we don't want to propagate it's bonuses. The idea is to disable // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. // FIXME: Pointer sizes may differ between different address spaces, so do we // need to use correct address space in the call to getPointerSizeInBits here? // Or could we skip the getPointerSizeInBits call completely? As far as I can // see the ZeroOffset is used as a dummy value, so we can probably use any // bit width for the ZeroOffset? APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0)); bool CheckSROA = I.getType()->isPointerTy(); // Track the constant or pointer with constant offset we've seen so far. Constant *FirstC = nullptr; std::pair FirstBaseAndOffset = {nullptr, ZeroOffset}; Value *FirstV = nullptr; for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = I.getIncomingBlock(i); // If the incoming block is dead, skip the incoming block. if (DeadBlocks.count(Pred)) continue; // If the parent block of phi is not the known successor of the incoming // block, skip the incoming block. BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; if (KnownSuccessor && KnownSuccessor != I.getParent()) continue; Value *V = I.getIncomingValue(i); // If the incoming value is this phi itself, skip the incoming value. if (&I == V) continue; Constant *C = dyn_cast(V); if (!C) C = SimplifiedValues.lookup(V); std::pair BaseAndOffset = {nullptr, ZeroOffset}; if (!C && CheckSROA) BaseAndOffset = ConstantOffsetPtrs.lookup(V); if (!C && !BaseAndOffset.first) // The incoming value is neither a constant nor a pointer with constant // offset, exit early. return true; if (FirstC) { if (FirstC == C) // If we've seen a constant incoming value before and it is the same // constant we see this time, continue checking the next incoming value. continue; // Otherwise early exit because we either see a different constant or saw // a constant before but we have a pointer with constant offset this time. return true; } if (FirstV) { // The same logic as above, but check pointer with constant offset here. if (FirstBaseAndOffset == BaseAndOffset) continue; return true; } if (C) { // This is the 1st time we've seen a constant, record it. FirstC = C; continue; } // The remaining case is that this is the 1st time we've seen a pointer with // constant offset, record it. FirstV = V; FirstBaseAndOffset = BaseAndOffset; } // Check if we can map phi to a constant. if (FirstC) { SimplifiedValues[&I] = FirstC; return true; } // Check if we can map phi to a pointer with constant offset. if (FirstBaseAndOffset.first) { ConstantOffsetPtrs[&I] = FirstBaseAndOffset; if (auto *SROAArg = getSROAArgForValueOrNull(FirstV)) SROAArgValues[&I] = SROAArg; } return true; } /// Check we can fold GEPs of constant-offset call site argument pointers. /// This requires target data and inbounds GEPs. /// /// \return true if the specified GEP can be folded. bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { // Check if we have a base + offset for the pointer. std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getPointerOperand()); if (!BaseAndOffset.first) return false; // Check if the offset of this GEP is constant, and if so accumulate it // into Offset. if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) return false; // Add the result as a new mapping to Base + Offset. ConstantOffsetPtrs[&I] = BaseAndOffset; return true; } bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand()); // Lambda to check whether a GEP's indices are all constant. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { for (const Use &Op : GEP.indices()) if (!isa(Op) && !SimplifiedValues.lookup(Op)) return false; return true; }; if (!DisableGEPConstOperand) if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { SmallVector Indices; for (unsigned int Index = 1; Index < COps.size(); ++Index) Indices.push_back(COps[Index]); return ConstantExpr::getGetElementPtr( I.getSourceElementType(), COps[0], Indices, I.isInBounds()); })) return true; if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { if (SROAArg) SROAArgValues[&I] = SROAArg; // Constant GEPs are modeled as free. return true; } // Variable GEPs will require math and will disable SROA. if (SROAArg) disableSROAForArg(SROAArg); return isGEPFree(I); } /// Simplify \p I if its operands are constants and update SimplifiedValues. /// \p Evaluate is a callable specific to instruction type that evaluates the /// instruction when all the operands are constants. template bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { SmallVector COps; for (Value *Op : I.operands()) { Constant *COp = dyn_cast(Op); if (!COp) COp = SimplifiedValues.lookup(Op); if (!COp) return false; COps.push_back(COp); } auto *C = Evaluate(COps); if (!C) return false; SimplifiedValues[&I] = C; return true; } /// Try to simplify a call to llvm.is.constant. /// /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since /// we expect calls of this specific intrinsic to be infrequent. /// /// FIXME: Given that we know CB's parent (F) caller /// (CandidateCall->getParent()->getParent()), we might be able to determine /// whether inlining F into F's caller would change how the call to /// llvm.is.constant would evaluate. bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) { Value *Arg = CB.getArgOperand(0); auto *C = dyn_cast(Arg); if (!C) C = dyn_cast_or_null(SimplifiedValues.lookup(Arg)); Type *RT = CB.getFunctionType()->getReturnType(); SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0); return true; } bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getBitCast(COps[0], I.getType()); })) return true; // Track base/offsets through casts std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); // Casts don't change the offset, just wrap it up. if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; // Also look for SROA candidates here. if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; // Bitcasts are always zero cost. return true; } bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getPtrToInt(COps[0], I.getType()); })) return true; // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); if (IntegerSize == DL.getPointerSizeInBits(AS)) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; } // This is really weird. Technically, ptrtoint will disable SROA. However, // unless that ptrtoint is *used* somewhere in the live basic blocks after // inlining, it will be nuked, and SROA should proceed. All of the uses which // would block SROA would also block SROA if applied directly to a pointer, // and so we can just add the integer in here. The only places where SROA is // preserved either cannot fire on an integer, or won't in-and-of themselves // disable SROA (ext) w/o some later use that we would see and disable. if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getIntToPtr(COps[0], I.getType()); })) return true; // Track base/offset pairs when round-tripped through a pointer without // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; } // "Propagate" SROA here in the same manner as we do for ptrtoint above. if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through casts. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); })) return true; // Disable SROA in the face of arbitrary casts we don't explicitly list // elsewhere. disableSROA(I.getOperand(0)); // If this is a floating-point cast, and the target says this operation // is expensive, this may eventually become a library call. Treat the cost // as such. switch (I.getOpcode()) { case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) onCallPenalty(); break; default: break; } return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { return CandidateCall.paramHasAttr(A->getArgNo(), Attr); } bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { // Does the *call site* have the NonNull attribute set on an argument? We // use the attribute on the call site to memoize any analysis done in the // caller. This will also trip if the callee function has a non-null // parameter attribute, but that's a less interesting case because hopefully // the callee would already have been simplified based on that. if (Argument *A = dyn_cast(V)) if (paramHasAttr(A, Attribute::NonNull)) return true; // Is this an alloca in the caller? This is distinct from the attribute case // above because attributes aren't updated within the inliner itself and we // always want to catch the alloca derived case. if (isAllocaDerivedArg(V)) // We can actually predict the result of comparisons between an // alloca-derived value and null. Note that this fires regardless of // SROA firing. return true; return false; } bool CallAnalyzer::allowSizeGrowth(CallBase &Call) { // If the normal destination of the invoke or the parent block of the call // site is unreachable-terminated, there is little point in inlining this // unless there is literally zero cost. // FIXME: Note that it is possible that an unreachable-terminated block has a // hot entry. For example, in below scenario inlining hot_call_X() may be // beneficial : // main() { // hot_call_1(); // ... // hot_call_N() // exit(0); // } // For now, we are not handling this corner case here as it is rare in real // code. In future, we should elaborate this based on BPI and BFI in more // general threshold adjusting heuristics in updateThreshold(). if (InvokeInst *II = dyn_cast(&Call)) { if (isa(II->getNormalDest()->getTerminator())) return false; } else if (isa(Call.getParent()->getTerminator())) return false; return true; } bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's coldness is // determined based on that. if (PSI && PSI->hasProfileSummary()) return PSI->isColdCallSite(Call, CallerBFI); // Otherwise we need BFI to be available. if (!CallerBFI) return false; // Determine if the callsite is cold relative to caller's entry. We could // potentially cache the computation of scaled entry frequency, but the added // complexity is not worth it unless this scaling shows up high in the // profiles. const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); auto CallSiteBB = Call.getParent(); auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); auto CallerEntryFreq = CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock())); return CallSiteFreq < CallerEntryFreq * ColdProb; } Optional InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's hotness is // determined based on that. if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI)) return Params.HotCallSiteThreshold; // Otherwise we need BFI to be available and to have a locally hot callsite // threshold. if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) return None; // Determine if the callsite is hot relative to caller's entry. We could // potentially cache the computation of scaled entry frequency, but the added // complexity is not worth it unless this scaling shows up high in the // profiles. auto CallSiteBB = Call.getParent(); auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); auto CallerEntryFreq = CallerBFI->getEntryFreq(); if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq) return Params.LocallyHotCallSiteThreshold; // Otherwise treat it normally. return None; } void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. if (!allowSizeGrowth(Call)) { Threshold = 0; return; } Function *Caller = Call.getCaller(); // return min(A, B) if B is valid. auto MinIfValid = [](int A, Optional B) { return B ? std::min(A, B.getValue()) : A; }; // return max(A, B) if B is valid. auto MaxIfValid = [](int A, Optional B) { return B ? std::max(A, B.getValue()) : A; }; // Various bonus percentages. These are multiplied by Threshold to get the // bonus values. // SingleBBBonus: This bonus is applied if the callee has a single reachable // basic block at the given callsite context. This is speculatively applied // and withdrawn if more than one basic block is seen. // // LstCallToStaticBonus: This large bonus is applied to ensure the inlining // of the last call to a static function as inlining such functions is // guaranteed to reduce code size. // // These bonus percentages may be set to 0 based on properties of the caller // and the callsite. int SingleBBBonusPercent = 50; int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; // Lambda to set all the above bonus and bonus percentages to 0. auto DisallowAllBonuses = [&]() { SingleBBBonusPercent = 0; VectorBonusPercent = 0; LastCallToStaticBonus = 0; }; // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available // and reduce the threshold if the caller has the necessary attribute. if (Caller->hasMinSize()) { Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); // For minsize, we want to disable the single BB bonus and the vector // bonuses, but not the last-call-to-static bonus. Inlining the last call to // a static function will, at the minimum, eliminate the parameter setup and // call/return instructions. SingleBBBonusPercent = 0; VectorBonusPercent = 0; } else if (Caller->hasOptSize()) Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); // Adjust the threshold based on inlinehint attribute and profile based // hotness information if the caller does not have MinSize attribute. if (!Caller->hasMinSize()) { if (Callee.hasFnAttribute(Attribute::InlineHint)) Threshold = MaxIfValid(Threshold, Params.HintThreshold); // FIXME: After switching to the new passmanager, simplify the logic below // by checking only the callsite hotness/coldness as we will reliably // have local profile information. // // Callsite hotness and coldness can be determined if sample profile is // used (which adds hotness metadata to calls) or if caller's // BlockFrequencyInfo is available. BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr; auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI); if (!Caller->hasOptSize() && HotCallSiteThreshold) { LLVM_DEBUG(dbgs() << "Hot callsite.\n"); // FIXME: This should update the threshold only if it exceeds the // current threshold, but AutoFDO + ThinLTO currently relies on this // behavior to prevent inlining of hot callsites during ThinLTO // compile phase. Threshold = HotCallSiteThreshold.getValue(); } else if (isColdCallSite(Call, CallerBFI)) { LLVM_DEBUG(dbgs() << "Cold callsite.\n"); // Do not apply bonuses for a cold callsite including the // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase // preventing it from being inlined. DisallowAllBonuses(); Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } else if (PSI) { // Use callee's global profile information only if we have no way of // determining this via callsite information. if (PSI->isFunctionEntryHot(&Callee)) { LLVM_DEBUG(dbgs() << "Hot callee.\n"); // If callsite hotness can not be determined, we may still know // that the callee is hot and treat it as a weaker hint for threshold // increase. Threshold = MaxIfValid(Threshold, Params.HintThreshold); } else if (PSI->isFunctionEntryCold(&Callee)) { LLVM_DEBUG(dbgs() << "Cold callee.\n"); // Do not apply bonuses for a cold callee including the // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase // preventing it from being inlined. DisallowAllBonuses(); Threshold = MinIfValid(Threshold, Params.ColdThreshold); } } } Threshold += TTI.adjustInliningThreshold(&Call); // Finally, take the target-specific inlining threshold multiplier into // account. Threshold *= TTI.getInliningThresholdMultiplier(); SingleBBBonus = Threshold * SingleBBBonusPercent / 100; VectorBonus = Threshold * VectorBonusPercent / 100; bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() && &F == Call.getCalledFunction(); // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. It may seem odd to update // Cost in updateThreshold, but the bonus depends on the logic in this method. if (OnlyOneCallAndLocalLinkage) Cost -= LastCallToStaticBonus; } bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); })) return true; if (I.getOpcode() == Instruction::FCmp) return false; // Otherwise look for a comparison between constant offset pointers with // a common base. Value *LHSBase, *RHSBase; APInt LHSOffset, RHSOffset; std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); if (LHSBase) { std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); if (RHSBase && LHSBase == RHSBase) { // We have common bases, fold the icmp to a constant based on the // offsets. Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; ++NumConstantPtrCmps; return true; } } } // If the comparison is an equality comparison with null, we can simplify it // if we know the value (argument) can't be null if (I.isEquality() && isa(I.getOperand(1)) && isKnownNonNullInCallee(I.getOperand(0))) { bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) : ConstantInt::getFalse(I.getType()); return true; } return handleSROA(I.getOperand(0), isa(I.getOperand(1))); } bool CallAnalyzer::visitSub(BinaryOperator &I) { // Try to handle a special case: we can fold computing the difference of two // constant-related pointers. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Value *LHSBase, *RHSBase; APInt LHSOffset, RHSOffset; std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); if (LHSBase) { std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); if (RHSBase && LHSBase == RHSBase) { // We have common bases, fold the subtract to a constant based on the // offsets. Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { SimplifiedValues[&I] = C; ++NumConstantPtrDiffs; return true; } } } // Otherwise, fall back to the generic logic for simplifying and handling // instructions. return Base::visitSub(I); } bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Constant *CLHS = dyn_cast(LHS); if (!CLHS) CLHS = SimplifiedValues.lookup(LHS); Constant *CRHS = dyn_cast(RHS); if (!CRHS) CRHS = SimplifiedValues.lookup(RHS); Value *SimpleV = nullptr; if (auto FI = dyn_cast(&I)) SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); if (Constant *C = dyn_cast_or_null(SimpleV)) SimplifiedValues[&I] = C; if (SimpleV) return true; // Disable any SROA on arguments to arbitrary, unsimplified binary operators. disableSROA(LHS); disableSROA(RHS); // If the instruction is floating point, and the target says this operation // is expensive, this may eventually become a library call. Treat the cost // as such. Unless it's fneg which can be implemented with an xor. using namespace llvm::PatternMatch; if (I.getType()->isFloatingPointTy() && TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive && !match(&I, m_FNeg(m_Value()))) onCallPenalty(); return false; } bool CallAnalyzer::visitFNeg(UnaryOperator &I) { Value *Op = I.getOperand(0); Constant *COp = dyn_cast(Op); if (!COp) COp = SimplifiedValues.lookup(Op); Value *SimpleV = SimplifyFNegInst( COp ? COp : Op, cast(I).getFastMathFlags(), DL); if (Constant *C = dyn_cast_or_null(SimpleV)) SimplifiedValues[&I] = C; if (SimpleV) return true; // Disable any SROA on arguments to arbitrary, unsimplified fneg. disableSROA(Op); return false; } bool CallAnalyzer::visitLoad(LoadInst &I) { if (handleSROA(I.getPointerOperand(), I.isSimple())) return true; // If the data is already loaded from this address and hasn't been clobbered // by any stores or calls, this load is likely to be redundant and can be // eliminated. if (EnableLoadElimination && !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { onLoadEliminationOpportunity(); return true; } return false; } bool CallAnalyzer::visitStore(StoreInst &I) { if (handleSROA(I.getPointerOperand(), I.isSimple())) return true; // The store can potentially clobber loads and prevent repeated loads from // being eliminated. // FIXME: // 1. We can probably keep an initial set of eliminatable loads substracted // from the cost even when we finally see a store. We just need to disable // *further* accumulation of elimination savings. // 2. We should probably at some point thread MemorySSA for the callee into // this and then use that to actually compute *really* precise savings. disableLoadElimination(); return false; } bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { // Constant folding for extract value is trivial. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getExtractValue(COps[0], I.getIndices()); })) return true; // SROA can't look through these, but they may be free. return Base::visitExtractValue(I); } bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { // Constant folding for insert value is trivial. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], /*InsertedValueOperand*/ COps[1], I.getIndices()); })) return true; // SROA can't look through these, but they may be free. return Base::visitInsertValue(I); } /// Try to simplify a call site. /// /// Takes a concrete function and callsite and tries to actually simplify it by /// analyzing the arguments and call itself with instsimplify. Returns true if /// it has simplified the callsite to some other entity (a constant), making it /// free. bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { // FIXME: Using the instsimplify logic directly for this is inefficient // because we have to continually rebuild the argument list even when no // simplifications can be performed. Until that is fixed with remapping // inside of instsimplify, directly constant fold calls here. if (!canConstantFoldCallTo(&Call, F)) return false; // Try to re-map the arguments to constants. SmallVector ConstantArgs; ConstantArgs.reserve(Call.arg_size()); for (Value *I : Call.args()) { Constant *C = dyn_cast(I); if (!C) C = dyn_cast_or_null(SimplifiedValues.lookup(I)); if (!C) return false; // This argument doesn't map to a constant. ConstantArgs.push_back(C); } if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) { SimplifiedValues[&Call] = C; return true; } return false; } bool CallAnalyzer::visitCallBase(CallBase &Call) { if (!onCallBaseVisitStart(Call)) return true; if (Call.hasFnAttr(Attribute::ReturnsTwice) && !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; } if (isa(Call) && cast(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; Value *Callee = Call.getCalledOperand(); Function *F = dyn_cast_or_null(Callee); bool IsIndirectCall = !F; if (IsIndirectCall) { // Check if this happens to be an indirect function call to a known function // in this inline context. If not, we've done all we can. F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); if (!F) { onCallArgumentSetup(Call); if (!Call.onlyReadsMemory()) disableLoadElimination(); return Base::visitCallBase(Call); } } assert(F && "Expected a call to a known function"); // When we have a concrete function, first try to simplify it directly. if (simplifyCallSite(F, Call)) return true; // Next check if it is an intrinsic we know about. // FIXME: Lift this into part of the InstVisitor. if (IntrinsicInst *II = dyn_cast(&Call)) { switch (II->getIntrinsicID()) { default: if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) disableLoadElimination(); return Base::visitCallBase(Call); case Intrinsic::load_relative: onLoadRelativeIntrinsic(); return false; case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: disableLoadElimination(); // SROA can usually chew through these intrinsics, but they aren't free. return false; case Intrinsic::icall_branch_funnel: case Intrinsic::localescape: HasUninlineableIntrinsic = true; return false; case Intrinsic::vastart: InitsVargArgs = true; return false; case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0))) SROAArgValues[II] = SROAArg; return true; case Intrinsic::is_constant: return simplifyIntrinsicCallIsConstant(Call); } } if (F == Call.getFunction()) { // This flag will fully abort the analysis, so don't bother with anything // else. IsRecursiveCall = true; if (!AllowRecursiveCall) return false; } if (TTI.isLoweredToCall(F)) { onLoweredCall(F, Call, IsIndirectCall); } if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory()))) disableLoadElimination(); return Base::visitCallBase(Call); } bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { // At least one return instruction will be free after inlining. bool Free = !HasReturn; HasReturn = true; return Free; } bool CallAnalyzer::visitBranchInst(BranchInst &BI) { // We model unconditional branches as essentially free -- they really // shouldn't exist at all, but handling them makes the behavior of the // inliner more regular and predictable. Interestingly, conditional branches // which will fold away are also free. return BI.isUnconditional() || isa(BI.getCondition()) || isa_and_nonnull( SimplifiedValues.lookup(BI.getCondition())); } bool CallAnalyzer::visitSelectInst(SelectInst &SI) { bool CheckSROA = SI.getType()->isPointerTy(); Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); Constant *TrueC = dyn_cast(TrueVal); if (!TrueC) TrueC = SimplifiedValues.lookup(TrueVal); Constant *FalseC = dyn_cast(FalseVal); if (!FalseC) FalseC = SimplifiedValues.lookup(FalseVal); Constant *CondC = dyn_cast_or_null(SimplifiedValues.lookup(SI.getCondition())); if (!CondC) { // Select C, X, X => X if (TrueC == FalseC && TrueC) { SimplifiedValues[&SI] = TrueC; return true; } if (!CheckSROA) return Base::visitSelectInst(SI); std::pair TrueBaseAndOffset = ConstantOffsetPtrs.lookup(TrueVal); std::pair FalseBaseAndOffset = ConstantOffsetPtrs.lookup(FalseVal); if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal)) SROAArgValues[&SI] = SROAArg; return true; } return Base::visitSelectInst(SI); } // Select condition is a constant. Value *SelectedV = CondC->isAllOnesValue() ? TrueVal : (CondC->isNullValue()) ? FalseVal : nullptr; if (!SelectedV) { // Condition is a vector constant that is not all 1s or all 0s. If all // operands are constants, ConstantExpr::getSelect() can handle the cases // such as select vectors. if (TrueC && FalseC) { if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) { SimplifiedValues[&SI] = C; return true; } } return Base::visitSelectInst(SI); } // Condition is either all 1s or all 0s. SI can be simplified. if (Constant *SelectedC = dyn_cast(SelectedV)) { SimplifiedValues[&SI] = SelectedC; return true; } if (!CheckSROA) return true; std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(SelectedV); if (BaseAndOffset.first) { ConstantOffsetPtrs[&SI] = BaseAndOffset; if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV)) SROAArgValues[&SI] = SROAArg; } return true; } bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. if (isa(SI.getCondition())) return true; if (Value *V = SimplifiedValues.lookup(SI.getCondition())) if (isa(V)) return true; // Assume the most general case where the switch is lowered into // either a jump table, bit test, or a balanced binary tree consisting of // case clusters without merging adjacent clusters with the same // destination. We do not consider the switches that are lowered with a mix // of jump table/bit test/binary search tree. The cost of the switch is // proportional to the size of the tree or the size of jump table range. // // NB: We convert large switches which are just used to initialize large phi // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent // inlining those. It will prevent inlining in cases where the optimization // does not (yet) fire. unsigned JumpTableSize = 0; BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr; unsigned NumCaseCluster = TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI); onFinalizeSwitch(JumpTableSize, NumCaseCluster); return false; } bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this // indirect jump would jump from the inlined copy of the function into the // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions with // indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. HasIndirectBr = true; return false; } bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { // FIXME: It's not clear that a single instruction is an accurate model for // the inline cost of a resume instruction. return false; } bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { // FIXME: It's not clear that a single instruction is an accurate model for // the inline cost of a cleanupret instruction. return false; } bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { // FIXME: It's not clear that a single instruction is an accurate model for // the inline cost of a catchret instruction. return false; } bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { // FIXME: It might be reasonably to discount the cost of instructions leading // to unreachable as they have the lowest possible impact on both runtime and // code size. return true; // No actual code is needed for unreachable. } bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. if (TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free) return true; // We found something we don't understand or can't handle. Mark any SROA-able // values in the operand list as no longer viable. for (const Use &Op : I.operands()) disableSROA(Op); return false; } /// Analyze a basic block for its contribution to the inline cost. /// /// This method walks the analyzer over every instruction in the given basic /// block and accounts for their cost during inlining at this callsite. It /// aborts early if the threshold has been exceeded or an impossible to inline /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. InlineResult CallAnalyzer::analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues) { for (Instruction &I : *BB) { // FIXME: Currently, the number of instructions in a function regardless of // our ability to simplify them during inline to constants or dead code, // are actually used by the vector bonus heuristic. As long as that's true, // we have to special case debug intrinsics here to prevent differences in // inlining due to debug symbols. Eventually, the number of unsimplified // instructions shouldn't factor into the cost computation, but until then, // hack around it here. // Similarly, skip pseudo-probes. if (I.isDebugOrPseudoInst()) continue; // Skip ephemeral values. if (EphValues.count(&I)) continue; ++NumInstructions; if (isa(I) || I.getType()->isVectorTy()) ++NumVectorInstructions; // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for // all of the per-instruction logic. The visit tree returns true if we // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. onInstructionAnalysisStart(&I); if (Base::visit(&I)) ++NumInstructionsSimplified; else onMissedSimplification(); onInstructionAnalysisFinish(&I); using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. InlineResult IR = InlineResult::success(); if (IsRecursiveCall && !AllowRecursiveCall) IR = InlineResult::failure("recursive"); else if (ExposesReturnsTwice) IR = InlineResult::failure("exposes returns twice"); else if (HasDynamicAlloca) IR = InlineResult::failure("dynamic alloca"); else if (HasIndirectBr) IR = InlineResult::failure("indirect branch"); else if (HasUninlineableIntrinsic) IR = InlineResult::failure("uninlinable intrinsic"); else if (InitsVargArgs) IR = InlineResult::failure("varargs"); if (!IR.isSuccess()) { if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CandidateCall) << NV("Callee", &F) << " has uninlinable pattern (" << NV("InlineResult", IR.getFailureReason()) << ") and cost is not fully computed"; }); return IR; } // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { auto IR = InlineResult::failure("recursive and allocates too much stack space"); if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CandidateCall) << NV("Callee", &F) << " is " << NV("InlineResult", IR.getFailureReason()) << ". Cost is not fully computed"; }); return IR; } if (shouldStop()) return InlineResult::failure( "Call site analysis is not favorable to inlining."); } return InlineResult::success(); } /// Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!V->getType()->isPointerTy()) return nullptr; unsigned AS = V->getType()->getPointerAddressSpace(); unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); APInt Offset = APInt::getZero(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet Visited; Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) return nullptr; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(V)) { if (GA->isInterposable()) break; V = GA->getAliasee(); } else { break; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); Type *IdxPtrTy = DL.getIndexType(V->getType()); return cast(ConstantInt::get(IdxPtrTy, Offset)); } /// Find dead blocks due to deleted CFG edges during inlining. /// /// If we know the successor of the current block, \p CurrBB, has to be \p /// NextBB, the other successors of \p CurrBB are dead if these successors have /// no live incoming CFG edges. If one block is found to be dead, we can /// continue growing the dead block list by checking the successors of the dead /// blocks to see if all their incoming edges are dead or not. void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { // A CFG edge is dead if the predecessor is dead or the predecessor has a // known successor which is not the one under exam. return (DeadBlocks.count(Pred) || (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); }; auto IsNewlyDead = [&](BasicBlock *BB) { // If all the edges to a block are dead, the block is also dead. return (!DeadBlocks.count(BB) && llvm::all_of(predecessors(BB), [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); }; for (BasicBlock *Succ : successors(CurrBB)) { if (Succ == NextBB || !IsNewlyDead(Succ)) continue; SmallVector NewDead; NewDead.push_back(Succ); while (!NewDead.empty()) { BasicBlock *Dead = NewDead.pop_back_val(); if (DeadBlocks.insert(Dead)) // Continue growing the dead block lists. for (BasicBlock *S : successors(Dead)) if (IsNewlyDead(S)) NewDead.push_back(S); } } } /// Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not /// viable. It computes the cost and adjusts the threshold based on numerous /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the routine. InlineResult CallAnalyzer::analyze() { ++NumCallsAnalyzed; auto Result = onAnalysisStart(); if (!Result.isSuccess()) return Result; if (F.empty()) return InlineResult::success(); Function *Caller = CandidateCall.getFunction(); // Check if the caller function is recursive itself. for (User *U : Caller->users()) { CallBase *Call = dyn_cast(U); if (Call && Call->getFunction() == Caller) { IsCallerRecursive = true; break; } } // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. auto CAI = CandidateCall.arg_begin(); for (Argument &FAI : F.args()) { assert(CAI != CandidateCall.arg_end()); if (Constant *C = dyn_cast(CAI)) SimplifiedValues[&FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (auto *SROAArg = dyn_cast(PtrArg)) { SROAArgValues[&FAI] = SROAArg; onInitializeSROAArg(SROAArg); EnabledSROAAllocas.insert(SROAArg); } } ++CAI; } NumConstantArgs = SimplifiedValues.size(); NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); NumAllocaArgs = SROAArgValues.size(); // FIXME: If a caller has multiple calls to a callee, we end up recomputing // the ephemeral values multiple times (and they're completely determined by // the callee, so this is purely duplicate work). SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this // particular call site in order to get more accurate cost estimates. This // requires a somewhat heavyweight iteration pattern: we need to walk the // basic blocks in a breadth-first order as we insert live successors. To // accomplish this, prioritizing for small iterations because we exit after // crossing our threshold, we use a small-size optimized SetVector. typedef SetVector, SmallPtrSet> BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { if (shouldStop()) break; BasicBlock *BB = BBWorklist[Idx]; if (BB->empty()) continue; onBlockStart(BB); // Disallow inlining a blockaddress with uses other than strictly callbr. // A blockaddress only has defined behavior for an indirect branch in the // same function, and we do not currently support inlining indirect // branches. But, the inliner may not see an indirect branch that ends up // being dead code at a particular call site. If the blockaddress escapes // the function, e.g., via a global variable, inlining may lead to an // invalid cross-function reference. // FIXME: pr/39560: continue relaxing this overt restriction. if (BB->hasAddressTaken()) for (User *U : BlockAddress::get(&*BB)->users()) if (!isa(*U)) return InlineResult::failure("blockaddress used outside of callbr"); // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. InlineResult IR = analyzeBlock(BB, EphValues); if (!IR.isSuccess()) return IR; Instruction *TI = BB->getTerminator(); // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast(TI)) { if (BI->isConditional()) { Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); BBWorklist.insert(NextBB); KnownSuccessors[BB] = NextBB; findDeadBlocks(BB, NextBB); continue; } } } else if (SwitchInst *SI = dyn_cast(TI)) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); BBWorklist.insert(NextBB); KnownSuccessors[BB] = NextBB; findDeadBlocks(BB, NextBB); continue; } } // If we're unable to select a particular successor, just count all of // them. for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); onBlockAnalyzed(BB); } bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() && &F == CandidateCall.getCalledFunction(); // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return InlineResult::failure("noduplicate"); return finalizeAnalysis(); } void InlineCostCallAnalyzer::print(raw_ostream &OS) { #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n" if (PrintInstructionComments) F.print(OS, &Writer); DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); DEBUG_PRINT_STAT(NumAllocaArgs); DEBUG_PRINT_STAT(NumConstantPtrCmps); DEBUG_PRINT_STAT(NumConstantPtrDiffs); DEBUG_PRINT_STAT(NumInstructionsSimplified); DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); DEBUG_PRINT_STAT(LoadEliminationCost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); #undef DEBUG_PRINT_STAT } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Dump stats about this call's analysis. LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); } #endif /// Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes( Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref &GetTLI) { // Note that CalleeTLI must be a copy not a reference. The legacy pass manager // caches the most recently created TLI in the TargetLibraryInfoWrapperPass // object, and always returns the same object (which is overwritten on each // GetTLI call). Therefore we copy the first result. auto CalleeTLI = GetTLI(*Callee); return TTI.areInlineCompatible(Caller, Callee) && GetTLI(*Caller).areInlineCompatible(CalleeTLI, InlineCallerSupersetNoBuiltin) && AttributeFuncs::areInlineCompatible(*Caller, *Callee); } int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) { int Cost = 0; for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) { if (Call.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast(Call.getArgOperand(I)->getType()); unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I)); unsigned AS = PTy->getAddressSpace(); unsigned PointerSize = DL.getPointerSizeInBits(AS); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; // If it generates more than 8 stores it is likely to be expanded as an // inline memcpy so we take that as an upper bound. Otherwise we assume // one load and one store per word copied. // FIXME: The maxStoresPerMemcpy setting from the target should be used // here instead of a magic number of 8, but it's not available via // DataLayout. NumStores = std::min(NumStores, 8U); Cost += 2 * NumStores * InlineConstants::InstrCost; } else { // For non-byval arguments subtract off one instruction per call // argument. Cost += InlineConstants::InstrCost; } } // The call instruction also disappears after inlining. Cost += InlineConstants::InstrCost + CallPenalty; return Cost; } InlineCost llvm::getInlineCost( CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, GetAssumptionCache, GetTLI, GetBFI, PSI, ORE); } Optional llvm::getInliningCostEstimate( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { const InlineParams Params = {/* DefaultThreshold*/ 0, /*HintThreshold*/ {}, /*ColdThreshold*/ {}, /*OptSizeThreshold*/ {}, /*OptMinSizeThreshold*/ {}, /*HotCallSiteThreshold*/ {}, /*LocallyHotCallSiteThreshold*/ {}, /*ColdCallSiteThreshold*/ {}, /*ComputeFullInlineCost*/ true, /*EnableDeferral*/ true}; InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, true, /*IgnoreThreshold*/ true); auto R = CA.analyze(); if (!R.isSuccess()) return None; return CA.getCost(); } Optional llvm::getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Call.getCalledFunction(), Call); auto R = CFA.analyze(); if (!R.isSuccess()) return None; return CFA.features(); } Optional llvm::getAttributeBasedInliningDecision( CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref GetTLI) { // Cannot inline indirect calls. if (!Callee) return InlineResult::failure("indirect call"); // When callee coroutine function is inlined into caller coroutine function // before coro-split pass, // coro-early pass can not handle this quiet well. // So we won't inline the coroutine function if it have not been unsplited if (Callee->isPresplitCoroutine()) return InlineResult::failure("unsplited coroutine call"); // Never inline calls with byval arguments that does not have the alloca // address space. Since byval arguments can be replaced with a copy to an // alloca, the inlined code would need to be adjusted to handle that the // argument is in the alloca address space (so it is a little bit complicated // to solve). unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) if (Call.isByValArgument(I)) { PointerType *PTy = cast(Call.getArgOperand(I)->getType()); if (PTy->getAddressSpace() != AllocaAS) return InlineResult::failure("byval arguments without alloca" " address space"); } // Calls to functions with always-inline attributes should be inlined // whenever possible. if (Call.hasFnAttr(Attribute::AlwaysInline)) { auto IsViable = isInlineViable(*Callee); if (IsViable.isSuccess()) return InlineResult::success(); return InlineResult::failure(IsViable.getFailureReason()); } // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). Function *Caller = Call.getCaller(); if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI)) return InlineResult::failure("conflicting attributes"); // Don't inline this call if the caller has the optnone attribute. if (Caller->hasOptNone()) return InlineResult::failure("optnone attribute"); // Don't inline a function that treats null pointer as valid into a caller // that does not have this attribute. if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) return InlineResult::failure("nullptr definitions incompatible"); // Don't inline functions which can be interposed at link-time. if (Callee->isInterposable()) return InlineResult::failure("interposable"); // Don't inline functions marked noinline. if (Callee->hasFnAttribute(Attribute::NoInline)) return InlineResult::failure("noinline function attribute"); // Don't inline call sites marked noinline. if (Call.isNoInline()) return InlineResult::failure("noinline call site attribute"); return None; } InlineCost llvm::getInlineCost( CallBase &Call, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { auto UserDecision = llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI); if (UserDecision.hasValue()) { if (UserDecision->isSuccess()) return llvm::InlineCost::getAlways("always inline attribute"); return llvm::InlineCost::getNever(UserDecision->getFailureReason()); } LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Call.getCaller()->getName() << ")\n"); InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); // Always make cost benefit based decision explicit. // We use always/never here since threshold is not meaningful, // as it's not what drives cost-benefit analysis. if (CA.wasDecidedByCostBenefit()) { if (ShouldInline.isSuccess()) return InlineCost::getAlways("benefit over cost", CA.getCostBenefitPair()); else return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair()); } if (CA.wasDecidedByCostThreshold()) return InlineCost::get(CA.getCost(), CA.getThreshold()); // No details on how the decision was made, simply return always or never. return ShouldInline.isSuccess() ? InlineCost::getAlways("empty function") : InlineCost::getNever(ShouldInline.getFailureReason()); } InlineResult llvm::isInlineViable(Function &F) { bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (BasicBlock &BB : F) { // Disallow inlining of functions which contain indirect branches. if (isa(BB.getTerminator())) return InlineResult::failure("contains indirect branches"); // Disallow inlining of blockaddresses which are used by non-callbr // instructions. if (BB.hasAddressTaken()) for (User *U : BlockAddress::get(&BB)->users()) if (!isa(*U)) return InlineResult::failure("blockaddress used outside of callbr"); for (auto &II : BB) { CallBase *Call = dyn_cast(&II); if (!Call) continue; // Disallow recursive calls. Function *Callee = Call->getCalledFunction(); if (&F == Callee) return InlineResult::failure("recursive call"); // Disallow calls which expose returns-twice to a function not previously // attributed as such. if (!ReturnsTwice && isa(Call) && cast(Call)->canReturnTwice()) return InlineResult::failure("exposes returns-twice attribute"); if (Callee) switch (Callee->getIntrinsicID()) { default: break; case llvm::Intrinsic::icall_branch_funnel: // Disallow inlining of @llvm.icall.branch.funnel because current // backend can't separate call targets from call arguments. return InlineResult::failure( "disallowed inlining of @llvm.icall.branch.funnel"); case llvm::Intrinsic::localescape: // Disallow inlining functions that call @llvm.localescape. Doing this // correctly would require major changes to the inliner. return InlineResult::failure( "disallowed inlining of @llvm.localescape"); case llvm::Intrinsic::vastart: // Disallow inlining of functions that initialize VarArgs with // va_start. return InlineResult::failure( "contains VarArgs initialized with va_start"); } } } return InlineResult::success(); } // APIs to create InlineParams based on command line flags and/or other // parameters. InlineParams llvm::getInlineParams(int Threshold) { InlineParams Params; // This field is the threshold to use for a callee by default. This is // derived from one or more of: // * optimization or size-optimization levels, // * a value passed to createFunctionInliningPass function, or // * the -inline-threshold flag. // If the -inline-threshold flag is explicitly specified, that is used // irrespective of anything else. if (InlineThreshold.getNumOccurrences() > 0) Params.DefaultThreshold = InlineThreshold; else Params.DefaultThreshold = Threshold; // Set the HintThreshold knob from the -inlinehint-threshold. Params.HintThreshold = HintThreshold; // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. Params.HotCallSiteThreshold = HotCallSiteThreshold; // If the -locally-hot-callsite-threshold is explicitly specified, use it to // populate LocallyHotCallSiteThreshold. Later, we populate // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if // we know that optimization level is O3 (in the getInlineParams variant that // takes the opt and size levels). // FIXME: Remove this check (and make the assignment unconditional) after // addressing size regression issues at O2. if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; // Set the ColdCallSiteThreshold knob from the // -inline-cold-callsite-threshold. Params.ColdCallSiteThreshold = ColdCallSiteThreshold; // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // -inlinehint-threshold commandline option is not explicitly given. If that // option is present, then its value applies even for callees with size and // minsize attributes. // If the -inline-threshold is not specified, set the ColdThreshold from the // -inlinecold-threshold even if it is not explicitly passed. If // -inline-threshold is specified, then -inlinecold-threshold needs to be // explicitly specified to set the ColdThreshold knob if (InlineThreshold.getNumOccurrences() == 0) { Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; Params.ColdThreshold = ColdThreshold; } else if (ColdThreshold.getNumOccurrences() > 0) { Params.ColdThreshold = ColdThreshold; } return Params; } InlineParams llvm::getInlineParams() { return getInlineParams(DefaultThreshold); } // Compute the default threshold for inlining based on the opt level and the // size opt level. static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel) { if (OptLevel > 2) return InlineConstants::OptAggressiveThreshold; if (SizeOptLevel == 1) // -Os return InlineConstants::OptSizeThreshold; if (SizeOptLevel == 2) // -Oz return InlineConstants::OptMinSizeThreshold; return DefaultThreshold; } InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { auto Params = getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); // At O3, use the value of -locally-hot-callsite-threshold option to populate // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only // when it is specified explicitly. if (OptLevel > 2) Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; return Params; } PreservedAnalyses InlineCostAnnotationPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { PrintInstructionComments = true; std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; Module *M = F.getParent(); ProfileSummaryInfo PSI(*M); DataLayout DL(M); TargetTransformInfo TTI(DL); // FIXME: Redesign the usage of InlineParams to expand the scope of this pass. // In the current implementation, the type of InlineParams doesn't matter as // the pass serves only for verification of inliner's decisions. // We can add a flag which determines InlineParams for this run. Right now, // the default InlineParams are used. const InlineParams Params = llvm::getInlineParams(); for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (CallInst *CI = dyn_cast(&I)) { Function *CalledFunction = CI->getCalledFunction(); if (!CalledFunction || CalledFunction->isDeclaration()) continue; OptimizationRemarkEmitter ORE(CalledFunction); InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI, GetAssumptionCache, nullptr, &PSI, &ORE); ICCA.analyze(); OS << " Analyzing call of " << CalledFunction->getName() << "... (caller:" << CI->getCaller()->getName() << ")\n"; ICCA.print(OS); OS << "\n"; } } } return PreservedAnalyses::all(); } diff --git a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp index 0dbbc218e946..bc03776bde19 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/MachineSink.cpp @@ -1,1833 +1,1845 @@ //===- MachineSink.cpp - Sinking for machine instructions -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass moves instructions into successor blocks when possible, so that // they aren't executed on paths where their results aren't needed. // // This pass is not intended to be a replacement or a complete alternative // for an LLVM-IR-level sinking pass. It is only designed to sink simple // constructs that are not exposed before lowering and instruction selection. // //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "machine-sink" static cl::opt SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden); static cl::opt UseBlockFreqInfo("machine-sink-bfi", cl::desc("Use block frequency info to find successors to sink"), cl::init(true), cl::Hidden); static cl::opt SplitEdgeProbabilityThreshold( "machine-sink-split-probability-threshold", cl::desc( "Percentage threshold for splitting single-instruction critical edge. " "If the branch threshold is higher than this threshold, we allow " "speculative execution of up to 1 instruction to avoid branching to " "splitted critical edge"), cl::init(40), cl::Hidden); static cl::opt SinkLoadInstsPerBlockThreshold( "machine-sink-load-instrs-threshold", cl::desc("Do not try to find alias store for a load if there is a in-path " "block whose instruction number is higher than this threshold."), cl::init(2000), cl::Hidden); static cl::opt SinkLoadBlocksThreshold( "machine-sink-load-blocks-threshold", cl::desc("Do not try to find alias store for a load if the block number in " "the straight line is higher than this threshold."), cl::init(20), cl::Hidden); static cl::opt SinkInstsIntoLoop("sink-insts-to-avoid-spills", cl::desc("Sink instructions into loops to avoid " "register spills"), cl::init(false), cl::Hidden); static cl::opt SinkIntoLoopLimit( "machine-sink-loop-limit", cl::desc("The maximum number of instructions considered for loop sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); namespace { class MachineSinking : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; RegisterClassInfo RegClassInfo; // Remember which edges have been considered for breaking. SmallSet, 8> CEBCandidates; // Remember which edges we are about to split. // This is different from CEBCandidates since those edges // will be split. SetVector> ToSplit; DenseSet RegsToClearKillFlags; using AllSuccsCache = std::map>; /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is /// post-dominated by another DBG_VALUE of the same variable location. /// This is necessary to detect sequences such as: /// %0 = someinst /// DBG_VALUE %0, !123, !DIExpression() /// %1 = anotherinst /// DBG_VALUE %1, !123, !DIExpression() /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that /// would re-order assignments. using SeenDbgUser = PointerIntPair; /// Record of DBG_VALUE uses of vregs in a block, so that we can identify /// debug instructions to sink. SmallDenseMap> SeenDbgUsers; /// Record of debug variables that have had their locations set in the /// current block. DenseSet SeenDbgVars; std::map, bool> HasStoreCache; std::map, std::vector> StoreInstrCache; /// Cached BB's register pressure. std::map> CachedRegisterPressure; public: static char ID; // Pass identification MachineSinking() : MachineFunctionPass(ID) { initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); } void releaseMemory() override { CEBCandidates.clear(); } private: bool ProcessBlock(MachineBasicBlock &MBB); void ProcessDbgInst(MachineInstr &MI); bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To); bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, MachineInstr &MI); /// Postpone the splitting of the given critical /// edge (\p From, \p To). /// /// We do not split the edges on the fly. Indeed, this invalidates /// the dominance information and thus triggers a lot of updates /// of that information underneath. /// Instead, we postpone all the splits after each iteration of /// the main loop. That way, the information is at least valid /// for the lifetime of an iteration. /// /// \return True if the edge is marked as toSplit, false otherwise. /// False can be returned if, for instance, this is not profitable. bool PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, bool BreakPHIEdge); bool SinkInstruction(MachineInstr &MI, bool &SawStore, AllSuccsCache &AllSuccessors); /// If we sink a COPY inst, some debug users of it's destination may no /// longer be dominated by the COPY, and will eventually be dropped. /// This is easily rectified by forwarding the non-dominated debug uses /// to the copy source. void SalvageUnsunkDebugUsersOfCopy(MachineInstr &, MachineBasicBlock *TargetBlock); bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates); bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors); bool PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB); SmallVector & GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; std::vector &getBBRegisterPressure(MachineBasicBlock &MBB); }; } // end anonymous namespace char MachineSinking::ID = 0; char &llvm::MachineSinkingID = MachineSinking::ID; INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB) { if (!MI.isCopy()) return false; Register SrcReg = MI.getOperand(1).getReg(); Register DstReg = MI.getOperand(0).getReg(); if (!Register::isVirtualRegister(SrcReg) || !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg)) return false; const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); const TargetRegisterClass *DRC = MRI->getRegClass(DstReg); if (SRC != DRC) return false; MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI->isCopyLike()) return false; LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); LLVM_DEBUG(dbgs() << "*** to: " << MI); MRI->replaceRegWith(DstReg, SrcReg); MI.eraseFromParent(); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. MRI->clearKillFlags(SrcReg); ++NumCoalesces; return true; } /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. If any use is in the /// definition block, then return false since it is never legal to move def /// after uses. bool MachineSinking::AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const { assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) return true; // BreakPHIEdge is true if all the uses are in the successor MBB being sunken // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. // // %bb.1: // Predecessors according to CFG: %bb.0 // ... // %def = DEC64_32r %x, implicit-def dead %eflags // ... // JE_4 <%bb.37>, implicit %eflags // Successors according to CFG: %bb.37 %bb.2 // // %bb.2: // %p = PHI %y, %bb.0, %def, %bb.1 if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) { MachineInstr *UseInst = MO.getParent(); unsigned OpNo = UseInst->getOperandNo(&MO); MachineBasicBlock *UseBlock = UseInst->getParent(); return UseBlock == MBB && UseInst->isPHI() && UseInst->getOperand(OpNo + 1).getMBB() == DefMBB; })) { BreakPHIEdge = true; return true; } for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { // Determine the block of the use. MachineInstr *UseInst = MO.getParent(); unsigned OpNo = &MO - &UseInst->getOperand(0); MachineBasicBlock *UseBlock = UseInst->getParent(); if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(OpNo+1).getMBB(); } else if (UseBlock == DefMBB) { LocalUse = true; return false; } // Check that it dominates. if (!DT->dominates(MBB, UseBlock)) return false; } return true; } /// Return true if this machine instruction loads from global offset table or /// constant pool. static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { assert(MI.mayLoad() && "Expected MI that loads!"); // If we lost memory operands, conservatively assume that the instruction // reads from everything.. if (MI.memoperands_empty()) return true; for (MachineMemOperand *MemOp : MI.memoperands()) if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) if (PSV->isGOT() || PSV->isConstantPool()) return true; return false; } void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " "target\n"); continue; } if (!L->isLoopInvariant(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) continue; const MachineOperand &MO = MI.getOperand(0); if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; if (!MRI->hasOneDef(MO.getReg())) continue; LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n"); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); LI = &getAnalysis(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + // MachineSink currently uses MachineLoopInfo, which only recognizes natural + // loops. As such, we could sink instructions into irreducible cycles, which + // would be non-profitable. + // WARNING: The current implementation of hasStoreBetween() is incorrect for + // sinking into irreducible cycles (PR53990), this bailout is currently + // necessary for correctness, not just profitability. + ReversePostOrderTraversal RPOT(&*MF.begin()); + if (containsIrreducibleCFG(RPOT, *LI)) + return false; + bool EverMadeChange = false; while (true) { bool MadeChange = false; // Process all basic blocks. CEBCandidates.clear(); ToSplit.clear(); for (auto &MBB: MF) MadeChange |= ProcessBlock(MBB); // If we have anything we marked as toSplit, split it now. for (auto &Pair : ToSplit) { auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " << printMBBReference(*Pair.first) << " -- " << printMBBReference(*NewSucc) << " -- " << printMBBReference(*Pair.second) << '\n'); if (MBFI) MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI); MadeChange = true; ++NumSplit; } else LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n"); } // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; } if (SinkInstsIntoLoop) { SmallVector Loops(LI->begin(), LI->end()); for (auto *L : Loops) { MachineBasicBlock *Preheader = LI->findLoopPreheader(L); if (!Preheader) { LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); continue; } SmallVector Candidates; FindLoopSinkCandidates(L, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { if (i++ == SinkIntoLoopLimit) { LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " "be analysed."); break; } if (!SinkIntoLoop(L, *I)) break; EverMadeChange = true; ++NumLoopSunk; } } } HasStoreCache.clear(); StoreInstrCache.clear(); // Now clear any kill flags for recorded registers. for (auto I : RegsToClearKillFlags) MRI->clearKillFlags(I); RegsToClearKillFlags.clear(); return EverMadeChange; } bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Can't sink anything out of a block that has less than two successors. if (MBB.succ_size() <= 1 || MBB.empty()) return false; // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; // Cache all successors, sorted by frequency info and loop depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. MachineBasicBlock::iterator I = MBB.end(); --I; bool ProcessedBegin, SawStore = false; do { MachineInstr &MI = *I; // The instruction to sink. // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. ProcessedBegin = I == MBB.begin(); if (!ProcessedBegin) --I; if (MI.isDebugOrPseudoInstr()) { if (MI.isDebugValue()) ProcessDbgInst(MI); continue; } bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); if (Joined) { MadeChange = true; continue; } if (SinkInstruction(MI, SawStore, AllSuccessors)) { ++NumSunk; MadeChange = true; } // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); SeenDbgUsers.clear(); SeenDbgVars.clear(); // recalculate the bb register pressure after sinking one BB. CachedRegisterPressure.clear(); return MadeChange; } void MachineSinking::ProcessDbgInst(MachineInstr &MI) { // When we see DBG_VALUEs for registers, record any vreg it reads, so that // we know what to sink if the vreg def sinks. assert(MI.isDebugValue() && "Expected DBG_VALUE for processing"); DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); bool SeenBefore = SeenDbgVars.contains(Var); for (MachineOperand &MO : MI.debug_operands()) { if (MO.isReg() && MO.getReg().isVirtual()) SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore)); } // Record the variable for any DBG_VALUE, to avoid re-ordering any of them. SeenDbgVars.insert(Var); } bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To) { // FIXME: Need much better heuristics. // If the pass has already considered breaking this edge (during this pass // through the function), then let's go ahead and break it. This means // sinking multiple "cheap" instructions into the same block. if (!CEBCandidates.insert(std::make_pair(From, To)).second) return true; if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) return true; if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <= BranchProbability(SplitEdgeProbabilityThreshold, 100)) return true; // MI is cheap, we probably don't want to break the critical edge for it. // However, if this would allow some definitions of its source operands // to be sunk then it's probably worth it. for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; // We don't move live definitions of physical registers, // so sinking their uses won't enable any opportunities. if (Register::isPhysicalRegister(Reg)) continue; // If this instruction is the only user of a virtual register, // check if breaking the edge will enable sinking // both this instruction and the defining instruction. if (MRI->hasOneNonDBGUse(Reg)) { // If the definition resides in same MBB, // claim it's likely we can sink these together. // If definition resides elsewhere, we aren't // blocking it from being sunk so don't break the edge. MachineInstr *DefMI = MRI->getVRegDef(Reg); if (DefMI->getParent() == MI.getParent()) return true; } } return false; } bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *FromBB, MachineBasicBlock *ToBB, bool BreakPHIEdge) { if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; // Avoid breaking back edge. From == To means backedge for single BB loop. if (!SplitEdges || FromBB == ToBB) return false; // Check for backedges of more "complex" loops. if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && LI->isLoopHeader(ToBB)) return false; // It's not always legal to break critical edges and sink the computation // to the edge. // // %bb.1: // v1024 // Beq %bb.3 // // %bb.2: // ... no uses of v1024 // // %bb.3: // ... // = v1024 // // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted: // // %bb.1: // ... // Bne %bb.2 // %bb.4: // v1024 = // B %bb.3 // %bb.2: // ... no uses of v1024 // // %bb.3: // ... // = v1024 // // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3 // flow. We need to ensure the new basic block where the computation is // sunk to dominates all the uses. // It's only legal to break critical edge and sink the computation to the // new block if all the predecessors of "To", except for "From", are // not dominated by "From". Given SSA property, this means these // predecessors are dominated by "To". // // There is no need to do this check if all the uses are PHI nodes. PHI // sources are only defined on the specific predecessor edges. if (!BreakPHIEdge) { for (MachineBasicBlock *Pred : ToBB->predecessors()) if (Pred != FromBB && !DT->dominates(ToBB, Pred)) return false; } ToSplit.insert(std::make_pair(FromBB, ToBB)); return true; } std::vector & MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { // Currently to save compiling time, MBB's register pressure will not change // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's // register pressure is changed after sinking any instructions into it. // FIXME: need a accurate and cheap register pressure estiminate model here. auto RP = CachedRegisterPressure.find(&MBB); if (RP != CachedRegisterPressure.end()) return RP->second; RegionPressure Pressure; RegPressureTracker RPTracker(Pressure); // Initialize the register pressure tracker. RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(), /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); for (MachineBasicBlock::iterator MII = MBB.instr_end(), MIE = MBB.instr_begin(); MII != MIE; --MII) { MachineInstr &MI = *std::prev(MII); if (MI.isDebugInstr() || MI.isPseudoProbe()) continue; RegisterOperands RegOpers; RegOpers.collect(MI, *TRI, *MRI, false, false); RPTracker.recedeSkipDebugValues(); assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); RPTracker.recede(RegOpers); } RPTracker.closeRegion(); auto It = CachedRegisterPressure.insert( std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure)); return It.first->second; } /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors) { assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); if (MBB == SuccToSinkTo) return false; // It is profitable if SuccToSinkTo does not post dominate current block. if (!PDT->dominates(SuccToSinkTo, MBB)) return true; // It is profitable to sink an instruction from a deeper loop to a shallower // loop, even if the latter post-dominates the former (PR21115). if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. bool NonPHIUse = false; for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { MachineBasicBlock *UseBlock = UseInst.getParent(); if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) NonPHIUse = true; } if (!NonPHIUse) return true; // If SuccToSinkTo post dominates then also it may be profitable if MI // can further profitably sinked into another block in next round. bool BreakPHIEdge = false; // FIXME - If finding successor is compile time expensive then cache results. if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); MachineLoop *ML = LI->getLoopFor(MBB); // If the instruction is not inside a loop, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. if (!ML) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; const int *PS = TRI->getRegClassPressureSets(RC); // Get register pressure for block SuccToSinkTo. std::vector BBRegisterPressure = getBBRegisterPressure(*SuccToSinkTo); for (; *PS != -1; PS++) // check if any register pressure set exceeds limit in block SuccToSinkTo // after sinking. if (Weight + BBRegisterPressure[*PS] >= TRI->getRegPressureSetLimit(*MBB->getParent(), *PS)) return true; return false; }; // If this instruction is inside a loop and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; if (Register::isPhysicalRegister(Reg)) { if (MO.isUse() && (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) continue; // Don't handle non-constant and non-ignorable physical register. return false; } // Users for the defs are all dominated by SuccToSinkTo. if (MO.isDef()) { // This def register's live range is shortened after sinking. bool LocalUse = false; if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); // DefMI is defined outside of loop. There should be no live range // impact for this operand. Defination outside of loop means: // 1: defination is outside of loop. // 2: defination is in this loop, but it is a PHI in the loop header. if (LI->getLoopFor(DefMI->getParent()) != ML || (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) continue; // The DefMI is defined inside the loop. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable."); return false; } } } // If MI is in loop and all its operands are alive across the whole loop or if // no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } /// Get the sorted sequence of successors for this MachineBasicBlock, possibly /// computing it if it was not already cached. SmallVector & MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const { // Do we have the sorted successors in cache ? auto Succs = AllSuccessors.find(MBB); if (Succs != AllSuccessors.end()) return Succs->second; SmallVector AllSuccs(MBB->successors()); // Handle cases where sinking can happen but where the sink point isn't a // successor. For example: // // x = computation // if () {} else {} // use x // for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) { // DomTree children of MBB that have MBB as immediate dominator are added. if (DTChild->getIDom()->getBlock() == MI.getParent() && // Skip MBBs already added to the AllSuccs vector above. !MBB->isSuccessor(DTChild->getBlock())) AllSuccs.push_back(DTChild->getBlock()); } // Sort Successors according to their loop depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq : LI->getLoopDepth(L) < LI->getLoopDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); return it.first->second; } /// FindSuccToSinkTo - Find a successor to sink this instruction to. MachineBasicBlock * MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = nullptr; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; // Ignore non-register operands. Register Reg = MO.getReg(); if (Reg == 0) continue; if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO)) return nullptr; } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. return nullptr; } } else { // Virtual register uses are always safe to sink. if (MO.isUse()) continue; // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) return nullptr; // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. bool LocalUse = false; if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) return nullptr; continue; } // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies // higher priority, otherwise prioritize smaller loop depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, BreakPHIEdge, LocalUse)) { SuccToSinkTo = SuccBlock; break; } if (LocalUse) // Def is used locally, it's never safe to move this def. return nullptr; } // If we couldn't find a block to sink to, ignore this instruction. if (!SuccToSinkTo) return nullptr; if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors)) return nullptr; } } // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MBB == SuccToSinkTo) return nullptr; // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. if (SuccToSinkTo && SuccToSinkTo->isEHPad()) return nullptr; // It ought to be okay to sink instructions into an INLINEASM_BR target, but // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in // the source block (which this code does not yet do). So for now, forbid // doing so. if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget()) return nullptr; return SuccToSinkTo; } /// Return true if MI is likely to be usable as a memory operation by the /// implicit null check optimization. /// /// This is a "best effort" heuristic, and should not be relied upon for /// correctness. This returning true does not guarantee that the implicit null /// check optimization is legal over MI, and this returning false does not /// guarantee MI cannot possibly be used to do a null check. static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate; auto *MBB = MI.getParent(); if (MBB->pred_size() != 1) return false; auto *PredMBB = *MBB->pred_begin(); auto *PredBB = PredMBB->getBasicBlock(); // Frontends that don't use implicit null checks have no reason to emit // branches with make.implicit metadata, and this function should always // return false for them. if (!PredBB || !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) return false; const MachineOperand *BaseOp; int64_t Offset; bool OffsetIsScalable; if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) return false; if (!BaseOp->isReg()) return false; if (!(MI.mayLoad() && !MI.isPredicable())) return false; MachineBranchPredicate MBP; if (TII->analyzeBranchPredicate(*PredMBB, MBP, false)) return false; return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && (MBP.Predicate == MachineBranchPredicate::PRED_NE || MBP.Predicate == MachineBranchPredicate::PRED_EQ) && MBP.LHS.getReg() == BaseOp->getReg(); } /// If the sunk instruction is a copy, try to forward the copy instead of /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if /// there's any subregister weirdness involved. Returns true if copy /// propagation occurred. static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, Register Reg) { const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); // Copy DBG_VALUE operand and set the original to undef. We then check to // see whether this is something that can be copy-forwarded. If it isn't, // continue around the loop. const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; auto CopyOperands = TII.isCopyInstr(SinkInst); if (!CopyOperands) return false; SrcMO = CopyOperands->Source; DstMO = CopyOperands->Destination; // Check validity of forwarding this copy. bool PostRA = MRI.getNumVirtRegs() == 0; // Trying to forward between physical and virtual registers is too hard. if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) return false; // Only try virtual register copy-forwarding before regalloc, and physical // register copy-forwarding after regalloc. bool arePhysRegs = !Reg.isVirtual(); if (arePhysRegs != PostRA) return false; // Pre-regalloc, only forward if all subregisters agree (or there are no // subregs at all). More analysis might recover some forwardable copies. if (!PostRA) for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) if (DbgMO.getSubReg() != SrcMO->getSubReg() || DbgMO.getSubReg() != DstMO->getSubReg()) return false; // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register // of this copy. Only forward the copy if the DBG_VALUE operand exactly // matches the copy destination. if (PostRA && Reg != DstMO->getReg()) return false; for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { DbgMO.setReg(SrcMO->getReg()); DbgMO.setSubReg(SrcMO->getSubReg()); } return true; } using MIRegs = std::pair>; /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, SmallVectorImpl &DbgValuesToSink) { // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting // wrong location information. if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), InsertPos->getDebugLoc())); else MI.setDebugLoc(DebugLoc()); // Move the instruction. MachineBasicBlock *ParentBlock = MI.getParent(); SuccToSinkTo.splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); // Sink a copy of debug users to the insert position. Mark the original // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. for (auto DbgValueToSink : DbgValuesToSink) { MachineInstr *DbgMI = DbgValueToSink.first; MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); bool PropagatedAllSunkOps = true; for (unsigned Reg : DbgValueToSink.second) { if (DbgMI->hasDebugOperandForReg(Reg)) { if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) { PropagatedAllSunkOps = false; break; } } } if (!PropagatedAllSunkOps) DbgMI->setDebugValueUndef(); } } /// hasStoreBetween - check if there is store betweeen straight line blocks From /// and To. bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, MachineInstr &MI) { // Make sure From and To are in straight line which means From dominates To // and To post dominates From. if (!DT->dominates(From, To) || !PDT->dominates(To, From)) return true; auto BlockPair = std::make_pair(From, To); // Does these two blocks pair be queried before and have a definite cached // result? if (HasStoreCache.find(BlockPair) != HasStoreCache.end()) return HasStoreCache[BlockPair]; if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end()) return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) { return I->mayAlias(AA, MI, false); }); bool SawStore = false; bool HasAliasedStore = false; DenseSet HandledBlocks; DenseSet HandledDomBlocks; // Go through all reachable blocks from From. for (MachineBasicBlock *BB : depth_first(From)) { // We insert the instruction at the start of block To, so no need to worry // about stores inside To. // Store in block From should be already considered when just enter function // SinkInstruction. if (BB == To || BB == From) continue; // We already handle this BB in previous iteration. if (HandledBlocks.count(BB)) continue; HandledBlocks.insert(BB); // To post dominates BB, it must be a path from block From. if (PDT->dominates(To, BB)) { if (!HandledDomBlocks.count(BB)) HandledDomBlocks.insert(BB); // If this BB is too big or the block number in straight line between From // and To is too big, stop searching to save compiling time. if (BB->size() > SinkLoadInstsPerBlockThreshold || HandledDomBlocks.size() > SinkLoadBlocksThreshold) { for (auto *DomBB : HandledDomBlocks) { if (DomBB != BB && DT->dominates(DomBB, BB)) HasStoreCache[std::make_pair(DomBB, To)] = true; else if(DomBB != BB && DT->dominates(BB, DomBB)) HasStoreCache[std::make_pair(From, DomBB)] = true; } HasStoreCache[BlockPair] = true; return true; } for (MachineInstr &I : *BB) { // Treat as alias conservatively for a call or an ordered memory // operation. if (I.isCall() || I.hasOrderedMemoryRef()) { for (auto *DomBB : HandledDomBlocks) { if (DomBB != BB && DT->dominates(DomBB, BB)) HasStoreCache[std::make_pair(DomBB, To)] = true; else if(DomBB != BB && DT->dominates(BB, DomBB)) HasStoreCache[std::make_pair(From, DomBB)] = true; } HasStoreCache[BlockPair] = true; return true; } if (I.mayStore()) { SawStore = true; // We still have chance to sink MI if all stores between are not // aliased to MI. // Cache all store instructions, so that we don't need to go through // all From reachable blocks for next load instruction. if (I.mayAlias(AA, MI, false)) HasAliasedStore = true; StoreInstrCache[BlockPair].push_back(&I); } } } } // If there is no store at all, cache the result. if (!SawStore) HasStoreCache[BlockPair] = false; return HasAliasedStore; } /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); MachineBasicBlock *Preheader = L->getLoopPreheader(); assert(Preheader && "Loop sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); if (!L->contains(&MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); CanSink = false; break; } LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); return false; } if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); return false; } LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); // The instruction is moved from its basic block, so do not retain the // debug information. assert(!I.isDebugInstr() && "Should not sink debug inst"); I.setDebugLoc(DebugLoc()); return true; } /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, AllSuccsCache &AllSuccessors) { // Don't sink instructions that the target prefers not to sink. if (!TII->shouldSink(MI)) return false; // Check if it's safe to move the instruction. if (!MI.isSafeToMove(AA, SawStore)) return false; // Convergent operations may not be made control-dependent on additional // values. if (MI.isConvergent()) return false; // Don't break implicit null checks. This is a performance heuristic, and not // required for correctness. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) return false; // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. bool BreakPHIEdge = false; MachineBasicBlock *ParentBlock = MI.getParent(); MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors); // If there are no outputs, it must have side-effects. if (!SuccToSinkTo) return false; // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. () for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isUse()) continue; Register Reg = MO.getReg(); if (Reg == 0 || !Register::isPhysicalRegister(Reg)) continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); // If the block has multiple predecessors, this is a critical edge. // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. bool TryBreak = false; bool Store = MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true; if (!MI.isSafeToMove(AA, Store)) { LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); TryBreak = true; } // Don't sink instructions into a loop. if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. if (!TryBreak) LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n"); else { // Mark this edge as to be split. // If the edge can actually be split, the next iteration of the main loop // will sink MI in the newly created block. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } } if (BreakPHIEdge) { // BreakPHIEdge is true if all the uses are in the successor MBB being // sunken into and they are all PHI nodes. In this case, machine-sink must // break the critical edge first. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; // Collect debug users of any vreg that this inst defines. SmallVector DbgUsersToSink; for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; if (!SeenDbgUsers.count(MO.getReg())) continue; // Sink any users that don't pass any other DBG_VALUEs for this variable. auto &Users = SeenDbgUsers[MO.getReg()]; for (auto &User : Users) { MachineInstr *DbgMI = User.getPointer(); if (User.getInt()) { // This DBG_VALUE would re-order assignments. If we can't copy-propagate // it, it can't be recovered. Set it undef. if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg())) DbgMI->setDebugValueUndef(); } else { DbgUsersToSink.push_back( {DbgMI, SmallVector(1, MO.getReg())}); } } } // After sinking, some debug users may not be dominated any more. If possible, // copy-propagate their operands. As it's expensive, don't do this if there's // no debuginfo in the program. if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. // Note that we have to clear the kill flags for any register this instruction // uses as we may sink over another instruction which currently kills the // used registers. for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isUse()) RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags. } return true; } void MachineSinking::SalvageUnsunkDebugUsersOfCopy( MachineInstr &MI, MachineBasicBlock *TargetBlock) { assert(MI.isCopy()); assert(MI.getOperand(1).isReg()); // Enumerate all users of vreg operands that are def'd. Skip those that will // be sunk. For the rest, if they are not dominated by the block we will sink // MI into, propagate the copy source to them. SmallVector DbgDefUsers; SmallVector DbgUseRegs; const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; DbgUseRegs.push_back(MO.getReg()); for (auto &User : MRI.use_instructions(MO.getReg())) { if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent())) continue; // If is in same block, will either sink or be use-before-def. if (User.getParent() == MI.getParent()) continue; assert(User.hasDebugOperandForReg(MO.getReg()) && "DBG_VALUE user of vreg, but has no operand for it?"); DbgDefUsers.push_back(&User); } } // Point the users of this copy that are no longer dominated, at the source // of the copy. for (auto *User : DbgDefUsers) { for (auto &Reg : DbgUseRegs) { for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) { DbgOp.setReg(MI.getOperand(1).getReg()); DbgOp.setSubReg(MI.getOperand(1).getSubReg()); } } } } //===----------------------------------------------------------------------===// // This pass is not intended to be a replacement or a complete alternative // for the pre-ra machine sink pass. It is only designed to sink COPY // instructions which should be handled after RA. // // This pass sinks COPY instructions into a successor block, if the COPY is not // used in the current block and the COPY is live-in to a single successor // (i.e., doesn't require the COPY to be duplicated). This avoids executing the // copy on paths where their results aren't needed. This also exposes // additional opportunites for dead copy elimination and shrink wrapping. // // These copies were either not handled by or are inserted after the MachineSink // pass. As an example of the former case, the MachineSink pass cannot sink // COPY instructions with allocatable source registers; for AArch64 these type // of copy instructions are frequently used to move function parameters (PhyReg) // into virtual registers in the entry block. // // For the machine IR below, this pass will sink %w19 in the entry into its // successor (%bb.1) because %w19 is only live-in in %bb.1. // %bb.0: // %wzr = SUBSWri %w1, 1 // %w19 = COPY %w0 // Bcc 11, %bb.2 // %bb.1: // Live Ins: %w19 // BL @fun // %w0 = ADDWrr %w0, %w19 // RET %w0 // %bb.2: // %w0 = COPY %wzr // RET %w0 // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be // able to see %bb.0 as a candidate. //===----------------------------------------------------------------------===// namespace { class PostRAMachineSinking : public MachineFunctionPass { public: bool runOnMachineFunction(MachineFunction &MF) override; static char ID; PostRAMachineSinking() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "PostRA Machine Sink"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } private: /// Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an /// entry in this map for each unit it touches. The DBG_VALUE's entry /// consists of a pointer to the instruction itself, and a vector of registers /// referred to by the instruction that overlap the key register unit. DenseMap> SeenDbgInstrs; /// Sink Copy instructions unused in the same block close to their uses in /// successors. bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); }; } // namespace char PostRAMachineSinking::ID = 0; char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink", "PostRA Machine Sink", false, false) static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg, const TargetRegisterInfo *TRI) { LiveRegUnits LiveInRegUnits(*TRI); LiveInRegUnits.addLiveIns(MBB); return !LiveInRegUnits.available(Reg); } static MachineBasicBlock * getSingleLiveInSuccBB(MachineBasicBlock &CurBB, const SmallPtrSetImpl &SinkableBBs, unsigned Reg, const TargetRegisterInfo *TRI) { // Try to find a single sinkable successor in which Reg is live-in. MachineBasicBlock *BB = nullptr; for (auto *SI : SinkableBBs) { if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) { // If BB is set here, Reg is live-in to at least two sinkable successors, // so quit. if (BB) return nullptr; BB = SI; } } // Reg is not live-in to any sinkable successors. if (!BB) return nullptr; // Check if any register aliased with Reg is live-in in other successors. for (auto *SI : CurBB.successors()) { if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI)) return nullptr; } return BB; } static MachineBasicBlock * getSingleLiveInSuccBB(MachineBasicBlock &CurBB, const SmallPtrSetImpl &SinkableBBs, ArrayRef DefedRegsInCopy, const TargetRegisterInfo *TRI) { MachineBasicBlock *SingleBB = nullptr; for (auto DefReg : DefedRegsInCopy) { MachineBasicBlock *BB = getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI); if (!BB || (SingleBB && SingleBB != BB)) return nullptr; SingleBB = BB; } return SingleBB; } static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, SmallVectorImpl &UsedOpsInCopy, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI) { for (auto U : UsedOpsInCopy) { MachineOperand &MO = MI->getOperand(U); Register SrcReg = MO.getReg(); if (!UsedRegUnits.available(SrcReg)) { MachineBasicBlock::iterator NI = std::next(MI->getIterator()); for (MachineInstr &UI : make_range(NI, CurBB.end())) { if (UI.killsRegister(SrcReg, TRI)) { UI.clearRegisterKills(SrcReg, TRI); MO.setIsKill(true); break; } } } } } static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, SmallVectorImpl &UsedOpsInCopy, SmallVectorImpl &DefedRegsInCopy) { MachineFunction &MF = *SuccBB->getParent(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (unsigned DefReg : DefedRegsInCopy) for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S) SuccBB->removeLiveIn(*S); for (auto U : UsedOpsInCopy) { Register SrcReg = MI->getOperand(U).getReg(); LaneBitmask Mask; for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) { Mask |= (*S).second; } SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll()); } SuccBB->sortUniqueLiveIns(); } static bool hasRegisterDependency(MachineInstr *MI, SmallVectorImpl &UsedOpsInCopy, SmallVectorImpl &DefedRegsInCopy, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits) { bool HasRegDependency = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) { HasRegDependency = true; break; } DefedRegsInCopy.push_back(Reg); // FIXME: instead of isUse(), readsReg() would be a better fix here, // For example, we can ignore modifications in reg with undef. However, // it's not perfectly clear if skipping the internal read is safe in all // other targets. } else if (MO.isUse()) { if (!ModifiedRegUnits.available(Reg)) { HasRegDependency = true; break; } UsedOpsInCopy.push_back(i); } } return HasRegDependency; } static SmallSet getRegUnits(MCRegister Reg, const TargetRegisterInfo *TRI) { SmallSet RegUnits; for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) RegUnits.insert(*RI); return RegUnits; } bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII) { SmallPtrSet SinkableBBs; // FIXME: For now, we sink only to a successor which has a single predecessor // so that we can directly sink COPY instructions to the successor without // adding any new block or branch instruction. for (MachineBasicBlock *SI : CurBB.successors()) if (!SI->livein_empty() && SI->pred_size() == 1) SinkableBBs.insert(SI); if (SinkableBBs.empty()) return false; bool Changed = false; // Track which registers have been modified and used between the end of the // block and the current instruction. ModifiedRegUnits.clear(); UsedRegUnits.clear(); SeenDbgInstrs.clear(); for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) { // Track the operand index for use in Copy. SmallVector UsedOpsInCopy; // Track the register number defed in Copy. SmallVector DefedRegsInCopy; // We must sink this DBG_VALUE if its operand is sunk. To avoid searching // for DBG_VALUEs later, record them when they're encountered. if (MI.isDebugValue()) { SmallDenseMap, 4> MIUnits; bool IsValid = true; for (MachineOperand &MO : MI.debug_operands()) { if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { IsValid = false; break; } // Record debug use of each reg unit. SmallSet RegUnits = getRegUnits(MO.getReg(), TRI); for (MCRegister Reg : RegUnits) MIUnits[Reg].push_back(MO.getReg()); } } if (IsValid) { for (auto RegOps : MIUnits) SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second}); } continue; } if (MI.isDebugOrPseudoInstr()) continue; // Do not move any instruction across function call. if (MI.isCall()) return false; if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } // Don't sink the COPY if it would violate a register dependency. if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && "Unexpect SrcReg or DefReg"); MachineBasicBlock *SuccBB = getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); // Don't sink if we cannot find a single sinkable successor in which Reg // is live-in. if (!SuccBB) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); // Collect DBG_VALUEs that must sink with this copy. We've previously // recorded which reg units that DBG_VALUEs read, if this instruction // writes any of those units then the corresponding DBG_VALUEs must sink. MapVector DbgValsToSinkMap; for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; SmallSet Units = getRegUnits(MO.getReg(), TRI); for (MCRegister Reg : Units) { for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) { auto &Regs = DbgValsToSinkMap[MIRegs.first]; for (unsigned Reg : MIRegs.second) Regs.push_back(Reg); } } } SmallVector DbgValsToSink(DbgValsToSinkMap.begin(), DbgValsToSinkMap.end()); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; } return Changed; } bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; bool Changed = false; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); for (auto &BB : MF) Changed |= tryToSinkCopy(BB, MF, TRI, TII); return Changed; } diff --git a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp index 636c1d238932..a016b7085a00 100644 --- a/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/MC/WasmObjectWriter.cpp @@ -1,1959 +1,1981 @@ //===- lib/MC/WasmObjectWriter.cpp - Wasm File Writer ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements Wasm object file writer information. // //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/BinaryFormat/WasmTraits.h" #include "llvm/Config/llvm-config.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWasmObjectWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/StringSaver.h" #include using namespace llvm; #define DEBUG_TYPE "mc" namespace { // When we create the indirect function table we start at 1, so that there is // and empty slot at 0 and therefore calling a null function pointer will trap. static const uint32_t InitialTableOffset = 1; // For patching purposes, we need to remember where each section starts, both // for patching up the section size field, and for patching up references to // locations within the section. struct SectionBookkeeping { // Where the size of the section is written. uint64_t SizeOffset; // Where the section header ends (without custom section name). uint64_t PayloadOffset; // Where the contents of the section starts. uint64_t ContentsOffset; uint32_t Index; }; // A wasm data segment. A wasm binary contains only a single data section // but that can contain many segments, each with their own virtual location // in memory. Each MCSection data created by llvm is modeled as its own // wasm data segment. struct WasmDataSegment { MCSectionWasm *Section; StringRef Name; uint32_t InitFlags; uint64_t Offset; uint32_t Alignment; uint32_t LinkingFlags; SmallVector Data; }; // A wasm function to be written into the function section. struct WasmFunction { uint32_t SigIndex; const MCSymbolWasm *Sym; }; // A wasm global to be written into the global section. struct WasmGlobal { wasm::WasmGlobalType Type; uint64_t InitialValue; }; // Information about a single item which is part of a COMDAT. For each data // segment or function which is in the COMDAT, there is a corresponding // WasmComdatEntry. struct WasmComdatEntry { unsigned Kind; uint32_t Index; }; // Information about a single relocation. struct WasmRelocationEntry { uint64_t Offset; // Where is the relocation. const MCSymbolWasm *Symbol; // The symbol to relocate with. int64_t Addend; // A value to add to the symbol. unsigned Type; // The type of the relocation. const MCSectionWasm *FixupSection; // The section the relocation is targeting. WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, int64_t Addend, unsigned Type, const MCSectionWasm *FixupSection) : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), FixupSection(FixupSection) {} bool hasAddend() const { return wasm::relocTypeHasAddend(Type); } void print(raw_ostream &Out) const { Out << wasm::relocTypetoString(Type) << " Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend << ", FixupSection=" << FixupSection->getName(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const { print(dbgs()); } #endif }; static const uint32_t InvalidIndex = -1; struct WasmCustomSection { StringRef Name; MCSectionWasm *Section; uint32_t OutputContentsOffset; uint32_t OutputIndex; WasmCustomSection(StringRef Name, MCSectionWasm *Section) : Name(Name), Section(Section), OutputContentsOffset(0), OutputIndex(InvalidIndex) {} }; #if !defined(NDEBUG) raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) { Rel.print(OS); return OS; } #endif -// Write X as an (unsigned) LEB value at offset Offset in Stream, padded +// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +template +void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeULEB128(X, Buffer, W); + unsigned SizeLen = encodeULEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as an signed LEB value at offset Offset in Stream, padded +// Write Value as an signed LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) { +template +void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeSLEB128(X, Buffer, W); + unsigned SizeLen = encodeSLEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as a plain integer value at offset Offset in Stream. -static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { +static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { + writePatchableULEB(Stream, Value, Offset); +} + +static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +// Write Value as a plain integer value at offset Offset in Stream. +static void patchI32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { uint8_t Buffer[4]; - support::endian::write32le(Buffer, X); + support::endian::write32le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } -static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +static void patchI64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { uint8_t Buffer[8]; - support::endian::write64le(Buffer, X); + support::endian::write64le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } bool isDwoSection(const MCSection &Sec) { return Sec.getName().endswith(".dwo"); } class WasmObjectWriter : public MCObjectWriter { support::endian::Writer *W; /// The target specific Wasm writer instance. std::unique_ptr TargetObjectWriter; // Relocations for fixing up references in the code section. std::vector CodeRelocations; // Relocations for fixing up references in the data section. std::vector DataRelocations; // Index values to use for fixing up call_indirect type indices. // Maps function symbols to the index of the type of the function DenseMap TypeIndices; // Maps function symbols to the table element index space. Used // for TABLE_INDEX relocation types (i.e. address taken functions). DenseMap TableIndices; // Maps function/global/table symbols to the // function/global/table/tag/section index space. DenseMap WasmIndices; DenseMap GOTIndices; // Maps data symbols to the Wasm segment and offset/size with the segment. DenseMap DataLocations; // Stores output data (index, relocations, content offset) for custom // section. std::vector CustomSections; std::unique_ptr ProducersSection; std::unique_ptr TargetFeaturesSection; // Relocations for fixing up references in the custom sections. DenseMap> CustomSectionsRelocations; // Map from section to defining function symbol. DenseMap SectionFunctions; DenseMap SignatureIndices; SmallVector Signatures; SmallVector DataSegments; unsigned NumFunctionImports = 0; unsigned NumGlobalImports = 0; unsigned NumTableImports = 0; unsigned NumTagImports = 0; uint32_t SectionCount = 0; enum class DwoMode { AllSections, NonDwoOnly, DwoOnly, }; bool IsSplitDwarf = false; raw_pwrite_stream *OS = nullptr; raw_pwrite_stream *DwoOS = nullptr; // TargetObjectWriter wranppers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } bool isEmscripten() const { return TargetObjectWriter->isEmscripten(); } void startSection(SectionBookkeeping &Section, unsigned SectionId); void startCustomSection(SectionBookkeeping &Section, StringRef Name); void endSection(SectionBookkeeping &Section); public: WasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS_) : TargetObjectWriter(std::move(MOTW)), OS(&OS_) {} WasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS_, raw_pwrite_stream &DwoOS_) : TargetObjectWriter(std::move(MOTW)), IsSplitDwarf(true), OS(&OS_), DwoOS(&DwoOS_) {} private: void reset() override { CodeRelocations.clear(); DataRelocations.clear(); TypeIndices.clear(); WasmIndices.clear(); GOTIndices.clear(); TableIndices.clear(); DataLocations.clear(); CustomSections.clear(); ProducersSection.reset(); TargetFeaturesSection.reset(); CustomSectionsRelocations.clear(); SignatureIndices.clear(); Signatures.clear(); DataSegments.clear(); SectionFunctions.clear(); NumFunctionImports = 0; NumGlobalImports = 0; NumTableImports = 0; MCObjectWriter::reset(); } void writeHeader(const MCAssembler &Asm); void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; void prepareImports(SmallVectorImpl &Imports, MCAssembler &Asm, const MCAsmLayout &Layout); uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; uint64_t writeOneObject(MCAssembler &Asm, const MCAsmLayout &Layout, DwoMode Mode); void writeString(const StringRef Str) { encodeULEB128(Str.size(), W->OS); W->OS << Str; } void writeStringWithAlignment(const StringRef Str, unsigned Alignment); void writeI32(int32_t val) { char Buffer[4]; support::endian::write32le(Buffer, val); W->OS.write(Buffer, sizeof(Buffer)); } void writeI64(int64_t val) { char Buffer[8]; support::endian::write64le(Buffer, val); W->OS.write(Buffer, sizeof(Buffer)); } void writeValueType(wasm::ValType Ty) { W->OS << static_cast(Ty); } void writeTypeSection(ArrayRef Signatures); void writeImportSection(ArrayRef Imports, uint64_t DataSize, uint32_t NumElements); void writeFunctionSection(ArrayRef Functions); void writeExportSection(ArrayRef Exports); void writeElemSection(const MCSymbolWasm *IndirectFunctionTable, ArrayRef TableElems); void writeDataCountSection(); uint32_t writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, ArrayRef Functions); uint32_t writeDataSection(const MCAsmLayout &Layout); void writeTagSection(ArrayRef TagTypes); void writeGlobalSection(ArrayRef Globals); void writeTableSection(ArrayRef Tables); void writeRelocSection(uint32_t SectionIndex, StringRef Name, std::vector &Relocations); void writeLinkingMetaDataSection( ArrayRef SymbolInfos, ArrayRef> InitFuncs, const std::map> &Comdats); void writeCustomSection(WasmCustomSection &CustomSection, const MCAssembler &Asm, const MCAsmLayout &Layout); void writeCustomRelocSections(); uint64_t getProvisionalValue(const WasmRelocationEntry &RelEntry, const MCAsmLayout &Layout); void applyRelocations(ArrayRef Relocations, uint64_t ContentsOffset, const MCAsmLayout &Layout); uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); uint32_t getFunctionType(const MCSymbolWasm &Symbol); uint32_t getTagType(const MCSymbolWasm &Symbol); void registerFunctionType(const MCSymbolWasm &Symbol); void registerTagType(const MCSymbolWasm &Symbol); }; } // end anonymous namespace // Write out a section header and a patchable section size field. void WasmObjectWriter::startSection(SectionBookkeeping &Section, unsigned SectionId) { LLVM_DEBUG(dbgs() << "startSection " << SectionId << "\n"); W->OS << char(SectionId); Section.SizeOffset = W->OS.tell(); // The section size. We don't know the size yet, so reserve enough space // for any 32-bit value; we'll patch it later. encodeULEB128(0, W->OS, 5); // The position where the section starts, for measuring its size. Section.ContentsOffset = W->OS.tell(); Section.PayloadOffset = W->OS.tell(); Section.Index = SectionCount++; } // Write a string with extra paddings for trailing alignment // TODO: support alignment at asm and llvm level? void WasmObjectWriter::writeStringWithAlignment(const StringRef Str, unsigned Alignment) { // Calculate the encoded size of str length and add pads based on it and // alignment. raw_null_ostream NullOS; uint64_t StrSizeLength = encodeULEB128(Str.size(), NullOS); uint64_t Offset = W->OS.tell() + StrSizeLength + Str.size(); uint64_t Paddings = offsetToAlignment(Offset, Align(Alignment)); Offset += Paddings; // LEB128 greater than 5 bytes is invalid assert((StrSizeLength + Paddings) <= 5 && "too long string to align"); encodeSLEB128(Str.size(), W->OS, StrSizeLength + Paddings); W->OS << Str; assert(W->OS.tell() == Offset && "invalid padding"); } void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section, StringRef Name) { LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n"); startSection(Section, wasm::WASM_SEC_CUSTOM); // The position where the section header ends, for measuring its size. Section.PayloadOffset = W->OS.tell(); // Custom sections in wasm also have a string identifier. if (Name != "__clangast") { writeString(Name); } else { // The on-disk hashtable in clangast needs to be aligned by 4 bytes. writeStringWithAlignment(Name, 4); } // The position where the custom section starts. Section.ContentsOffset = W->OS.tell(); } // Now that the section is complete and we know how big it is, patch up the // section size field at the start of the section. void WasmObjectWriter::endSection(SectionBookkeeping &Section) { uint64_t Size = W->OS.tell(); // /dev/null doesn't support seek/tell and can report offset of 0. // Simply skip this patching in that case. if (!Size) return; Size -= Section.PayloadOffset; if (uint32_t(Size) != Size) report_fatal_error("section size does not fit in a uint32_t"); LLVM_DEBUG(dbgs() << "endSection size=" << Size << "\n"); // Write the final section size to the payload_len field, which follows // the section id byte. - writePatchableLEB<5>(static_cast(W->OS), Size, - Section.SizeOffset); + writePatchableU32(static_cast(W->OS), Size, + Section.SizeOffset); } // Emit the Wasm header. void WasmObjectWriter::writeHeader(const MCAssembler &Asm) { W->OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic)); W->write(wasm::WasmVersion); } void WasmObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { // Some compilation units require the indirect function table to be present // but don't explicitly reference it. This is the case for call_indirect // without the reference-types feature, and also function bitcasts in all // cases. In those cases the __indirect_function_table has the // WASM_SYMBOL_NO_STRIP attribute. Here we make sure this symbol makes it to // the assembler, if needed. if (auto *Sym = Asm.getContext().lookupSymbol("__indirect_function_table")) { const auto *WasmSym = static_cast(Sym); if (WasmSym->isNoStrip()) Asm.registerSymbol(*Sym); } // Build a map of sections to the function that defines them, for use // in recordRelocation. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); if (WS.isDefined() && WS.isFunction() && !WS.isVariable()) { const auto &Sec = static_cast(S.getSection()); auto Pair = SectionFunctions.insert(std::make_pair(&Sec, &S)); if (!Pair.second) report_fatal_error("section already has a defining function: " + Sec.getName()); } } } void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { // The WebAssembly backend should never generate FKF_IsPCRel fixups assert(!(Asm.getBackend().getFixupKindInfo(Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel)); const auto &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); bool IsLocRel = false; if (const MCSymbolRefExpr *RefB = Target.getSymB()) { const auto &SymB = cast(RefB->getSymbol()); if (FixupSection.getKind().isText()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' unsupported subtraction expression used in " "relocation in code section."); return; } if (SymB.isUndefined()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' can not be undefined in a subtraction expression"); return; } const MCSection &SecB = SymB.getSection(); if (&SecB != &FixupSection) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' can not be placed in a different section"); return; } IsLocRel = true; C += FixupOffset - Layout.getSymbolOffset(SymB); } // We either rejected the fixup or folded B into C at this point. const MCSymbolRefExpr *RefA = Target.getSymA(); const auto *SymA = cast(&RefA->getSymbol()); // The .init_array isn't translated as data, so don't do relocations in it. if (FixupSection.getName().startswith(".init_array")) { SymA->setUsedInInitArray(); return; } if (SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); if (const auto *Inner = dyn_cast(Expr)) if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) llvm_unreachable("weakref used in reloc not yet implemented"); } // Put any constant offset in an addend. Offsets can be negative, and // LLVM expects wrapping, in contrast to wasm's immediates which can't // be negative and don't wrap. FixedValue = 0; unsigned Type = TargetObjectWriter->getRelocType(Target, Fixup, FixupSection, IsLocRel); // Absolute offset within a section or a function. // Currently only supported for for metadata sections. // See: test/MC/WebAssembly/blockaddress.ll if ((Type == wasm::R_WASM_FUNCTION_OFFSET_I32 || Type == wasm::R_WASM_FUNCTION_OFFSET_I64 || Type == wasm::R_WASM_SECTION_OFFSET_I32) && SymA->isDefined()) { // SymA can be a temp data symbol that represents a function (in which case // it needs to be replaced by the section symbol), [XXX and it apparently // later gets changed again to a func symbol?] or it can be a real // function symbol, in which case it can be left as-is. if (!FixupSection.getKind().isMetadata()) report_fatal_error("relocations for function or section offsets are " "only supported in metadata sections"); const MCSymbol *SectionSymbol = nullptr; const MCSection &SecA = SymA->getSection(); if (SecA.getKind().isText()) { auto SecSymIt = SectionFunctions.find(&SecA); if (SecSymIt == SectionFunctions.end()) report_fatal_error("section doesn\'t have defining symbol"); SectionSymbol = SecSymIt->second; } else { SectionSymbol = SecA.getBeginSymbol(); } if (!SectionSymbol) report_fatal_error("section symbol is required for relocation"); C += Layout.getSymbolOffset(*SymA); SymA = cast(SectionSymbol); } if (Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB || Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB64 || Type == wasm::R_WASM_TABLE_INDEX_SLEB || Type == wasm::R_WASM_TABLE_INDEX_SLEB64 || Type == wasm::R_WASM_TABLE_INDEX_I32 || Type == wasm::R_WASM_TABLE_INDEX_I64) { // TABLE_INDEX relocs implicitly use the default indirect function table. // We require the function table to have already been defined. auto TableName = "__indirect_function_table"; MCSymbolWasm *Sym = cast_or_null(Ctx.lookupSymbol(TableName)); if (!Sym) { report_fatal_error("missing indirect function table symbol"); } else { if (!Sym->isFunctionTable()) report_fatal_error("__indirect_function_table symbol has wrong type"); // Ensure that __indirect_function_table reaches the output. Sym->setNoStrip(); Asm.registerSymbol(*Sym); } } // Relocation other than R_WASM_TYPE_INDEX_LEB are required to be // against a named symbol. if (Type != wasm::R_WASM_TYPE_INDEX_LEB) { if (SymA->getName().empty()) report_fatal_error("relocations against un-named temporaries are not yet " "supported by wasm"); SymA->setUsedInReloc(); } switch (RefA->getKind()) { case MCSymbolRefExpr::VK_GOT: case MCSymbolRefExpr::VK_WASM_GOT_TLS: SymA->setUsedInGOT(); break; default: break; } WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); LLVM_DEBUG(dbgs() << "WasmReloc: " << Rec << "\n"); if (FixupSection.isWasmData()) { DataRelocations.push_back(Rec); } else if (FixupSection.getKind().isText()) { CodeRelocations.push_back(Rec); } else if (FixupSection.getKind().isMetadata()) { CustomSectionsRelocations[&FixupSection].push_back(Rec); } else { llvm_unreachable("unexpected section type"); } } // Compute a value to write into the code at the location covered // by RelEntry. This value isn't used by the static linker; it just serves // to make the object format more readable and more likely to be directly // useable. uint64_t WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry, const MCAsmLayout &Layout) { if ((RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_LEB || RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_I32) && !RelEntry.Symbol->isGlobal()) { assert(GOTIndices.count(RelEntry.Symbol) > 0 && "symbol not found in GOT index space"); return GOTIndices[RelEntry.Symbol]; } switch (RelEntry.Type) { case wasm::R_WASM_TABLE_INDEX_REL_SLEB: case wasm::R_WASM_TABLE_INDEX_REL_SLEB64: case wasm::R_WASM_TABLE_INDEX_SLEB: case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_I32: case wasm::R_WASM_TABLE_INDEX_I64: { // Provisional value is table address of the resolved symbol itself const MCSymbolWasm *Base = cast(Layout.getBaseSymbol(*RelEntry.Symbol)); assert(Base->isFunction()); if (RelEntry.Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB || RelEntry.Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB64) return TableIndices[Base] - InitialTableOffset; else return TableIndices[Base]; } case wasm::R_WASM_TYPE_INDEX_LEB: // Provisional value is same as the index return getRelocationIndexValue(RelEntry); case wasm::R_WASM_FUNCTION_INDEX_LEB: case wasm::R_WASM_GLOBAL_INDEX_LEB: case wasm::R_WASM_GLOBAL_INDEX_I32: case wasm::R_WASM_TAG_INDEX_LEB: case wasm::R_WASM_TABLE_NUMBER_LEB: // Provisional value is function/global/tag Wasm index assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space"); return WasmIndices[RelEntry.Symbol]; case wasm::R_WASM_FUNCTION_OFFSET_I32: case wasm::R_WASM_FUNCTION_OFFSET_I64: case wasm::R_WASM_SECTION_OFFSET_I32: { if (!RelEntry.Symbol->isDefined()) return 0; const auto &Section = static_cast(RelEntry.Symbol->getSection()); return Section.getSectionOffset() + RelEntry.Addend; } case wasm::R_WASM_MEMORY_ADDR_LEB: case wasm::R_WASM_MEMORY_ADDR_LEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_I32: case wasm::R_WASM_MEMORY_ADDR_I64: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32: { // Provisional value is address of the global plus the offset // For undefined symbols, use zero if (!RelEntry.Symbol->isDefined()) return 0; const wasm::WasmDataReference &SymRef = DataLocations[RelEntry.Symbol]; const WasmDataSegment &Segment = DataSegments[SymRef.Segment]; // Ignore overflow. LLVM allows address arithmetic to silently wrap. return Segment.Offset + SymRef.Offset + RelEntry.Addend; } default: llvm_unreachable("invalid relocation type"); } } static void addData(SmallVectorImpl &DataBytes, MCSectionWasm &DataSection) { LLVM_DEBUG(errs() << "addData: " << DataSection.getName() << "\n"); DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); for (const MCFragment &Frag : DataSection) { if (Frag.hasInstructions()) report_fatal_error("only data supported in data sections"); if (auto *Align = dyn_cast(&Frag)) { if (Align->getValueSize() != 1) report_fatal_error("only byte values supported for alignment"); // If nops are requested, use zeros, as this is the data section. uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue(); uint64_t Size = std::min(alignTo(DataBytes.size(), Align->getAlignment()), DataBytes.size() + Align->getMaxBytesToEmit()); DataBytes.resize(Size, Value); } else if (auto *Fill = dyn_cast(&Frag)) { int64_t NumValues; if (!Fill->getNumValues().evaluateAsAbsolute(NumValues)) llvm_unreachable("The fill should be an assembler constant"); DataBytes.insert(DataBytes.end(), Fill->getValueSize() * NumValues, Fill->getValue()); } else if (auto *LEB = dyn_cast(&Frag)) { const SmallVectorImpl &Contents = LEB->getContents(); llvm::append_range(DataBytes, Contents); } else { const auto &DataFrag = cast(Frag); const SmallVectorImpl &Contents = DataFrag.getContents(); llvm::append_range(DataBytes, Contents); } } LLVM_DEBUG(dbgs() << "addData -> " << DataBytes.size() << "\n"); } uint32_t WasmObjectWriter::getRelocationIndexValue(const WasmRelocationEntry &RelEntry) { if (RelEntry.Type == wasm::R_WASM_TYPE_INDEX_LEB) { if (!TypeIndices.count(RelEntry.Symbol)) report_fatal_error("symbol not found in type index space: " + RelEntry.Symbol->getName()); return TypeIndices[RelEntry.Symbol]; } return RelEntry.Symbol->getIndex(); } // Apply the portions of the relocation records that we can handle ourselves // directly. void WasmObjectWriter::applyRelocations( ArrayRef Relocations, uint64_t ContentsOffset, const MCAsmLayout &Layout) { auto &Stream = static_cast(W->OS); for (const WasmRelocationEntry &RelEntry : Relocations) { uint64_t Offset = ContentsOffset + RelEntry.FixupSection->getSectionOffset() + RelEntry.Offset; LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); - auto Value = getProvisionalValue(RelEntry, Layout); + uint64_t Value = getProvisionalValue(RelEntry, Layout); switch (RelEntry.Type) { case wasm::R_WASM_FUNCTION_INDEX_LEB: case wasm::R_WASM_TYPE_INDEX_LEB: case wasm::R_WASM_GLOBAL_INDEX_LEB: case wasm::R_WASM_MEMORY_ADDR_LEB: case wasm::R_WASM_TAG_INDEX_LEB: case wasm::R_WASM_TABLE_NUMBER_LEB: - writePatchableLEB<5>(Stream, Value, Offset); + writePatchableU32(Stream, Value, Offset); break; case wasm::R_WASM_MEMORY_ADDR_LEB64: - writePatchableLEB<10>(Stream, Value, Offset); + writePatchableU64(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_I32: case wasm::R_WASM_MEMORY_ADDR_I32: case wasm::R_WASM_FUNCTION_OFFSET_I32: case wasm::R_WASM_SECTION_OFFSET_I32: case wasm::R_WASM_GLOBAL_INDEX_I32: case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32: patchI32(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_I64: case wasm::R_WASM_MEMORY_ADDR_I64: case wasm::R_WASM_FUNCTION_OFFSET_I64: patchI64(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB: case wasm::R_WASM_TABLE_INDEX_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_SLEB: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: - writePatchableSLEB<5>(Stream, Value, Offset); + writePatchableS32(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: - writePatchableSLEB<10>(Stream, Value, Offset); + writePatchableS64(Stream, Value, Offset); break; default: llvm_unreachable("invalid relocation type"); } } } void WasmObjectWriter::writeTypeSection( ArrayRef Signatures) { if (Signatures.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_TYPE); encodeULEB128(Signatures.size(), W->OS); for (const wasm::WasmSignature &Sig : Signatures) { W->OS << char(wasm::WASM_TYPE_FUNC); encodeULEB128(Sig.Params.size(), W->OS); for (wasm::ValType Ty : Sig.Params) writeValueType(Ty); encodeULEB128(Sig.Returns.size(), W->OS); for (wasm::ValType Ty : Sig.Returns) writeValueType(Ty); } endSection(Section); } void WasmObjectWriter::writeImportSection(ArrayRef Imports, uint64_t DataSize, uint32_t NumElements) { if (Imports.empty()) return; uint64_t NumPages = (DataSize + wasm::WasmPageSize - 1) / wasm::WasmPageSize; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_IMPORT); encodeULEB128(Imports.size(), W->OS); for (const wasm::WasmImport &Import : Imports) { writeString(Import.Module); writeString(Import.Field); W->OS << char(Import.Kind); switch (Import.Kind) { case wasm::WASM_EXTERNAL_FUNCTION: encodeULEB128(Import.SigIndex, W->OS); break; case wasm::WASM_EXTERNAL_GLOBAL: W->OS << char(Import.Global.Type); W->OS << char(Import.Global.Mutable ? 1 : 0); break; case wasm::WASM_EXTERNAL_MEMORY: encodeULEB128(Import.Memory.Flags, W->OS); encodeULEB128(NumPages, W->OS); // initial break; case wasm::WASM_EXTERNAL_TABLE: W->OS << char(Import.Table.ElemType); encodeULEB128(0, W->OS); // flags encodeULEB128(NumElements, W->OS); // initial break; case wasm::WASM_EXTERNAL_TAG: W->OS << char(0); // Reserved 'attribute' field encodeULEB128(Import.SigIndex, W->OS); break; default: llvm_unreachable("unsupported import kind"); } } endSection(Section); } void WasmObjectWriter::writeFunctionSection(ArrayRef Functions) { if (Functions.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_FUNCTION); encodeULEB128(Functions.size(), W->OS); for (const WasmFunction &Func : Functions) encodeULEB128(Func.SigIndex, W->OS); endSection(Section); } void WasmObjectWriter::writeTagSection(ArrayRef TagTypes) { if (TagTypes.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_TAG); encodeULEB128(TagTypes.size(), W->OS); for (uint32_t Index : TagTypes) { W->OS << char(0); // Reserved 'attribute' field encodeULEB128(Index, W->OS); } endSection(Section); } void WasmObjectWriter::writeGlobalSection(ArrayRef Globals) { if (Globals.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_GLOBAL); encodeULEB128(Globals.size(), W->OS); for (const wasm::WasmGlobal &Global : Globals) { encodeULEB128(Global.Type.Type, W->OS); W->OS << char(Global.Type.Mutable); W->OS << char(Global.InitExpr.Opcode); switch (Global.Type.Type) { case wasm::WASM_TYPE_I32: encodeSLEB128(0, W->OS); break; case wasm::WASM_TYPE_I64: encodeSLEB128(0, W->OS); break; case wasm::WASM_TYPE_F32: writeI32(0); break; case wasm::WASM_TYPE_F64: writeI64(0); break; case wasm::WASM_TYPE_EXTERNREF: writeValueType(wasm::ValType::EXTERNREF); break; default: llvm_unreachable("unexpected type"); } W->OS << char(wasm::WASM_OPCODE_END); } endSection(Section); } void WasmObjectWriter::writeTableSection(ArrayRef Tables) { if (Tables.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_TABLE); encodeULEB128(Tables.size(), W->OS); for (const wasm::WasmTable &Table : Tables) { encodeULEB128(Table.Type.ElemType, W->OS); encodeULEB128(Table.Type.Limits.Flags, W->OS); encodeULEB128(Table.Type.Limits.Minimum, W->OS); if (Table.Type.Limits.Flags & wasm::WASM_LIMITS_FLAG_HAS_MAX) encodeULEB128(Table.Type.Limits.Maximum, W->OS); } endSection(Section); } void WasmObjectWriter::writeExportSection(ArrayRef Exports) { if (Exports.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_EXPORT); encodeULEB128(Exports.size(), W->OS); for (const wasm::WasmExport &Export : Exports) { writeString(Export.Name); W->OS << char(Export.Kind); encodeULEB128(Export.Index, W->OS); } endSection(Section); } void WasmObjectWriter::writeElemSection( const MCSymbolWasm *IndirectFunctionTable, ArrayRef TableElems) { if (TableElems.empty()) return; assert(IndirectFunctionTable); SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_ELEM); encodeULEB128(1, W->OS); // number of "segments" assert(WasmIndices.count(IndirectFunctionTable)); uint32_t TableNumber = WasmIndices.find(IndirectFunctionTable)->second; uint32_t Flags = 0; if (TableNumber) Flags |= wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER; encodeULEB128(Flags, W->OS); if (Flags & wasm::WASM_ELEM_SEGMENT_HAS_TABLE_NUMBER) encodeULEB128(TableNumber, W->OS); // the table number // init expr for starting offset W->OS << char(wasm::WASM_OPCODE_I32_CONST); encodeSLEB128(InitialTableOffset, W->OS); W->OS << char(wasm::WASM_OPCODE_END); if (Flags & wasm::WASM_ELEM_SEGMENT_MASK_HAS_ELEM_KIND) { // We only write active function table initializers, for which the elem kind // is specified to be written as 0x00 and interpreted to mean "funcref". const uint8_t ElemKind = 0; W->OS << ElemKind; } encodeULEB128(TableElems.size(), W->OS); for (uint32_t Elem : TableElems) encodeULEB128(Elem, W->OS); endSection(Section); } void WasmObjectWriter::writeDataCountSection() { if (DataSegments.empty()) return; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_DATACOUNT); encodeULEB128(DataSegments.size(), W->OS); endSection(Section); } uint32_t WasmObjectWriter::writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, ArrayRef Functions) { if (Functions.empty()) return 0; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_CODE); encodeULEB128(Functions.size(), W->OS); for (const WasmFunction &Func : Functions) { auto &FuncSection = static_cast(Func.Sym->getSection()); int64_t Size = 0; if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout)) report_fatal_error(".size expression must be evaluatable"); encodeULEB128(Size, W->OS); FuncSection.setSectionOffset(W->OS.tell() - Section.ContentsOffset); Asm.writeSectionData(W->OS, &FuncSection, Layout); } // Apply fixups. applyRelocations(CodeRelocations, Section.ContentsOffset, Layout); endSection(Section); return Section.Index; } uint32_t WasmObjectWriter::writeDataSection(const MCAsmLayout &Layout) { if (DataSegments.empty()) return 0; SectionBookkeeping Section; startSection(Section, wasm::WASM_SEC_DATA); encodeULEB128(DataSegments.size(), W->OS); // count for (const WasmDataSegment &Segment : DataSegments) { encodeULEB128(Segment.InitFlags, W->OS); // flags if (Segment.InitFlags & wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) encodeULEB128(0, W->OS); // memory index if ((Segment.InitFlags & wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) { W->OS << char(is64Bit() ? wasm::WASM_OPCODE_I64_CONST : wasm::WASM_OPCODE_I32_CONST); encodeSLEB128(Segment.Offset, W->OS); // offset W->OS << char(wasm::WASM_OPCODE_END); } encodeULEB128(Segment.Data.size(), W->OS); // size Segment.Section->setSectionOffset(W->OS.tell() - Section.ContentsOffset); W->OS << Segment.Data; // data } // Apply fixups. applyRelocations(DataRelocations, Section.ContentsOffset, Layout); endSection(Section); return Section.Index; } void WasmObjectWriter::writeRelocSection( uint32_t SectionIndex, StringRef Name, std::vector &Relocs) { // See: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md // for descriptions of the reloc sections. if (Relocs.empty()) return; // First, ensure the relocations are sorted in offset order. In general they // should already be sorted since `recordRelocation` is called in offset // order, but for the code section we combine many MC sections into single // wasm section, and this order is determined by the order of Asm.Symbols() // not the sections order. llvm::stable_sort( Relocs, [](const WasmRelocationEntry &A, const WasmRelocationEntry &B) { return (A.Offset + A.FixupSection->getSectionOffset()) < (B.Offset + B.FixupSection->getSectionOffset()); }); SectionBookkeeping Section; startCustomSection(Section, std::string("reloc.") + Name.str()); encodeULEB128(SectionIndex, W->OS); encodeULEB128(Relocs.size(), W->OS); for (const WasmRelocationEntry &RelEntry : Relocs) { uint64_t Offset = RelEntry.Offset + RelEntry.FixupSection->getSectionOffset(); uint32_t Index = getRelocationIndexValue(RelEntry); W->OS << char(RelEntry.Type); encodeULEB128(Offset, W->OS); encodeULEB128(Index, W->OS); if (RelEntry.hasAddend()) encodeSLEB128(RelEntry.Addend, W->OS); } endSection(Section); } void WasmObjectWriter::writeCustomRelocSections() { for (const auto &Sec : CustomSections) { auto &Relocations = CustomSectionsRelocations[Sec.Section]; writeRelocSection(Sec.OutputIndex, Sec.Name, Relocations); } } void WasmObjectWriter::writeLinkingMetaDataSection( ArrayRef SymbolInfos, ArrayRef> InitFuncs, const std::map> &Comdats) { SectionBookkeeping Section; startCustomSection(Section, "linking"); encodeULEB128(wasm::WasmMetadataVersion, W->OS); SectionBookkeeping SubSection; if (SymbolInfos.size() != 0) { startSection(SubSection, wasm::WASM_SYMBOL_TABLE); encodeULEB128(SymbolInfos.size(), W->OS); for (const wasm::WasmSymbolInfo &Sym : SymbolInfos) { encodeULEB128(Sym.Kind, W->OS); encodeULEB128(Sym.Flags, W->OS); switch (Sym.Kind) { case wasm::WASM_SYMBOL_TYPE_FUNCTION: case wasm::WASM_SYMBOL_TYPE_GLOBAL: case wasm::WASM_SYMBOL_TYPE_TAG: case wasm::WASM_SYMBOL_TYPE_TABLE: encodeULEB128(Sym.ElementIndex, W->OS); if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0 || (Sym.Flags & wasm::WASM_SYMBOL_EXPLICIT_NAME) != 0) writeString(Sym.Name); break; case wasm::WASM_SYMBOL_TYPE_DATA: writeString(Sym.Name); if ((Sym.Flags & wasm::WASM_SYMBOL_UNDEFINED) == 0) { encodeULEB128(Sym.DataRef.Segment, W->OS); encodeULEB128(Sym.DataRef.Offset, W->OS); encodeULEB128(Sym.DataRef.Size, W->OS); } break; case wasm::WASM_SYMBOL_TYPE_SECTION: { const uint32_t SectionIndex = CustomSections[Sym.ElementIndex].OutputIndex; encodeULEB128(SectionIndex, W->OS); break; } default: llvm_unreachable("unexpected kind"); } } endSection(SubSection); } if (DataSegments.size()) { startSection(SubSection, wasm::WASM_SEGMENT_INFO); encodeULEB128(DataSegments.size(), W->OS); for (const WasmDataSegment &Segment : DataSegments) { writeString(Segment.Name); encodeULEB128(Segment.Alignment, W->OS); encodeULEB128(Segment.LinkingFlags, W->OS); } endSection(SubSection); } if (!InitFuncs.empty()) { startSection(SubSection, wasm::WASM_INIT_FUNCS); encodeULEB128(InitFuncs.size(), W->OS); for (auto &StartFunc : InitFuncs) { encodeULEB128(StartFunc.first, W->OS); // priority encodeULEB128(StartFunc.second, W->OS); // function index } endSection(SubSection); } if (Comdats.size()) { startSection(SubSection, wasm::WASM_COMDAT_INFO); encodeULEB128(Comdats.size(), W->OS); for (const auto &C : Comdats) { writeString(C.first); encodeULEB128(0, W->OS); // flags for future use encodeULEB128(C.second.size(), W->OS); for (const WasmComdatEntry &Entry : C.second) { encodeULEB128(Entry.Kind, W->OS); encodeULEB128(Entry.Index, W->OS); } } endSection(SubSection); } endSection(Section); } void WasmObjectWriter::writeCustomSection(WasmCustomSection &CustomSection, const MCAssembler &Asm, const MCAsmLayout &Layout) { SectionBookkeeping Section; auto *Sec = CustomSection.Section; startCustomSection(Section, CustomSection.Name); Sec->setSectionOffset(W->OS.tell() - Section.ContentsOffset); Asm.writeSectionData(W->OS, Sec, Layout); CustomSection.OutputContentsOffset = Section.ContentsOffset; CustomSection.OutputIndex = Section.Index; endSection(Section); // Apply fixups. auto &Relocations = CustomSectionsRelocations[CustomSection.Section]; applyRelocations(Relocations, CustomSection.OutputContentsOffset, Layout); } uint32_t WasmObjectWriter::getFunctionType(const MCSymbolWasm &Symbol) { assert(Symbol.isFunction()); assert(TypeIndices.count(&Symbol)); return TypeIndices[&Symbol]; } uint32_t WasmObjectWriter::getTagType(const MCSymbolWasm &Symbol) { assert(Symbol.isTag()); assert(TypeIndices.count(&Symbol)); return TypeIndices[&Symbol]; } void WasmObjectWriter::registerFunctionType(const MCSymbolWasm &Symbol) { assert(Symbol.isFunction()); wasm::WasmSignature S; if (auto *Sig = Symbol.getSignature()) { S.Returns = Sig->Returns; S.Params = Sig->Params; } auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size())); if (Pair.second) Signatures.push_back(S); TypeIndices[&Symbol] = Pair.first->second; LLVM_DEBUG(dbgs() << "registerFunctionType: " << Symbol << " new:" << Pair.second << "\n"); LLVM_DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n"); } void WasmObjectWriter::registerTagType(const MCSymbolWasm &Symbol) { assert(Symbol.isTag()); // TODO Currently we don't generate imported exceptions, but if we do, we // should have a way of infering types of imported exceptions. wasm::WasmSignature S; if (auto *Sig = Symbol.getSignature()) { S.Returns = Sig->Returns; S.Params = Sig->Params; } auto Pair = SignatureIndices.insert(std::make_pair(S, Signatures.size())); if (Pair.second) Signatures.push_back(S); TypeIndices[&Symbol] = Pair.first->second; LLVM_DEBUG(dbgs() << "registerTagType: " << Symbol << " new:" << Pair.second << "\n"); LLVM_DEBUG(dbgs() << " -> type index: " << Pair.first->second << "\n"); } static bool isInSymtab(const MCSymbolWasm &Sym) { if (Sym.isUsedInReloc() || Sym.isUsedInInitArray()) return true; if (Sym.isComdat() && !Sym.isDefined()) return false; if (Sym.isTemporary()) return false; if (Sym.isSection()) return false; if (Sym.omitFromLinkingSection()) return false; return true; } void WasmObjectWriter::prepareImports( SmallVectorImpl &Imports, MCAssembler &Asm, const MCAsmLayout &Layout) { // For now, always emit the memory import, since loads and stores are not // valid without it. In the future, we could perhaps be more clever and omit // it if there are no loads or stores. wasm::WasmImport MemImport; MemImport.Module = "env"; MemImport.Field = "__linear_memory"; MemImport.Kind = wasm::WASM_EXTERNAL_MEMORY; MemImport.Memory.Flags = is64Bit() ? wasm::WASM_LIMITS_FLAG_IS_64 : wasm::WASM_LIMITS_FLAG_NONE; Imports.push_back(MemImport); // Populate SignatureIndices, and Imports and WasmIndices for undefined // symbols. This must be done before populating WasmIndices for defined // symbols. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); // Register types for all functions, including those with private linkage // (because wasm always needs a type signature). if (WS.isFunction()) { const auto *BS = Layout.getBaseSymbol(S); if (!BS) report_fatal_error(Twine(S.getName()) + ": absolute addressing not supported!"); registerFunctionType(*cast(BS)); } if (WS.isTag()) registerTagType(WS); if (WS.isTemporary()) continue; // If the symbol is not defined in this translation unit, import it. if (!WS.isDefined() && !WS.isComdat()) { if (WS.isFunction()) { wasm::WasmImport Import; Import.Module = WS.getImportModule(); Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_FUNCTION; Import.SigIndex = getFunctionType(WS); Imports.push_back(Import); assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = NumFunctionImports++; } else if (WS.isGlobal()) { if (WS.isWeak()) report_fatal_error("undefined global symbol cannot be weak"); wasm::WasmImport Import; Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; Import.Module = WS.getImportModule(); Import.Global = WS.getGlobalType(); Imports.push_back(Import); assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = NumGlobalImports++; } else if (WS.isTag()) { if (WS.isWeak()) report_fatal_error("undefined tag symbol cannot be weak"); wasm::WasmImport Import; Import.Module = WS.getImportModule(); Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_TAG; Import.SigIndex = getTagType(WS); Imports.push_back(Import); assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = NumTagImports++; } else if (WS.isTable()) { if (WS.isWeak()) report_fatal_error("undefined table symbol cannot be weak"); wasm::WasmImport Import; Import.Module = WS.getImportModule(); Import.Field = WS.getImportName(); Import.Kind = wasm::WASM_EXTERNAL_TABLE; Import.Table = WS.getTableType(); Imports.push_back(Import); assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = NumTableImports++; } } } // Add imports for GOT globals for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); if (WS.isUsedInGOT()) { wasm::WasmImport Import; if (WS.isFunction()) Import.Module = "GOT.func"; else Import.Module = "GOT.mem"; Import.Field = WS.getName(); Import.Kind = wasm::WASM_EXTERNAL_GLOBAL; Import.Global = {wasm::WASM_TYPE_I32, true}; Imports.push_back(Import); assert(GOTIndices.count(&WS) == 0); GOTIndices[&WS] = NumGlobalImports++; } } } uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) { support::endian::Writer MainWriter(*OS, support::little); W = &MainWriter; if (IsSplitDwarf) { uint64_t TotalSize = writeOneObject(Asm, Layout, DwoMode::NonDwoOnly); assert(DwoOS); support::endian::Writer DwoWriter(*DwoOS, support::little); W = &DwoWriter; return TotalSize + writeOneObject(Asm, Layout, DwoMode::DwoOnly); } else { return writeOneObject(Asm, Layout, DwoMode::AllSections); } } uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, const MCAsmLayout &Layout, DwoMode Mode) { uint64_t StartOffset = W->OS.tell(); SectionCount = 0; CustomSections.clear(); LLVM_DEBUG(dbgs() << "WasmObjectWriter::writeObject\n"); // Collect information from the available symbols. SmallVector Functions; SmallVector TableElems; SmallVector Imports; SmallVector Exports; SmallVector TagTypes; SmallVector Globals; SmallVector Tables; SmallVector SymbolInfos; SmallVector, 2> InitFuncs; std::map> Comdats; uint64_t DataSize = 0; if (Mode != DwoMode::DwoOnly) { prepareImports(Imports, Asm, Layout); } // Populate DataSegments and CustomSections, which must be done before // populating DataLocations. for (MCSection &Sec : Asm) { auto &Section = static_cast(Sec); StringRef SectionName = Section.getName(); if (Mode == DwoMode::NonDwoOnly && isDwoSection(Sec)) continue; if (Mode == DwoMode::DwoOnly && !isDwoSection(Sec)) continue; LLVM_DEBUG(dbgs() << "Processing Section " << SectionName << " group " << Section.getGroup() << "\n";); // .init_array sections are handled specially elsewhere. if (SectionName.startswith(".init_array")) continue; // Code is handled separately if (Section.getKind().isText()) continue; if (Section.isWasmData()) { uint32_t SegmentIndex = DataSegments.size(); DataSize = alignTo(DataSize, Section.getAlignment()); DataSegments.emplace_back(); WasmDataSegment &Segment = DataSegments.back(); Segment.Name = SectionName; Segment.InitFlags = Section.getPassive() ? (uint32_t)wasm::WASM_DATA_SEGMENT_IS_PASSIVE : 0; Segment.Offset = DataSize; Segment.Section = &Section; addData(Segment.Data, Section); Segment.Alignment = Log2_32(Section.getAlignment()); Segment.LinkingFlags = Section.getSegmentFlags(); DataSize += Segment.Data.size(); Section.setSegmentIndex(SegmentIndex); if (const MCSymbolWasm *C = Section.getGroup()) { Comdats[C->getName()].emplace_back( WasmComdatEntry{wasm::WASM_COMDAT_DATA, SegmentIndex}); } } else { // Create custom sections assert(Sec.getKind().isMetadata()); StringRef Name = SectionName; // For user-defined custom sections, strip the prefix if (Name.startswith(".custom_section.")) Name = Name.substr(strlen(".custom_section.")); MCSymbol *Begin = Sec.getBeginSymbol(); if (Begin) { assert(WasmIndices.count(cast(Begin)) == 0); WasmIndices[cast(Begin)] = CustomSections.size(); } // Separate out the producers and target features sections if (Name == "producers") { ProducersSection = std::make_unique(Name, &Section); continue; } if (Name == "target_features") { TargetFeaturesSection = std::make_unique(Name, &Section); continue; } // Custom sections can also belong to COMDAT groups. In this case the // decriptor's "index" field is the section index (in the final object // file), but that is not known until after layout, so it must be fixed up // later if (const MCSymbolWasm *C = Section.getGroup()) { Comdats[C->getName()].emplace_back( WasmComdatEntry{wasm::WASM_COMDAT_SECTION, static_cast(CustomSections.size())}); } CustomSections.emplace_back(Name, &Section); } } if (Mode != DwoMode::DwoOnly) { // Populate WasmIndices and DataLocations for defined symbols. for (const MCSymbol &S : Asm.symbols()) { // Ignore unnamed temporary symbols, which aren't ever exported, imported, // or used in relocations. if (S.isTemporary() && S.getName().empty()) continue; const auto &WS = static_cast(S); LLVM_DEBUG(dbgs() << "MCSymbol: " << toString(WS.getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA)) << " '" << S << "'" << " isDefined=" << S.isDefined() << " isExternal=" << S.isExternal() << " isTemporary=" << S.isTemporary() << " isWeak=" << WS.isWeak() << " isHidden=" << WS.isHidden() << " isVariable=" << WS.isVariable() << "\n"); if (WS.isVariable()) continue; if (WS.isComdat() && !WS.isDefined()) continue; if (WS.isFunction()) { unsigned Index; if (WS.isDefined()) { if (WS.getOffset() != 0) report_fatal_error( "function sections must contain one function each"); if (WS.getSize() == nullptr) report_fatal_error( "function symbols must have a size set with .size"); // A definition. Write out the function body. Index = NumFunctionImports + Functions.size(); WasmFunction Func; Func.SigIndex = getFunctionType(WS); Func.Sym = &WS; assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = Index; Functions.push_back(Func); auto &Section = static_cast(WS.getSection()); if (const MCSymbolWasm *C = Section.getGroup()) { Comdats[C->getName()].emplace_back( WasmComdatEntry{wasm::WASM_COMDAT_FUNCTION, Index}); } if (WS.hasExportName()) { wasm::WasmExport Export; Export.Name = WS.getExportName(); Export.Kind = wasm::WASM_EXTERNAL_FUNCTION; Export.Index = Index; Exports.push_back(Export); } } else { // An import; the index was assigned above. Index = WasmIndices.find(&WS)->second; } LLVM_DEBUG(dbgs() << " -> function index: " << Index << "\n"); } else if (WS.isData()) { if (!isInSymtab(WS)) continue; if (!WS.isDefined()) { LLVM_DEBUG(dbgs() << " -> segment index: -1" << "\n"); continue; } if (!WS.getSize()) report_fatal_error("data symbols must have a size set with .size: " + WS.getName()); int64_t Size = 0; if (!WS.getSize()->evaluateAsAbsolute(Size, Layout)) report_fatal_error(".size expression must be evaluatable"); auto &DataSection = static_cast(WS.getSection()); if (!DataSection.isWasmData()) report_fatal_error("data symbols must live in a data section: " + WS.getName()); // For each data symbol, export it in the symtab as a reference to the // corresponding Wasm data segment. wasm::WasmDataReference Ref = wasm::WasmDataReference{ DataSection.getSegmentIndex(), Layout.getSymbolOffset(WS), static_cast(Size)}; assert(DataLocations.count(&WS) == 0); DataLocations[&WS] = Ref; LLVM_DEBUG(dbgs() << " -> segment index: " << Ref.Segment << "\n"); } else if (WS.isGlobal()) { // A "true" Wasm global (currently just __stack_pointer) if (WS.isDefined()) { wasm::WasmGlobal Global; Global.Type = WS.getGlobalType(); Global.Index = NumGlobalImports + Globals.size(); switch (Global.Type.Type) { case wasm::WASM_TYPE_I32: Global.InitExpr.Opcode = wasm::WASM_OPCODE_I32_CONST; break; case wasm::WASM_TYPE_I64: Global.InitExpr.Opcode = wasm::WASM_OPCODE_I64_CONST; break; case wasm::WASM_TYPE_F32: Global.InitExpr.Opcode = wasm::WASM_OPCODE_F32_CONST; break; case wasm::WASM_TYPE_F64: Global.InitExpr.Opcode = wasm::WASM_OPCODE_F64_CONST; break; case wasm::WASM_TYPE_EXTERNREF: Global.InitExpr.Opcode = wasm::WASM_OPCODE_REF_NULL; break; default: llvm_unreachable("unexpected type"); } assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = Global.Index; Globals.push_back(Global); } else { // An import; the index was assigned above LLVM_DEBUG(dbgs() << " -> global index: " << WasmIndices.find(&WS)->second << "\n"); } } else if (WS.isTable()) { if (WS.isDefined()) { wasm::WasmTable Table; Table.Index = NumTableImports + Tables.size(); Table.Type = WS.getTableType(); assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = Table.Index; Tables.push_back(Table); } LLVM_DEBUG(dbgs() << " -> table index: " << WasmIndices.find(&WS)->second << "\n"); } else if (WS.isTag()) { // C++ exception symbol (__cpp_exception) or longjmp symbol // (__c_longjmp) unsigned Index; if (WS.isDefined()) { Index = NumTagImports + TagTypes.size(); uint32_t SigIndex = getTagType(WS); assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = Index; TagTypes.push_back(SigIndex); } else { // An import; the index was assigned above. assert(WasmIndices.count(&WS) > 0); } LLVM_DEBUG(dbgs() << " -> tag index: " << WasmIndices.find(&WS)->second << "\n"); } else { assert(WS.isSection()); } } // Populate WasmIndices and DataLocations for aliased symbols. We need to // process these in a separate pass because we need to have processed the // target of the alias before the alias itself and the symbols are not // necessarily ordered in this way. for (const MCSymbol &S : Asm.symbols()) { if (!S.isVariable()) continue; assert(S.isDefined()); const auto *BS = Layout.getBaseSymbol(S); if (!BS) report_fatal_error(Twine(S.getName()) + ": absolute addressing not supported!"); const MCSymbolWasm *Base = cast(BS); // Find the target symbol of this weak alias and export that index const auto &WS = static_cast(S); LLVM_DEBUG(dbgs() << WS.getName() << ": weak alias of '" << *Base << "'\n"); if (Base->isFunction()) { assert(WasmIndices.count(Base) > 0); uint32_t WasmIndex = WasmIndices.find(Base)->second; assert(WasmIndices.count(&WS) == 0); WasmIndices[&WS] = WasmIndex; LLVM_DEBUG(dbgs() << " -> index:" << WasmIndex << "\n"); } else if (Base->isData()) { auto &DataSection = static_cast(WS.getSection()); uint64_t Offset = Layout.getSymbolOffset(S); int64_t Size = 0; // For data symbol alias we use the size of the base symbol as the // size of the alias. When an offset from the base is involved this // can result in a offset + size goes past the end of the data section // which out object format doesn't support. So we must clamp it. if (!Base->getSize()->evaluateAsAbsolute(Size, Layout)) report_fatal_error(".size expression must be evaluatable"); const WasmDataSegment &Segment = DataSegments[DataSection.getSegmentIndex()]; Size = std::min(static_cast(Size), Segment.Data.size() - Offset); wasm::WasmDataReference Ref = wasm::WasmDataReference{ DataSection.getSegmentIndex(), static_cast(Layout.getSymbolOffset(S)), static_cast(Size)}; DataLocations[&WS] = Ref; LLVM_DEBUG(dbgs() << " -> index:" << Ref.Segment << "\n"); } else { report_fatal_error("don't yet support global/tag aliases"); } } } // Finally, populate the symbol table itself, in its "natural" order. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); if (!isInSymtab(WS)) { WS.setIndex(InvalidIndex); continue; } LLVM_DEBUG(dbgs() << "adding to symtab: " << WS << "\n"); uint32_t Flags = 0; if (WS.isWeak()) Flags |= wasm::WASM_SYMBOL_BINDING_WEAK; if (WS.isHidden()) Flags |= wasm::WASM_SYMBOL_VISIBILITY_HIDDEN; if (!WS.isExternal() && WS.isDefined()) Flags |= wasm::WASM_SYMBOL_BINDING_LOCAL; if (WS.isUndefined()) Flags |= wasm::WASM_SYMBOL_UNDEFINED; if (WS.isNoStrip()) { Flags |= wasm::WASM_SYMBOL_NO_STRIP; if (isEmscripten()) { Flags |= wasm::WASM_SYMBOL_EXPORTED; } } if (WS.hasImportName()) Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME; if (WS.hasExportName()) Flags |= wasm::WASM_SYMBOL_EXPORTED; if (WS.isTLS()) Flags |= wasm::WASM_SYMBOL_TLS; wasm::WasmSymbolInfo Info; Info.Name = WS.getName(); Info.Kind = WS.getType().getValueOr(wasm::WASM_SYMBOL_TYPE_DATA); Info.Flags = Flags; if (!WS.isData()) { assert(WasmIndices.count(&WS) > 0); Info.ElementIndex = WasmIndices.find(&WS)->second; } else if (WS.isDefined()) { assert(DataLocations.count(&WS) > 0); Info.DataRef = DataLocations.find(&WS)->second; } WS.setIndex(SymbolInfos.size()); SymbolInfos.emplace_back(Info); } { auto HandleReloc = [&](const WasmRelocationEntry &Rel) { // Functions referenced by a relocation need to put in the table. This is // purely to make the object file's provisional values readable, and is // ignored by the linker, which re-calculates the relocations itself. if (Rel.Type != wasm::R_WASM_TABLE_INDEX_I32 && Rel.Type != wasm::R_WASM_TABLE_INDEX_I64 && Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB && Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB64 && Rel.Type != wasm::R_WASM_TABLE_INDEX_REL_SLEB && Rel.Type != wasm::R_WASM_TABLE_INDEX_REL_SLEB64) return; assert(Rel.Symbol->isFunction()); const MCSymbolWasm *Base = cast(Layout.getBaseSymbol(*Rel.Symbol)); uint32_t FunctionIndex = WasmIndices.find(Base)->second; uint32_t TableIndex = TableElems.size() + InitialTableOffset; if (TableIndices.try_emplace(Base, TableIndex).second) { LLVM_DEBUG(dbgs() << " -> adding " << Base->getName() << " to table: " << TableIndex << "\n"); TableElems.push_back(FunctionIndex); registerFunctionType(*Base); } }; for (const WasmRelocationEntry &RelEntry : CodeRelocations) HandleReloc(RelEntry); for (const WasmRelocationEntry &RelEntry : DataRelocations) HandleReloc(RelEntry); } // Translate .init_array section contents into start functions. for (const MCSection &S : Asm) { const auto &WS = static_cast(S); if (WS.getName().startswith(".fini_array")) report_fatal_error(".fini_array sections are unsupported"); if (!WS.getName().startswith(".init_array")) continue; if (WS.getFragmentList().empty()) continue; // init_array is expected to contain a single non-empty data fragment if (WS.getFragmentList().size() != 3) report_fatal_error("only one .init_array section fragment supported"); auto IT = WS.begin(); const MCFragment &EmptyFrag = *IT; if (EmptyFrag.getKind() != MCFragment::FT_Data) report_fatal_error(".init_array section should be aligned"); IT = std::next(IT); const MCFragment &AlignFrag = *IT; if (AlignFrag.getKind() != MCFragment::FT_Align) report_fatal_error(".init_array section should be aligned"); if (cast(AlignFrag).getAlignment() != (is64Bit() ? 8 : 4)) report_fatal_error(".init_array section should be aligned for pointers"); const MCFragment &Frag = *std::next(IT); if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) report_fatal_error("only data supported in .init_array section"); uint16_t Priority = UINT16_MAX; unsigned PrefixLength = strlen(".init_array"); if (WS.getName().size() > PrefixLength) { if (WS.getName()[PrefixLength] != '.') report_fatal_error( ".init_array section priority should start with '.'"); if (WS.getName().substr(PrefixLength + 1).getAsInteger(10, Priority)) report_fatal_error("invalid .init_array section priority"); } const auto &DataFrag = cast(Frag); const SmallVectorImpl &Contents = DataFrag.getContents(); for (const uint8_t * P = (const uint8_t *)Contents.data(), *End = (const uint8_t *)Contents.data() + Contents.size(); P != End; ++P) { if (*P != 0) report_fatal_error("non-symbolic data in .init_array section"); } for (const MCFixup &Fixup : DataFrag.getFixups()) { assert(Fixup.getKind() == MCFixup::getKindForSize(is64Bit() ? 8 : 4, false)); const MCExpr *Expr = Fixup.getValue(); auto *SymRef = dyn_cast(Expr); if (!SymRef) report_fatal_error("fixups in .init_array should be symbol references"); const auto &TargetSym = cast(SymRef->getSymbol()); if (TargetSym.getIndex() == InvalidIndex) report_fatal_error("symbols in .init_array should exist in symtab"); if (!TargetSym.isFunction()) report_fatal_error("symbols in .init_array should be for functions"); InitFuncs.push_back( std::make_pair(Priority, TargetSym.getIndex())); } } // Write out the Wasm header. writeHeader(Asm); uint32_t CodeSectionIndex, DataSectionIndex; if (Mode != DwoMode::DwoOnly) { writeTypeSection(Signatures); writeImportSection(Imports, DataSize, TableElems.size()); writeFunctionSection(Functions); writeTableSection(Tables); // Skip the "memory" section; we import the memory instead. writeTagSection(TagTypes); writeGlobalSection(Globals); writeExportSection(Exports); const MCSymbol *IndirectFunctionTable = Asm.getContext().lookupSymbol("__indirect_function_table"); writeElemSection(cast_or_null(IndirectFunctionTable), TableElems); writeDataCountSection(); CodeSectionIndex = writeCodeSection(Asm, Layout, Functions); DataSectionIndex = writeDataSection(Layout); } // The Sections in the COMDAT list have placeholder indices (their index among // custom sections, rather than among all sections). Fix them up here. for (auto &Group : Comdats) { for (auto &Entry : Group.second) { if (Entry.Kind == wasm::WASM_COMDAT_SECTION) { Entry.Index += SectionCount; } } } for (auto &CustomSection : CustomSections) writeCustomSection(CustomSection, Asm, Layout); if (Mode != DwoMode::DwoOnly) { writeLinkingMetaDataSection(SymbolInfos, InitFuncs, Comdats); writeRelocSection(CodeSectionIndex, "CODE", CodeRelocations); writeRelocSection(DataSectionIndex, "DATA", DataRelocations); } writeCustomRelocSections(); if (ProducersSection) writeCustomSection(*ProducersSection, Asm, Layout); if (TargetFeaturesSection) writeCustomSection(*TargetFeaturesSection, Asm, Layout); // TODO: Translate the .comment section to the output. return W->OS.tell() - StartOffset; } std::unique_ptr llvm::createWasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS) { return std::make_unique(std::move(MOTW), OS); } std::unique_ptr llvm::createWasmDwoObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS) { return std::make_unique(std::move(MOTW), OS, DwoOS); } diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp index 015ca1eec4df..dedfc81f11bb 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilder.cpp @@ -1,1832 +1,1834 @@ //===- Parsing and selection of pass pipelines ----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// /// This file provides the implementation of the PassBuilder based on our /// static pass registry as well as related functionality. It also provides /// helpers to aid in analyzing, debugging, and testing passes and pass /// pipelines. /// //===----------------------------------------------------------------------===// #include "llvm/Passes/PassBuilder.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/AliasAnalysisEvaluator.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CostModel.h" #include "llvm/Analysis/CycleAnalysis.h" #include "llvm/Analysis/DDG.h" #include "llvm/Analysis/DDGPrinter.h" #include "llvm/Analysis/Delinearization.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/DivergenceAnalysis.h" #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/FunctionPropertiesAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IRSimilarityIdentifier.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" #include "llvm/Analysis/InstCount.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Lint.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/Analysis/MemDerefPrinter.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ModuleDebugInfoPrinter.h" #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ObjCARCAliasAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PhiValues.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/Analysis/StackSafetyAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PrintPasses.h" #include "llvm/IR/SafepointIRVerifier.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Regex.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/Coroutines/CoroCleanup.h" #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/IPO/BlockExtractor.h" #include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/GlobalOpt.h" #include "llvm/Transforms/IPO/GlobalSplit.h" #include "llvm/Transforms/IPO/HotColdSplitting.h" #include "llvm/Transforms/IPO/IROutliner.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/SampleProfile.h" #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/Transforms/IPO/StripDeadPrototypes.h" #include "llvm/Transforms/IPO/StripSymbols.h" #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/CGProfile.h" #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/AnnotationRemarks.h" #include "llvm/Transforms/Scalar/BDCE.h" #include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/ConstraintElimination.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DCE.h" #include "llvm/Transforms/Scalar/DFAJumpThreading.h" #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/Transforms/Scalar/DivRemPairs.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/FlattenCFG.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/GuardWidening.h" #include "llvm/Transforms/Scalar/IVUsersPrinter.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h" #include "llvm/Transforms/Scalar/LoopBoundSplit.h" #include "llvm/Transforms/Scalar/LoopDataPrefetch.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopFlatten.h" #include "llvm/Transforms/Scalar/LoopFuse.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopInterchange.h" #include "llvm/Transforms/Scalar/LoopLoadElimination.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopPredication.h" #include "llvm/Transforms/Scalar/LoopReroll.h" #include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" #include "llvm/Transforms/Scalar/LoopSink.h" #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/LoopVersioningLICM.h" #include "llvm/Transforms/Scalar/LowerAtomic.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Scalar/MergeICmps.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/Reg2Mem.h" #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BreakCriticalEdges.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/Transforms/Utils/HelloWorld.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/InstructionNamer.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Transforms/Utils/LowerInvoke.h" #include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MetaRenamer.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/StripGCRelocates.h" #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Utils/UnifyLoopExits.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; static const Regex DefaultAliasRegex( "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); namespace llvm { cl::opt PrintPipelinePasses( "print-pipeline-passes", cl::desc("Print a '-passes' compatible string describing the pipeline " "(best-effort only).")); } // namespace llvm namespace { // The following passes/analyses have custom names, otherwise their name will // include `(anonymous namespace)`. These are special since they are only for // testing purposes and don't live in a header file. /// No-op module pass which does nothing. struct NoOpModulePass : PassInfoMixin { PreservedAnalyses run(Module &M, ModuleAnalysisManager &) { return PreservedAnalyses::all(); } static StringRef name() { return "NoOpModulePass"; } }; /// No-op module analysis. class NoOpModuleAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; public: struct Result {}; Result run(Module &, ModuleAnalysisManager &) { return Result(); } static StringRef name() { return "NoOpModuleAnalysis"; } }; /// No-op CGSCC pass which does nothing. struct NoOpCGSCCPass : PassInfoMixin { PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &, LazyCallGraph &, CGSCCUpdateResult &UR) { return PreservedAnalyses::all(); } static StringRef name() { return "NoOpCGSCCPass"; } }; /// No-op CGSCC analysis. class NoOpCGSCCAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; public: struct Result {}; Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) { return Result(); } static StringRef name() { return "NoOpCGSCCAnalysis"; } }; /// No-op function pass which does nothing. struct NoOpFunctionPass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &) { return PreservedAnalyses::all(); } static StringRef name() { return "NoOpFunctionPass"; } }; /// No-op function analysis. class NoOpFunctionAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; public: struct Result {}; Result run(Function &, FunctionAnalysisManager &) { return Result(); } static StringRef name() { return "NoOpFunctionAnalysis"; } }; /// No-op loop nest pass which does nothing. struct NoOpLoopNestPass : PassInfoMixin { PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &, LoopStandardAnalysisResults &, LPMUpdater &) { return PreservedAnalyses::all(); } static StringRef name() { return "NoOpLoopNestPass"; } }; /// No-op loop pass which does nothing. struct NoOpLoopPass : PassInfoMixin { PreservedAnalyses run(Loop &L, LoopAnalysisManager &, LoopStandardAnalysisResults &, LPMUpdater &) { return PreservedAnalyses::all(); } static StringRef name() { return "NoOpLoopPass"; } }; /// No-op loop analysis. class NoOpLoopAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; public: struct Result {}; Result run(Loop &, LoopAnalysisManager &, LoopStandardAnalysisResults &) { return Result(); } static StringRef name() { return "NoOpLoopAnalysis"; } }; AnalysisKey NoOpModuleAnalysis::Key; AnalysisKey NoOpCGSCCAnalysis::Key; AnalysisKey NoOpFunctionAnalysis::Key; AnalysisKey NoOpLoopAnalysis::Key; /// Whether or not we should populate a PassInstrumentationCallbacks's class to /// pass name map. /// /// This is for optimization purposes so we don't populate it if we never use /// it. This should be updated if new pass instrumentation wants to use the map. /// We currently only use this for --print-before/after. bool shouldPopulateClassToPassNames() { return PrintPipelinePasses || !printBeforePasses().empty() || !printAfterPasses().empty(); } } // namespace PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO, Optional PGOOpt, PassInstrumentationCallbacks *PIC) : TM(TM), PTO(PTO), PGOOpt(PGOOpt), PIC(PIC) { if (TM) TM->registerPassBuilderCallbacks(*this); if (PIC && shouldPopulateClassToPassNames()) { #define MODULE_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOPNEST_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ PIC->addClassToPassName(CLASS, NAME); #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME); #include "PassRegistry.def" } } void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) { #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ MAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" for (auto &C : ModuleAnalysisRegistrationCallbacks) C(MAM); } void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) { #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ CGAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" for (auto &C : CGSCCAnalysisRegistrationCallbacks) C(CGAM); } void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { // We almost always want the default alias analysis pipeline. // If a user wants a different one, they can register their own before calling // registerFunctionAnalyses(). FAM.registerPass([&] { return buildDefaultAAPipeline(); }); #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ FAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" for (auto &C : FunctionAnalysisRegistrationCallbacks) C(FAM); } void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ LAM.registerPass([&] { return CREATE_PASS; }); #include "PassRegistry.def" for (auto &C : LoopAnalysisRegistrationCallbacks) C(LAM); } static Optional parseRepeatPassName(StringRef Name) { if (!Name.consume_front("repeat<") || !Name.consume_back(">")) return None; int Count; if (Name.getAsInteger(0, Count) || Count <= 0) return None; return Count; } static Optional parseDevirtPassName(StringRef Name) { if (!Name.consume_front("devirt<") || !Name.consume_back(">")) return None; int Count; if (Name.getAsInteger(0, Count) || Count < 0) return None; return Count; } static bool checkParametrizedPassName(StringRef Name, StringRef PassName) { if (!Name.consume_front(PassName)) return false; // normal pass name w/o parameters == default parameters if (Name.empty()) return true; return Name.startswith("<") && Name.endswith(">"); } namespace { /// This performs customized parsing of pass name with parameters. /// /// We do not need parametrization of passes in textual pipeline very often, /// yet on a rare occasion ability to specify parameters right there can be /// useful. /// /// \p Name - parameterized specification of a pass from a textual pipeline /// is a string in a form of : /// PassName '<' parameter-list '>' /// /// Parameter list is being parsed by the parser callable argument, \p Parser, /// It takes a string-ref of parameters and returns either StringError or a /// parameter list in a form of a custom parameters type, all wrapped into /// Expected<> template class. /// template auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name, StringRef PassName) -> decltype(Parser(StringRef{})) { using ParametersT = typename decltype(Parser(StringRef{}))::value_type; StringRef Params = Name; if (!Params.consume_front(PassName)) { assert(false && "unable to strip pass name from parametrized pass specification"); } if (!Params.empty() && (!Params.consume_front("<") || !Params.consume_back(">"))) { assert(false && "invalid format for parametrized pass name"); } Expected Result = Parser(Params); assert((Result || Result.template errorIsA()) && "Pass parameter parser can only return StringErrors."); return Result; } /// Parser of parameters for LoopUnroll pass. Expected parseLoopUnrollOptions(StringRef Params) { LoopUnrollOptions UnrollOpts; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); int OptLevel = StringSwitch(ParamName) .Case("O0", 0) .Case("O1", 1) .Case("O2", 2) .Case("O3", 3) .Default(-1); if (OptLevel >= 0) { UnrollOpts.setOptLevel(OptLevel); continue; } if (ParamName.consume_front("full-unroll-max=")) { int Count; if (ParamName.getAsInteger(0, Count)) return make_error( formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(), inconvertibleErrorCode()); UnrollOpts.setFullUnrollMaxCount(Count); continue; } bool Enable = !ParamName.consume_front("no-"); if (ParamName == "partial") { UnrollOpts.setPartial(Enable); } else if (ParamName == "peeling") { UnrollOpts.setPeeling(Enable); } else if (ParamName == "profile-peeling") { UnrollOpts.setProfileBasedPeeling(Enable); } else if (ParamName == "runtime") { UnrollOpts.setRuntime(Enable); } else if (ParamName == "upperbound") { UnrollOpts.setUpperBound(Enable); } else { return make_error( formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(), inconvertibleErrorCode()); } } return UnrollOpts; } Expected parseSinglePassOption(StringRef Params, StringRef OptionName, StringRef PassName) { bool Result = false; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); if (ParamName == OptionName) { Result = true; } else { return make_error( formatv("invalid {1} pass parameter '{0}' ", ParamName, PassName) .str(), inconvertibleErrorCode()); } } return Result; } Expected parseInlinerPassOptions(StringRef Params) { return parseSinglePassOption(Params, "only-mandatory", "InlinerPass"); } Expected parseEarlyCSEPassOptions(StringRef Params) { return parseSinglePassOption(Params, "memssa", "EarlyCSE"); } Expected parseEntryExitInstrumenterPassOptions(StringRef Params) { return parseSinglePassOption(Params, "post-inline", "EntryExitInstrumenter"); } Expected parseLoopExtractorPassOptions(StringRef Params) { return parseSinglePassOption(Params, "single", "LoopExtractor"); } Expected parseLowerMatrixIntrinsicsPassOptions(StringRef Params) { return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics"); } Expected parseASanPassOptions(StringRef Params) { AddressSanitizerOptions Result; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); if (ParamName == "kernel") { Result.CompileKernel = true; } else { return make_error( formatv("invalid AddressSanitizer pass parameter '{0}' ", ParamName) .str(), inconvertibleErrorCode()); } } return Result; } Expected parseHWASanPassOptions(StringRef Params) { HWAddressSanitizerOptions Result; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); if (ParamName == "recover") { Result.Recover = true; } else if (ParamName == "kernel") { Result.CompileKernel = true; } else { return make_error( formatv("invalid HWAddressSanitizer pass parameter '{0}' ", ParamName) .str(), inconvertibleErrorCode()); } } return Result; } Expected parseMSanPassOptions(StringRef Params) { MemorySanitizerOptions Result; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); if (ParamName == "recover") { Result.Recover = true; } else if (ParamName == "kernel") { Result.Kernel = true; } else if (ParamName.consume_front("track-origins=")) { if (ParamName.getAsInteger(0, Result.TrackOrigins)) return make_error( formatv("invalid argument to MemorySanitizer pass track-origins " "parameter: '{0}' ", ParamName) .str(), inconvertibleErrorCode()); } else if (ParamName == "eager-checks") { Result.EagerChecks = true; } else { return make_error( formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName) .str(), inconvertibleErrorCode()); } } return Result; } /// Parser of parameters for SimplifyCFG pass. Expected parseSimplifyCFGOptions(StringRef Params) { SimplifyCFGOptions Result; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); bool Enable = !ParamName.consume_front("no-"); if (ParamName == "forward-switch-cond") { Result.forwardSwitchCondToPhi(Enable); + } else if (ParamName == "switch-range-to-icmp") { + Result.convertSwitchRangeToICmp(Enable); } else if (ParamName == "switch-to-lookup") { Result.convertSwitchToLookupTable(Enable); } else if (ParamName == "keep-loops") { Result.needCanonicalLoops(Enable); } else if (ParamName == "hoist-common-insts") { Result.hoistCommonInsts(Enable); } else if (ParamName == "sink-common-insts") { Result.sinkCommonInsts(Enable); } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) { APInt BonusInstThreshold; if (ParamName.getAsInteger(0, BonusInstThreshold)) return make_error( formatv("invalid argument to SimplifyCFG pass bonus-threshold " "parameter: '{0}' ", ParamName).str(), inconvertibleErrorCode()); Result.bonusInstThreshold(BonusInstThreshold.getSExtValue()); } else { return make_error( formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(), inconvertibleErrorCode()); } } return Result; } /// Parser of parameters for LoopVectorize pass. Expected parseLoopVectorizeOptions(StringRef Params) { LoopVectorizeOptions Opts; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); bool Enable = !ParamName.consume_front("no-"); if (ParamName == "interleave-forced-only") { Opts.setInterleaveOnlyWhenForced(Enable); } else if (ParamName == "vectorize-forced-only") { Opts.setVectorizeOnlyWhenForced(Enable); } else { return make_error( formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(), inconvertibleErrorCode()); } } return Opts; } Expected> parseLoopUnswitchOptions(StringRef Params) { std::pair Result = {false, true}; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); bool Enable = !ParamName.consume_front("no-"); if (ParamName == "nontrivial") { Result.first = Enable; } else if (ParamName == "trivial") { Result.second = Enable; } else { return make_error( formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName) .str(), inconvertibleErrorCode()); } } return Result; } Expected parseMergedLoadStoreMotionOptions(StringRef Params) { bool Result = false; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); bool Enable = !ParamName.consume_front("no-"); if (ParamName == "split-footer-bb") { Result = Enable; } else { return make_error( formatv("invalid MergedLoadStoreMotion pass parameter '{0}' ", ParamName) .str(), inconvertibleErrorCode()); } } return Result; } Expected parseGVNOptions(StringRef Params) { GVNOptions Result; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); bool Enable = !ParamName.consume_front("no-"); if (ParamName == "pre") { Result.setPRE(Enable); } else if (ParamName == "load-pre") { Result.setLoadPRE(Enable); } else if (ParamName == "split-backedge-load-pre") { Result.setLoadPRESplitBackedge(Enable); } else if (ParamName == "memdep") { Result.setMemDep(Enable); } else { return make_error( formatv("invalid GVN pass parameter '{0}' ", ParamName).str(), inconvertibleErrorCode()); } } return Result; } Expected parseStackLifetimeOptions(StringRef Params) { StackLifetime::LivenessType Result = StackLifetime::LivenessType::May; while (!Params.empty()) { StringRef ParamName; std::tie(ParamName, Params) = Params.split(';'); if (ParamName == "may") { Result = StackLifetime::LivenessType::May; } else if (ParamName == "must") { Result = StackLifetime::LivenessType::Must; } else { return make_error( formatv("invalid StackLifetime parameter '{0}' ", ParamName).str(), inconvertibleErrorCode()); } } return Result; } } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline /// alias. static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { return Name.startswith("default") || Name.startswith("thinlto") || Name.startswith("lto"); } /// Tests whether registered callbacks will accept a given pass name. /// /// When parsing a pipeline text, the type of the outermost pipeline may be /// omitted, in which case the type is automatically determined from the first /// pass name in the text. This may be a name that is handled through one of the /// callbacks. We check this through the oridinary parsing callbacks by setting /// up a dummy PassManager in order to not force the client to also handle this /// type of query. template static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) { if (!Callbacks.empty()) { PassManagerT DummyPM; for (auto &CB : Callbacks) if (CB(Name, DummyPM, {})) return true; } return false; } template static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) { // Manually handle aliases for pre-configured pipeline fragments. if (startsWithDefaultPipelineAliasPrefix(Name)) return DefaultAliasRegex.match(Name); // Explicitly handle pass manager names. if (Name == "module") return true; if (Name == "cgscc") return true; if (Name == "function" || Name == "function") return true; // Explicitly handle custom-parsed pass names. if (parseRepeatPassName(Name)) return true; #define MODULE_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ return true; #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) \ return true; #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ return true; #include "PassRegistry.def" return callbacksAcceptPassName(Name, Callbacks); } template static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "cgscc") return true; if (Name == "function" || Name == "function") return true; // Explicitly handle custom-parsed pass names. if (parseRepeatPassName(Name)) return true; if (parseDevirtPassName(Name)) return true; #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ return true; #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) \ return true; #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ return true; #include "PassRegistry.def" return callbacksAcceptPassName(Name, Callbacks); } template static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { // Explicitly handle pass manager names. if (Name == "function" || Name == "function") return true; if (Name == "loop" || Name == "loop-mssa") return true; // Explicitly handle custom-parsed pass names. if (parseRepeatPassName(Name)) return true; #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ return true; #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) \ return true; #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ return true; #include "PassRegistry.def" return callbacksAcceptPassName(Name, Callbacks); } template static bool isLoopNestPassName(StringRef Name, CallbacksT &Callbacks, bool &UseMemorySSA) { UseMemorySSA = false; // Explicitly handle custom-parsed pass names. if (parseRepeatPassName(Name)) return true; if (Name == "lnicm") { UseMemorySSA = true; return true; } #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ return true; #include "PassRegistry.def" return callbacksAcceptPassName(Name, Callbacks); } template static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks, bool &UseMemorySSA) { UseMemorySSA = false; // Explicitly handle custom-parsed pass names. if (parseRepeatPassName(Name)) return true; if (Name == "licm") { UseMemorySSA = true; return true; } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) \ return true; #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) \ return true; #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ return true; #include "PassRegistry.def" return callbacksAcceptPassName(Name, Callbacks); } Optional> PassBuilder::parsePipelineText(StringRef Text) { std::vector ResultPipeline; SmallVector *, 4> PipelineStack = { &ResultPipeline}; for (;;) { std::vector &Pipeline = *PipelineStack.back(); size_t Pos = Text.find_first_of(",()"); Pipeline.push_back({Text.substr(0, Pos), {}}); // If we have a single terminating name, we're done. if (Pos == Text.npos) break; char Sep = Text[Pos]; Text = Text.substr(Pos + 1); if (Sep == ',') // Just a name ending in a comma, continue. continue; if (Sep == '(') { // Push the inner pipeline onto the stack to continue processing. PipelineStack.push_back(&Pipeline.back().InnerPipeline); continue; } assert(Sep == ')' && "Bogus separator!"); // When handling the close parenthesis, we greedily consume them to avoid // empty strings in the pipeline. do { // If we try to pop the outer pipeline we have unbalanced parentheses. if (PipelineStack.size() == 1) return None; PipelineStack.pop_back(); } while (Text.consume_front(")")); // Check if we've finished parsing. if (Text.empty()) break; // Otherwise, the end of an inner pipeline always has to be followed by // a comma, and then we can continue. if (!Text.consume_front(",")) return None; } if (PipelineStack.size() > 1) // Unbalanced paretheses. return None; assert(PipelineStack.back() == &ResultPipeline && "Wrong pipeline at the bottom of the stack!"); return {std::move(ResultPipeline)}; } Error PassBuilder::parseModulePass(ModulePassManager &MPM, const PipelineElement &E) { auto &Name = E.Name; auto &InnerPipeline = E.InnerPipeline; // First handle complex passes like the pass managers which carry pipelines. if (!InnerPipeline.empty()) { if (Name == "module") { ModulePassManager NestedMPM; if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline)) return Err; MPM.addPass(std::move(NestedMPM)); return Error::success(); } if (Name == "cgscc") { CGSCCPassManager CGPM; if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline)) return Err; MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); return Error::success(); } if (Name == "function" || Name == "function") { FunctionPassManager FPM; if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline)) return Err; MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), Name != "function")); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { ModulePassManager NestedMPM; if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline)) return Err; MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM))); return Error::success(); } for (auto &C : ModulePipelineParsingCallbacks) if (C(Name, MPM, InnerPipeline)) return Error::success(); // Normal passes can't have pipelines. return make_error( formatv("invalid use of '{0}' pass as module pipeline", Name).str(), inconvertibleErrorCode()); ; } // Manually handle aliases for pre-configured pipeline fragments. if (startsWithDefaultPipelineAliasPrefix(Name)) { SmallVector Matches; if (!DefaultAliasRegex.match(Name, &Matches)) return make_error( formatv("unknown default pipeline alias '{0}'", Name).str(), inconvertibleErrorCode()); assert(Matches.size() == 3 && "Must capture two matched strings!"); OptimizationLevel L = StringSwitch(Matches[2]) .Case("O0", OptimizationLevel::O0) .Case("O1", OptimizationLevel::O1) .Case("O2", OptimizationLevel::O2) .Case("O3", OptimizationLevel::O3) .Case("Os", OptimizationLevel::Os) .Case("Oz", OptimizationLevel::Oz); if (L == OptimizationLevel::O0 && Matches[1] != "thinlto" && Matches[1] != "lto") { MPM.addPass(buildO0DefaultPipeline(L, Matches[1] == "thinlto-pre-link" || Matches[1] == "lto-pre-link")); return Error::success(); } // This is consistent with old pass manager invoked via opt, but // inconsistent with clang. Clang doesn't enable loop vectorization // but does enable slp vectorization at Oz. PTO.LoopVectorization = L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz; PTO.SLPVectorization = L.getSpeedupLevel() > 1 && L != OptimizationLevel::Oz; if (Matches[1] == "default") { MPM.addPass(buildPerModuleDefaultPipeline(L)); } else if (Matches[1] == "thinlto-pre-link") { MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L)); } else if (Matches[1] == "thinlto") { MPM.addPass(buildThinLTODefaultPipeline(L, nullptr)); } else if (Matches[1] == "lto-pre-link") { MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); } else { assert(Matches[1] == "lto" && "Not one of the matched options!"); MPM.addPass(buildLTODefaultPipeline(L, nullptr)); } return Error::success(); } // Finally expand the basic registered passes from the .inc file. #define MODULE_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(CREATE_PASS); \ return Error::success(); \ } #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ MPM.addPass(CREATE_PASS(Params.get())); \ return Error::success(); \ } #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">") { \ MPM.addPass( \ RequireAnalysisPass< \ std::remove_reference::type, Module>()); \ return Error::success(); \ } \ if (Name == "invalidate<" NAME ">") { \ MPM.addPass(InvalidateAnalysisPass< \ std::remove_reference::type>()); \ return Error::success(); \ } #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \ return Error::success(); \ } #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ MPM.addPass( \ createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS(Params.get()))); \ return Error::success(); \ } #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \ return Error::success(); \ } #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ return Error::success(); \ } #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToFunctionPassAdaptor( \ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ return Error::success(); \ } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ MPM.addPass(createModuleToFunctionPassAdaptor( \ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ return Error::success(); \ } #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ MPM.addPass( \ createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \ CREATE_PASS(Params.get()), false, false))); \ return Error::success(); \ } #include "PassRegistry.def" for (auto &C : ModulePipelineParsingCallbacks) if (C(Name, MPM, InnerPipeline)) return Error::success(); return make_error( formatv("unknown module pass '{0}'", Name).str(), inconvertibleErrorCode()); } Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E) { auto &Name = E.Name; auto &InnerPipeline = E.InnerPipeline; // First handle complex passes like the pass managers which carry pipelines. if (!InnerPipeline.empty()) { if (Name == "cgscc") { CGSCCPassManager NestedCGPM; if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline)) return Err; // Add the nested pass manager with the appropriate adaptor. CGPM.addPass(std::move(NestedCGPM)); return Error::success(); } if (Name == "function" || Name == "function") { FunctionPassManager FPM; if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline)) return Err; // Add the nested pass manager with the appropriate adaptor. CGPM.addPass( createCGSCCToFunctionPassAdaptor(std::move(FPM), Name != "function")); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { CGSCCPassManager NestedCGPM; if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline)) return Err; CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM))); return Error::success(); } if (auto MaxRepetitions = parseDevirtPassName(Name)) { CGSCCPassManager NestedCGPM; if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline)) return Err; CGPM.addPass( createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions)); return Error::success(); } for (auto &C : CGSCCPipelineParsingCallbacks) if (C(Name, CGPM, InnerPipeline)) return Error::success(); // Normal passes can't have pipelines. return make_error( formatv("invalid use of '{0}' pass as cgscc pipeline", Name).str(), inconvertibleErrorCode()); } // Now expand the basic registered passes from the .inc file. #define CGSCC_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(CREATE_PASS); \ return Error::success(); \ } #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ CGPM.addPass(CREATE_PASS(Params.get())); \ return Error::success(); \ } #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">") { \ CGPM.addPass(RequireAnalysisPass< \ std::remove_reference::type, \ LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, \ CGSCCUpdateResult &>()); \ return Error::success(); \ } \ if (Name == "invalidate<" NAME ">") { \ CGPM.addPass(InvalidateAnalysisPass< \ std::remove_reference::type>()); \ return Error::success(); \ } #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS)); \ return Error::success(); \ } #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \ return Error::success(); \ } #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ return Error::success(); \ } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \ return Error::success(); \ } #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ CGPM.addPass( \ createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor( \ CREATE_PASS(Params.get()), false, false))); \ return Error::success(); \ } #include "PassRegistry.def" for (auto &C : CGSCCPipelineParsingCallbacks) if (C(Name, CGPM, InnerPipeline)) return Error::success(); return make_error( formatv("unknown cgscc pass '{0}'", Name).str(), inconvertibleErrorCode()); } Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E) { auto &Name = E.Name; auto &InnerPipeline = E.InnerPipeline; // First handle complex passes like the pass managers which carry pipelines. if (!InnerPipeline.empty()) { if (Name == "function") { FunctionPassManager NestedFPM; if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline)) return Err; // Add the nested pass manager with the appropriate adaptor. FPM.addPass(std::move(NestedFPM)); return Error::success(); } if (Name == "loop" || Name == "loop-mssa") { LoopPassManager LPM; if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline)) return Err; // Add the nested pass manager with the appropriate adaptor. bool UseMemorySSA = (Name == "loop-mssa"); bool UseBFI = llvm::any_of( InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; }); bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "loop-predication"; }); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA, UseBFI, UseBPI)); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { FunctionPassManager NestedFPM; if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline)) return Err; FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM))); return Error::success(); } for (auto &C : FunctionPipelineParsingCallbacks) if (C(Name, FPM, InnerPipeline)) return Error::success(); // Normal passes can't have pipelines. return make_error( formatv("invalid use of '{0}' pass as function pipeline", Name).str(), inconvertibleErrorCode()); } // Now expand the basic registered passes from the .inc file. #define FUNCTION_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ FPM.addPass(CREATE_PASS); \ return Error::success(); \ } #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ FPM.addPass(CREATE_PASS(Params.get())); \ return Error::success(); \ } #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">") { \ FPM.addPass( \ RequireAnalysisPass< \ std::remove_reference::type, Function>()); \ return Error::success(); \ } \ if (Name == "invalidate<" NAME ">") { \ FPM.addPass(InvalidateAnalysisPass< \ std::remove_reference::type>()); \ return Error::success(); \ } // FIXME: UseMemorySSA is set to false. Maybe we could do things like: // bool UseMemorySSA = !("canon-freeze" || "loop-predication" || // "guard-widening"); // The risk is that it may become obsolete if we're not careful. #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \ return Error::success(); \ } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \ return Error::success(); \ } #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()), \ false, false)); \ return Error::success(); \ } #include "PassRegistry.def" for (auto &C : FunctionPipelineParsingCallbacks) if (C(Name, FPM, InnerPipeline)) return Error::success(); return make_error( formatv("unknown function pass '{0}'", Name).str(), inconvertibleErrorCode()); } Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E) { StringRef Name = E.Name; auto &InnerPipeline = E.InnerPipeline; // First handle complex passes like the pass managers which carry pipelines. if (!InnerPipeline.empty()) { if (Name == "loop") { LoopPassManager NestedLPM; if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline)) return Err; // Add the nested pass manager with the appropriate adaptor. LPM.addPass(std::move(NestedLPM)); return Error::success(); } if (auto Count = parseRepeatPassName(Name)) { LoopPassManager NestedLPM; if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline)) return Err; LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM))); return Error::success(); } for (auto &C : LoopPipelineParsingCallbacks) if (C(Name, LPM, InnerPipeline)) return Error::success(); // Normal passes can't have pipelines. return make_error( formatv("invalid use of '{0}' pass as loop pipeline", Name).str(), inconvertibleErrorCode()); } // Now expand the basic registered passes from the .inc file. #define LOOPNEST_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ LPM.addPass(CREATE_PASS); \ return Error::success(); \ } #define LOOP_PASS(NAME, CREATE_PASS) \ if (Name == NAME) { \ LPM.addPass(CREATE_PASS); \ return Error::success(); \ } #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ if (checkParametrizedPassName(Name, NAME)) { \ auto Params = parsePassParameters(PARSER, Name, NAME); \ if (!Params) \ return Params.takeError(); \ LPM.addPass(CREATE_PASS(Params.get())); \ return Error::success(); \ } #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ if (Name == "require<" NAME ">") { \ LPM.addPass(RequireAnalysisPass< \ std::remove_reference::type, Loop, \ LoopAnalysisManager, LoopStandardAnalysisResults &, \ LPMUpdater &>()); \ return Error::success(); \ } \ if (Name == "invalidate<" NAME ">") { \ LPM.addPass(InvalidateAnalysisPass< \ std::remove_reference::type>()); \ return Error::success(); \ } #include "PassRegistry.def" for (auto &C : LoopPipelineParsingCallbacks) if (C(Name, LPM, InnerPipeline)) return Error::success(); return make_error(formatv("unknown loop pass '{0}'", Name).str(), inconvertibleErrorCode()); } bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) { #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (Name == NAME) { \ AA.registerModuleAnalysis< \ std::remove_reference::type>(); \ return true; \ } #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (Name == NAME) { \ AA.registerFunctionAnalysis< \ std::remove_reference::type>(); \ return true; \ } #include "PassRegistry.def" for (auto &C : AAParsingCallbacks) if (C(Name, AA)) return true; return false; } Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM, ArrayRef Pipeline) { for (const auto &Element : Pipeline) { if (auto Err = parseLoopPass(LPM, Element)) return Err; } return Error::success(); } Error PassBuilder::parseFunctionPassPipeline( FunctionPassManager &FPM, ArrayRef Pipeline) { for (const auto &Element : Pipeline) { if (auto Err = parseFunctionPass(FPM, Element)) return Err; } return Error::success(); } Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM, ArrayRef Pipeline) { for (const auto &Element : Pipeline) { if (auto Err = parseCGSCCPass(CGPM, Element)) return Err; } return Error::success(); } void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM, FunctionAnalysisManager &FAM, CGSCCAnalysisManager &CGAM, ModuleAnalysisManager &MAM) { MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); }); MAM.registerPass([&] { return CGSCCAnalysisManagerModuleProxy(CGAM); }); CGAM.registerPass([&] { return ModuleAnalysisManagerCGSCCProxy(MAM); }); FAM.registerPass([&] { return CGSCCAnalysisManagerFunctionProxy(CGAM); }); FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); FAM.registerPass([&] { return LoopAnalysisManagerFunctionProxy(LAM); }); LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); }); } Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, ArrayRef Pipeline) { for (const auto &Element : Pipeline) { if (auto Err = parseModulePass(MPM, Element)) return Err; } return Error::success(); } // Primary pass pipeline description parsing routine for a \c ModulePassManager // FIXME: Should this routine accept a TargetMachine or require the caller to // pre-populate the analysis managers with target-specific stuff? Error PassBuilder::parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( formatv("invalid pipeline '{0}'", PipelineText).str(), inconvertibleErrorCode()); // If the first name isn't at the module layer, wrap the pipeline up // automatically. StringRef FirstName = Pipeline->front().Name; if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) { bool UseMemorySSA; if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) { Pipeline = {{"cgscc", std::move(*Pipeline)}}; } else if (isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) { Pipeline = {{"function", std::move(*Pipeline)}}; } else if (isLoopNestPassName(FirstName, LoopPipelineParsingCallbacks, UseMemorySSA)) { Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop", std::move(*Pipeline)}}}}; } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks, UseMemorySSA)) { Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop", std::move(*Pipeline)}}}}; } else { for (auto &C : TopLevelPipelineParsingCallbacks) if (C(MPM, *Pipeline)) return Error::success(); // Unknown pass or pipeline name! auto &InnerPipeline = Pipeline->front().InnerPipeline; return make_error( formatv("unknown {0} name '{1}'", (InnerPipeline.empty() ? "pass" : "pipeline"), FirstName) .str(), inconvertibleErrorCode()); } } if (auto Err = parseModulePassPipeline(MPM, *Pipeline)) return Err; return Error::success(); } // Primary pass pipeline description parsing routine for a \c CGSCCPassManager Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, StringRef PipelineText) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( formatv("invalid pipeline '{0}'", PipelineText).str(), inconvertibleErrorCode()); StringRef FirstName = Pipeline->front().Name; if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) return make_error( formatv("unknown cgscc pass '{0}' in pipeline '{1}'", FirstName, PipelineText) .str(), inconvertibleErrorCode()); if (auto Err = parseCGSCCPassPipeline(CGPM, *Pipeline)) return Err; return Error::success(); } // Primary pass pipeline description parsing routine for a \c // FunctionPassManager Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM, StringRef PipelineText) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( formatv("invalid pipeline '{0}'", PipelineText).str(), inconvertibleErrorCode()); StringRef FirstName = Pipeline->front().Name; if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) return make_error( formatv("unknown function pass '{0}' in pipeline '{1}'", FirstName, PipelineText) .str(), inconvertibleErrorCode()); if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline)) return Err; return Error::success(); } // Primary pass pipeline description parsing routine for a \c LoopPassManager Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM, StringRef PipelineText) { auto Pipeline = parsePipelineText(PipelineText); if (!Pipeline || Pipeline->empty()) return make_error( formatv("invalid pipeline '{0}'", PipelineText).str(), inconvertibleErrorCode()); if (auto Err = parseLoopPassPipeline(CGPM, *Pipeline)) return Err; return Error::success(); } Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) { // If the pipeline just consists of the word 'default' just replace the AA // manager with our default one. if (PipelineText == "default") { AA = buildDefaultAAPipeline(); return Error::success(); } while (!PipelineText.empty()) { StringRef Name; std::tie(Name, PipelineText) = PipelineText.split(','); if (!parseAAPassName(AA, Name)) return make_error( formatv("unknown alias analysis name '{0}'", Name).str(), inconvertibleErrorCode()); } return Error::success(); } bool PassBuilder::isAAPassName(StringRef PassName) { #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #include "PassRegistry.def" return false; } bool PassBuilder::isAnalysisPassName(StringRef PassName) { #define MODULE_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #define LOOP_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ if (PassName == NAME) \ return true; #include "PassRegistry.def" return false; } static void printPassName(StringRef PassName, raw_ostream &OS) { OS << " " << PassName << "\n"; } static void printPassName(StringRef PassName, StringRef Params, raw_ostream &OS) { OS << " " << PassName << "<" << Params << ">\n"; } void PassBuilder::printPassNames(raw_ostream &OS) { // TODO: print pass descriptions when they are available OS << "Module passes:\n"; #define MODULE_PASS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Module passes with params:\n"; #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ printPassName(NAME, PARAMS, OS); #include "PassRegistry.def" OS << "Module analyses:\n"; #define MODULE_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Module alias analyses:\n"; #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "CGSCC passes:\n"; #define CGSCC_PASS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "CGSCC passes with params:\n"; #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ printPassName(NAME, PARAMS, OS); #include "PassRegistry.def" OS << "CGSCC analyses:\n"; #define CGSCC_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Function passes:\n"; #define FUNCTION_PASS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Function passes with params:\n"; #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ printPassName(NAME, PARAMS, OS); #include "PassRegistry.def" OS << "Function analyses:\n"; #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Function alias analyses:\n"; #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "LoopNest passes:\n"; #define LOOPNEST_PASS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Loop passes:\n"; #define LOOP_PASS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" OS << "Loop passes with params:\n"; #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \ printPassName(NAME, PARAMS, OS); #include "PassRegistry.def" OS << "Loop analyses:\n"; #define LOOP_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS); #include "PassRegistry.def" } void PassBuilder::registerParseTopLevelPipelineCallback( const std::function)> &C) { TopLevelPipelineParsingCallbacks.push_back(C); } diff --git a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp index 93637c890c4f..e838665eb9ce 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/contrib/llvm-project/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1,1825 +1,1855 @@ //===- Construction of pass pipelines -------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// /// This file provides the implementation of the PassBuilder based on our /// static pass registry as well as related functionality. It also provides /// helpers to aid in analyzing, debugging, and testing passes and pass /// pipelines. /// //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/PassManager.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/PGOOptions.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/Coroutines/CoroCleanup.h" #include "llvm/Transforms/Coroutines/CoroEarly.h" #include "llvm/Transforms/Coroutines/CoroElide.h" #include "llvm/Transforms/Coroutines/CoroSplit.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/Annotation2Metadata.h" #include "llvm/Transforms/IPO/ArgumentPromotion.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/IPO/CalledValuePropagation.h" #include "llvm/Transforms/IPO/ConstantMerge.h" #include "llvm/Transforms/IPO/CrossDSOCFI.h" #include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/GlobalOpt.h" #include "llvm/Transforms/IPO/GlobalSplit.h" #include "llvm/Transforms/IPO/HotColdSplitting.h" #include "llvm/Transforms/IPO/IROutliner.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" #include "llvm/Transforms/IPO/PartialInlining.h" #include "llvm/Transforms/IPO/SCCP.h" #include "llvm/Transforms/IPO/SampleProfile.h" #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation/CGProfile.h" #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" #include "llvm/Transforms/Instrumentation/InstrOrderFile.h" #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Scalar/ADCE.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/AnnotationRemarks.h" #include "llvm/Transforms/Scalar/BDCE.h" #include "llvm/Transforms/Scalar/CallSiteSplitting.h" #include "llvm/Transforms/Scalar/ConstraintElimination.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DFAJumpThreading.h" #include "llvm/Transforms/Scalar/DeadStoreElimination.h" #include "llvm/Transforms/Scalar/DivRemPairs.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/Float2Int.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Scalar/LoopDistribute.h" #include "llvm/Transforms/Scalar/LoopFlatten.h" #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" #include "llvm/Transforms/Scalar/LoopInstSimplify.h" #include "llvm/Transforms/Scalar/LoopInterchange.h" #include "llvm/Transforms/Scalar/LoopLoadElimination.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" #include "llvm/Transforms/Scalar/LoopSink.h" #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SROA.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; static cl::opt UseInlineAdvisor( "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden, cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"), cl::values(clEnumValN(InliningAdvisorMode::Default, "default", "Heuristics-based inliner version."), clEnumValN(InliningAdvisorMode::Development, "development", "Use development mode (runtime-loadable model)."), clEnumValN(InliningAdvisorMode::Release, "release", "Use release mode (AOT-compiled model)."))); static cl::opt EnableSyntheticCounts( "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Run synthetic function entry count generation " "pass")); /// Flag to enable inline deferral during PGO. static cl::opt EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true), cl::Hidden, cl::desc("Enable inline deferral during PGO")); static cl::opt EnableMemProfiler("enable-mem-prof", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable memory profiler")); static cl::opt EnableModuleInliner("enable-module-inliner", cl::init(false), cl::Hidden, cl::desc("Enable module inliner")); static cl::opt PerformMandatoryInliningsFirst( "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Perform mandatory inlinings module-wide, before performing " "inlining.")); static cl::opt EnableO3NonTrivialUnswitching( "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3")); static cl::opt EnableEagerlyInvalidateAnalyses( "eagerly-invalidate-analyses", cl::init(true), cl::Hidden, cl::desc("Eagerly invalidate more analyses in default pipelines")); static cl::opt EnableNoRerunSimplificationPipeline( "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden, cl::desc( "Prevent running the simplification pipeline on a function more " "than once in the case that SCC mutations cause a function to be " "visited multiple times as long as the function has not been changed")); static cl::opt EnableMergeFunctions( "enable-merge-functions", cl::init(false), cl::Hidden, cl::desc("Enable function merging as part of the optimization pipeline")); PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; SLPVectorization = false; LoopUnrolling = true; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; CallGraphProfile = true; MergeFunctions = EnableMergeFunctions; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; } namespace llvm { extern cl::opt MaxDevirtIterations; extern cl::opt EnableConstraintElimination; extern cl::opt EnableFunctionSpecialization; extern cl::opt EnableGVNHoist; extern cl::opt EnableGVNSink; extern cl::opt EnableHotColdSplit; extern cl::opt EnableIROutliner; extern cl::opt EnableOrderFileInstrumentation; extern cl::opt EnableCHR; extern cl::opt EnableLoopInterchange; extern cl::opt EnableUnrollAndJam; extern cl::opt EnableLoopFlatten; extern cl::opt EnableDFAJumpThreading; extern cl::opt RunNewGVN; extern cl::opt RunPartialInlining; extern cl::opt ExtraVectorizerPasses; extern cl::opt FlattenedProfileUsed; extern cl::opt AttributorRun; extern cl::opt EnableKnowledgeRetention; extern cl::opt EnableMatrix; extern cl::opt DisablePreInliner; extern cl::opt PreInlineThreshold; } // namespace llvm void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, OptimizationLevel Level) { for (auto &C : PeepholeEPCallbacks) C(FPM, Level); } // Helper to add AnnotationRemarksPass. static void addAnnotationRemarksPass(ModulePassManager &MPM) { FunctionPassManager FPM; FPM.addPass(AnnotationRemarksPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } // Helper to check if the current compilation phase is preparing for LTO static bool isLTOPreLink(ThinOrFullLTOPhase Phase) { return Phase == ThinOrFullLTOPhase::ThinLTOPreLink || Phase == ThinOrFullLTOPhase::FullLTOPreLink; } // TODO: Investigate the cost/benefit of tail call elimination on debugging. FunctionPassManager PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { FunctionPassManager FPM; // Form SSA out of local memory accesses after breaking apart aggregates into // scalars. FPM.addPass(SROAPass()); // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); // Hoisting of scalars and load expressions. - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); FPM.addPass(LibCallsShrinkWrapPass()); invokePeepholeEPCallbacks(FPM, Level); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) // minimal multiplication trees. FPM.addPass(ReassociatePass()); // Add the primary loop simplification pipeline. // FIXME: Currently this is split into two loop pass pipelines because we run // some function passes in between them. These can and should be removed // and/or replaced by scheduling the loop pass equivalents in the correct // positions. But those equivalent passes aren't powerful enough yet. // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to // fully replace `SimplifyCFGPass`, and the closest to the other we have is // `LoopInstSimplify`. LoopPassManager LPM1, LPM2; // Simplify the loop body. We do this initially to clean up after other loop // passes run, either when iterating on a loop or on inner loops with // implications on the outer loop. LPM1.addPass(LoopInstSimplifyPass()); LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass(SimpleLoopUnswitchPass()); if (EnableLoopFlatten) LPM1.addPass(LoopFlattenPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(IndVarSimplifyPass()); for (auto &C : LateLoopOptimizationsEPCallbacks) C(LPM2, Level); LPM2.addPass(LoopDeletionPass()); if (EnableLoopInterchange) LPM2.addPass(LoopInterchangePass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. The normal unroller doesn't pay attention to forced full unroll // attributes so we need to make sure and allow the full unroll pass to pay // attention to it. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM2, Level); // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); // Delete small array after loop unroll. FPM.addPass(SROAPass()); // Specially optimize memory movement as it doesn't look like dataflow in SSA. FPM.addPass(MemCpyOptPass()); // Sparse conditional constant propagation. // FIXME: It isn't clear why we do this *after* loop passes rather than // before... FPM.addPass(SCCPPass()); // Delete dead bit computations (instcombine runs after to fold away the dead // computations, and then ADCE will run later to exploit any new DCE // opportunities that creates). FPM.addPass(BDCEPass()); // Run instcombine after redundancy and dead bit elimination to exploit // opportunities opened up by them. FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); FPM.addPass(CoroElidePass()); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. // TODO: Investigate if this is too expensive. FPM.addPass(ADCEPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); return FPM; } FunctionPassManager PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { assert(Level != OptimizationLevel::O0 && "Must request optimizations!"); // The O1 pipeline has a separate pipeline creation function to simplify // construction readability. if (Level.getSpeedupLevel() == 1) return buildO1FunctionSimplificationPipeline(Level, Phase); FunctionPassManager FPM; // Form SSA out of local memory accesses after breaking apart aggregates into // scalars. FPM.addPass(SROAPass()); // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); if (EnableKnowledgeRetention) FPM.addPass(AssumeSimplifyPass()); // Hoisting of scalars and load expressions. if (EnableGVNHoist) FPM.addPass(GVNHoistPass()); // Global value numbering based sinking. if (EnableGVNSink) { FPM.addPass(GVNSinkPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); } if (EnableConstraintElimination) FPM.addPass(ConstraintEliminationPass()); // Speculative execution if the target has divergent branches; otherwise nop. FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true)); // Optimize based on known information about branches, and cleanup afterward. FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); if (Level == OptimizationLevel::O3) FPM.addPass(AggressiveInstCombinePass()); if (!Level.isOptimizingForSize()) FPM.addPass(LibCallsShrinkWrapPass()); invokePeepholeEPCallbacks(FPM, Level); // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && !Level.isOptimizingForSize()) FPM.addPass(PGOMemOPSizeOpt()); FPM.addPass(TailCallElimPass()); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) // minimal multiplication trees. FPM.addPass(ReassociatePass()); // Add the primary loop simplification pipeline. // FIXME: Currently this is split into two loop pass pipelines because we run // some function passes in between them. These can and should be removed // and/or replaced by scheduling the loop pass equivalents in the correct // positions. But those equivalent passes aren't powerful enough yet. // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to // fully replace `SimplifyCFGPass`, and the closest to the other we have is // `LoopInstSimplify`. LoopPassManager LPM1, LPM2; // Simplify the loop body. We do this initially to clean up after other loop // passes run, either when iterating on a loop or on inner loops with // implications on the outer loop. LPM1.addPass(LoopInstSimplifyPass()); LPM1.addPass(LoopSimplifyCFGPass()); // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Disable header duplication in loop rotation at -Oz. LPM1.addPass( LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM1.addPass( SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && EnableO3NonTrivialUnswitching)); if (EnableLoopFlatten) LPM1.addPass(LoopFlattenPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(IndVarSimplifyPass()); for (auto &C : LateLoopOptimizationsEPCallbacks) C(LPM2, Level); LPM2.addPass(LoopDeletionPass()); if (EnableLoopInterchange) LPM2.addPass(LoopInterchangePass()); // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. The normal unroller doesn't pay attention to forced full unroll // attributes so we need to make sure and allow the full unroll pass to pay // attention to it. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM2, Level); // We provide the opt remark emitter pass for LICM to use. We only need to do // this once as it is immutable. FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); + FPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); // Delete small array after loop unroll. FPM.addPass(SROAPass()); // The matrix extension can introduce large vector operations early, which can // benefit from running vector-combine early on. if (EnableMatrix) FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true)); // Eliminate redundancies. FPM.addPass(MergedLoadStoreMotionPass()); if (RunNewGVN) FPM.addPass(NewGVNPass()); else FPM.addPass(GVNPass()); // Sparse conditional constant propagation. // FIXME: It isn't clear why we do this *after* loop passes rather than // before... FPM.addPass(SCCPPass()); // Delete dead bit computations (instcombine runs after to fold away the dead // computations, and then ADCE will run later to exploit any new DCE // opportunities that creates). FPM.addPass(BDCEPass()); // Run instcombine after redundancy and dead bit elimination to exploit // opportunities opened up by them. FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); // Re-consider control flow based optimizations after redundancy elimination, // redo DCE, etc. if (EnableDFAJumpThreading && Level.getSizeLevel() == 0) FPM.addPass(DFAJumpThreadingPass()); FPM.addPass(JumpThreadingPass()); FPM.addPass(CorrelatedValuePropagationPass()); // Finally, do an expensive DCE pass to catch all the dead code exposed by // the simplifications and basic cleanup after all the simplifications. // TODO: Investigate if this is too expensive. FPM.addPass(ADCEPass()); // Specially optimize memory movement as it doesn't look like dataflow in SSA. FPM.addPass(MemCpyOptPass()); FPM.addPass(DSEPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); FPM.addPass(CoroElidePass()); for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); - FPM.addPass(SimplifyCFGPass( - SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .convertSwitchRangeToICmp(true) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt && (PGOOpt->Action == PGOOptions::IRUse || PGOOpt->Action == PGOOptions::SampleUse)) FPM.addPass(ControlHeightReductionPass()); return FPM; } void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) { MPM.addPass(CanonicalizeAliasesPass()); MPM.addPass(NameAnonGlobalPass()); } void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile) { assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!"); if (!IsCS && !DisablePreInliner) { InlineParams IP; IP.DefaultThreshold = PreInlineThreshold; // FIXME: The hint threshold has the same value used by the regular inliner // when not optimzing for size. This should probably be lowered after // performance testing. // FIXME: this comment is cargo culted from the old pass manager, revisit). IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325; ModuleInlinerWrapperPass MIWP(IP); CGSCCPassManager &CGPipeline = MIWP.getPM(); FunctionPassManager FPM; FPM.addPass(SROAPass()); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. - FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); CGPipeline.addPass(createCGSCCToFunctionPassAdaptor( std::move(FPM), PTO.EagerlyInvalidateAnalyses)); MPM.addPass(std::move(MIWP)); // Delete anything that is now dead to make sure that we don't instrument // dead code. Instrumentation can end up keeping dead code around and // dramatically increase code size. MPM.addPass(GlobalDCEPass()); } if (!RunProfileGen) { assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); return; } // Perform PGO instrumentation. MPM.addPass(PGOInstrumentationGen(IsCS)); FunctionPassManager FPM; // Disable header duplication in loop rotation at -Oz. FPM.addPass(createFunctionToLoopPassAdaptor( LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), PTO.EagerlyInvalidateAnalyses)); // Add the profile lowering pass. InstrProfOptions Options; if (!ProfileFile.empty()) Options.InstrProfileOutput = ProfileFile; // Do counter promotion at Level greater than O0. Options.DoCounterPromotion = true; Options.UseBFIInPromotion = IsCS; MPM.addPass(InstrProfiling(Options, IsCS)); } void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen, bool IsCS, std::string ProfileFile, std::string ProfileRemappingFile) { if (!RunProfileGen) { assert(!ProfileFile.empty() && "Profile use expecting a profile file!"); MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); return; } // Perform PGO instrumentation. MPM.addPass(PGOInstrumentationGen(IsCS)); // Add the profile lowering pass. InstrProfOptions Options; if (!ProfileFile.empty()) Options.InstrProfileOutput = ProfileFile; // Do not do counter promotion at O0. Options.DoCounterPromotion = false; Options.UseBFIInPromotion = IsCS; MPM.addPass(InstrProfiling(Options, IsCS)); } static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) { return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()); } ModuleInlinerWrapperPass PassBuilder::buildInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { InlineParams IP = getInlineParamsFromOptLevel(Level); if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; if (PGOOpt) IP.EnableDeferral = EnablePGOInlineDeferral; ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst, UseInlineAdvisor, MaxDevirtIterations); // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MIWP.addModulePass(RequireAnalysisPass()); // Invalidate AAManager so it can be recreated and pick up the newly available // GlobalsAA. MIWP.addModulePass( createModuleToFunctionPassAdaptor(InvalidateAnalysisPass())); // Require the ProfileSummaryAnalysis for the module so we can query it within // the inliner pass. MIWP.addModulePass(RequireAnalysisPass()); // Now begin the main postorder CGSCC pipeline. // FIXME: The current CGSCC pipeline has its origins in the legacy pass // manager and trying to emulate its precise behavior. Much of this doesn't // make a lot of sense and we should revisit the core CGSCC structure. CGSCCPassManager &MainCGPipeline = MIWP.getPM(); // Note: historically, the PruneEH pass was run first to deduce nounwind and // generally clean up exception handling overhead. It isn't clear this is // valuable as the inliner doesn't currently care whether it is inlining an // invoke or a call. if (AttributorRun & AttributorRunOption::CGSCC) MainCGPipeline.addPass(AttributorCGSCCPass()); // Now deduce any function attributes based in the current code. MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); // When at O3 add argument promotion to the pass pipeline. // FIXME: It isn't at all clear why this should be limited to O3. if (Level == OptimizationLevel::O3) MainCGPipeline.addPass(ArgumentPromotionPass()); // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if // there are no OpenMP runtime calls present in the module. if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) MainCGPipeline.addPass(OpenMPOptCGSCCPass()); for (auto &C : CGSCCOptimizerLateEPCallbacks) C(MainCGPipeline, Level); // Lastly, add the core function simplification pipeline nested inside the // CGSCC walk. MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase), PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline)); MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0)); if (EnableNoRerunSimplificationPipeline) MIWP.addLateModulePass(createModuleToFunctionPassAdaptor( InvalidateAnalysisPass())); return MIWP; } ModulePassManager PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { ModulePassManager MPM; InlineParams IP = getInlineParamsFromOptLevel(Level); if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; if (PGOOpt) IP.EnableDeferral = EnablePGOInlineDeferral; // The inline deferral logic is used to avoid losing some // inlining chance in future. It is helpful in SCC inliner, in which // inlining is processed in bottom-up order. // While in module inliner, the inlining order is a priority-based order // by default. The inline deferral is unnecessary there. So we disable the // inline deferral logic in module inliner. IP.EnableDeferral = false; MPM.addPass(ModuleInlinerPass(IP, UseInlineAdvisor)); MPM.addPass(createModuleToFunctionPassAdaptor( buildFunctionSimplificationPipeline(Level, Phase), PTO.EagerlyInvalidateAnalyses)); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( CoroSplitPass(Level != OptimizationLevel::O0))); return MPM; } ModulePassManager PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase Phase) { ModulePassManager MPM; // Place pseudo probe instrumentation as the first pass of the pipeline to // minimize the impact of optimization changes. if (PGOOpt && PGOOpt->PseudoProbeForProfiling && Phase != ThinOrFullLTOPhase::ThinLTOPostLink) MPM.addPass(SampleProfileProbePass(TM)); bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); // In ThinLTO mode, when flattened profile is used, all the available // profile information will be annotated in PreLink phase so there is // no need to load the profile again in PostLink. bool LoadSampleProfile = HasSampleProfile && !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink); // During the ThinLTO backend phase we perform early indirect call promotion // here, before globalopt. Otherwise imported available_externally functions // look unreferenced and are removed. If we are going to load the sample // profile then defer until later. // TODO: See if we can move later and consolidate with the location where // we perform ICP when we are loading a sample profile. // TODO: We pass HasSampleProfile (whether there was a sample profile file // passed to the compile) to the SamplePGO flag of ICP. This is used to // determine whether the new direct calls are annotated with prof metadata. // Ideally this should be determined from whether the IR is annotated with // sample profile, and not whether the a sample profile was provided on the // command line. E.g. for flattened profiles where we will not be reloading // the sample profile in the ThinLTO backend, we ideally shouldn't have to // provide the sample profile file. if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile) MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); // Create an early function pass manager to cleanup the output of the // frontend. FunctionPassManager EarlyFPM; // Lower llvm.expect to metadata before attempting transforms. // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. EarlyFPM.addPass(LowerExpectIntrinsicPass()); EarlyFPM.addPass(SimplifyCFGPass()); EarlyFPM.addPass(SROAPass()); EarlyFPM.addPass(EarlyCSEPass()); EarlyFPM.addPass(CoroEarlyPass()); if (Level == OptimizationLevel::O3) EarlyFPM.addPass(CallSiteSplittingPass()); // In SamplePGO ThinLTO backend, we need instcombine before profile annotation // to convert bitcast to direct calls so that they can be inlined during the // profile annotation prepration step. // More details about SamplePGO design can be found in: // https://research.google.com/pubs/pub45290.html // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. if (LoadSampleProfile) EarlyFPM.addPass(InstCombinePass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses)); if (LoadSampleProfile) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); // Do not invoke ICP in the LTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the LTO backend. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink && Phase != ThinOrFullLTOPhase::FullLTOPreLink) // We perform early indirect call promotion here, before globalopt. // This is important for the ThinLTO backend phase because otherwise // imported available_externally functions look unreferenced and are // removed. MPM.addPass( PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); } // Try to perform OpenMP specific optimizations on the module. This is a // (quick!) no-op if there are no OpenMP runtime calls present in the module. if (Level != OptimizationLevel::O0) MPM.addPass(OpenMPOptPass()); if (AttributorRun & AttributorRunOption::MODULE) MPM.addPass(AttributorPass()); // Lower type metadata and the type.test intrinsic in the ThinLTO // post link pipeline after ICP. This is to enable usage of the type // tests in ICP sequences. if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink) MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); for (auto &C : PipelineEarlySimplificationEPCallbacks) C(MPM, Level); // Specialize functions with IPSCCP. if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) MPM.addPass(FunctionSpecializationPass()); // Interprocedural constant propagation now that basic cleanup has occurred // and prior to optimizing globals. // FIXME: This position in the pipeline hasn't been carefully considered in // years, it should be re-analyzed. MPM.addPass(IPSCCPPass()); // Attach metadata to indirect call sites indicating the set of functions // they may target at run-time. This should follow IPSCCP. MPM.addPass(CalledValuePropagationPass()); // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); // Promote any localized globals to SSA registers. // FIXME: Should this instead by a run of SROA? // FIXME: We should probably run instcombine and simplifycfg afterward to // delete control flows that are dead once globals have been folded to // constants. MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); // Remove any dead arguments exposed by cleanups and constant folding // globals. MPM.addPass(DeadArgumentEliminationPass()); // Create a small function pass pipeline to cleanup after all the global // optimizations. FunctionPassManager GlobalCleanupPM; GlobalCleanupPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(GlobalCleanupPM, Level); - GlobalCleanupPM.addPass(SimplifyCFGPass()); + GlobalCleanupPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), PTO.EagerlyInvalidateAnalyses)); // Add all the requested passes for instrumentation PGO, if requested. if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && (PGOOpt->Action == PGOOptions::IRInstr || PGOOpt->Action == PGOOptions::IRUse)) { addPGOInstrPasses(MPM, Level, /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); MPM.addPass(PGOIndirectCallPromotion(false, false)); } if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink && PGOOpt->CSAction == PGOOptions::CSIRInstr) MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); // Synthesize function entry counts for non-PGO compilation. if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); if (EnableModuleInliner) MPM.addPass(buildModuleInlinerPipeline(Level, Phase)); else MPM.addPass(buildInlinerPipeline(Level, Phase)); if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) { MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); MPM.addPass(ModuleMemProfilerPass()); } return MPM; } /// TODO: Should LTO cause any differences to this set of passes? void PassBuilder::addVectorPasses(OptimizationLevel Level, FunctionPassManager &FPM, bool IsFullLTO) { FPM.addPass(LoopVectorizePass( LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); if (IsFullLTO) { // The vectorizer may have significantly shortened a loop body; unroll // again. Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then // need to clean up redundancies and loop invariant code. // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) FPM.addPass(createFunctionToLoopPassAdaptor( LoopUnrollAndJamPass(Level.getSpeedupLevel()))); FPM.addPass(LoopUnrollPass(LoopUnrollOptions( Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll))); FPM.addPass(WarnMissedTransformationsPass()); } if (!IsFullLTO) { // Eliminate loads by forwarding stores from the previous iteration to loads // of the current iteration. FPM.addPass(LoopLoadEliminationPass()); } // Cleanup after the loop optimization passes. FPM.addPass(InstCombinePass()); if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { ExtraVectorPassManager ExtraPasses; // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correlated // runtime checks for two inner loops in the same outer loop, fold any // common computations, hoist loop-invariant aspects out of any outer loop, // and unswitch the runtime checks if possible. Once hoisted, we may have // dead (or speculatable) control flows or more combining opportunities. ExtraPasses.addPass(EarlyCSEPass()); ExtraPasses.addPass(CorrelatedValuePropagationPass()); ExtraPasses.addPass(InstCombinePass()); LoopPassManager LPM; - LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3)); ExtraPasses.addPass( RequireAnalysisPass()); ExtraPasses.addPass( createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); - ExtraPasses.addPass(SimplifyCFGPass()); + ExtraPasses.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); ExtraPasses.addPass(InstCombinePass()); FPM.addPass(std::move(ExtraPasses)); } // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. // Cleanup after loop vectorization, etc. Simplification passes like CVP and // GVN, loop transforms, and others have already run, so it's now better to // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) .sinkCommonInsts(true))); if (IsFullLTO) { FPM.addPass(SCCPPass()); FPM.addPass(InstCombinePass()); FPM.addPass(BDCEPass()); } // Optimize parallel scalar instruction chains into SIMD instructions. if (PTO.SLPVectorization) { FPM.addPass(SLPVectorizerPass()); if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) { FPM.addPass(EarlyCSEPass()); } } // Enhance/cleanup vector code. FPM.addPass(VectorCombinePass()); if (!IsFullLTO) { FPM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then // need to clean up redundancies and loop invariant code. // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && PTO.LoopUnrolling) { FPM.addPass(createFunctionToLoopPassAdaptor( LoopUnrollAndJamPass(Level.getSpeedupLevel()))); } FPM.addPass(LoopUnrollPass(LoopUnrollOptions( Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll))); FPM.addPass(WarnMissedTransformationsPass()); FPM.addPass(InstCombinePass()); FPM.addPass( RequireAnalysisPass()); FPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); } // Now that we've vectorized and unrolled loops, we may have more refined // alignment information, try to re-derive it here. FPM.addPass(AlignmentFromAssumptionsPass()); if (IsFullLTO) FPM.addPass(InstCombinePass()); } ModulePassManager PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, bool LTOPreLink) { ModulePassManager MPM; // Optimize globals now that the module is fully simplified. MPM.addPass(GlobalOptPass()); MPM.addPass(GlobalDCEPass()); // Run partial inlining pass to partially inline functions that have // large bodies. if (RunPartialInlining) MPM.addPass(PartialInlinerPass()); // Remove avail extern fns and globals definitions since we aren't compiling // an object file for later LTO. For LTO we want to preserve these so they // are eligible for inlining at link-time. Note if they are unreferenced they // will be removed by GlobalDCE later, so this only impacts referenced // available externally globals. Eventually they will be suppressed during // codegen, but eliminating here enables more opportunity for GlobalDCE as it // may make globals referenced by available external functions dead and saves // running remaining passes on the eliminated functions. These should be // preserved during prelinking for link-time inlining decisions. if (!LTOPreLink) MPM.addPass(EliminateAvailableExternallyPass()); if (EnableOrderFileInstrumentation) MPM.addPass(InstrOrderFilePass()); // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); // Do a post inline PGO instrumentation and use pass. This is a context // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as // cross-module inline has not been done yet. The context sensitive // instrumentation is after all the inlines are done. if (!LTOPreLink && PGOOpt) { if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); } // Re-require GloblasAA here prior to function passes. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal // and richly annotated call graph. By computing aliasing and mod/ref // information for all local globals here, the late loop passes and notably // the vectorizer will be able to use them to help recognize vectorizable // memory operations. MPM.addPass(RequireAnalysisPass()); FunctionPassManager OptimizePM; OptimizePM.addPass(Float2IntPass()); OptimizePM.addPass(LowerConstantIntrinsicsPass()); if (EnableMatrix) { OptimizePM.addPass(LowerMatrixIntrinsicsPass()); OptimizePM.addPass(EarlyCSEPass()); } // FIXME: We need to run some loop optimizations to re-rotate loops after // simplifycfg and others undo their rotation. // Optimize the loop execution. These passes operate on entire loop nests // rather than on each loop in an inside-out manner, and so they are actually // function passes. for (auto &C : VectorizerStartEPCallbacks) C(OptimizePM, Level); LoopPassManager LPM; // First rotate loops that may have been un-rotated by prior passes. // Disable header duplication at -Oz. LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink)); // Some loops may have become dead by now. Try to delete them. // FIXME: see discussion in https://reviews.llvm.org/D112851, // this may need to be revisited once we run GVN before loop deletion // in the simplification pipeline. LPM.addPass(LoopDeletionPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is // currently only performed for loops marked with the metadata // llvm.loop.distribute=true or when -enable-loop-distribute is specified. OptimizePM.addPass(LoopDistributePass()); // Populates the VFABI attribute with the scalar-to-vector mappings // from the TargetLibraryInfo. OptimizePM.addPass(InjectTLIMappings()); addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM // result too early. OptimizePM.addPass(LoopSinkPass()); // And finally clean up LCSSA form before generating code. OptimizePM.addPass(InstSimplifyPass()); // This hoists/decomposes div/rem ops. It should run after other sink/hoist // passes to avoid re-sinking, but before SimplifyCFG because it can allow // flattening of blocks. OptimizePM.addPass(DivRemPairsPass()); // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - OptimizePM.addPass(SimplifyCFGPass()); + OptimizePM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); OptimizePM.addPass(CoroCleanupPass()); // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), PTO.EagerlyInvalidateAnalyses)); for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff // is that this has a higher code size cost than splitting early. if (EnableHotColdSplit && !LTOPreLink) MPM.addPass(HotColdSplittingPass()); // Search the code for similar regions of code. If enough similar regions can // be found where extracting the regions into their own function will decrease // the size of the program, we extract the regions, a deduplicate the // structurally similar regions. if (EnableIROutliner) MPM.addPass(IROutlinerPass()); // Merge functions if requested. if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); if (PTO.CallGraphProfile) MPM.addPass(CGProfilePass()); // Now we need to do some global optimization transforms. // FIXME: It would seem like these should come first in the optimization // pipeline and maybe be the bottom of the canonicalization pipeline? Weird // ordering here. MPM.addPass(GlobalDCEPass()); MPM.addPass(ConstantMergePass()); // TODO: Relative look table converter pass caused an issue when full lto is // enabled. See https://reviews.llvm.org/D94355 for more details. // Until the issue fixed, disable this pass during pre-linking phase. if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); return MPM; } ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink) { assert(Level != OptimizationLevel::O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM; // Convert @llvm.global.annotations to !annotation metadata. MPM.addPass(Annotation2MetadataPass()); // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); // Apply module pipeline start EP callback. for (auto &C : PipelineStartEPCallbacks) C(MPM, Level); if (PGOOpt && PGOOpt->DebugInfoForProfiling) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline( Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink : ThinOrFullLTOPhase::None)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); if (PGOOpt && PGOOpt->PseudoProbeForProfiling && PGOOpt->Action == PGOOptions::SampleUse) MPM.addPass(PseudoProbeUpdatePass()); // Emit annotation remarks. addAnnotationRemarksPass(MPM); if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); return MPM; } ModulePassManager PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { assert(Level != OptimizationLevel::O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM; // Convert @llvm.global.annotations to !annotation metadata. MPM.addPass(Annotation2MetadataPass()); // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); if (PGOOpt && PGOOpt->DebugInfoForProfiling) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); // Apply module pipeline start EP callback. for (auto &C : PipelineStartEPCallbacks) C(MPM, Level); // If we are planning to perform ThinLTO later, we don't bloat the code with // unrolling/vectorization/... now. Just simplify the module as much as we // can. MPM.addPass(buildModuleSimplificationPipeline( Level, ThinOrFullLTOPhase::ThinLTOPreLink)); // Run partial inlining pass to partially inline functions that have // large bodies. // FIXME: It isn't clear whether this is really the right place to run this // in ThinLTO. Because there is another canonicalization and simplification // phase that will run after the thin link, running this here ends up with // less information than will be available later and it may grow functions in // ways that aren't beneficial. if (RunPartialInlining) MPM.addPass(PartialInlinerPass()); // Reduce the size of the IR as much as possible. MPM.addPass(GlobalOptPass()); // Module simplification splits coroutines, but does not fully clean up // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up // on these, we schedule the cleanup here. MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); if (PGOOpt && PGOOpt->PseudoProbeForProfiling && PGOOpt->Action == PGOOptions::SampleUse) MPM.addPass(PseudoProbeUpdatePass()); // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual // optimization is going to be done in PostLink stage, but clang can't // add callbacks there in case of in-process ThinLTO called by linker. for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); // Emit annotation remarks. addAnnotationRemarksPass(MPM); addRequiredLTOPreLinkPasses(MPM); return MPM; } ModulePassManager PassBuilder::buildThinLTODefaultPipeline( OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { ModulePassManager MPM; // Convert @llvm.global.annotations to !annotation metadata. MPM.addPass(Annotation2MetadataPass()); if (ImportSummary) { // These passes import type identifier resolutions for whole-program // devirtualization and CFI. They must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // // For example, GVN may transform the pattern assume(type.test) appearing in // two basic blocks into assume(phi(type.test, type.test)), which would // transform a dependency on a WPD resolution into a dependency on a type // identifier resolution for CFI. // // Also, WPD has access to more precise information than ICP and can // devirtualize more effectively, so it should operate on the IR first. // // The WPD and LowerTypeTest passes need to run at -O0 to lower type // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); } if (Level == OptimizationLevel::O0) { // Run a second time to clean up any type tests left behind by WPD for use // in ICP. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. MPM.addPass(EliminateAvailableExternallyPass()); MPM.addPass(GlobalDCEPass()); return MPM; } // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline( Level, ThinOrFullLTOPhase::ThinLTOPostLink)); // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level)); // Emit annotation remarks. addAnnotationRemarksPass(MPM); return MPM; } ModulePassManager PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { assert(Level != OptimizationLevel::O0 && "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! return buildPerModuleDefaultPipeline(Level, /* LTOPreLink */ true); } ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, ModuleSummaryIndex *ExportSummary) { ModulePassManager MPM; // Convert @llvm.global.annotations to !annotation metadata. MPM.addPass(Annotation2MetadataPass()); // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. MPM.addPass(CrossDSOCFIPass()); if (Level == OptimizationLevel::O0) { // The WPD and LowerTypeTest passes need to run at -O0 to lower type // metadata and intrinsics. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); // Run a second time to clean up any type tests left behind by WPD for use // in ICP. MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Emit annotation remarks. addAnnotationRemarksPass(MPM); return MPM; } if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { // Load sample profile before running the LTO optimization pipeline. MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, ThinOrFullLTOPhase::FullLTOPostLink)); // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); } // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present. MPM.addPass(OpenMPOptPass()); // Remove unused virtual tables to improve the quality of code generated by // whole-program devirtualization and bitset lowering. MPM.addPass(GlobalDCEPass()); // Force any function attributes we want the rest of the pipeline to observe. MPM.addPass(ForceFunctionAttrsPass()); // Do basic inference of function attributes from known properties of system // libraries and other oracles. MPM.addPass(InferFunctionAttrsPass()); if (Level.getSpeedupLevel() > 1) { FunctionPassManager EarlyFPM; EarlyFPM.addPass(CallSiteSplittingPass()); MPM.addPass(createModuleToFunctionPassAdaptor( std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses)); // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. MPM.addPass(PGOIndirectCallPromotion( true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); if (EnableFunctionSpecialization && Level == OptimizationLevel::O3) MPM.addPass(FunctionSpecializationPass()); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. MPM.addPass(IPSCCPPass()); // Attach metadata to indirect call sites indicating the set of functions // they may target at run-time. This should follow IPSCCP. MPM.addPass(CalledValuePropagationPass()); } // Now deduce any function attributes based in the current code. MPM.addPass( createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); // Do RPO function attribute inference across the module to forward-propagate // attributes where applicable. // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); // Use in-range annotations on GEP indices to split globals where beneficial. MPM.addPass(GlobalSplitPass()); // Run whole program optimization of virtual call when the list of callees // is fixed. MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); // Stop here at -O1. if (Level == OptimizationLevel::O1) { // The LowerTypeTestsPass needs to run to lower type metadata and the // type.test intrinsics. The pass does nothing if CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); // Run a second time to clean up any type tests left behind by WPD for use // in ICP (which is performed earlier than this in the regular LTO // pipeline). MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Emit annotation remarks. addAnnotationRemarksPass(MPM); return MPM; } // Optimize globals to try and fold them into constants. MPM.addPass(GlobalOptPass()); // Promote any localized globals to SSA registers. MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); // Linking modules together can lead to duplicate global constant, only // keep one copy of each constant. MPM.addPass(ConstantMergePass()); // Remove unused arguments from functions. MPM.addPass(DeadArgumentEliminationPass()); // Reduce the code after globalopt and ipsccp. Both can open up significant // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. FunctionPassManager PeepholeFPM; PeepholeFPM.addPass(InstCombinePass()); if (Level == OptimizationLevel::O3) PeepholeFPM.addPass(AggressiveInstCombinePass()); invokePeepholeEPCallbacks(PeepholeFPM, Level); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM), PTO.EagerlyInvalidateAnalyses)); // Note: historically, the PruneEH pass was run first to deduce nounwind and // generally clean up exception handling overhead. It isn't clear this is // valuable as the inliner doesn't currently care whether it is inlining an // invoke or a call. // Run the inliner now. MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level))); // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); // Garbage collect dead functions. MPM.addPass(GlobalDCEPass()); // If we didn't decide to inline a function, check to see if we can // transform it to pass arguments by value instead of by reference. MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass())); FunctionPassManager FPM; // The IPO Passes may leave cruft around. Clean up after them. FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); // Do a post inline PGO instrumentation and use pass. This is a context // sensitive PGO pass. if (PGOOpt) { if (PGOOpt->CSAction == PGOOptions::CSIRInstr) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true, /* IsCS */ true, PGOOpt->CSProfileGenFile, PGOOpt->ProfileRemappingFile); else if (PGOOpt->CSAction == PGOOptions::CSIRUse) addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false, /* IsCS */ true, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); } // Break up allocas FPM.addPass(SROAPass()); // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. FPM.addPass(TailCallElimPass()); // Run a few AA driver optimizations here and now to cleanup the code. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM), PTO.EagerlyInvalidateAnalyses)); MPM.addPass( createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass())); // Require the GlobalsAA analysis for the module so we can query it within // MainFPM. MPM.addPass(RequireAnalysisPass()); // Invalidate AAManager so it can be recreated and pick up the newly available // GlobalsAA. MPM.addPass( createModuleToFunctionPassAdaptor(InvalidateAnalysisPass())); FunctionPassManager MainFPM; MainFPM.addPass(createFunctionToLoopPassAdaptor( - LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true), /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true)); if (RunNewGVN) MainFPM.addPass(NewGVNPass()); else MainFPM.addPass(GVNPass()); // Remove dead memcpy()'s. MainFPM.addPass(MemCpyOptPass()); // Nuke dead stores. MainFPM.addPass(DSEPass()); MainFPM.addPass(MergedLoadStoreMotionPass()); if (EnableConstraintElimination) MainFPM.addPass(ConstraintEliminationPass()); LoopPassManager LPM; if (EnableLoopFlatten && Level.getSpeedupLevel() > 1) LPM.addPass(LoopFlattenPass()); LPM.addPass(IndVarSimplifyPass()); LPM.addPass(LoopDeletionPass()); // FIXME: Add loop interchange. // Unroll small loops and perform peeling. LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), /* OnlyWhenForced= */ !PTO.LoopUnrolling, PTO.ForgetAllSCEVInLoopUnroll)); // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. MainFPM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true)); MainFPM.addPass(LoopDistributePass()); addVectorPasses(Level, MainFPM, /* IsFullLTO */ true); // Run the OpenMPOpt CGSCC pass again late. MPM.addPass( createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass())); invokePeepholeEPCallbacks(MainFPM, Level); MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true)); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM), PTO.EagerlyInvalidateAnalyses)); // Lower type metadata and the type.test intrinsic. This pass supports // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs // to be run at link time if CFI is enabled. This pass does nothing if // CFI is disabled. MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); // Run a second time to clean up any type tests left behind by WPD for use // in ICP (which is performed earlier than this in the regular LTO pipeline). MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true)); // Enable splitting late in the FullLTO post-link pipeline. This is done in // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). if (EnableHotColdSplit) MPM.addPass(HotColdSplittingPass()); // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. - MPM.addPass(createModuleToFunctionPassAdaptor( - SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)))); + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts( + true)))); // Drop bodies of available eternally objects to improve GlobalDCE. MPM.addPass(EliminateAvailableExternallyPass()); // Now that we have optimized the program, discard unreachable functions. MPM.addPass(GlobalDCEPass()); if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); // Emit annotation remarks. addAnnotationRemarksPass(MPM); return MPM; } ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, bool LTOPreLink) { assert(Level == OptimizationLevel::O0 && "buildO0DefaultPipeline should only be used with O0"); ModulePassManager MPM; // Perform pseudo probe instrumentation in O0 mode. This is for the // consistency between different build modes. For example, a LTO build can be // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in // the postlink will require pseudo probe instrumentation in the prelink. if (PGOOpt && PGOOpt->PseudoProbeForProfiling) MPM.addPass(SampleProfileProbePass(TM)); if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || PGOOpt->Action == PGOOptions::IRUse)) addPGOInstrPassesForO0( MPM, /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr), /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); for (auto &C : PipelineStartEPCallbacks) C(MPM, Level); if (PGOOpt && PGOOpt->DebugInfoForProfiling) MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); for (auto &C : PipelineEarlySimplificationEPCallbacks) C(MPM, Level); // Build a minimal pipeline based on the semantics required by LLVM, // which is just that always inlining occurs. Further, disable generating // lifetime intrinsics to avoid enabling further optimizations during // code generation. MPM.addPass(AlwaysInlinerPass( /*InsertLifetimeIntrinsics=*/false)); if (PTO.MergeFunctions) MPM.addPass(MergeFunctionsPass()); if (EnableMatrix) MPM.addPass( createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true))); if (!CGSCCOptimizerLateEPCallbacks.empty()) { CGSCCPassManager CGPM; for (auto &C : CGSCCOptimizerLateEPCallbacks) C(CGPM, Level); if (!CGPM.isEmpty()) MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); } if (!LateLoopOptimizationsEPCallbacks.empty()) { LoopPassManager LPM; for (auto &C : LateLoopOptimizationsEPCallbacks) C(LPM, Level); if (!LPM.isEmpty()) { MPM.addPass(createModuleToFunctionPassAdaptor( createFunctionToLoopPassAdaptor(std::move(LPM)))); } } if (!LoopOptimizerEndEPCallbacks.empty()) { LoopPassManager LPM; for (auto &C : LoopOptimizerEndEPCallbacks) C(LPM, Level); if (!LPM.isEmpty()) { MPM.addPass(createModuleToFunctionPassAdaptor( createFunctionToLoopPassAdaptor(std::move(LPM)))); } } if (!ScalarOptimizerLateEPCallbacks.empty()) { FunctionPassManager FPM; for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); if (!FPM.isEmpty()) MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } if (!VectorizerStartEPCallbacks.empty()) { FunctionPassManager FPM; for (auto &C : VectorizerStartEPCallbacks) C(FPM, Level); if (!FPM.isEmpty()) MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); } MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass())); CGSCCPassManager CGPM; CGPM.addPass(CoroSplitPass()); MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); return MPM; } AAManager PassBuilder::buildDefaultAAPipeline() { AAManager AA; // The order in which these are registered determines their priority when // being queried. // First we register the basic alias analysis that provides the majority of // per-function local AA logic. This is a stateless, on-demand local set of // AA techniques. AA.registerFunctionAnalysis(); // Next we query fast, specialized alias analyses that wrap IR-embedded // information about aliasing. AA.registerFunctionAnalysis(); AA.registerFunctionAnalysis(); // Add support for querying global aliasing information when available. // Because the `AAManager` is a function analysis and `GlobalsAA` is a module // analysis, all that the `AAManager` can do is query for any *cached* // results from `GlobalsAA` through a readonly proxy. AA.registerModuleAnalysis(); // Add target-specific alias analyses. if (TM) TM->registerDefaultAliasAnalyses(AA); return AA; } diff --git a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def index 8e0af11b854d..69d8d8c43267 100644 --- a/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def +++ b/contrib/llvm-project/llvm/lib/Passes/PassRegistry.def @@ -1,525 +1,526 @@ //===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file is used as the registry of passes that are part of the core LLVM // libraries. This file describes both transformation passes and analyses // Analyses are registered while transformation passes have names registered // that can be used when providing a textual pass pipeline. // //===----------------------------------------------------------------------===// // NOTE: NO INCLUDE GUARD DESIRED! #ifndef MODULE_ANALYSIS #define MODULE_ANALYSIS(NAME, CREATE_PASS) #endif MODULE_ANALYSIS("callgraph", CallGraphAnalysis()) MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis()) MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis()) MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis()) MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) MODULE_ANALYSIS("verify", VerifierAnalysis()) MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) MODULE_ANALYSIS("inline-advisor", InlineAdvisorAnalysis()) MODULE_ANALYSIS("ir-similarity", IRSimilarityAnalysis()) #ifndef MODULE_ALIAS_ANALYSIS #define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ MODULE_ANALYSIS(NAME, CREATE_PASS) #endif MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #undef MODULE_ALIAS_ANALYSIS #undef MODULE_ANALYSIS #ifndef MODULE_PASS #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("always-inline", AlwaysInlinerPass()) MODULE_PASS("attributor", AttributorPass()) MODULE_PASS("annotation2metadata", Annotation2MetadataPass()) MODULE_PASS("openmp-opt", OpenMPOptPass()) MODULE_PASS("called-value-propagation", CalledValuePropagationPass()) MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass()) MODULE_PASS("cg-profile", CGProfilePass()) MODULE_PASS("check-debugify", NewPMCheckDebugifyPass()) MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) MODULE_PASS("debugify", NewPMDebugifyPass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("function-specialization", FunctionSpecializationPass()) MODULE_PASS("globaldce", GlobalDCEPass()) MODULE_PASS("globalopt", GlobalOptPass()) MODULE_PASS("globalsplit", GlobalSplitPass()) MODULE_PASS("hotcoldsplit", HotColdSplittingPass()) MODULE_PASS("inferattrs", InferFunctionAttrsPass()) MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass()) MODULE_PASS("print", InlineAdvisorAnalysisPrinterPass(dbgs())) MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass( getInlineParams(), false)) MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) MODULE_PASS("instrorderfile", InstrOrderFilePass()) MODULE_PASS("instrprof", InstrProfiling()) MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("ipsccp", IPSCCPPass()) MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) MODULE_PASS("metarenamer", MetaRenamerPass()) MODULE_PASS("mergefunc", MergeFunctionsPass()) MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs())) MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs())) MODULE_PASS("print", PrintModulePass(dbgs())) MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs())) MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs())) MODULE_PASS("print-must-be-executed-contexts", MustBeExecutedContextPrinterPass(dbgs())) MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs())) MODULE_PASS("print", ModuleDebugInfoPrinterPass(dbgs())) MODULE_PASS("rel-lookup-table-converter", RelLookupTableConverterPass()) MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC()) MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass()) MODULE_PASS("sample-profile", SampleProfileLoaderPass()) MODULE_PASS("scc-oz-module-inliner", buildInlinerPipeline(OptimizationLevel::Oz, ThinOrFullLTOPhase::None)) MODULE_PASS("strip", StripSymbolsPass()) MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass()) MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM)) MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) MODULE_PASS("strip-debug-declare", StripDebugDeclarePass()) MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass()) MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass()) MODULE_PASS("dfsan", DataFlowSanitizerPass()) MODULE_PASS("msan-module", ModuleMemorySanitizerPass({})) MODULE_PASS("module-inline", ModuleInlinerPass()) MODULE_PASS("tsan-module", ModuleThreadSanitizerPass()) MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) #undef MODULE_PASS #ifndef MODULE_PASS_WITH_PARAMS #define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) #endif MODULE_PASS_WITH_PARAMS("loop-extract", "LoopExtractorPass", [](bool Single) { if (Single) return LoopExtractorPass(1); return LoopExtractorPass(); }, parseLoopExtractorPassOptions, "single") MODULE_PASS_WITH_PARAMS("hwasan", "HWAddressSanitizerPass", [](HWAddressSanitizerOptions Opts) { return HWAddressSanitizerPass(Opts); }, parseHWASanPassOptions, "kernel;recover") MODULE_PASS_WITH_PARAMS("asan-module", "ModuleAddressSanitizerPass", [](AddressSanitizerOptions Opts) { return ModuleAddressSanitizerPass(Opts); }, parseASanPassOptions, "kernel") #undef MODULE_PASS_WITH_PARAMS #ifndef CGSCC_ANALYSIS #define CGSCC_ANALYSIS(NAME, CREATE_PASS) #endif CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis()) CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy()) CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #undef CGSCC_ANALYSIS #ifndef CGSCC_PASS #define CGSCC_PASS(NAME, CREATE_PASS) #endif CGSCC_PASS("argpromotion", ArgumentPromotionPass()) CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass()) CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass()) CGSCC_PASS("coro-split", CoroSplitPass()) CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) #undef CGSCC_PASS #ifndef CGSCC_PASS_WITH_PARAMS #define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) #endif CGSCC_PASS_WITH_PARAMS("inline", "InlinerPass", [](bool OnlyMandatory) { return InlinerPass(OnlyMandatory); }, parseInlinerPassOptions, "only-mandatory") #undef CGSCC_PASS_WITH_PARAMS #ifndef FUNCTION_ANALYSIS #define FUNCTION_ANALYSIS(NAME, CREATE_PASS) #endif FUNCTION_ANALYSIS("aa", AAManager()) FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) FUNCTION_ANALYSIS("cycles", CycleAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis()) FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis()) FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) FUNCTION_ANALYSIS("func-properties", FunctionPropertiesAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis()) FUNCTION_ANALYSIS("da", DependenceAnalysis()) FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis()) FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis()) FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis()) FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis()) FUNCTION_ANALYSIS("regions", RegionInfoAnalysis()) FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis()) FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis()) FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses()) FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) FUNCTION_ANALYSIS("targetir", TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) FUNCTION_ANALYSIS("verify", VerifierAnalysis()) FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) FUNCTION_ANALYSIS("divergence", DivergenceAnalysis()) #ifndef FUNCTION_ALIAS_ANALYSIS #define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ FUNCTION_ANALYSIS(NAME, CREATE_PASS) #endif FUNCTION_ALIAS_ANALYSIS("basic-aa", BasicAA()) FUNCTION_ALIAS_ANALYSIS("cfl-anders-aa", CFLAndersAA()) FUNCTION_ALIAS_ANALYSIS("cfl-steens-aa", CFLSteensAA()) FUNCTION_ALIAS_ANALYSIS("objc-arc-aa", objcarc::ObjCARCAA()) FUNCTION_ALIAS_ANALYSIS("scev-aa", SCEVAA()) FUNCTION_ALIAS_ANALYSIS("scoped-noalias-aa", ScopedNoAliasAA()) FUNCTION_ALIAS_ANALYSIS("tbaa", TypeBasedAA()) #undef FUNCTION_ALIAS_ANALYSIS #undef FUNCTION_ANALYSIS #ifndef FUNCTION_PASS #define FUNCTION_PASS(NAME, CREATE_PASS) #endif FUNCTION_PASS("aa-eval", AAEvaluator()) FUNCTION_PASS("adce", ADCEPass()) FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass()) FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass()) FUNCTION_PASS("assume-builder", AssumeBuilderPass()) FUNCTION_PASS("assume-simplify", AssumeSimplifyPass()) FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) FUNCTION_PASS("annotation-remarks", AnnotationRemarksPass()) FUNCTION_PASS("bdce", BDCEPass()) FUNCTION_PASS("bounds-checking", BoundsCheckingPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) FUNCTION_PASS("coro-early", CoroEarlyPass()) FUNCTION_PASS("coro-elide", CoroElidePass()) FUNCTION_PASS("coro-cleanup", CoroCleanupPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) FUNCTION_PASS("dfa-jump-threading", DFAJumpThreadingPass()) FUNCTION_PASS("div-rem-pairs", DivRemPairsPass()) FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("dot-cfg", CFGPrinterPass()) FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass()) FUNCTION_PASS("dot-dom", DomTreePrinterPass()) FUNCTION_PASS("dot-dom-only", DomTreeOnlyPrinterPass()) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flattencfg", FlattenCFGPass()) FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass()) FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("gvn-sink", GVNSinkPass()) FUNCTION_PASS("helloworld", HelloWorldPass()) FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instcount", InstCountPass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) FUNCTION_PASS("invalidate", InvalidateAllAnalysesPass()) FUNCTION_PASS("irce", IRCEPass()) FUNCTION_PASS("float2int", Float2IntPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) FUNCTION_PASS("lint", LintPass()) FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings()) FUNCTION_PASS("instnamer", InstructionNamerPass()) FUNCTION_PASS("loweratomic", LowerAtomicPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass()) FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass()) FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass()) FUNCTION_PASS("guard-widening", GuardWideningPass()) FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) FUNCTION_PASS("lowerinvoke", LowerInvokePass()) FUNCTION_PASS("lowerswitch", LowerSwitchPass()) FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("mergeicmps", MergeICmpsPass()) FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) FUNCTION_PASS("jump-threading", JumpThreadingPass()) FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) FUNCTION_PASS("lcssa", LCSSAPass()) FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) FUNCTION_PASS("loop-fusion", LoopFusePass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-versioning", LoopVersioningPass()) FUNCTION_PASS("objc-arc", ObjCARCOptPass()) FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass()) FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass()) FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print", BlockFrequencyPrinterPass(dbgs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(dbgs())) FUNCTION_PASS("print", CostModelPrinterPass(dbgs())) FUNCTION_PASS("print", CycleInfoPrinterPass(dbgs())) FUNCTION_PASS("print", DependenceAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", DivergenceAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", DelinearizationPrinterPass(dbgs())) FUNCTION_PASS("print", DemandedBitsPrinterPass(dbgs())) FUNCTION_PASS("print", DominanceFrontierPrinterPass(dbgs())) FUNCTION_PASS("print", FunctionPropertiesPrinterPass(dbgs())) FUNCTION_PASS("print", InlineCostAnnotationPrinterPass(dbgs())) FUNCTION_PASS("print", InlineSizeEstimatorAnalysisPrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", MemorySSAPrinterPass(dbgs())) FUNCTION_PASS("print", MemorySSAWalkerPrinterPass(dbgs())) FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("print", StackSafetyPrinterPass(dbgs())) // TODO: rename to print after NPM switch FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs())) FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs())) FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(dbgs())) FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) FUNCTION_PASS("reg2mem", RegToMemPass()) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) FUNCTION_PASS("sroa", SROAPass()) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass()) FUNCTION_PASS("vector-combine", VectorCombinePass()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) FUNCTION_PASS("verify", LoopVerifierPass()) FUNCTION_PASS("verify", MemorySSAVerifierPass()) FUNCTION_PASS("verify", RegionInfoVerifierPass()) FUNCTION_PASS("verify", SafepointIRVerifierPass()) FUNCTION_PASS("verify", ScalarEvolutionVerifierPass()) FUNCTION_PASS("view-cfg", CFGViewerPass()) FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("memprof", MemProfilerPass()) #undef FUNCTION_PASS #ifndef FUNCTION_PASS_WITH_PARAMS #define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) #endif FUNCTION_PASS_WITH_PARAMS("early-cse", "EarlyCSEPass", [](bool UseMemorySSA) { return EarlyCSEPass(UseMemorySSA); }, parseEarlyCSEPassOptions, "memssa") FUNCTION_PASS_WITH_PARAMS("ee-instrument", "EntryExitInstrumenterPass", [](bool PostInlining) { return EntryExitInstrumenterPass(PostInlining); }, parseEntryExitInstrumenterPassOptions, "post-inline") FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics", "LowerMatrixIntrinsicsPass", [](bool Minimal) { return LowerMatrixIntrinsicsPass(Minimal); }, parseLowerMatrixIntrinsicsPassOptions, "minimal") FUNCTION_PASS_WITH_PARAMS("loop-unroll", "LoopUnrollPass", [](LoopUnrollOptions Opts) { return LoopUnrollPass(Opts); }, parseLoopUnrollOptions, "O0;O1;O2;O3;full-unroll-max=N;" "no-partial;partial;" "no-peeling;peeling;" "no-profile-peeling;profile-peeling;" "no-runtime;runtime;" "no-upperbound;upperbound") FUNCTION_PASS_WITH_PARAMS("asan", "AddressSanitizerPass", [](AddressSanitizerOptions Opts) { return AddressSanitizerPass(Opts); }, parseASanPassOptions, "kernel") FUNCTION_PASS_WITH_PARAMS("msan", "MemorySanitizerPass", [](MemorySanitizerOptions Opts) { return MemorySanitizerPass(Opts); }, parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N") FUNCTION_PASS_WITH_PARAMS("simplifycfg", "SimplifyCFGPass", [](SimplifyCFGOptions Opts) { return SimplifyCFGPass(Opts); }, parseSimplifyCFGOptions, "no-forward-switch-cond;forward-switch-cond;" + "no-switch-range-to-icmp;switch-range-to-icmp;" "no-switch-to-lookup;switch-to-lookup;" "no-keep-loops;keep-loops;" "no-hoist-common-insts;hoist-common-insts;" "no-sink-common-insts;sink-common-insts;" "bonus-inst-threshold=N" ) FUNCTION_PASS_WITH_PARAMS("loop-vectorize", "LoopVectorizePass", [](LoopVectorizeOptions Opts) { return LoopVectorizePass(Opts); }, parseLoopVectorizeOptions, "no-interleave-forced-only;interleave-forced-only;" "no-vectorize-forced-only;vectorize-forced-only") FUNCTION_PASS_WITH_PARAMS("mldst-motion", "MergedLoadStoreMotionPass", [](MergedLoadStoreMotionOptions Opts) { return MergedLoadStoreMotionPass(Opts); }, parseMergedLoadStoreMotionOptions, "no-split-footer-bb;split-footer-bb") FUNCTION_PASS_WITH_PARAMS("gvn", "GVNPass", [](GVNOptions Opts) { return GVNPass(Opts); }, parseGVNOptions, "no-pre;pre;" "no-load-pre;load-pre;" "no-split-backedge-load-pre;split-backedge-load-pre;" "no-memdep;memdep") FUNCTION_PASS_WITH_PARAMS("print", "StackLifetimePrinterPass", [](StackLifetime::LivenessType Type) { return StackLifetimePrinterPass(dbgs(), Type); }, parseStackLifetimeOptions, "may;must") #undef FUNCTION_PASS_WITH_PARAMS #ifndef LOOPNEST_PASS #define LOOPNEST_PASS(NAME, CREATE_PASS) #endif LOOPNEST_PASS("lnicm", LNICMPass()) LOOPNEST_PASS("loop-flatten", LoopFlattenPass()) LOOPNEST_PASS("loop-interchange", LoopInterchangePass()) LOOPNEST_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass()) LOOPNEST_PASS("no-op-loopnest", NoOpLoopNestPass()) #undef LOOPNEST_PASS #ifndef LOOP_ANALYSIS #define LOOP_ANALYSIS(NAME, CREATE_PASS) #endif LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis()) LOOP_ANALYSIS("access-info", LoopAccessAnalysis()) LOOP_ANALYSIS("ddg", DDGAnalysis()) LOOP_ANALYSIS("iv-users", IVUsersAnalysis()) LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) #undef LOOP_ANALYSIS #ifndef LOOP_PASS #define LOOP_PASS(NAME, CREATE_PASS) #endif LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass()) LOOP_PASS("dot-ddg", DDGDotPrinterPass()) LOOP_PASS("invalidate", InvalidateAllAnalysesPass()) LOOP_PASS("licm", LICMPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) LOOP_PASS("loop-rotate", LoopRotatePass()) LOOP_PASS("no-op-loop", NoOpLoopPass()) LOOP_PASS("print", PrintLoopPass(dbgs())) LOOP_PASS("loop-deletion", LoopDeletionPass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("indvars", IndVarSimplifyPass()) LOOP_PASS("loop-unroll-full", LoopFullUnrollPass()) LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) LOOP_PASS("print", LoopNestPrinterPass(dbgs())) LOOP_PASS("print", LoopCachePrinterPass(dbgs())) LOOP_PASS("loop-predication", LoopPredicationPass()) LOOP_PASS("guard-widening", GuardWideningPass()) LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) LOOP_PASS("loop-reroll", LoopRerollPass()) LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) #undef LOOP_PASS #ifndef LOOP_PASS_WITH_PARAMS #define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) #endif LOOP_PASS_WITH_PARAMS("simple-loop-unswitch", "SimpleLoopUnswitchPass", [](std::pair Params) { return SimpleLoopUnswitchPass(Params.first, Params.second); }, parseLoopUnswitchOptions, "nontrivial;no-nontrivial;trivial;no-trivial") #undef LOOP_PASS_WITH_PARAMS diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 4af28fc070dd..6a751da7ad55 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -1,812 +1,813 @@ //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // //===----------------------------------------------------------------------===// #include "AArch64TargetMachine.h" #include "AArch64.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" #include "AArch64TargetObjectFile.h" #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "TargetInfo/AArch64TargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/CFGuard.h" #include "llvm/Transforms/Scalar.h" #include #include using namespace llvm; static cl::opt EnableCCMP("aarch64-enable-ccmp", cl::desc("Enable the CCMP formation pass"), cl::init(true), cl::Hidden); static cl::opt EnableCondBrTuning("aarch64-enable-cond-br-tune", cl::desc("Enable the conditional branch tuning pass"), cl::init(true), cl::Hidden); static cl::opt EnableMCR("aarch64-enable-mcr", cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); static cl::opt EnableStPairSuppress("aarch64-enable-stp-suppress", cl::desc("Suppress STP for AArch64"), cl::init(true), cl::Hidden); static cl::opt EnableAdvSIMDScalar( "aarch64-enable-simd-scalar", cl::desc("Enable use of AdvSIMD scalar integer instructions"), cl::init(false), cl::Hidden); static cl::opt EnablePromoteConstant("aarch64-enable-promote-const", cl::desc("Enable the promote constant pass"), cl::init(true), cl::Hidden); static cl::opt EnableCollectLOH( "aarch64-enable-collect-loh", cl::desc("Enable the pass that emits the linker optimization hints (LOH)"), cl::init(true), cl::Hidden); static cl::opt EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden, cl::desc("Enable the pass that removes dead" " definitons and replaces stores to" " them with stores to the zero" " register"), cl::init(true)); static cl::opt EnableRedundantCopyElimination( "aarch64-enable-copyelim", cl::desc("Enable the redundant copy elimination pass"), cl::init(true), cl::Hidden); static cl::opt EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden); static cl::opt EnableAtomicTidy( "aarch64-enable-atomic-cfg-tidy", cl::Hidden, cl::desc("Run SimplifyCFG after expanding atomic operations" " to make use of cmpxchg flow-based information"), cl::init(true)); static cl::opt EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(true)); static cl::opt EnableCondOpt("aarch64-enable-condopt", cl::desc("Enable the condition optimizer pass"), cl::init(true), cl::Hidden); static cl::opt EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), cl::init(false)); static cl::opt BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), cl::desc("Relax out of range conditional branches")); static cl::opt EnableCompressJumpTables( "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true), cl::desc("Use smallest entry possible for jump tables")); // FIXME: Unify control over GlobalMerge. static cl::opt EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); static cl::opt EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden, cl::desc("Enable the loop data prefetch pass"), cl::init(true)); static cl::opt EnableGlobalISelAtO( "aarch64-enable-global-isel-at-O", cl::Hidden, cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"), cl::init(0)); static cl::opt EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden, cl::desc("Enable SVE intrinsic opts"), cl::init(true)); static cl::opt EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", cl::init(true), cl::Hidden); static cl::opt EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden, cl::desc("Enable the AArch64 branch target pass"), cl::init(true)); static cl::opt SVEVectorBitsMaxOpt( "aarch64-sve-vector-bits-max", cl::desc("Assume SVE vector registers are at most this big, " "with zero meaning no maximum size is assumed."), cl::init(0), cl::Hidden); static cl::opt SVEVectorBitsMinOpt( "aarch64-sve-vector-bits-min", cl::desc("Assume SVE vector registers are at least this big, " "with zero meaning no minimum size is assumed."), cl::init(0), cl::Hidden); extern cl::opt EnableHomogeneousPrologEpilog; static cl::opt EnableGISelLoadStoreOptPreLegal( "aarch64-enable-gisel-ldst-prelegal", cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"), cl::init(true), cl::Hidden); static cl::opt EnableGISelLoadStoreOptPostLegal( "aarch64-enable-gisel-ldst-postlegal", cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"), cl::init(false), cl::Hidden); extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine X(getTheAArch64leTarget()); RegisterTargetMachine Y(getTheAArch64beTarget()); RegisterTargetMachine Z(getTheARM64Target()); RegisterTargetMachine W(getTheARM64_32Target()); RegisterTargetMachine V(getTheAArch64_32Target()); auto PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); initializeAArch64A53Fix835769Pass(*PR); initializeAArch64A57FPLoadBalancingPass(*PR); initializeAArch64AdvSIMDScalarPass(*PR); initializeAArch64BranchTargetsPass(*PR); initializeAArch64CollectLOHPass(*PR); initializeAArch64CompressJumpTablesPass(*PR); initializeAArch64ConditionalComparesPass(*PR); initializeAArch64ConditionOptimizerPass(*PR); initializeAArch64DeadRegisterDefinitionsPass(*PR); initializeAArch64ExpandPseudoPass(*PR); initializeAArch64LoadStoreOptPass(*PR); initializeAArch64MIPeepholeOptPass(*PR); initializeAArch64SIMDInstrOptPass(*PR); initializeAArch64O0PreLegalizerCombinerPass(*PR); initializeAArch64PreLegalizerCombinerPass(*PR); initializeAArch64PostLegalizerCombinerPass(*PR); initializeAArch64PostLegalizerLoweringPass(*PR); initializeAArch64PostSelectOptimizePass(*PR); initializeAArch64PromoteConstantPass(*PR); initializeAArch64RedundantCopyEliminationPass(*PR); initializeAArch64StorePairSuppressPass(*PR); initializeFalkorHWPFFixPass(*PR); initializeFalkorMarkStridedAccessesLegacyPass(*PR); initializeLDTLSCleanupPass(*PR); initializeSVEIntrinsicOptsPass(*PR); initializeAArch64SpeculationHardeningPass(*PR); initializeAArch64SLSHardeningPass(*PR); initializeAArch64StackTaggingPass(*PR); initializeAArch64StackTaggingPreRAPass(*PR); initializeAArch64LowerHomogeneousPrologEpilogPass(*PR); } //===----------------------------------------------------------------------===// // AArch64 Lowering public interface. //===----------------------------------------------------------------------===// static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) return std::make_unique(); if (TT.isOSBinFormatCOFF()) return std::make_unique(); return std::make_unique(); } // Helper function to build a DataLayout string static std::string computeDataLayout(const Triple &TT, const MCTargetOptions &Options, bool LittleEndian) { if (TT.isOSBinFormatMachO()) { if (TT.getArch() == Triple::aarch64_32) return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128"; return "e-m:o-i64:64-i128:128-n32:64-S128"; } if (TT.isOSBinFormatCOFF()) return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; std::string Endian = LittleEndian ? "e" : "E"; std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : ""; return Endian + "-m:e" + Ptr32 + "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; } static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) { if (CPU.empty() && TT.isArm64e()) return "apple-a12"; return CPU; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, Optional RM) { // AArch64 Darwin and Windows are always PIC. if (TT.isOSDarwin() || TT.isOSWindows()) return Reloc::PIC_; // On ELF platforms the default static relocation model has a smart enough // linker to cope with referencing external symbols defined in a shared // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC) return Reloc::Static; return *RM; } static CodeModel::Model getEffectiveAArch64CodeModel(const Triple &TT, Optional CM, bool JIT) { if (CM) { if (*CM != CodeModel::Small && *CM != CodeModel::Tiny && *CM != CodeModel::Large) { report_fatal_error( "Only small, tiny and large code models are allowed on AArch64"); } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF()) report_fatal_error("tiny code model is only supported on ELF"); return *CM; } // The default MCJIT memory managers make no guarantees about where they can // find an executable page; JITed code needs to be able to refer to globals // no matter how far away they are. // We should set the CodeModel::Small for Windows ARM64 in JIT mode, // since with large code model LLVM generating 4 MOV instructions, and // Windows doesn't support relocating these long branch (4 MOVs). if (JIT && !TT.isOSWindows()) return CodeModel::Large; return CodeModel::Small; } /// Create an AArch64 architecture model. /// AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT, bool LittleEndian) : LLVMTargetMachine(T, computeDataLayout(TT, Options.MCOptions, LittleEndian), TT, computeDefaultCPU(TT, CPU), FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveAArch64CodeModel(TT, CM, JIT), OL), TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) { initAsmInfo(); if (TT.isOSBinFormatMachO()) { this->Options.TrapUnreachable = true; this->Options.NoTrapAfterNoreturn = true; } if (getMCAsmInfo()->usesWindowsCFI()) { // Unwinding can get confused if the last instruction in an // exception-handling region (function, funclet, try block, etc.) // is a call. // // FIXME: We could elide the trap if the next instruction would be in // the same region anyway. this->Options.TrapUnreachable = true; } if (this->Options.TLSSize == 0) // default this->Options.TLSSize = 24; if ((getCodeModel() == CodeModel::Small || getCodeModel() == CodeModel::Kernel) && this->Options.TLSSize > 32) // for the small (and kernel) code model, the maximum TLS size is 4GiB this->Options.TLSSize = 32; else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24) // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB) this->Options.TLSSize = 24; // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is // MachO/CodeModel::Large, which GlobalISel does not support. if (getOptLevel() <= EnableGlobalISelAtO && TT.getArch() != Triple::aarch64_32 && TT.getEnvironment() != Triple::GNUILP32 && !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) { setGlobalISel(true); setGlobalISelAbort(GlobalISelAbortMode::Disable); } // AArch64 supports the MachineOutliner. setMachineOutliner(true); // AArch64 supports default outlining behaviour. setSupportsDefaultOutlining(true); // AArch64 supports the debug entry values. setSupportsDebugEntryValues(true); } AArch64TargetMachine::~AArch64TargetMachine() = default; const AArch64Subtarget * AArch64TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute TuneAttr = F.getFnAttribute("tune-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; std::string TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; SmallString<512> Key; unsigned MinSVEVectorSize = 0; unsigned MaxSVEVectorSize = 0; Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange); if (VScaleRangeAttr.isValid()) { Optional VScaleMax = VScaleRangeAttr.getVScaleRangeMax(); MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128; MaxSVEVectorSize = VScaleMax ? VScaleMax.getValue() * 128 : 0; } else { MinSVEVectorSize = SVEVectorBitsMinOpt; MaxSVEVectorSize = SVEVectorBitsMaxOpt; } assert(MinSVEVectorSize % 128 == 0 && "SVE requires vector length in multiples of 128!"); assert(MaxSVEVectorSize % 128 == 0 && "SVE requires vector length in multiples of 128!"); assert((MaxSVEVectorSize >= MinSVEVectorSize || MaxSVEVectorSize == 0) && "Minimum SVE vector size should not be larger than its maximum!"); // Sanitize user input in case of no asserts if (MaxSVEVectorSize == 0) MinSVEVectorSize = (MinSVEVectorSize / 128) * 128; else { MinSVEVectorSize = (std::min(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128; MaxSVEVectorSize = (std::max(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128; } Key += "SVEMin"; Key += std::to_string(MinSVEVectorSize); Key += "SVEMax"; Key += std::to_string(MaxSVEVectorSize); Key += CPU; Key += TuneCPU; Key += FS; auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize, MaxSVEVectorSize); } return I.get(); } void AArch64leTargetMachine::anchor() { } AArch64leTargetMachine::AArch64leTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {} void AArch64beTargetMachine::anchor() { } AArch64beTargetMachine::AArch64beTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} namespace { /// AArch64 Code Generator Pass Configuration Options. class AArch64PassConfig : public TargetPassConfig { public: AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) { if (TM.getOptLevel() != CodeGenOpt::None) substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); } AArch64TargetMachine &getAArch64TargetMachine() const { return getTM(); } ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); ScheduleDAGMILive *DAG = createGenericSchedLive(C); DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); if (ST.hasFusion()) DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } return nullptr; } void addIRPasses() override; bool addPreISel() override; void addCodeGenPrepare() override; bool addInstSelector() override; bool addIRTranslator() override; void addPreLegalizeMachineIR() override; bool addLegalizeMachineIR() override; void addPreRegBankSelect() override; bool addRegBankSelect() override; void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; void addMachineSSAOptimization() override; bool addILPOpts() override; void addPreRegAlloc() override; void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; void addPreEmitPass2() override; std::unique_ptr getCSEConfig() const override; }; } // end anonymous namespace TargetTransformInfo AArch64TargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(AArch64TTIImpl(this, F)); } TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { return new AArch64PassConfig(*this, PM); } std::unique_ptr AArch64PassConfig::getCSEConfig() const { return getStandardCSEConfigForOpt(TM->getOptLevel()); } void AArch64PassConfig::addIRPasses() { // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg // ourselves. addPass(createAtomicExpandPass()); // Expand any SVE vector library calls that we can't code generate directly. if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive) addPass(createSVEIntrinsicOptsPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) addPass(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) .sinkCommonInsts(true))); // Run LoopDataPrefetch // // Run this before LSR to remove the multiplies involved in computing the // pointer values N iterations ahead. if (TM->getOptLevel() != CodeGenOpt::None) { if (EnableLoopDataPrefetch) addPass(createLoopDataPrefetchPass()); if (EnableFalkorHWPFFix) addPass(createFalkorMarkStridedAccessesPass()); } TargetPassConfig::addIRPasses(); addPass(createAArch64StackTaggingPass( /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); // Match interleaved memory accesses to ldN/stN intrinsics. if (TM->getOptLevel() != CodeGenOpt::None) { addPass(createInterleavedLoadCombinePass()); addPass(createInterleavedAccessPass()); } if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or // multiple GEPs with single index. addPass(createSeparateConstOffsetFromGEPPass(true)); // Call EarlyCSE pass to find and remove subexpressions in the lowered // result. addPass(createEarlyCSEPass()); // Do loop invariant code motion in case part of the lowered result is // invariant. addPass(createLICMPass()); } // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); } // Pass Pipeline Configuration bool AArch64PassConfig::addPreISel() { // Run promote constant before global merge, so that the promoted constants // get a chance to be merged if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) addPass(createAArch64PromoteConstantPass()); // FIXME: On AArch64, this depends on the type. // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. if ((TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge == cl::BOU_UNSET) || EnableGlobalMerge == cl::BOU_TRUE) { bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && (EnableGlobalMerge == cl::BOU_UNSET); // Merging of extern globals is enabled by default on non-Mach-O as we // expect it to be generally either beneficial or harmless. On Mach-O it // is disabled as we emit the .subsections_via_symbols directive which // means that merging extern globals is not safe. bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); // FIXME: extern global merging is only enabled when we optimise for size // because there are some regressions with it also enabled for performance. if (!OnlyOptimizeForSize) MergeExternalByDefault = false; addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize, MergeExternalByDefault)); } return false; } void AArch64PassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None) addPass(createTypePromotionPass()); TargetPassConfig::addCodeGenPrepare(); } bool AArch64PassConfig::addInstSelector() { addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many // references to _TLS_MODULE_BASE_ as possible. if (TM->getTargetTriple().isOSBinFormatELF() && getOptLevel() != CodeGenOpt::None) addPass(createAArch64CleanupLocalDynamicTLSPass()); return false; } bool AArch64PassConfig::addIRTranslator() { addPass(new IRTranslator(getOptLevel())); return false; } void AArch64PassConfig::addPreLegalizeMachineIR() { if (getOptLevel() == CodeGenOpt::None) addPass(createAArch64O0PreLegalizerCombiner()); else { addPass(createAArch64PreLegalizerCombiner()); if (EnableGISelLoadStoreOptPreLegal) addPass(new LoadStoreOpt()); } } bool AArch64PassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; } void AArch64PassConfig::addPreRegBankSelect() { bool IsOptNone = getOptLevel() == CodeGenOpt::None; if (!IsOptNone) { addPass(createAArch64PostLegalizerCombiner(IsOptNone)); if (EnableGISelLoadStoreOptPostLegal) addPass(new LoadStoreOpt()); } addPass(createAArch64PostLegalizerLowering()); } bool AArch64PassConfig::addRegBankSelect() { addPass(new RegBankSelect()); return false; } void AArch64PassConfig::addPreGlobalInstructionSelect() { addPass(new Localizer()); } bool AArch64PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect(getOptLevel())); if (getOptLevel() != CodeGenOpt::None) addPass(createAArch64PostSelectOptimize()); return false; } void AArch64PassConfig::addMachineSSAOptimization() { // Run default MachineSSAOptimization first. TargetPassConfig::addMachineSSAOptimization(); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64MIPeepholeOptPass()); } bool AArch64PassConfig::addILPOpts() { if (EnableCondOpt) addPass(createAArch64ConditionOptimizerPass()); if (EnableCCMP) addPass(createAArch64ConditionalCompares()); if (EnableMCR) addPass(&MachineCombinerID); if (EnableCondBrTuning) addPass(createAArch64CondBrTuning()); if (EnableEarlyIfConversion) addPass(&EarlyIfConverterID); if (EnableStPairSuppress) addPass(createAArch64StorePairSuppressPass()); addPass(createAArch64SIMDInstrOptPass()); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64StackTaggingPreRAPass()); return true; } void AArch64PassConfig::addPreRegAlloc() { // Change dead register definitions to refer to the zero register. if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) addPass(createAArch64DeadRegisterDefinitions()); // Use AdvSIMD scalar instructions whenever profitable. if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) { addPass(createAArch64AdvSIMDScalar()); // The AdvSIMD pass may produce copies that can be rewritten to // be register coalescer friendly. addPass(&PeepholeOptimizerID); } } void AArch64PassConfig::addPostRegAlloc() { // Remove redundant copy instructions. if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) addPass(createAArch64RedundantCopyEliminationPass()); if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) // Improve performance for some FP/SIMD code for A57. addPass(createAArch64A57FPLoadBalancing()); } void AArch64PassConfig::addPreSched2() { // Lower homogeneous frame instructions if (EnableHomogeneousPrologEpilog) addPass(createAArch64LowerHomogeneousPrologEpilogPass()); // Expand some pseudo instructions to allow proper scheduling. addPass(createAArch64ExpandPseudoPass()); // Use load/store pair instructions when possible. if (TM->getOptLevel() != CodeGenOpt::None) { if (EnableLoadStoreOpt) addPass(createAArch64LoadStoreOptimizationPass()); } // The AArch64SpeculationHardeningPass destroys dominator tree and natural // loop info, which is needed for the FalkorHWPFFixPass and also later on. // Therefore, run the AArch64SpeculationHardeningPass before the // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop // info. addPass(createAArch64SpeculationHardeningPass()); addPass(createAArch64IndirectThunks()); addPass(createAArch64SLSHardeningPass()); if (TM->getOptLevel() != CodeGenOpt::None) { if (EnableFalkorHWPFFix) addPass(createFalkorHWPFFixPass()); } } void AArch64PassConfig::addPreEmitPass() { // Machine Block Placement might have created new opportunities when run // at O3, where the Tail Duplication Threshold is set to 4 instructions. // Run the load/store optimizer once more. if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt) addPass(createAArch64LoadStoreOptimizationPass()); addPass(createAArch64A53Fix835769()); if (EnableBranchTargets) addPass(createAArch64BranchTargetsPass()); // Relax conditional branch instructions if they're otherwise out of // range of their destination. if (BranchRelaxation) addPass(&BranchRelaxationPassID); if (TM->getTargetTriple().isOSWindows()) { // Identify valid longjmp targets for Windows Control Flow Guard. addPass(createCFGuardLongjmpPass()); // Identify valid eh continuation targets for Windows EHCont Guard. addPass(createEHContGuardCatchretPass()); } if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables) addPass(createAArch64CompressJumpTablesPass()); if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && TM->getTargetTriple().isOSBinFormatMachO()) addPass(createAArch64CollectLOHPass()); } void AArch64PassConfig::addPreEmitPass2() { // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo // instructions are lowered to bundles as well. addPass(createUnpackMachineBundles(nullptr)); } yaml::MachineFunctionInfo * AArch64TargetMachine::createDefaultFuncInfoYAML() const { return new yaml::AArch64FunctionInfo(); } yaml::MachineFunctionInfo * AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { const auto *MFI = MF.getInfo(); return new yaml::AArch64FunctionInfo(*MFI); } bool AArch64TargetMachine::parseMachineFunctionInfo( const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const { const auto &YamlMFI = reinterpret_cast(MFI); MachineFunction &MF = PFS.MF; MF.getInfo()->initializeBaseYamlFields(YamlMFI); return false; } diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index 0ba75a544c04..14b4f7c56c57 100755 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -1,2672 +1,2672 @@ //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "HexagonISelLowering.h" #include "HexagonRegisterInfo.h" #include "HexagonSubtarget.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/Support/CommandLine.h" using namespace llvm; static cl::opt HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors")); static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; void HexagonTargetLowering::initializeHVXLowering() { if (Subtarget.useHVX64BOps()) { addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); // These "short" boolean vector types should be legal because // they will appear as results of vector compares. If they were // not legal, type legalization would try to make them legal // and that would require using operations that do not use or // produce such types. That, in turn, would imply using custom // nodes, which would be unoptimizable by the DAG combiner. // The idea is to rely on target-independent operations as much // as possible. addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); } else if (Subtarget.useHVX128BOps()) { addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass); if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass); addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass); } } // Set up operation actions. bool Use64b = Subtarget.useHVX64BOps(); ArrayRef LegalV = Use64b ? LegalV64 : LegalV128; ArrayRef LegalW = Use64b ? LegalW64 : LegalW128; MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { setOperationAction(Opc, FromTy, Promote); AddPromotedToType(Opc, FromTy, ToTy); }; // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). // Note: v16i1 -> i16 is handled in type legalization instead of op // legalization. setOperationAction(ISD::BITCAST, MVT::i16, Custom); setOperationAction(ISD::BITCAST, MVT::i32, Custom); setOperationAction(ISD::BITCAST, MVT::i64, Custom); setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); setOperationAction(ISD::BITCAST, MVT::v128i1, Custom); setOperationAction(ISD::BITCAST, MVT::i128, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal); setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal); setOperationAction(ISD::FADD, MVT::v64f16, Legal); setOperationAction(ISD::FSUB, MVT::v64f16, Legal); setOperationAction(ISD::FMUL, MVT::v64f16, Legal); setOperationAction(ISD::FADD, MVT::v32f32, Legal); setOperationAction(ISD::FSUB, MVT::v32f32, Legal); setOperationAction(ISD::FMUL, MVT::v32f32, Legal); setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom); // Handle ISD::BUILD_VECTOR for v32f32 in a custom way to generate vsplat setOperationAction(ISD::BUILD_VECTOR, MVT::v32f32, Custom); // BUILD_VECTOR with f16 operands cannot be promoted without // promoting the result, so lower the node to vsplat or constant pool setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::f16, Custom); setOperationAction(ISD::SPLAT_VECTOR, MVT::f16, Custom); setOperationAction(ISD::SPLAT_VECTOR, MVT::v64f16, Legal); setOperationAction(ISD::SPLAT_VECTOR, MVT::v32f32, Legal); // Vector shuffle is always promoted to ByteV and a bitcast to f16 is // generated. - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); - setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); + setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- // independent) handling of it would convert it to a load, which is // not always the optimal choice. setOperationAction(ISD::BUILD_VECTOR, MVT::v64f32, Custom); // Make concat-vectors custom to handle concats of more than 2 vectors. setOperationAction(ISD::CONCAT_VECTORS, MVT::v128f16, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v64f32, Custom); setOperationAction(ISD::LOAD, MVT::v64f32, Custom); setOperationAction(ISD::STORE, MVT::v64f32, Custom); setOperationAction(ISD::FADD, MVT::v64f32, Custom); setOperationAction(ISD::FSUB, MVT::v64f32, Custom); setOperationAction(ISD::FMUL, MVT::v64f32, Custom); setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom); setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); if (Subtarget.useHVXQFloatOps()) { setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom); setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); } else if (Subtarget.useHVXIEEEFPOps()) { setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v64f16, Legal); } setOperationAction(ISD::MLOAD, MVT::v32f32, Custom); setOperationAction(ISD::MSTORE, MVT::v32f32, Custom); setOperationAction(ISD::MLOAD, MVT::v64f16, Custom); setOperationAction(ISD::MSTORE, MVT::v64f16, Custom); setOperationAction(ISD::MLOAD, MVT::v64f32, Custom); setOperationAction(ISD::MSTORE, MVT::v64f32, Custom); } for (MVT T : LegalV) { setIndexedLoadAction(ISD::POST_INC, T, Legal); setIndexedStoreAction(ISD::POST_INC, T, Legal); setOperationAction(ISD::AND, T, Legal); setOperationAction(ISD::OR, T, Legal); setOperationAction(ISD::XOR, T, Legal); setOperationAction(ISD::ADD, T, Legal); setOperationAction(ISD::SUB, T, Legal); setOperationAction(ISD::MUL, T, Legal); setOperationAction(ISD::CTPOP, T, Legal); setOperationAction(ISD::CTLZ, T, Legal); setOperationAction(ISD::SELECT, T, Legal); setOperationAction(ISD::SPLAT_VECTOR, T, Legal); if (T != ByteV) { setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::BSWAP, T, Legal); } setOperationAction(ISD::SMIN, T, Legal); setOperationAction(ISD::SMAX, T, Legal); if (T.getScalarType() != MVT::i32) { setOperationAction(ISD::UMIN, T, Legal); setOperationAction(ISD::UMAX, T, Legal); } setOperationAction(ISD::CTTZ, T, Custom); setOperationAction(ISD::LOAD, T, Custom); setOperationAction(ISD::MLOAD, T, Custom); setOperationAction(ISD::MSTORE, T, Custom); setOperationAction(ISD::MULHS, T, Custom); setOperationAction(ISD::MULHU, T, Custom); setOperationAction(ISD::BUILD_VECTOR, T, Custom); // Make concat-vectors custom to handle concats of more than 2 vectors. setOperationAction(ISD::CONCAT_VECTORS, T, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); setOperationAction(ISD::ANY_EXTEND, T, Custom); setOperationAction(ISD::SIGN_EXTEND, T, Custom); setOperationAction(ISD::ZERO_EXTEND, T, Custom); if (T != ByteV) { setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); // HVX only has shifts of words and halfwords. setOperationAction(ISD::SRA, T, Custom); setOperationAction(ISD::SHL, T, Custom); setOperationAction(ISD::SRL, T, Custom); // Promote all shuffles to operate on vectors of bytes. setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); } if (Subtarget.useHVXQFloatOps()) { setOperationAction(ISD::SINT_TO_FP, T, Expand); setOperationAction(ISD::UINT_TO_FP, T, Expand); setOperationAction(ISD::FP_TO_SINT, T, Expand); setOperationAction(ISD::FP_TO_UINT, T, Expand); } else if (Subtarget.useHVXIEEEFPOps()) { setOperationAction(ISD::SINT_TO_FP, T, Custom); setOperationAction(ISD::UINT_TO_FP, T, Custom); setOperationAction(ISD::FP_TO_SINT, T, Custom); setOperationAction(ISD::FP_TO_UINT, T, Custom); } setCondCodeAction(ISD::SETNE, T, Expand); setCondCodeAction(ISD::SETLE, T, Expand); setCondCodeAction(ISD::SETGE, T, Expand); setCondCodeAction(ISD::SETLT, T, Expand); setCondCodeAction(ISD::SETULE, T, Expand); setCondCodeAction(ISD::SETUGE, T, Expand); setCondCodeAction(ISD::SETULT, T, Expand); } for (MVT T : LegalW) { // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- // independent) handling of it would convert it to a load, which is // not always the optimal choice. setOperationAction(ISD::BUILD_VECTOR, T, Custom); // Make concat-vectors custom to handle concats of more than 2 vectors. setOperationAction(ISD::CONCAT_VECTORS, T, Custom); // Custom-lower these operations for pairs. Expand them into a concat // of the corresponding operations on individual vectors. setOperationAction(ISD::ANY_EXTEND, T, Custom); setOperationAction(ISD::SIGN_EXTEND, T, Custom); setOperationAction(ISD::ZERO_EXTEND, T, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, T, Custom); setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); setOperationAction(ISD::SPLAT_VECTOR, T, Custom); setOperationAction(ISD::LOAD, T, Custom); setOperationAction(ISD::STORE, T, Custom); setOperationAction(ISD::MLOAD, T, Custom); setOperationAction(ISD::MSTORE, T, Custom); setOperationAction(ISD::CTLZ, T, Custom); setOperationAction(ISD::CTTZ, T, Custom); setOperationAction(ISD::CTPOP, T, Custom); setOperationAction(ISD::ADD, T, Legal); setOperationAction(ISD::SUB, T, Legal); setOperationAction(ISD::MUL, T, Custom); setOperationAction(ISD::MULHS, T, Custom); setOperationAction(ISD::MULHU, T, Custom); setOperationAction(ISD::AND, T, Custom); setOperationAction(ISD::OR, T, Custom); setOperationAction(ISD::XOR, T, Custom); setOperationAction(ISD::SETCC, T, Custom); setOperationAction(ISD::VSELECT, T, Custom); if (T != ByteW) { setOperationAction(ISD::SRA, T, Custom); setOperationAction(ISD::SHL, T, Custom); setOperationAction(ISD::SRL, T, Custom); // Promote all shuffles to operate on vectors of bytes. setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); } setOperationAction(ISD::SMIN, T, Custom); setOperationAction(ISD::SMAX, T, Custom); if (T.getScalarType() != MVT::i32) { setOperationAction(ISD::UMIN, T, Custom); setOperationAction(ISD::UMAX, T, Custom); } setOperationAction(ISD::SINT_TO_FP, T, Custom); setOperationAction(ISD::UINT_TO_FP, T, Custom); setOperationAction(ISD::FP_TO_SINT, T, Custom); setOperationAction(ISD::FP_TO_UINT, T, Custom); } setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand); setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand); setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand); setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); // Boolean vectors. for (MVT T : LegalW) { // Boolean types for vector pairs will overlap with the boolean // types for single vectors, e.g. // v64i8 -> v64i1 (single) // v64i16 -> v64i1 (pair) // Set these actions first, and allow the single actions to overwrite // any duplicates. MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); setOperationAction(ISD::SETCC, BoolW, Custom); setOperationAction(ISD::AND, BoolW, Custom); setOperationAction(ISD::OR, BoolW, Custom); setOperationAction(ISD::XOR, BoolW, Custom); // Masked load/store takes a mask that may need splitting. setOperationAction(ISD::MLOAD, BoolW, Custom); setOperationAction(ISD::MSTORE, BoolW, Custom); } for (MVT T : LegalV) { MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements()); setOperationAction(ISD::BUILD_VECTOR, BoolV, Custom); setOperationAction(ISD::CONCAT_VECTORS, BoolV, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, BoolV, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, BoolV, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, BoolV, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, BoolV, Custom); setOperationAction(ISD::SELECT, BoolV, Custom); setOperationAction(ISD::AND, BoolV, Legal); setOperationAction(ISD::OR, BoolV, Legal); setOperationAction(ISD::XOR, BoolV, Legal); } if (Use64b) { for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); } else { for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal); } // Handle store widening for short vectors. unsigned HwLen = Subtarget.getVectorLength(); for (MVT ElemTy : Subtarget.getHVXElementTypes()) { if (ElemTy == MVT::i1) continue; int ElemWidth = ElemTy.getFixedSizeInBits(); int MaxElems = (8*HwLen) / ElemWidth; for (int N = 2; N < MaxElems; N *= 2) { MVT VecTy = MVT::getVectorVT(ElemTy, N); auto Action = getPreferredVectorAction(VecTy); if (Action == TargetLoweringBase::TypeWidenVector) { setOperationAction(ISD::LOAD, VecTy, Custom); setOperationAction(ISD::STORE, VecTy, Custom); setOperationAction(ISD::SETCC, VecTy, Custom); setOperationAction(ISD::TRUNCATE, VecTy, Custom); setOperationAction(ISD::ANY_EXTEND, VecTy, Custom); setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom); setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom); MVT BoolTy = MVT::getVectorVT(MVT::i1, N); if (!isTypeLegal(BoolTy)) setOperationAction(ISD::SETCC, BoolTy, Custom); } } } setTargetDAGCombine(ISD::SPLAT_VECTOR); setTargetDAGCombine(ISD::VSELECT); } unsigned HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const { MVT ElemTy = VecTy.getVectorElementType(); unsigned VecLen = VecTy.getVectorNumElements(); unsigned HwLen = Subtarget.getVectorLength(); // Split vectors of i1 that exceed byte vector length. if (ElemTy == MVT::i1 && VecLen > HwLen) return TargetLoweringBase::TypeSplitVector; ArrayRef Tys = Subtarget.getHVXElementTypes(); // For shorter vectors of i1, widen them if any of the corresponding // vectors of integers needs to be widened. if (ElemTy == MVT::i1) { for (MVT T : Tys) { assert(T != MVT::i1); auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen)); if (A != ~0u) return A; } return ~0u; } // If the size of VecTy is at least half of the vector length, // widen the vector. Note: the threshold was not selected in // any scientific way. if (llvm::is_contained(Tys, ElemTy)) { unsigned VecWidth = VecTy.getSizeInBits(); bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0; if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth) return TargetLoweringBase::TypeWidenVector; unsigned HwWidth = 8*HwLen; if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) return TargetLoweringBase::TypeWidenVector; } // Defer to default. return ~0u; } SDValue HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef Ops, const SDLoc &dl, SelectionDAG &DAG) const { SmallVector IntOps; IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32)); append_range(IntOps, Ops); return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps); } MVT HexagonTargetLowering::typeJoin(const TypePair &Tys) const { assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); MVT ElemTy = Tys.first.getVectorElementType(); return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() + Tys.second.getVectorNumElements()); } HexagonTargetLowering::TypePair HexagonTargetLowering::typeSplit(MVT VecTy) const { assert(VecTy.isVector()); unsigned NumElem = VecTy.getVectorNumElements(); assert((NumElem % 2) == 0 && "Expecting even-sized vector type"); MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2); return { HalfTy, HalfTy }; } MVT HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { MVT ElemTy = VecTy.getVectorElementType(); MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor); return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); } MVT HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { MVT ElemTy = VecTy.getVectorElementType(); MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor); return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements()); } SDValue HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, SelectionDAG &DAG) const { if (ty(Vec).getVectorElementType() == ElemTy) return Vec; MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy); return DAG.getBitcast(CastTy, Vec); } SDValue HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, SelectionDAG &DAG) const { return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)), Ops.second, Ops.first); } HexagonTargetLowering::VectorPair HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, SelectionDAG &DAG) const { TypePair Tys = typeSplit(ty(Vec)); if (Vec.getOpcode() == HexagonISD::QCAT) return VectorPair(Vec.getOperand(0), Vec.getOperand(1)); return DAG.SplitVector(Vec, dl, Tys.first, Tys.second); } bool HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { return Subtarget.isHVXVectorType(Ty) && Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); } bool HexagonTargetLowering::isHvxPairTy(MVT Ty) const { return Subtarget.isHVXVectorType(Ty) && Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); } bool HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { return Subtarget.isHVXVectorType(Ty, true) && Ty.getVectorElementType() == MVT::i1; } bool HexagonTargetLowering::allowsHvxMemoryAccess( MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { // Bool vectors are excluded by default, but make it explicit to // emphasize that bool vectors cannot be loaded or stored. // Also, disallow double vector stores (to prevent unnecessary // store widening in DAG combiner). if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength()) return false; if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) return false; if (Fast) *Fast = true; return true; } bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const { if (!Subtarget.isHVXVectorType(VecTy)) return false; // XXX Should this be false? vmemu are a bit slower than vmem. if (Fast) *Fast = true; return true; } SDValue HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, SelectionDAG &DAG) const { if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx); unsigned ElemWidth = ElemTy.getSizeInBits(); if (ElemWidth == 8) return ElemIdx; unsigned L = Log2_32(ElemWidth/8); const SDLoc &dl(ElemIdx); return DAG.getNode(ISD::SHL, dl, MVT::i32, {ElemIdx, DAG.getConstant(L, dl, MVT::i32)}); } SDValue HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, SelectionDAG &DAG) const { unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); if (ElemWidth == 32) return Idx; if (ty(Idx) != MVT::i32) Idx = DAG.getBitcast(MVT::i32, Idx); const SDLoc &dl(Idx); SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32); SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask}); return SubIdx; } SDValue HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, ArrayRef Mask, SelectionDAG &DAG) const { MVT OpTy = ty(Op0); assert(OpTy == ty(Op1)); MVT ElemTy = OpTy.getVectorElementType(); if (ElemTy == MVT::i8) return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask); assert(ElemTy.getSizeInBits() >= 8); MVT ResTy = tyVector(OpTy, MVT::i8); unsigned ElemSize = ElemTy.getSizeInBits() / 8; SmallVector ByteMask; for (int M : Mask) { if (M < 0) { for (unsigned I = 0; I != ElemSize; ++I) ByteMask.push_back(-1); } else { int NewM = M*ElemSize; for (unsigned I = 0; I != ElemSize; ++I) ByteMask.push_back(NewM+I); } } assert(ResTy.getVectorNumElements() == ByteMask.size()); return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG), opCastElem(Op1, MVT::i8, DAG), ByteMask); } SDValue HexagonTargetLowering::buildHvxVectorReg(ArrayRef Values, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const { unsigned VecLen = Values.size(); MachineFunction &MF = DAG.getMachineFunction(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); unsigned HwLen = Subtarget.getVectorLength(); unsigned ElemSize = ElemWidth / 8; assert(ElemSize*VecLen == HwLen); SmallVector Words; if (VecTy.getVectorElementType() != MVT::i32 && !(Subtarget.useHVXFloatingPoint() && VecTy.getVectorElementType() == MVT::f32)) { assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); for (unsigned i = 0; i != VecLen; i += OpsPerWord) { SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); Words.push_back(DAG.getBitcast(MVT::i32, W)); } } else { for (SDValue V : Values) Words.push_back(DAG.getBitcast(MVT::i32, V)); } auto isSplat = [] (ArrayRef Values, SDValue &SplatV) { unsigned NumValues = Values.size(); assert(NumValues > 0); bool IsUndef = true; for (unsigned i = 0; i != NumValues; ++i) { if (Values[i].isUndef()) continue; IsUndef = false; if (!SplatV.getNode()) SplatV = Values[i]; else if (SplatV != Values[i]) return false; } if (IsUndef) SplatV = Values[0]; return true; }; unsigned NumWords = Words.size(); SDValue SplatV; bool IsSplat = isSplat(Words, SplatV); if (IsSplat && isUndef(SplatV)) return DAG.getUNDEF(VecTy); if (IsSplat) { assert(SplatV.getNode()); auto *IdxN = dyn_cast(SplatV.getNode()); if (IdxN && IdxN->isZero()) return getZero(dl, VecTy, DAG); MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV); return DAG.getBitcast(VecTy, S); } // Delay recognizing constant vectors until here, so that we can generate // a vsplat. SmallVector Consts(VecLen); bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); if (AllConst) { ArrayRef Tmp((Constant**)Consts.begin(), (Constant**)Consts.end()); Constant *CV = ConstantVector::get(Tmp); Align Alignment(HwLen); SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG); return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(MF), Alignment); } // A special case is a situation where the vector is built entirely from // elements extracted from another vector. This could be done via a shuffle // more efficiently, but typically, the size of the source vector will not // match the size of the vector being built (which precludes the use of a // shuffle directly). // This only handles a single source vector, and the vector being built // should be of a sub-vector type of the source vector type. auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec, SmallVectorImpl &SrcIdx) { SDValue Vec; for (SDValue V : Values) { if (isUndef(V)) { SrcIdx.push_back(-1); continue; } if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) return false; // All extracts should come from the same vector. SDValue T = V.getOperand(0); if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()) return false; Vec = T; ConstantSDNode *C = dyn_cast(V.getOperand(1)); if (C == nullptr) return false; int I = C->getSExtValue(); assert(I >= 0 && "Negative element index"); SrcIdx.push_back(I); } SrcVec = Vec; return true; }; SmallVector ExtIdx; SDValue ExtVec; if (IsBuildFromExtracts(ExtVec, ExtIdx)) { MVT ExtTy = ty(ExtVec); unsigned ExtLen = ExtTy.getVectorNumElements(); if (ExtLen == VecLen || ExtLen == 2*VecLen) { // Construct a new shuffle mask that will produce a vector with the same // number of elements as the input vector, and such that the vector we // want will be the initial subvector of it. SmallVector Mask; BitVector Used(ExtLen); for (int M : ExtIdx) { Mask.push_back(M); if (M >= 0) Used.set(M); } // Fill the rest of the mask with the unused elements of ExtVec in hopes // that it will result in a permutation of ExtVec's elements. It's still // fine if it doesn't (e.g. if undefs are present, or elements are // repeated), but permutations can always be done efficiently via vdelta // and vrdelta. for (unsigned I = 0; I != ExtLen; ++I) { if (Mask.size() == ExtLen) break; if (!Used.test(I)) Mask.push_back(I); } SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec, DAG.getUNDEF(ExtTy), Mask); if (ExtLen == VecLen) return S; return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, VecTy, S); } } // Find most common element to initialize vector with. This is to avoid // unnecessary vinsert/valign for cases where the same value is present // many times. Creates a histogram of the vector's elements to find the // most common element n. assert(4*Words.size() == Subtarget.getVectorLength()); int VecHist[32]; int n = 0; for (unsigned i = 0; i != NumWords; ++i) { VecHist[i] = 0; if (Words[i].isUndef()) continue; for (unsigned j = i; j != NumWords; ++j) if (Words[i] == Words[j]) VecHist[i]++; if (VecHist[i] > VecHist[n]) n = i; } SDValue HalfV = getZero(dl, VecTy, DAG); if (VecHist[n] > 1) { SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]); HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy, {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)}); } SDValue HalfV0 = HalfV; SDValue HalfV1 = HalfV; // Construct two halves in parallel, then or them together. Rn and Rm count // number of rotations needed before the next element. One last rotation is // performed post-loop to position the last element. int Rn = 0, Rm = 0; SDValue Sn, Sm; SDValue N = HalfV0; SDValue M = HalfV1; for (unsigned i = 0; i != NumWords/2; ++i) { - // Rotate by element count since last insertion. if (Words[i] != Words[n] || VecHist[n] <= 1) { Sn = DAG.getConstant(Rn, dl, MVT::i32); HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {HalfV0, Words[i]}); Rn = 0; } if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { Sm = DAG.getConstant(Rm, dl, MVT::i32); HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {HalfV1, Words[i+NumWords/2]}); Rm = 0; } Rn += 4; Rm += 4; } // Perform last rotation. Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32); Sm = DAG.getConstant(Rm, dl, MVT::i32); HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn}); HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm}); SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0); SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1); SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1}); SDValue OutV = DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV); return OutV; } SDValue HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { MVT PredTy = ty(PredV); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); if (Subtarget.isHVXVectorType(PredTy, true)) { // Move the vector predicate SubV to a vector register, and scale it // down to match the representation (bytes per type element) that VecV // uses. The scaling down will pick every 2nd or 4th (every Scale-th // in general) element and put them at the front of the resulting // vector. This subvector will then be inserted into the Q2V of VecV. // To avoid having an operation that generates an illegal type (short // vector), generate a full size vector. // SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV); SmallVector Mask(HwLen); // Scale = BitBytes(PredV) / Given BitBytes. unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; for (unsigned i = 0; i != HwLen; ++i) { unsigned Num = i % Scale; unsigned Off = i / Scale; Mask[BlockLen*Num + Off] = i; } SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask); if (!ZeroFill) return S; // Fill the bytes beyond BlockLen with 0s. // V6_pred_scalar2 cannot fill the entire predicate, so it only works // when BlockLen < HwLen. assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q); return DAG.getNode(ISD::AND, dl, ByteTy, S, M); } // Make sure that this is a valid scalar predicate. assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); unsigned Bytes = 8 / PredTy.getVectorNumElements(); SmallVector Words[2]; unsigned IdxW = 0; auto Lo32 = [&DAG, &dl] (SDValue P) { return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, P); }; auto Hi32 = [&DAG, &dl] (SDValue P) { return DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, P); }; SDValue W0 = isUndef(PredV) ? DAG.getUNDEF(MVT::i64) : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV); Words[IdxW].push_back(Hi32(W0)); Words[IdxW].push_back(Lo32(W0)); while (Bytes < BitBytes) { IdxW ^= 1; Words[IdxW].clear(); if (Bytes < 4) { for (const SDValue &W : Words[IdxW ^ 1]) { SDValue T = expandPredicate(W, dl, DAG); Words[IdxW].push_back(Hi32(T)); Words[IdxW].push_back(Lo32(T)); } } else { for (const SDValue &W : Words[IdxW ^ 1]) { Words[IdxW].push_back(W); Words[IdxW].push_back(W); } } Bytes *= 2; } assert(Bytes == BitBytes); SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy); SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32); for (const SDValue &W : Words[IdxW]) { Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4); Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W); } return Vec; } SDValue HexagonTargetLowering::buildHvxVectorPred(ArrayRef Values, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const { // Construct a vector V of bytes, such that a comparison V >u 0 would // produce the required vector predicate. unsigned VecLen = Values.size(); unsigned HwLen = Subtarget.getVectorLength(); assert(VecLen <= HwLen || VecLen == 8*HwLen); SmallVector Bytes; bool AllT = true, AllF = true; auto IsTrue = [] (SDValue V) { if (const auto *N = dyn_cast(V.getNode())) return !N->isZero(); return false; }; auto IsFalse = [] (SDValue V) { if (const auto *N = dyn_cast(V.getNode())) return N->isZero(); return false; }; if (VecLen <= HwLen) { // In the hardware, each bit of a vector predicate corresponds to a byte // of a vector register. Calculate how many bytes does a bit of VecTy // correspond to. assert(HwLen % VecLen == 0); unsigned BitBytes = HwLen / VecLen; for (SDValue V : Values) { AllT &= IsTrue(V); AllF &= IsFalse(V); SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) : DAG.getUNDEF(MVT::i8); for (unsigned B = 0; B != BitBytes; ++B) Bytes.push_back(Ext); } } else { // There are as many i1 values, as there are bits in a vector register. // Divide the values into groups of 8 and check that each group consists // of the same value (ignoring undefs). for (unsigned I = 0; I != VecLen; I += 8) { unsigned B = 0; // Find the first non-undef value in this group. for (; B != 8; ++B) { if (!Values[I+B].isUndef()) break; } SDValue F = Values[I+B]; AllT &= IsTrue(F); AllF &= IsFalse(F); SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) : DAG.getUNDEF(MVT::i8); Bytes.push_back(Ext); // Verify that the rest of values in the group are the same as the // first. for (; B != 8; ++B) assert(Values[I+B].isUndef() || Values[I+B] == F); } } if (AllT) return DAG.getNode(HexagonISD::QTRUE, dl, VecTy); if (AllF) return DAG.getNode(HexagonISD::QFALSE, dl, VecTy); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG); return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); } SDValue HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT ElemTy = ty(VecV).getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); (void)ElemWidth; SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {VecV, ByteIdx}); if (ElemTy == MVT::i32) return ExWord; // Have an extracted word, need to extract the smaller element out of it. // 1. Extract the bits of (the original) IdxV that correspond to the index // of the desired element in the 32-bit word. SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); // 2. Extract the element from the word. SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord); return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG); } SDValue HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { // Implement other return types if necessary. assert(ResTy == MVT::i1); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG); SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32); return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG); } SDValue HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { MVT ElemTy = ty(VecV).getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); assert(ElemWidth >= 8 && ElemWidth <= 32); (void)ElemWidth; auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, SDValue ByteIdxV) { MVT VecTy = ty(VecV); unsigned HwLen = Subtarget.getVectorLength(); SDValue MaskV = DAG.getNode(ISD::AND, dl, MVT::i32, {ByteIdxV, DAG.getConstant(-4, dl, MVT::i32)}); SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV}); SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV}); SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32, {DAG.getConstant(HwLen, dl, MVT::i32), MaskV}); SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV}); return TorV; }; SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG); if (ElemTy == MVT::i32) return InsertWord(VecV, ValV, ByteIdx); // If this is not inserting a 32-bit word, convert it into such a thing. // 1. Extract the existing word from the target vector. SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32, {ByteIdx, DAG.getConstant(2, dl, MVT::i32)}); SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx, dl, MVT::i32, DAG); // 2. Treating the extracted word as a 32-bit vector, insert the given // value into it. SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG); MVT SubVecTy = tyVector(ty(Ext), ElemTy); SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext), ValV, SubIdx, dl, ElemTy, DAG); // 3. Insert the 32-bit word back into the original vector. return InsertWord(VecV, Ins, ByteIdx); } SDValue HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); unsigned Scale = HwLen / ty(VecV).getVectorNumElements(); SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32); IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV); ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV); SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG); return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV); } SDValue HexagonTargetLowering::extractHvxSubvectorReg(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); unsigned HwLen = Subtarget.getVectorLength(); unsigned Idx = cast(IdxV.getNode())->getZExtValue(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); // If the source vector is a vector pair, get the single vector containing // the subvector of interest. The subvector will never overlap two single // vectors. if (isHvxPairTy(VecTy)) { unsigned SubIdx; if (Idx * ElemWidth >= 8*HwLen) { SubIdx = Hexagon::vsub_hi; Idx -= VecTy.getVectorNumElements() / 2; } else { SubIdx = Hexagon::vsub_lo; } VecTy = typeSplit(VecTy).first; VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV); if (VecTy == ResTy) return VecV; } // The only meaningful subvectors of a single HVX vector are those that // fit in a scalar register. assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); MVT WordTy = tyVector(VecTy, MVT::i32); SDValue WordVec = DAG.getBitcast(WordTy, VecV); unsigned WordIdx = (Idx*ElemWidth) / 32; SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32); SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG); if (ResTy.getSizeInBits() == 32) return DAG.getBitcast(ResTy, W0); SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32); SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG); SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {W1, W0}); return DAG.getBitcast(ResTy, WW); } SDValue HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); // IdxV is required to be a constant. unsigned Idx = cast(IdxV.getNode())->getZExtValue(); unsigned ResLen = ResTy.getVectorNumElements(); unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); unsigned Offset = Idx * BitBytes; SDValue Undef = DAG.getUNDEF(ByteTy); SmallVector Mask; if (Subtarget.isHVXVectorType(ResTy, true)) { // Converting between two vector predicates. Since the result is shorter // than the source, it will correspond to a vector predicate with the // relevant bits replicated. The replication count is the ratio of the // source and target vector lengths. unsigned Rep = VecTy.getVectorNumElements() / ResLen; assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); for (unsigned i = 0; i != HwLen/Rep; ++i) { for (unsigned j = 0; j != Rep; ++j) Mask.push_back(i + Offset); } SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV); } // Converting between a vector predicate and a scalar predicate. In the // vector predicate, a group of BitBytes bits will correspond to a single // i1 element of the source vector type. Those bits will all have the same // value. The same will be true for ByteVec, where each byte corresponds // to a bit in the vector predicate. // The algorithm is to traverse the ByteVec, going over the i1 values from // the source vector, and generate the corresponding representation in an // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the // elements so that the interesting 8 bytes will be in the low end of the // vector. unsigned Rep = 8 / ResLen; // Make sure the output fill the entire vector register, so repeat the // 8-byte groups as many times as necessary. for (unsigned r = 0; r != HwLen/ResLen; ++r) { // This will generate the indexes of the 8 interesting bytes. for (unsigned i = 0; i != ResLen; ++i) { for (unsigned j = 0; j != Rep; ++j) Mask.push_back(Offset + i*BitBytes); } } SDValue Zero = getZero(dl, MVT::i32, DAG); SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask); // Combine the two low words from ShuffV into a v8i8, and byte-compare // them against 0. SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero}); SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, DAG.getConstant(4, dl, MVT::i32)}); SDValue Vec64 = DAG.getNode(HexagonISD::COMBINE, dl, MVT::v8i8, {W1, W0}); return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy, {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG); } SDValue HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); MVT SubTy = ty(SubV); unsigned HwLen = Subtarget.getVectorLength(); MVT ElemTy = VecTy.getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); bool IsPair = isHvxPairTy(VecTy); MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth); // The two single vectors that VecV consists of, if it's a pair. SDValue V0, V1; SDValue SingleV = VecV; SDValue PickHi; if (IsPair) { V0 = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, VecV); V1 = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, VecV); SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(), dl, MVT::i32); PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT); if (isHvxSingleTy(SubTy)) { if (const auto *CN = dyn_cast(IdxV.getNode())) { unsigned Idx = CN->getZExtValue(); assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV); } // If IdxV is not a constant, generate the two variants: with the // SubV as the high and as the low subregister, and select the right // pair based on the IdxV. SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1}); SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV}); return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); } // The subvector being inserted must be entirely contained in one of // the vectors V0 or V1. Set SingleV to the correct one, and update // IdxV to be the index relative to the beginning of that vector. SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV); IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV); SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0); } // The only meaningful subvectors of a single HVX vector are those that // fit in a scalar register. assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); // Convert IdxV to be index in bytes. auto *IdxN = dyn_cast(IdxV.getNode()); if (!IdxN || !IdxN->isZero()) { IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, DAG.getConstant(ElemWidth/8, dl, MVT::i32)); SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV); } // When inserting a single word, the rotation back to the original position // would be by HwLen-Idx, but if two words are inserted, it will need to be // by (HwLen-4)-Idx. unsigned RolBase = HwLen; if (VecTy.getSizeInBits() == 32) { SDValue V = DAG.getBitcast(MVT::i32, SubV); SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V); } else { SDValue V = DAG.getBitcast(MVT::i64, SubV); SDValue R0 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, V); SDValue R1 = DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, V); SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0); SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, DAG.getConstant(4, dl, MVT::i32)); SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1); RolBase = HwLen-4; } // If the vector wasn't ror'ed, don't ror it back. if (RolBase != 4 || !IdxN || !IdxN->isZero()) { SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(RolBase, dl, MVT::i32), IdxV); SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV); } if (IsPair) { SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1}); SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV}); return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo); } return SingleV; } SDValue HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { MVT VecTy = ty(VecV); MVT SubTy = ty(SubV); assert(Subtarget.isHVXVectorType(VecTy, true)); // VecV is an HVX vector predicate. SubV may be either an HVX vector // predicate as well, or it can be a scalar predicate. unsigned VecLen = VecTy.getVectorNumElements(); unsigned HwLen = Subtarget.getVectorLength(); assert(HwLen % VecLen == 0 && "Unexpected vector type"); unsigned Scale = VecLen / SubTy.getVectorNumElements(); unsigned BitBytes = HwLen / VecLen; unsigned BlockLen = HwLen / Scale; MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV); SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG); SDValue ByteIdx; auto *IdxN = dyn_cast(IdxV.getNode()); if (!IdxN || !IdxN->isZero()) { ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, DAG.getConstant(BitBytes, dl, MVT::i32)); ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx); } // ByteVec is the target vector VecV rotated in such a way that the // subvector should be inserted at index 0. Generate a predicate mask // and use vmux to do the insertion. assert(BlockLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG); ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG); // Rotate ByteVec back, and convert to a vector predicate. if (!IdxN || !IdxN->isZero()) { SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32); SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx); ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi); } return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec); } SDValue HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { // Sign- and any-extending of a vector predicate to a vector register is // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and // a vector of 1s (where the 1s are of type matching the vector type). assert(Subtarget.isHVXVectorType(ResTy)); if (!ZeroExt) return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV); assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, DAG.getConstant(1, dl, MVT::i32)); SDValue False = getZero(dl, ResTy, DAG); return DAG.getSelect(dl, ResTy, VecV, True, False); } SDValue HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] // (i.e. the entire predicate register) to bits [0..HwLen-1] of a // vector register. The remaining bits of the vector register are // unspecified. MachineFunction &MF = DAG.getMachineFunction(); unsigned HwLen = Subtarget.getVectorLength(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); MVT PredTy = ty(VecQ); unsigned PredLen = PredTy.getVectorNumElements(); assert(HwLen % PredLen == 0); MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen); Type *Int8Ty = Type::getInt8Ty(*DAG.getContext()); SmallVector Tmp; // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... // These are bytes with the LSB rotated left with respect to their index. for (unsigned i = 0; i != HwLen/8; ++i) { for (unsigned j = 0; j != 8; ++j) Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j)); } Constant *CV = ConstantVector::get(Tmp); Align Alignment(HwLen); SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG); SDValue Bytes = DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(MF), Alignment); // Select the bytes that correspond to true bits in the vector predicate. SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes), getZero(dl, VecTy, DAG)); // Calculate the OR of all bytes in each group of 8. That will compress // all the individual bits into a single byte. // First, OR groups of 4, via vrmpy with 0x01010101. SDValue All1 = DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32)); SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG); // Then rotate the accumulated vector by 4 bytes, and do the final OR. SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy, {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG); SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot}); // Pick every 8th byte and coalesce them at the beginning of the output. // For symmetry, coalesce every 1+8th byte after that, then every 2+8th // byte and so on. SmallVector Mask; for (unsigned i = 0; i != HwLen; ++i) Mask.push_back((8*i) % HwLen + i/(HwLen/8)); SDValue Collect = DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask); return DAG.getBitcast(ResTy, Collect); } SDValue HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); MVT VecTy = ty(Op); unsigned Size = Op.getNumOperands(); SmallVector Ops; for (unsigned i = 0; i != Size; ++i) Ops.push_back(Op.getOperand(i)); if (VecTy.getVectorElementType() == MVT::i1) return buildHvxVectorPred(Ops, dl, VecTy, DAG); // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is // not a legal type, just bitcast the node to use i16 // types and bitcast the result back to f16 if (VecTy.getVectorElementType() == MVT::f16) { SmallVector NewOps; for (unsigned i = 0; i != Size; i++) NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i])); SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps); return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); } if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { ArrayRef A(Ops); MVT SingleTy = typeSplit(VecTy).first; SDValue V0 = buildHvxVectorReg(A.take_front(Size/2), dl, SingleTy, DAG); SDValue V1 = buildHvxVectorReg(A.drop_front(Size/2), dl, SingleTy, DAG); return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); } return buildHvxVectorReg(Ops, dl, VecTy, DAG); } SDValue HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); MVT VecTy = ty(Op); MVT ArgTy = ty(Op.getOperand(0)); if (ArgTy == MVT::f16) { MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements()); SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0)); SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16); SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32); return DAG.getBitcast(VecTy, Splat); } return SDValue(); } SDValue HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) const { // Vector concatenation of two integer (non-bool) vectors does not need // special lowering. Custom-lower concats of bool vectors and expand // concats of more than 2 vectors. MVT VecTy = ty(Op); const SDLoc &dl(Op); unsigned NumOp = Op.getNumOperands(); if (VecTy.getVectorElementType() != MVT::i1) { if (NumOp == 2) return Op; // Expand the other cases into a build-vector. SmallVector Elems; for (SDValue V : Op.getNode()->ops()) DAG.ExtractVectorElements(V, Elems); // A vector of i16 will be broken up into a build_vector of i16's. // This is a problem, since at the time of operation legalization, // all operations are expected to be type-legalized, and i16 is not // a legal type. If any of the extracted elements is not of a valid // type, sign-extend it to a valid one. for (unsigned i = 0, e = Elems.size(); i != e; ++i) { SDValue V = Elems[i]; MVT Ty = ty(V); if (!isTypeLegal(Ty)) { EVT NTy = getTypeToTransformTo(*DAG.getContext(), Ty); if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { Elems[i] = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy, V.getOperand(0), V.getOperand(1)), DAG.getValueType(Ty)); continue; } // A few less complicated cases. switch (V.getOpcode()) { case ISD::Constant: Elems[i] = DAG.getSExtOrTrunc(V, dl, NTy); break; case ISD::UNDEF: Elems[i] = DAG.getUNDEF(NTy); break; case ISD::TRUNCATE: Elems[i] = V.getOperand(0); break; default: llvm_unreachable("Unexpected vector element"); } } } return DAG.getBuildVector(VecTy, dl, Elems); } assert(VecTy.getVectorElementType() == MVT::i1); unsigned HwLen = Subtarget.getVectorLength(); assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); SDValue Op0 = Op.getOperand(0); // If the operands are HVX types (i.e. not scalar predicates), then // defer the concatenation, and create QCAT instead. if (Subtarget.isHVXVectorType(ty(Op0), true)) { if (NumOp == 2) return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1)); ArrayRef U(Op.getNode()->ops()); SmallVector SV(U.begin(), U.end()); ArrayRef Ops(SV); MVT HalfTy = typeSplit(VecTy).first; SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, Ops.take_front(NumOp/2)); SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy, Ops.take_back(NumOp/2)); return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1); } // Count how many bytes (in a vector register) each bit in VecTy // corresponds to. unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); SmallVector Prefixes; for (SDValue V : Op.getNode()->op_values()) { SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG); Prefixes.push_back(P); } unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements(); MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue S = DAG.getConstant(InpLen*BitBytes, dl, MVT::i32); SDValue Res = getZero(dl, ByteTy, DAG); for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S); Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]); } return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res); } SDValue HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const { // Change the type of the extracted element to i32. SDValue VecV = Op.getOperand(0); MVT ElemTy = ty(VecV).getVectorElementType(); const SDLoc &dl(Op); SDValue IdxV = Op.getOperand(1); if (ElemTy == MVT::i1) return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG); return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG); } SDValue HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); MVT VecTy = ty(Op); SDValue VecV = Op.getOperand(0); SDValue ValV = Op.getOperand(1); SDValue IdxV = Op.getOperand(2); MVT ElemTy = ty(VecV).getVectorElementType(); if (ElemTy == MVT::i1) return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); if (ElemTy == MVT::f16) { SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, tyVector(VecTy, MVT::i16), DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV), DAG.getBitcast(MVT::i16, ValV), IdxV); return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0); } return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); } SDValue HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG) const { SDValue SrcV = Op.getOperand(0); MVT SrcTy = ty(SrcV); MVT DstTy = ty(Op); SDValue IdxV = Op.getOperand(1); unsigned Idx = cast(IdxV.getNode())->getZExtValue(); assert(Idx % DstTy.getVectorNumElements() == 0); (void)Idx; const SDLoc &dl(Op); MVT ElemTy = SrcTy.getVectorElementType(); if (ElemTy == MVT::i1) return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG); return extractHvxSubvectorReg(SrcV, IdxV, dl, DstTy, DAG); } SDValue HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) const { // Idx does not need to be a constant. SDValue VecV = Op.getOperand(0); SDValue ValV = Op.getOperand(1); SDValue IdxV = Op.getOperand(2); const SDLoc &dl(Op); MVT VecTy = ty(VecV); MVT ElemTy = VecTy.getVectorElementType(); if (ElemTy == MVT::i1) return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG); return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG); } SDValue HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { // Lower any-extends of boolean vectors to sign-extends, since they // translate directly to Q2V. Zero-extending could also be done equally // fast, but Q2V is used/recognized in more places. // For all other vectors, use zero-extend. MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); MVT ElemTy = ty(InpV).getVectorElementType(); if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) return LowerHvxSignExt(Op, DAG); return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV); } SDValue HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); MVT ElemTy = ty(InpV).getVectorElementType(); if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG); return Op; } SDValue HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); MVT ElemTy = ty(InpV).getVectorElementType(); if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy)) return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG); return Op; } SDValue HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) const SDLoc &dl(Op); MVT ResTy = ty(Op); SDValue InpV = Op.getOperand(0); assert(ResTy == ty(InpV)); // Calculate the vectors of 1 and bitwidth(x). MVT ElemTy = ty(InpV).getVectorElementType(); unsigned ElemWidth = ElemTy.getSizeInBits(); SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, DAG.getConstant(1, dl, MVT::i32)); SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, DAG.getConstant(ElemWidth, dl, MVT::i32)); SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy, DAG.getConstant(-1, dl, MVT::i32)); // Do not use DAG.getNOT, because that would create BUILD_VECTOR with // a BITCAST. Here we can skip the BITCAST (so we don't have to handle // it separately in custom combine or selection). SDValue A = DAG.getNode(ISD::AND, dl, ResTy, {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}), DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})}); return DAG.getNode(ISD::SUB, dl, ResTy, {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)}); } SDValue HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); assert(ResTy.isVector()); const SDLoc &dl(Op); SmallVector ShuffMask; MVT ElemTy = ResTy.getVectorElementType(); unsigned VecLen = ResTy.getVectorNumElements(); SDValue Vs = Op.getOperand(0); SDValue Vt = Op.getOperand(1); bool IsSigned = Op.getOpcode() == ISD::MULHS; if (ElemTy == MVT::i8 || ElemTy == MVT::i16) { // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, // where Lo = (a0*b0, a2*b2, ...), Hi = (a1*b1, a3*b3, ...). // For i16, use V6_vmpyhv, which behaves in an analogous way to // V6_vmpybv: results Lo and Hi are products of even/odd elements // respectively. MVT ExtTy = typeExtElem(ResTy, 2); unsigned MpyOpc = ElemTy == MVT::i8 ? (IsSigned ? Hexagon::V6_vmpybv : Hexagon::V6_vmpyubv) : (IsSigned ? Hexagon::V6_vmpyhv : Hexagon::V6_vmpyuhv); SDValue M = getInstr(MpyOpc, dl, ExtTy, {Vs, Vt}, DAG); // Discard low halves of the resulting values, collect the high halves. for (unsigned I = 0; I < VecLen; I += 2) { ShuffMask.push_back(I+1); // Pick even element. ShuffMask.push_back(I+VecLen+1); // Pick odd element. } VectorPair P = opSplit(opCastElem(M, ElemTy, DAG), dl, DAG); SDValue BS = getByteShuffle(dl, P.first, P.second, ShuffMask, DAG); return DAG.getBitcast(ResTy, BS); } assert(ElemTy == MVT::i32); SDValue S16 = DAG.getConstant(16, dl, MVT::i32); auto MulHS_V60 = [&](SDValue Vs, SDValue Vt) { // mulhs(Vs,Vt) = // = [(Hi(Vs)*2^16 + Lo(Vs)) *s (Hi(Vt)*2^16 + Lo(Vt))] >> 32 // = [Hi(Vs)*2^16 *s Hi(Vt)*2^16 + Hi(Vs) *su Lo(Vt)*2^16 // + Lo(Vs) *us (Hi(Vt)*2^16 + Lo(Vt))] >> 32 // = [Hi(Vs) *s Hi(Vt)*2^32 + Hi(Vs) *su Lo(Vt)*2^16 // + Lo(Vs) *us Vt] >> 32 // The low half of Lo(Vs)*Lo(Vt) will be discarded (it's not added to // anything, so it cannot produce any carry over to higher bits), // so everything in [] can be shifted by 16 without loss of precision. // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + Lo(Vs)*Vt >> 16] >> 16 // = [Hi(Vs) *s Hi(Vt)*2^16 + Hi(Vs)*su Lo(Vt) + V6_vmpyewuh(Vs,Vt)] >> 16 // Denote Hi(Vs) = Vs': // = [Vs'*s Hi(Vt)*2^16 + Vs' *su Lo(Vt) + V6_vmpyewuh(Vt,Vs)] >> 16 // = Vs'*s Hi(Vt) + (V6_vmpyiewuh(Vs',Vt) + V6_vmpyewuh(Vt,Vs)) >> 16 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, ResTy, {Vt, Vs}, DAG); // Get Vs': SDValue S0 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {Vs, S16}, DAG); SDValue T1 = getInstr(Hexagon::V6_vmpyiewuh_acc, dl, ResTy, {T0, S0, Vt}, DAG); // Shift by 16: SDValue S2 = getInstr(Hexagon::V6_vasrw, dl, ResTy, {T1, S16}, DAG); // Get Vs'*Hi(Vt): SDValue T2 = getInstr(Hexagon::V6_vmpyiowh, dl, ResTy, {S0, Vt}, DAG); // Add: SDValue T3 = DAG.getNode(ISD::ADD, dl, ResTy, {S2, T2}); return T3; }; auto MulHS_V62 = [&](SDValue Vs, SDValue Vt) { MVT PairTy = typeJoin({ResTy, ResTy}); SDValue T0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {Vs, Vt}, DAG); SDValue T1 = getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {T0, Vs, Vt}, DAG); return opSplit(T1, dl, DAG).second; }; if (IsSigned) { if (Subtarget.useHVXV62Ops()) return MulHS_V62(Vs, Vt); return MulHS_V60(Vs, Vt); } // Unsigned mulhw. (Would expansion using signed mulhw be better?) auto LoVec = [&DAG,ResTy,dl] (SDValue Pair) { return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResTy, Pair); }; auto HiVec = [&DAG,ResTy,dl] (SDValue Pair) { return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResTy, Pair); }; MVT PairTy = typeJoin({ResTy, ResTy}); SDValue P = getInstr(Hexagon::V6_lvsplatw, dl, ResTy, {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG); // Multiply-unsigned halfwords: // LoVec = Vs.uh[2i] * Vt.uh[2i], // HiVec = Vs.uh[2i+1] * Vt.uh[2i+1] SDValue T0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, Vt}, DAG); // The low halves in the LoVec of the pair can be discarded. They are // not added to anything (in the full-precision product), so they cannot // produce a carry into the higher bits. SDValue T1 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {LoVec(T0), S16}, DAG); // Swap low and high halves in Vt, and do the halfword multiplication // to get products Vs.uh[2i] * Vt.uh[2i+1] and Vs.uh[2i+1] * Vt.uh[2i]. SDValue D0 = getInstr(Hexagon::V6_vdelta, dl, ResTy, {Vt, P}, DAG); SDValue T2 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {Vs, D0}, DAG); // T2 has mixed products of halfwords: Lo(Vt)*Hi(Vs) and Hi(Vt)*Lo(Vs). // These products are words, but cannot be added directly because the // sums could overflow. Add these products, by halfwords, where each sum // of a pair of halfwords gives a word. SDValue T3 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {LoVec(T2), HiVec(T2)}, DAG); // Add the high halfwords from the products of the low halfwords. SDValue T4 = DAG.getNode(ISD::ADD, dl, ResTy, {T1, LoVec(T3)}); SDValue T5 = getInstr(Hexagon::V6_vlsrw, dl, ResTy, {T4, S16}, DAG); SDValue T6 = DAG.getNode(ISD::ADD, dl, ResTy, {HiVec(T0), HiVec(T3)}); SDValue T7 = DAG.getNode(ISD::ADD, dl, ResTy, {T5, T6}); return T7; } SDValue HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { SDValue Val = Op.getOperand(0); MVT ResTy = ty(Op); MVT ValTy = ty(Val); const SDLoc &dl(Op); if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) { unsigned HwLen = Subtarget.getVectorLength(); MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4); SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG); unsigned BitWidth = ResTy.getSizeInBits(); if (BitWidth < 64) { SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32), dl, MVT::i32, DAG); if (BitWidth == 32) return W0; assert(BitWidth < 32u); return DAG.getZExtOrTrunc(W0, dl, ResTy); } // The result is >= 64 bits. The only options are 64 or 128. assert(BitWidth == 64 || BitWidth == 128); SmallVector Words; for (unsigned i = 0; i != BitWidth/32; ++i) { SDValue W = extractHvxElementReg( VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG); Words.push_back(W); } SmallVector Combines; assert(Words.size() % 2 == 0); for (unsigned i = 0, e = Words.size(); i < e; i += 2) { SDValue C = DAG.getNode( HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]}); Combines.push_back(C); } if (BitWidth == 64) return Combines[0]; return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines); } if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) { // Handle bitcast from i128 -> v128i1 and i64 -> v64i1. unsigned BitWidth = ValTy.getSizeInBits(); unsigned HwLen = Subtarget.getVectorLength(); assert(BitWidth == HwLen); MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8); SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val); // Splat each byte of Val 8 times. // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8] // where b0, b1,..., b15 are least to most significant bytes of I. SmallVector Bytes; // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,... // These are bytes with the LSB rotated left with respect to their index. SmallVector Tmp; for (unsigned I = 0; I != HwLen / 8; ++I) { SDValue Idx = DAG.getConstant(I, dl, MVT::i32); SDValue Byte = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx); for (unsigned J = 0; J != 8; ++J) { Bytes.push_back(Byte); Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8)); } } MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen); SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp); SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG); // Each Byte in the I2V will be set iff corresponding bit is set in Val. I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec}); return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V); } return Op; } SDValue HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { // Sign- and zero-extends are legal. assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op), Op.getOperand(0)); } SDValue HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const { MVT ResTy = ty(Op); if (ResTy.getVectorElementType() != MVT::i1) return Op; const SDLoc &dl(Op); unsigned HwLen = Subtarget.getVectorLength(); unsigned VecLen = ResTy.getVectorNumElements(); assert(HwLen % VecLen == 0); unsigned ElemSize = HwLen / VecLen; MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen); SDValue S = DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0), DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)), DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2))); return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S); } SDValue HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { if (SDValue S = getVectorShiftByInt(Op, DAG)) return S; return Op; } SDValue HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); MVT ResTy = ty(Op); unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); bool Use64b = Subtarget.useHVX64BOps(); unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast : Intrinsic::hexagon_V6_pred_typecast_128B; if (IntNo == IntPredCast) { SDValue Vs = Op.getOperand(1); MVT OpTy = ty(Vs); if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) { if (ResTy == OpTy) return Vs; return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs); } } return Op; } SDValue HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); unsigned HwLen = Subtarget.getVectorLength(); MachineFunction &MF = DAG.getMachineFunction(); auto *MaskN = cast(Op.getNode()); SDValue Mask = MaskN->getMask(); SDValue Chain = MaskN->getChain(); SDValue Base = MaskN->getBasePtr(); auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen); unsigned Opc = Op->getOpcode(); assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); if (Opc == ISD::MLOAD) { MVT ValTy = ty(Op); SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp); SDValue Thru = cast(MaskN)->getPassThru(); if (isUndef(Thru)) return Load; SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru); return DAG.getMergeValues({VSel, Load.getValue(1)}, dl); } // MSTORE // HVX only has aligned masked stores. // TODO: Fold negations of the mask into the store. unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai; SDValue Value = cast(MaskN)->getValue(); SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base)); if (MaskN->getAlign().value() % HwLen == 0) { SDValue Store = getInstr(StoreOpc, dl, MVT::Other, {Mask, Base, Offset0, Value, Chain}, DAG); DAG.setNodeMemRefs(cast(Store.getNode()), {MemOp}); return Store; } // Unaligned case. auto StoreAlign = [&](SDValue V, SDValue A) { SDValue Z = getZero(dl, ty(V), DAG); // TODO: use funnel shifts? // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the // upper half. SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG); SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG); return std::make_pair(LoV, HiV); }; MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask); VectorPair Tmp = StoreAlign(MaskV, Base); VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first), DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)}; VectorPair ValueU = StoreAlign(Value, Base); SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32); SDValue StoreLo = getInstr(StoreOpc, dl, MVT::Other, {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG); SDValue StoreHi = getInstr(StoreOpc, dl, MVT::Other, {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG); DAG.setNodeMemRefs(cast(StoreLo.getNode()), {MemOp}); DAG.setNodeMemRefs(cast(StoreHi.getNode()), {MemOp}); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi}); } SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op, SelectionDAG &DAG) const { // This conversion only applies to QFloat. assert(Subtarget.useHVXQFloatOps()); assert(Op->getOpcode() == ISD::FP_EXTEND); MVT VecTy = ty(Op); MVT ArgTy = ty(Op.getOperand(0)); const SDLoc &dl(Op); assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16); SDValue F16Vec = Op.getOperand(0); APFloat FloatVal = APFloat(1.0f); bool Ignored; FloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy); SDValue VmpyVec = getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG); MVT HalfTy = typeSplit(VecTy).first; VectorPair Pair = opSplit(VmpyVec, dl, DAG); SDValue LoVec = getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG); SDValue HiVec = getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG); SDValue ShuffVec = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy, {HiVec, LoVec, DAG.getConstant(-4, dl, MVT::i32)}, DAG); return ShuffVec; } SDValue HexagonTargetLowering::LowerHvxConvertFpInt(SDValue Op, SelectionDAG &DAG) const { // This conversion only applies to IEEE. assert(Subtarget.useHVXIEEEFPOps()); unsigned Opc = Op.getOpcode(); // Catch invalid conversion ops (just in case). assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT || Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); MVT ResTy = ty(Op); if (Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT) { MVT FpTy = ty(Op.getOperand(0)).getVectorElementType(); // There are only conversions of f16. if (FpTy != MVT::f16) return SDValue(); MVT IntTy = ResTy.getVectorElementType(); // Other int types aren't legal in HVX, so we shouldn't see them here. assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); // Conversions to i8 and i16 are legal. if (IntTy == MVT::i8 || IntTy == MVT::i16) return Op; } else { // Converting int -> fp. if (ResTy.getVectorElementType() != MVT::f16) return SDValue(); MVT IntTy = ty(Op.getOperand(0)).getVectorElementType(); // Other int types aren't legal in HVX, so we shouldn't see them here. assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); // i8, i16 -> f16 is legal. if (IntTy == MVT::i8 || IntTy == MVT::i16) return Op; } return SDValue(); } SDValue HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { assert(!Op.isMachineOpcode()); SmallVector OpsL, OpsH; const SDLoc &dl(Op); auto SplitVTNode = [&DAG,this] (const VTSDNode *N) { MVT Ty = typeSplit(N->getVT().getSimpleVT()).first; SDValue TV = DAG.getValueType(Ty); return std::make_pair(TV, TV); }; for (SDValue A : Op.getNode()->ops()) { VectorPair P = Subtarget.isHVXVectorType(ty(A), true) ? opSplit(A, dl, DAG) : std::make_pair(A, A); // Special case for type operand. if (Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { if (const auto *N = dyn_cast(A.getNode())) P = SplitVTNode(N); } OpsL.push_back(P.first); OpsH.push_back(P.second); } MVT ResTy = ty(Op); MVT HalfTy = typeSplit(ResTy).first; SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL); SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH); SDValue S = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, L, H); return S; } SDValue HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { auto *MemN = cast(Op.getNode()); MVT MemTy = MemN->getMemoryVT().getSimpleVT(); if (!isHvxPairTy(MemTy)) return Op; const SDLoc &dl(Op); unsigned HwLen = Subtarget.getVectorLength(); MVT SingleTy = typeSplit(MemTy).first; SDValue Chain = MemN->getChain(); SDValue Base0 = MemN->getBasePtr(); SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl); unsigned MemOpc = MemN->getOpcode(); MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; if (MachineMemOperand *MMO = MemN->getMemOperand()) { MachineFunction &MF = DAG.getMachineFunction(); uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) ? (uint64_t)MemoryLocation::UnknownSize : HwLen; MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize); MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize); } if (MemOpc == ISD::LOAD) { assert(cast(Op)->isUnindexed()); SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0); SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1); return DAG.getMergeValues( { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1), DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load0.getValue(1), Load1.getValue(1)) }, dl); } if (MemOpc == ISD::STORE) { assert(cast(Op)->isUnindexed()); VectorPair Vals = opSplit(cast(Op)->getValue(), dl, DAG); SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0); SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1); } assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE); auto MaskN = cast(Op); assert(MaskN->isUnindexed()); VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG); SDValue Offset = DAG.getUNDEF(MVT::i32); if (MemOpc == ISD::MLOAD) { VectorPair Thru = opSplit(cast(Op)->getPassThru(), dl, DAG); SDValue MLoad0 = DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first, Thru.first, SingleTy, MOp0, ISD::UNINDEXED, ISD::NON_EXTLOAD, false); SDValue MLoad1 = DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second, Thru.second, SingleTy, MOp1, ISD::UNINDEXED, ISD::NON_EXTLOAD, false); return DAG.getMergeValues( { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1), DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MLoad0.getValue(1), MLoad1.getValue(1)) }, dl); } if (MemOpc == ISD::MSTORE) { VectorPair Vals = opSplit(cast(Op)->getValue(), dl, DAG); SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset, Masks.first, SingleTy, MOp0, ISD::UNINDEXED, false, false); SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset, Masks.second, SingleTy, MOp1, ISD::UNINDEXED, false, false); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1); } std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG); llvm_unreachable(Name.c_str()); } SDValue HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); auto *LoadN = cast(Op.getNode()); assert(LoadN->isUnindexed() && "Not widening indexed loads yet"); assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 && "Not widening loads of i1 yet"); SDValue Chain = LoadN->getChain(); SDValue Base = LoadN->getBasePtr(); SDValue Offset = DAG.getUNDEF(MVT::i32); MVT ResTy = ty(Op); unsigned HwLen = Subtarget.getVectorLength(); unsigned ResLen = ResTy.getStoreSize(); assert(ResLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG); MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen); MachineFunction &MF = DAG.getMachineFunction(); auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen); SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask, DAG.getUNDEF(LoadTy), LoadTy, MemOp, ISD::UNINDEXED, ISD::NON_EXTLOAD, false); SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG); return DAG.getMergeValues({Value, Chain}, dl); } SDValue HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); auto *StoreN = cast(Op.getNode()); assert(StoreN->isUnindexed() && "Not widening indexed stores yet"); assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 && "Not widening stores of i1 yet"); SDValue Chain = StoreN->getChain(); SDValue Base = StoreN->getBasePtr(); SDValue Offset = DAG.getUNDEF(MVT::i32); SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG); MVT ValueTy = ty(Value); unsigned ValueLen = ValueTy.getVectorNumElements(); unsigned HwLen = Subtarget.getVectorLength(); assert(isPowerOf2_32(ValueLen)); for (unsigned Len = ValueLen; Len < HwLen; ) { Value = opJoin({DAG.getUNDEF(ty(Value)), Value}, dl, DAG); Len = ty(Value).getVectorNumElements(); // This is Len *= 2 } assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia assert(ValueLen < HwLen && "vsetq(v1) prerequisite"); MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen); SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy, {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG); MachineFunction &MF = DAG.getMachineFunction(); auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen); return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value), MemOp, ISD::UNINDEXED, false, false); } SDValue HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); MVT ElemTy = ty(Op0).getVectorElementType(); unsigned HwLen = Subtarget.getVectorLength(); unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits(); assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen); MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen); if (!Subtarget.isHVXVectorType(WideOpTy, true)) return SDValue(); SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG); SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG); EVT ResTy = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy); SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy, {WideOp0, WideOp1, Op.getOperand(2)}); EVT RetTy = getTypeToTransformTo(*DAG.getContext(), ty(Op)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy, {SetCC, getZero(dl, MVT::i32, DAG)}); } SDValue HexagonTargetLowering::WidenHvxExtend(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); unsigned HwWidth = 8*Subtarget.getVectorLength(); SDValue Op0 = Op.getOperand(0); MVT ResTy = ty(Op); MVT OpTy = ty(Op0); if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) return SDValue(); // .-res, op-> ScalarVec Illegal HVX // Scalar ok - - // Illegal widen(insert) widen - // HVX - widen ok auto getFactor = [HwWidth](MVT Ty) { unsigned Width = Ty.getSizeInBits(); return HwWidth > Width ? HwWidth / Width : 1; }; auto getWideTy = [getFactor](MVT Ty) { unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); }; unsigned Opcode = Op.getOpcode() == ISD::SIGN_EXTEND ? HexagonISD::VUNPACK : HexagonISD::VUNPACKU; SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); SDValue WideRes = DAG.getNode(Opcode, dl, getWideTy(ResTy), WideOp); return WideRes; } SDValue HexagonTargetLowering::WidenHvxTruncate(SDValue Op, SelectionDAG &DAG) const { const SDLoc &dl(Op); unsigned HwWidth = 8*Subtarget.getVectorLength(); SDValue Op0 = Op.getOperand(0); MVT ResTy = ty(Op); MVT OpTy = ty(Op0); if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy)) return SDValue(); // .-res, op-> ScalarVec Illegal HVX // Scalar ok extract(widen) - // Illegal - widen widen // HVX - - ok auto getFactor = [HwWidth](MVT Ty) { unsigned Width = Ty.getSizeInBits(); assert(HwWidth % Width == 0); return HwWidth / Width; }; auto getWideTy = [getFactor](MVT Ty) { unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty); return MVT::getVectorVT(Ty.getVectorElementType(), WideLen); }; if (Subtarget.isHVXVectorType(OpTy)) return DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), Op0); assert(!isTypeLegal(OpTy) && "HVX-widening a truncate of scalar?"); SDValue WideOp = appendUndef(Op0, getWideTy(OpTy), DAG); SDValue WideRes = DAG.getNode(HexagonISD::VPACKL, dl, getWideTy(ResTy), WideOp); // If the original result wasn't legal and was supposed to be widened, // we're done. if (shouldWidenToHvx(ResTy, DAG)) return WideRes; // The original result type wasn't meant to be widened to HVX, so // leave it as it is. Standard legalization should be able to deal // with it (since now it's a result of a target-idendependent ISD // node). assert(ResTy.isVector()); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy, {WideRes, getZero(dl, MVT::i32, DAG)}); } SDValue HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = Op.getOpcode(); bool IsPairOp = isHvxPairTy(ty(Op)) || llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) { return isHvxPairTy(ty(V)); }); if (IsPairOp) { switch (Opc) { default: break; case ISD::LOAD: case ISD::STORE: case ISD::MLOAD: case ISD::MSTORE: return SplitHvxMemOp(Op, DAG); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits()) return SplitHvxPairOp(Op, DAG); break; case ISD::CTPOP: case ISD::CTLZ: case ISD::CTTZ: case ISD::MUL: case ISD::FADD: case ISD::FSUB: case ISD::FMUL: case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::MULHS: case ISD::MULHU: case ISD::AND: case ISD::OR: case ISD::XOR: case ISD::SRA: case ISD::SHL: case ISD::SRL: case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: case ISD::SETCC: case ISD::VSELECT: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND_INREG: case ISD::SPLAT_VECTOR: return SplitHvxPairOp(Op, DAG); } } switch (Opc) { default: break; case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG); case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); case ISD::CTTZ: return LowerHvxCttz(Op, DAG); case ISD::SELECT: return LowerHvxSelect(Op, DAG); case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerHvxShift(Op, DAG); case ISD::MULHS: case ISD::MULHU: return LowerHvxMulh(Op, DAG); case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); case ISD::SETCC: case ISD::INTRINSIC_VOID: return Op; case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); case ISD::MLOAD: case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG); // Unaligned loads will be handled by the default lowering. case ISD::LOAD: return SDValue(); case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerHvxConvertFpInt(Op, DAG); } #ifndef NDEBUG Op.dumpr(&DAG); #endif llvm_unreachable("Unhandled HVX operation"); } void HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { unsigned Opc = N->getOpcode(); SDValue Op(N, 0); switch (Opc) { case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { if (SDValue T = WidenHvxExtend(Op, DAG)) Results.push_back(T); } break; case ISD::SETCC: if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { if (SDValue T = WidenHvxSetCC(Op, DAG)) Results.push_back(T); } break; case ISD::TRUNCATE: if (shouldWidenToHvx(ty(Op.getOperand(0)), DAG)) { if (SDValue T = WidenHvxTruncate(Op, DAG)) Results.push_back(T); } break; case ISD::STORE: { if (shouldWidenToHvx(ty(cast(N)->getValue()), DAG)) { SDValue Store = WidenHvxStore(Op, DAG); Results.push_back(Store); } break; } case ISD::MLOAD: if (isHvxPairTy(ty(Op))) { SDValue S = SplitHvxMemOp(Op, DAG); assert(S->getOpcode() == ISD::MERGE_VALUES); Results.push_back(S.getOperand(0)); Results.push_back(S.getOperand(1)); } break; case ISD::MSTORE: if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value SDValue S = SplitHvxMemOp(Op, DAG); Results.push_back(S); } break; default: break; } } void HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { unsigned Opc = N->getOpcode(); SDValue Op(N, 0); switch (Opc) { case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: if (shouldWidenToHvx(ty(Op), DAG)) { if (SDValue T = WidenHvxExtend(Op, DAG)) Results.push_back(T); } break; case ISD::SETCC: if (shouldWidenToHvx(ty(Op), DAG)) { if (SDValue T = WidenHvxSetCC(Op, DAG)) Results.push_back(T); } break; case ISD::TRUNCATE: if (shouldWidenToHvx(ty(Op), DAG)) { if (SDValue T = WidenHvxTruncate(Op, DAG)) Results.push_back(T); } break; case ISD::LOAD: { if (shouldWidenToHvx(ty(Op), DAG)) { SDValue Load = WidenHvxLoad(Op, DAG); assert(Load->getOpcode() == ISD::MERGE_VALUES); Results.push_back(Load.getOperand(0)); Results.push_back(Load.getOperand(1)); } break; } case ISD::BITCAST: if (isHvxBoolTy(ty(N->getOperand(0)))) { SDValue Op(N, 0); SDValue C = LowerHvxBitcast(Op, DAG); Results.push_back(C); } break; default: break; } } SDValue HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { const SDLoc &dl(N); SelectionDAG &DAG = DCI.DAG; SDValue Op(N, 0); unsigned Opc = Op.getOpcode(); if (DCI.isBeforeLegalizeOps()) return SDValue(); SmallVector Ops(N->ops().begin(), N->ops().end()); switch (Opc) { case ISD::VSELECT: { // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) SDValue Cond = Ops[0]; if (Cond->getOpcode() == ISD::XOR) { SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1); if (C1->getOpcode() == HexagonISD::QTRUE) return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]); } break; } case HexagonISD::V2Q: if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) { if (const auto *C = dyn_cast(Ops[0].getOperand(0))) return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op)) : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op)); } break; case HexagonISD::Q2V: if (Ops[0].getOpcode() == HexagonISD::QTRUE) return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op), DAG.getConstant(-1, dl, MVT::i32)); if (Ops[0].getOpcode() == HexagonISD::QFALSE) return getZero(dl, ty(Op), DAG); break; case HexagonISD::VINSERTW0: if (isUndef(Ops[1])) return Ops[0];; break; case HexagonISD::VROR: { if (Ops[0].getOpcode() == HexagonISD::VROR) { SDValue Vec = Ops[0].getOperand(0); SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1); SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1}); return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot}); } break; } } return SDValue(); } bool HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { auto Action = getPreferredHvxVectorAction(Ty); if (Action == TargetLoweringBase::TypeWidenVector) { EVT WideTy = getTypeToTransformTo(*DAG.getContext(), Ty); assert(WideTy.isSimple()); return Subtarget.isHVXVectorType(WideTy.getSimpleVT(), true); } return false; } bool HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { if (!Subtarget.useHVXOps()) return false; // If the type of any result, or any operand type are HVX vector types, // this is an HVX operation. auto IsHvxTy = [this](EVT Ty) { return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true); }; auto IsHvxOp = [this](SDValue Op) { return Op.getValueType().isSimple() && Subtarget.isHVXVectorType(ty(Op), true); }; if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp)) return true; // Check if this could be an HVX operation after type widening. auto IsWidenedToHvx = [this, &DAG](SDValue Op) { if (!Op.getValueType().isSimple()) return false; MVT ValTy = ty(Op); return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG); }; for (int i = 0, e = N->getNumValues(); i != e; ++i) { if (IsWidenedToHvx(SDValue(N, i))) return true; } return llvm::any_of(N->ops(), IsWidenedToHvx); } diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index c6703bb8a62a..08acf81961a3 100644 --- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -1,460 +1,461 @@ //===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Implements the info about Hexagon target spec. // //===----------------------------------------------------------------------===// #include "HexagonTargetMachine.h" #include "Hexagon.h" #include "HexagonISelLowering.h" #include "HexagonLoopIdiomRecognition.h" #include "HexagonMachineScheduler.h" #include "HexagonTargetObjectFile.h" #include "HexagonTargetTransformInfo.h" #include "HexagonVectorLoopCarriedReuse.h" #include "TargetInfo/HexagonTargetInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/VLIWMachineScheduler.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Module.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" using namespace llvm; static cl::opt EnableCExtOpt("hexagon-cext", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable Hexagon constant-extender optimization")); static cl::opt EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable RDF-based optimizations")); static cl::opt DisableHardwareLoops("disable-hexagon-hwloops", cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target")); static cl::opt DisableAModeOpt("disable-hexagon-amodeopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon Addressing Mode Optimization")); static cl::opt DisableHexagonCFGOpt("disable-hexagon-cfgopt", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Hexagon CFG Optimization")); static cl::opt DisableHCP("disable-hcp", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Disable Hexagon constant propagation")); static cl::opt DisableStoreWidening("disable-store-widen", cl::Hidden, cl::init(false), cl::desc("Disable store widening")); static cl::opt EnableExpandCondsets("hexagon-expand-condsets", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Early expansion of MUX")); static cl::opt EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable early if-conversion")); static cl::opt EnableGenInsert("hexagon-insert", cl::init(true), cl::Hidden, cl::desc("Generate \"insert\" instructions")); static cl::opt EnableCommGEP("hexagon-commgep", cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable commoning of GEP instructions")); static cl::opt EnableGenExtract("hexagon-extract", cl::init(true), cl::Hidden, cl::desc("Generate \"extract\" instructions")); static cl::opt EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden, cl::desc("Enable converting conditional transfers into MUX instructions")); static cl::opt EnableGenPred("hexagon-gen-pred", cl::init(true), cl::Hidden, cl::desc("Enable conversion of arithmetic operations to " "predicate instructions")); static cl::opt EnableLoopPrefetch("hexagon-loop-prefetch", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable loop data prefetch on Hexagon")); static cl::opt DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden, cl::desc("Disable splitting double registers")); static cl::opt EnableBitSimplify("hexagon-bit", cl::init(true), cl::Hidden, cl::desc("Bit simplification")); static cl::opt EnableLoopResched("hexagon-loop-resched", cl::init(true), cl::Hidden, cl::desc("Loop rescheduling")); static cl::opt HexagonNoOpt("hexagon-noopt", cl::init(false), cl::Hidden, cl::desc("Disable backend optimizations")); static cl::opt EnableVectorPrint("enable-hexagon-vector-print", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable Hexagon Vector print instr pass")); static cl::opt EnableVExtractOpt("hexagon-opt-vextract", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable vextract optimization")); static cl::opt EnableVectorCombine("hexagon-vector-combine", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable HVX vector combining")); static cl::opt EnableInitialCFGCleanup("hexagon-initial-cfg-cleanup", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Simplify the CFG after atomic expansion pass")); static cl::opt EnableInstSimplify("hexagon-instsimplify", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable instsimplify")); /// HexagonTargetMachineModule - Note that this is used on hosts that /// cannot link in a library unless there are references into the /// library. In particular, it seems that it is not possible to get /// things to work on Win32 without this. Though it is unused, do not /// remove it. extern "C" int HexagonTargetMachineModule; int HexagonTargetMachineModule = 0; static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { ScheduleDAGMILive *DAG = new VLIWMachineScheduler( C, std::make_unique()); DAG->addMutation(std::make_unique()); DAG->addMutation(std::make_unique()); DAG->addMutation(std::make_unique()); DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); return DAG; } static MachineSchedRegistry SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", createVLIWMachineSched); namespace llvm { extern char &HexagonExpandCondsetsID; void initializeHexagonBitSimplifyPass(PassRegistry&); void initializeHexagonConstExtendersPass(PassRegistry&); void initializeHexagonConstPropagationPass(PassRegistry&); void initializeHexagonCopyToCombinePass(PassRegistry&); void initializeHexagonEarlyIfConversionPass(PassRegistry&); void initializeHexagonExpandCondsetsPass(PassRegistry&); void initializeHexagonGenMuxPass(PassRegistry&); void initializeHexagonHardwareLoopsPass(PassRegistry&); void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &); void initializeHexagonNewValueJumpPass(PassRegistry&); void initializeHexagonOptAddrModePass(PassRegistry&); void initializeHexagonPacketizerPass(PassRegistry&); void initializeHexagonRDFOptPass(PassRegistry&); void initializeHexagonSplitDoubleRegsPass(PassRegistry&); void initializeHexagonVectorCombineLegacyPass(PassRegistry&); void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &); void initializeHexagonVExtractPass(PassRegistry&); Pass *createHexagonLoopIdiomPass(); Pass *createHexagonVectorLoopCarriedReuseLegacyPass(); FunctionPass *createHexagonBitSimplify(); FunctionPass *createHexagonBranchRelaxation(); FunctionPass *createHexagonCallFrameInformation(); FunctionPass *createHexagonCFGOptimizer(); FunctionPass *createHexagonCommonGEP(); FunctionPass *createHexagonConstExtenders(); FunctionPass *createHexagonConstPropagationPass(); FunctionPass *createHexagonCopyToCombine(); FunctionPass *createHexagonEarlyIfConversion(); FunctionPass *createHexagonFixupHwLoops(); FunctionPass *createHexagonGenExtract(); FunctionPass *createHexagonGenInsert(); FunctionPass *createHexagonGenMux(); FunctionPass *createHexagonGenPredicate(); FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createHexagonLoopRescheduling(); FunctionPass *createHexagonNewValueJump(); FunctionPass *createHexagonOptAddrMode(); FunctionPass *createHexagonOptimizeSZextends(); FunctionPass *createHexagonPacketizer(bool Minimal); FunctionPass *createHexagonPeephole(); FunctionPass *createHexagonRDFOpt(); FunctionPass *createHexagonSplitConst32AndConst64(); FunctionPass *createHexagonSplitDoubleRegs(); FunctionPass *createHexagonStoreWidening(); FunctionPass *createHexagonVectorCombineLegacyPass(); FunctionPass *createHexagonVectorPrint(); FunctionPass *createHexagonVExtract(); } // end namespace llvm; static Reloc::Model getEffectiveRelocModel(Optional RM) { return RM.getValueOr(Reloc::Static); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() { // Register the target. RegisterTargetMachine X(getTheHexagonTarget()); PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeHexagonBitSimplifyPass(PR); initializeHexagonConstExtendersPass(PR); initializeHexagonConstPropagationPass(PR); initializeHexagonCopyToCombinePass(PR); initializeHexagonEarlyIfConversionPass(PR); initializeHexagonGenMuxPass(PR); initializeHexagonHardwareLoopsPass(PR); initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR); initializeHexagonNewValueJumpPass(PR); initializeHexagonOptAddrModePass(PR); initializeHexagonPacketizerPass(PR); initializeHexagonRDFOptPass(PR); initializeHexagonSplitDoubleRegsPass(PR); initializeHexagonVectorCombineLegacyPass(PR); initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PR); initializeHexagonVExtractPass(PR); } HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) // Specify the vector alignment explicitly. For v512x1, the calculated // alignment would be 512*alignment(i1), which is 512 bytes, instead of // the required minimum of 64 bytes. : LLVMTargetMachine( T, "e-m:e-p:32:32:32-a:0-n16:32-" "i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-" "v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048", TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), (HexagonNoOpt ? CodeGenOpt::None : OL)), TLOF(std::make_unique()) { initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); initAsmInfo(); } const HexagonSubtarget * HexagonTargetMachine::getSubtargetImpl(const Function &F) const { AttributeList FnAttrs = F.getAttributes(); Attribute CPUAttr = FnAttrs.getFnAttr("target-cpu"); Attribute FSAttr = FnAttrs.getFnAttr("target-features"); std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; // Append the preexisting target features last, so that +mattr overrides // the "unsafe-fp-math" function attribute. // Creating a separate target feature is not strictly necessary, it only // exists to make "unsafe-fp-math" force creating a new subtarget. if (F.getFnAttribute("unsafe-fp-math").getValueAsBool()) FS = FS.empty() ? "+unsafe-fp" : "+unsafe-fp," + FS; auto &I = SubtargetMap[CPU + FS]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = std::make_unique(TargetTriple, CPU, FS, *this); } return I.get(); } void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) { PMB.addExtension( PassManagerBuilder::EP_LateLoopOptimizations, [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(createHexagonLoopIdiomPass()); }); PMB.addExtension( PassManagerBuilder::EP_LoopOptimizerEnd, [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { PM.add(createHexagonVectorLoopCarriedReuseLegacyPass()); }); } void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerLateLoopOptimizationsEPCallback( [=](LoopPassManager &LPM, OptimizationLevel Level) { LPM.addPass(HexagonLoopIdiomRecognitionPass()); }); PB.registerLoopOptimizerEndEPCallback( [=](LoopPassManager &LPM, OptimizationLevel Level) { LPM.addPass(HexagonVectorLoopCarriedReusePass()); }); } TargetTransformInfo HexagonTargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(HexagonTTIImpl(this, F)); } HexagonTargetMachine::~HexagonTargetMachine() {} namespace { /// Hexagon Code Generator Pass Configuration Options. class HexagonPassConfig : public TargetPassConfig { public: HexagonPassConfig(HexagonTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} HexagonTargetMachine &getHexagonTargetMachine() const { return getTM(); } ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { return createVLIWMachineSched(C); } void addIRPasses() override; bool addInstSelector() override; void addPreRegAlloc() override; void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; }; } // namespace TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) { return new HexagonPassConfig(*this, PM); } void HexagonPassConfig::addIRPasses() { TargetPassConfig::addIRPasses(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) { if (EnableInstSimplify) addPass(createInstSimplifyLegacyPass()); addPass(createDeadCodeEliminationPass()); } addPass(createAtomicExpandPass()); if (!NoOpt) { if (EnableInitialCFGCleanup) addPass(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) .sinkCommonInsts(true))); if (EnableLoopPrefetch) addPass(createLoopDataPrefetchPass()); if (EnableVectorCombine) addPass(createHexagonVectorCombineLegacyPass()); if (EnableCommGEP) addPass(createHexagonCommonGEP()); // Replace certain combinations of shifts and ands with extracts. if (EnableGenExtract) addPass(createHexagonGenExtract()); } } bool HexagonPassConfig::addInstSelector() { HexagonTargetMachine &TM = getHexagonTargetMachine(); bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) addPass(createHexagonOptimizeSZextends()); addPass(createHexagonISelDag(TM, getOptLevel())); if (!NoOpt) { if (EnableVExtractOpt) addPass(createHexagonVExtract()); // Create logical operations on predicate registers. if (EnableGenPred) addPass(createHexagonGenPredicate()); // Rotate loops to expose bit-simplification opportunities. if (EnableLoopResched) addPass(createHexagonLoopRescheduling()); // Split double registers. if (!DisableHSDR) addPass(createHexagonSplitDoubleRegs()); // Bit simplification. if (EnableBitSimplify) addPass(createHexagonBitSimplify()); addPass(createHexagonPeephole()); // Constant propagation. if (!DisableHCP) { addPass(createHexagonConstPropagationPass()); addPass(&UnreachableMachineBlockElimID); } if (EnableGenInsert) addPass(createHexagonGenInsert()); if (EnableEarlyIf) addPass(createHexagonEarlyIfConversion()); } return false; } void HexagonPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { if (EnableCExtOpt) addPass(createHexagonConstExtenders()); if (EnableExpandCondsets) insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID); if (!DisableStoreWidening) addPass(createHexagonStoreWidening()); if (!DisableHardwareLoops) addPass(createHexagonHardwareLoops()); } if (TM->getOptLevel() >= CodeGenOpt::Default) addPass(&MachinePipelinerID); } void HexagonPassConfig::addPostRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { if (EnableRDFOpt) addPass(createHexagonRDFOpt()); if (!DisableHexagonCFGOpt) addPass(createHexagonCFGOptimizer()); if (!DisableAModeOpt) addPass(createHexagonOptAddrMode()); } } void HexagonPassConfig::addPreSched2() { addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); addPass(createHexagonSplitConst32AndConst64()); } void HexagonPassConfig::addPreEmitPass() { bool NoOpt = (getOptLevel() == CodeGenOpt::None); if (!NoOpt) addPass(createHexagonNewValueJump()); addPass(createHexagonBranchRelaxation()); if (!NoOpt) { if (!DisableHardwareLoops) addPass(createHexagonFixupHwLoops()); // Generate MUX from pairs of conditional transfers. if (EnableGenMux) addPass(createHexagonGenMux()); } // Packetization is mandatory: it handles gather/scatter at all opt levels. addPass(createHexagonPacketizer(NoOpt)); if (EnableVectorPrint) addPass(createHexagonVectorPrint()); // Add CFI instructions if necessary. addPass(createHexagonCallFrameInformation()); } diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp index 0c2e129b8f1f..8534a0ad886e 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -1,5022 +1,5023 @@ //===- MipsISelLowering.cpp - Mips DAG Lowering Implementation ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the interfaces that Mips uses to lower LLVM code into a // selection DAG. // //===----------------------------------------------------------------------===// #include "MipsISelLowering.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsInstPrinter.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "MipsCCState.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" #include "MipsTargetObjectFile.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "mips-lower" STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt NoZeroDivCheck("mno-check-zero-division", cl::Hidden, cl::desc("MIPS: Don't trap on integer division by zero."), cl::init(false)); extern cl::opt EmitJalrReloc; static const MCPhysReg Mips64DPRegs[8] = { Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64 }; // If I is a shifted mask, set the size (Size) and the first bit of the // mask (Pos), and return true. // For example, if I is 0x003ff800, (Pos, Size) = (11, 11). static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) { if (!isShiftedMask_64(I)) return false; Size = countPopulation(I); Pos = countTrailingZeros(I); return true; } // The MIPS MSA ABI passes vector arguments in the integer register set. // The number of integer registers used is dependant on the ABI used. MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { if (!VT.isVector()) return getRegisterType(Context, VT); return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32 : MVT::i64; } unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { if (VT.isVector()) return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64); return MipsTargetLowering::getNumRegisters(Context, VT); } unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv( LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const { // Break down vector types to either 2 i64s or 4 i32s. RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT); IntermediateVT = RegisterVT; NumIntermediates = VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits() ? VT.getVectorNumElements() : divideCeil(VT.getSizeInBits(), RegisterVT.getSizeInBits()); return NumIntermediates; } SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *FI = MF.getInfo(); return DAG.getRegister(FI->getGlobalBaseReg(MF), Ty); } SDValue MipsTargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); } SDValue MipsTargetLowering::getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetExternalSymbol(N->getSymbol(), Ty, Flag); } SDValue MipsTargetLowering::getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); } SDValue MipsTargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); } SDValue MipsTargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, unsigned Flag) const { return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(), N->getOffset(), Flag); } const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((MipsISD::NodeType)Opcode) { case MipsISD::FIRST_NUMBER: break; case MipsISD::JmpLink: return "MipsISD::JmpLink"; case MipsISD::TailCall: return "MipsISD::TailCall"; case MipsISD::Highest: return "MipsISD::Highest"; case MipsISD::Higher: return "MipsISD::Higher"; case MipsISD::Hi: return "MipsISD::Hi"; case MipsISD::Lo: return "MipsISD::Lo"; case MipsISD::GotHi: return "MipsISD::GotHi"; case MipsISD::TlsHi: return "MipsISD::TlsHi"; case MipsISD::GPRel: return "MipsISD::GPRel"; case MipsISD::ThreadPointer: return "MipsISD::ThreadPointer"; case MipsISD::Ret: return "MipsISD::Ret"; case MipsISD::ERet: return "MipsISD::ERet"; case MipsISD::EH_RETURN: return "MipsISD::EH_RETURN"; case MipsISD::FMS: return "MipsISD::FMS"; case MipsISD::FPBrcond: return "MipsISD::FPBrcond"; case MipsISD::FPCmp: return "MipsISD::FPCmp"; case MipsISD::FSELECT: return "MipsISD::FSELECT"; case MipsISD::MTC1_D64: return "MipsISD::MTC1_D64"; case MipsISD::CMovFP_T: return "MipsISD::CMovFP_T"; case MipsISD::CMovFP_F: return "MipsISD::CMovFP_F"; case MipsISD::TruncIntFP: return "MipsISD::TruncIntFP"; case MipsISD::MFHI: return "MipsISD::MFHI"; case MipsISD::MFLO: return "MipsISD::MFLO"; case MipsISD::MTLOHI: return "MipsISD::MTLOHI"; case MipsISD::Mult: return "MipsISD::Mult"; case MipsISD::Multu: return "MipsISD::Multu"; case MipsISD::MAdd: return "MipsISD::MAdd"; case MipsISD::MAddu: return "MipsISD::MAddu"; case MipsISD::MSub: return "MipsISD::MSub"; case MipsISD::MSubu: return "MipsISD::MSubu"; case MipsISD::DivRem: return "MipsISD::DivRem"; case MipsISD::DivRemU: return "MipsISD::DivRemU"; case MipsISD::DivRem16: return "MipsISD::DivRem16"; case MipsISD::DivRemU16: return "MipsISD::DivRemU16"; case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; case MipsISD::Wrapper: return "MipsISD::Wrapper"; case MipsISD::DynAlloc: return "MipsISD::DynAlloc"; case MipsISD::Sync: return "MipsISD::Sync"; case MipsISD::Ext: return "MipsISD::Ext"; case MipsISD::Ins: return "MipsISD::Ins"; case MipsISD::CIns: return "MipsISD::CIns"; case MipsISD::LWL: return "MipsISD::LWL"; case MipsISD::LWR: return "MipsISD::LWR"; case MipsISD::SWL: return "MipsISD::SWL"; case MipsISD::SWR: return "MipsISD::SWR"; case MipsISD::LDL: return "MipsISD::LDL"; case MipsISD::LDR: return "MipsISD::LDR"; case MipsISD::SDL: return "MipsISD::SDL"; case MipsISD::SDR: return "MipsISD::SDR"; case MipsISD::EXTP: return "MipsISD::EXTP"; case MipsISD::EXTPDP: return "MipsISD::EXTPDP"; case MipsISD::EXTR_S_H: return "MipsISD::EXTR_S_H"; case MipsISD::EXTR_W: return "MipsISD::EXTR_W"; case MipsISD::EXTR_R_W: return "MipsISD::EXTR_R_W"; case MipsISD::EXTR_RS_W: return "MipsISD::EXTR_RS_W"; case MipsISD::SHILO: return "MipsISD::SHILO"; case MipsISD::MTHLIP: return "MipsISD::MTHLIP"; case MipsISD::MULSAQ_S_W_PH: return "MipsISD::MULSAQ_S_W_PH"; case MipsISD::MAQ_S_W_PHL: return "MipsISD::MAQ_S_W_PHL"; case MipsISD::MAQ_S_W_PHR: return "MipsISD::MAQ_S_W_PHR"; case MipsISD::MAQ_SA_W_PHL: return "MipsISD::MAQ_SA_W_PHL"; case MipsISD::MAQ_SA_W_PHR: return "MipsISD::MAQ_SA_W_PHR"; case MipsISD::DPAU_H_QBL: return "MipsISD::DPAU_H_QBL"; case MipsISD::DPAU_H_QBR: return "MipsISD::DPAU_H_QBR"; case MipsISD::DPSU_H_QBL: return "MipsISD::DPSU_H_QBL"; case MipsISD::DPSU_H_QBR: return "MipsISD::DPSU_H_QBR"; case MipsISD::DPAQ_S_W_PH: return "MipsISD::DPAQ_S_W_PH"; case MipsISD::DPSQ_S_W_PH: return "MipsISD::DPSQ_S_W_PH"; case MipsISD::DPAQ_SA_L_W: return "MipsISD::DPAQ_SA_L_W"; case MipsISD::DPSQ_SA_L_W: return "MipsISD::DPSQ_SA_L_W"; case MipsISD::DPA_W_PH: return "MipsISD::DPA_W_PH"; case MipsISD::DPS_W_PH: return "MipsISD::DPS_W_PH"; case MipsISD::DPAQX_S_W_PH: return "MipsISD::DPAQX_S_W_PH"; case MipsISD::DPAQX_SA_W_PH: return "MipsISD::DPAQX_SA_W_PH"; case MipsISD::DPAX_W_PH: return "MipsISD::DPAX_W_PH"; case MipsISD::DPSX_W_PH: return "MipsISD::DPSX_W_PH"; case MipsISD::DPSQX_S_W_PH: return "MipsISD::DPSQX_S_W_PH"; case MipsISD::DPSQX_SA_W_PH: return "MipsISD::DPSQX_SA_W_PH"; case MipsISD::MULSA_W_PH: return "MipsISD::MULSA_W_PH"; case MipsISD::MULT: return "MipsISD::MULT"; case MipsISD::MULTU: return "MipsISD::MULTU"; case MipsISD::MADD_DSP: return "MipsISD::MADD_DSP"; case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP"; case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP"; case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP"; case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP"; case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP"; case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP"; case MipsISD::SETCC_DSP: return "MipsISD::SETCC_DSP"; case MipsISD::SELECT_CC_DSP: return "MipsISD::SELECT_CC_DSP"; case MipsISD::VALL_ZERO: return "MipsISD::VALL_ZERO"; case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO"; case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO"; case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; case MipsISD::VCEQ: return "MipsISD::VCEQ"; case MipsISD::VCLE_S: return "MipsISD::VCLE_S"; case MipsISD::VCLE_U: return "MipsISD::VCLE_U"; case MipsISD::VCLT_S: return "MipsISD::VCLT_S"; case MipsISD::VCLT_U: return "MipsISD::VCLT_U"; case MipsISD::VEXTRACT_SEXT_ELT: return "MipsISD::VEXTRACT_SEXT_ELT"; case MipsISD::VEXTRACT_ZEXT_ELT: return "MipsISD::VEXTRACT_ZEXT_ELT"; case MipsISD::VNOR: return "MipsISD::VNOR"; case MipsISD::VSHF: return "MipsISD::VSHF"; case MipsISD::SHF: return "MipsISD::SHF"; case MipsISD::ILVEV: return "MipsISD::ILVEV"; case MipsISD::ILVOD: return "MipsISD::ILVOD"; case MipsISD::ILVL: return "MipsISD::ILVL"; case MipsISD::ILVR: return "MipsISD::ILVR"; case MipsISD::PCKEV: return "MipsISD::PCKEV"; case MipsISD::PCKOD: return "MipsISD::PCKOD"; case MipsISD::INSVE: return "MipsISD::INSVE"; } return nullptr; } MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, const MipsSubtarget &STI) : TargetLowering(TM), Subtarget(STI), ABI(TM.getABI()) { // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // The cmp.cond.fmt instruction in MIPS32r6/MIPS64r6 uses 0 and -1 like MSA // does. Integer booleans still use 0 and 1. if (Subtarget.hasMips32r6()) setBooleanContents(ZeroOrOneBooleanContent, ZeroOrNegativeOneBooleanContent); // Load extented operations for i1 types must be promoted for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); } // MIPS doesn't have extending float->double load/store. Set LoadExtAction // for f32, f16 for (MVT VT : MVT::fp_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); } // Set LoadExtAction for f16 vectors to Expand for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { MVT F16VT = MVT::getVectorVT(MVT::f16, VT.getVectorNumElements()); if (F16VT.isValid()) setLoadExtAction(ISD::EXTLOAD, VT, F16VT, Expand); } setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); // Used by legalize types to correctly generate the setcc result. // Without this, every float setcc comes with a AND/OR with the result, // we don't want this, since the fpcmp result goes to a flag register, // which is used implicitly by brcond and select operations. AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32); // Mips Custom Operations setOperationAction(ISD::BR_JT, MVT::Other, Expand); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SETCC, MVT::f32, Custom); setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); if (!(TM.Options.NoNaNsFPMath || Subtarget.inAbs2008Mode())) { setOperationAction(ISD::FABS, MVT::f32, Custom); setOperationAction(ISD::FABS, MVT::f64, Custom); } if (Subtarget.isGP64bit()) { setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::ConstantPool, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::LOAD, MVT::i64, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); } if (!Subtarget.isGP64bit()) { setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); if (Subtarget.isGP64bit()) setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::SDIV, MVT::i64, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i64, Expand); setOperationAction(ISD::UREM, MVT::i64, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (Subtarget.hasCnMips()) { setOperationAction(ISD::CTPOP, MVT::i32, Legal); setOperationAction(ISD::CTPOP, MVT::i64, Legal); } else { setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::CTPOP, MVT::i64, Expand); } setOperationAction(ISD::CTTZ, MVT::i32, Expand); setOperationAction(ISD::CTTZ, MVT::i64, Expand); setOperationAction(ISD::ROTL, MVT::i32, Expand); setOperationAction(ISD::ROTL, MVT::i64, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); if (!Subtarget.hasMips32r2()) setOperationAction(ISD::ROTR, MVT::i32, Expand); if (!Subtarget.hasMips64r2()) setOperationAction(ISD::ROTR, MVT::i64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); setOperationAction(ISD::FLOG2, MVT::f32, Expand); setOperationAction(ISD::FLOG10, MVT::f32, Expand); setOperationAction(ISD::FEXP, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); // Lower f16 conversion operations into library calls setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (!Subtarget.isGP64bit()) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } if (!Subtarget.hasMips32r2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); } // MIPS16 lacks MIPS32's clz and clo instructions. if (!Subtarget.hasMips32() || Subtarget.inMips16Mode()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); if (!Subtarget.hasMips64()) setOperationAction(ISD::CTLZ, MVT::i64, Expand); if (!Subtarget.hasMips32r2()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); if (!Subtarget.hasMips64r2()) setOperationAction(ISD::BSWAP, MVT::i64, Expand); if (Subtarget.isGP64bit()) { setLoadExtAction(ISD::SEXTLOAD, MVT::i64, MVT::i32, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::i64, MVT::i32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i64, MVT::i32, Custom); setTruncStoreAction(MVT::i64, MVT::i32, Custom); } setOperationAction(ISD::TRAP, MVT::Other, Legal); setTargetDAGCombine(ISD::SDIVREM); setTargetDAGCombine(ISD::UDIVREM); setTargetDAGCombine(ISD::SELECT); setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::AssertZext); setTargetDAGCombine(ISD::SHL); if (ABI.IsO32()) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); setLibcallName(RTLIB::MUL_I128, nullptr); setLibcallName(RTLIB::MULO_I64, nullptr); setLibcallName(RTLIB::MULO_I128, nullptr); } setMinFunctionAlignment(Subtarget.isGP64bit() ? Align(8) : Align(4)); // The arguments on the stack are defined in terms of 4-byte slots on O32 // and 8-byte slots on N32/N64. setMinStackArgumentAlignment((ABI.IsN32() || ABI.IsN64()) ? Align(8) : Align(4)); setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP); MaxStoresPerMemcpy = 16; isMicroMips = Subtarget.inMicroMipsMode(); } const MipsTargetLowering * MipsTargetLowering::create(const MipsTargetMachine &TM, const MipsSubtarget &STI) { if (STI.inMips16Mode()) return createMips16TargetLowering(TM, STI); return createMipsSETargetLowering(TM, STI); } // Create a fast isel object. FastISel * MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const { const MipsTargetMachine &TM = static_cast(funcInfo.MF->getTarget()); // We support only the standard encoding [MIPS32,MIPS32R5] ISAs. bool UseFastISel = TM.Options.EnableFastISel && Subtarget.hasMips32() && !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() && !Subtarget.inMicroMipsMode(); // Disable if either of the following is true: // We do not generate PIC, the ABI is not O32, XGOT is being used. if (!TM.isPositionIndependent() || !TM.getABI().IsO32() || Subtarget.useXGOT()) UseFastISel = false; return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr; } EVT MipsTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; return VT.changeVectorElementTypeToInteger(); } static SDValue performDivRemCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); EVT Ty = N->getValueType(0); unsigned LO = (Ty == MVT::i32) ? Mips::LO0 : Mips::LO0_64; unsigned HI = (Ty == MVT::i32) ? Mips::HI0 : Mips::HI0_64; unsigned Opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem16 : MipsISD::DivRemU16; SDLoc DL(N); SDValue DivRem = DAG.getNode(Opc, DL, MVT::Glue, N->getOperand(0), N->getOperand(1)); SDValue InChain = DAG.getEntryNode(); SDValue InGlue = DivRem; // insert MFLO if (N->hasAnyUseOfValue(0)) { SDValue CopyFromLo = DAG.getCopyFromReg(InChain, DL, LO, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); InChain = CopyFromLo.getValue(1); InGlue = CopyFromLo.getValue(2); } // insert MFHI if (N->hasAnyUseOfValue(1)) { SDValue CopyFromHi = DAG.getCopyFromReg(InChain, DL, HI, Ty, InGlue); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); } return SDValue(); } static Mips::CondCode condCodeToFCC(ISD::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown fp condition code!"); case ISD::SETEQ: case ISD::SETOEQ: return Mips::FCOND_OEQ; case ISD::SETUNE: return Mips::FCOND_UNE; case ISD::SETLT: case ISD::SETOLT: return Mips::FCOND_OLT; case ISD::SETGT: case ISD::SETOGT: return Mips::FCOND_OGT; case ISD::SETLE: case ISD::SETOLE: return Mips::FCOND_OLE; case ISD::SETGE: case ISD::SETOGE: return Mips::FCOND_OGE; case ISD::SETULT: return Mips::FCOND_ULT; case ISD::SETULE: return Mips::FCOND_ULE; case ISD::SETUGT: return Mips::FCOND_UGT; case ISD::SETUGE: return Mips::FCOND_UGE; case ISD::SETUO: return Mips::FCOND_UN; case ISD::SETO: return Mips::FCOND_OR; case ISD::SETNE: case ISD::SETONE: return Mips::FCOND_ONE; case ISD::SETUEQ: return Mips::FCOND_UEQ; } } /// This function returns true if the floating point conditional branches and /// conditional moves which use condition code CC should be inverted. static bool invertFPCondCodeUser(Mips::CondCode CC) { if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) return false; assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) && "Illegal Condition Code"); return true; } // Creates and returns an FPCmp node from a setcc node. // Returns Op if setcc is not a floating point comparison. static SDValue createFPCmp(SelectionDAG &DAG, const SDValue &Op) { // must be a SETCC node if (Op.getOpcode() != ISD::SETCC) return Op; SDValue LHS = Op.getOperand(0); if (!LHS.getValueType().isFloatingPoint()) return Op; SDValue RHS = Op.getOperand(1); SDLoc DL(Op); // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of // node if necessary. ISD::CondCode CC = cast(Op.getOperand(2))->get(); return DAG.getNode(MipsISD::FPCmp, DL, MVT::Glue, LHS, RHS, DAG.getConstant(condCodeToFCC(CC), DL, MVT::i32)); } // Creates and returns a CMovFPT/F node. static SDValue createCMovFP(SelectionDAG &DAG, SDValue Cond, SDValue True, SDValue False, const SDLoc &DL) { ConstantSDNode *CC = cast(Cond.getOperand(2)); bool invert = invertFPCondCodeUser((Mips::CondCode)CC->getSExtValue()); SDValue FCC0 = DAG.getRegister(Mips::FCC0, MVT::i32); return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL, True.getValueType(), True, FCC0, False, Cond); } static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue SetCC = N->getOperand(0); if ((SetCC.getOpcode() != ISD::SETCC) || !SetCC.getOperand(0).getValueType().isInteger()) return SDValue(); SDValue False = N->getOperand(2); EVT FalseTy = False.getValueType(); if (!FalseTy.isInteger()) return SDValue(); ConstantSDNode *FalseC = dyn_cast(False); // If the RHS (False) is 0, we swap the order of the operands // of ISD::SELECT (obviously also inverting the condition) so that we can // take advantage of conditional moves using the $0 register. // Example: // return (a != 0) ? x : 0; // load $reg, x // movz $reg, $0, a if (!FalseC) return SDValue(); const SDLoc DL(N); if (!FalseC->getZExtValue()) { ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); SDValue True = N->getOperand(1); SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), SetCC.getOperand(1), ISD::getSetCCInverse(CC, SetCC.getValueType())); return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True); } // If both operands are integer constants there's a possibility that we // can do some interesting optimizations. SDValue True = N->getOperand(1); ConstantSDNode *TrueC = dyn_cast(True); if (!TrueC || !True.getValueType().isInteger()) return SDValue(); // We'll also ignore MVT::i64 operands as this optimizations proves // to be ineffective because of the required sign extensions as the result // of a SETCC operator is always MVT::i32 for non-vector types. if (True.getValueType() == MVT::i64) return SDValue(); int64_t Diff = TrueC->getSExtValue() - FalseC->getSExtValue(); // 1) (a < x) ? y : y-1 // slti $reg1, a, x // addiu $reg2, $reg1, y-1 if (Diff == 1) return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, False); // 2) (a < x) ? y-1 : y // slti $reg1, a, x // xor $reg1, $reg1, 1 // addiu $reg2, $reg1, y-1 if (Diff == -1) { ISD::CondCode CC = cast(SetCC.getOperand(2))->get(); SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0), SetCC.getOperand(1), ISD::getSetCCInverse(CC, SetCC.getValueType())); return DAG.getNode(ISD::ADD, DL, SetCC.getValueType(), SetCC, True); } // Could not optimize. return SDValue(); } static SDValue performCMovFPCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalizeOps()) return SDValue(); SDValue ValueIfTrue = N->getOperand(0), ValueIfFalse = N->getOperand(2); ConstantSDNode *FalseC = dyn_cast(ValueIfFalse); if (!FalseC || FalseC->getZExtValue()) return SDValue(); // Since RHS (False) is 0, we swap the order of the True/False operands // (obviously also inverting the condition) so that we can // take advantage of conditional moves using the $0 register. // Example: // return (a != 0) ? x : 0; // load $reg, x // movz $reg, $0, a unsigned Opc = (N->getOpcode() == MipsISD::CMovFP_T) ? MipsISD::CMovFP_F : MipsISD::CMovFP_T; SDValue FCC = N->getOperand(1), Glue = N->getOperand(3); return DAG.getNode(Opc, SDLoc(N), ValueIfFalse.getValueType(), ValueIfFalse, FCC, ValueIfTrue, Glue); } static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert()) return SDValue(); SDValue FirstOperand = N->getOperand(0); unsigned FirstOperandOpc = FirstOperand.getOpcode(); SDValue Mask = N->getOperand(1); EVT ValTy = N->getValueType(0); SDLoc DL(N); uint64_t Pos = 0, SMPos, SMSize; ConstantSDNode *CN; SDValue NewOperand; unsigned Opc; // Op's second operand must be a shifted mask. if (!(CN = dyn_cast(Mask)) || !isShiftedMask(CN->getZExtValue(), SMPos, SMSize)) return SDValue(); if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) { // Pattern match EXT. // $dst = and ((sra or srl) $src , pos), (2**size - 1) // => ext $dst, $src, pos, size // The second operand of the shift must be an immediate. if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) return SDValue(); Pos = CN->getZExtValue(); // Return if the shifted mask does not start at bit 0 or the sum of its size // and Pos exceeds the word's size. if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits()) return SDValue(); Opc = MipsISD::Ext; NewOperand = FirstOperand.getOperand(0); } else if (FirstOperandOpc == ISD::SHL && Subtarget.hasCnMips()) { // Pattern match CINS. // $dst = and (shl $src , pos), mask // => cins $dst, $src, pos, size // mask is a shifted mask with consecutive 1's, pos = shift amount, // size = population count. // The second operand of the shift must be an immediate. if (!(CN = dyn_cast(FirstOperand.getOperand(1)))) return SDValue(); Pos = CN->getZExtValue(); if (SMPos != Pos || Pos >= ValTy.getSizeInBits() || SMSize >= 32 || Pos + SMSize > ValTy.getSizeInBits()) return SDValue(); NewOperand = FirstOperand.getOperand(0); // SMSize is 'location' (position) in this case, not size. SMSize--; Opc = MipsISD::CIns; } else { // Pattern match EXT. // $dst = and $src, (2**size - 1) , if size > 16 // => ext $dst, $src, pos, size , pos = 0 // If the mask is <= 0xffff, andi can be used instead. if (CN->getZExtValue() <= 0xffff) return SDValue(); // Return if the mask doesn't start at position 0. if (SMPos) return SDValue(); Opc = MipsISD::Ext; NewOperand = FirstOperand; } return DAG.getNode(Opc, DL, ValTy, NewOperand, DAG.getConstant(Pos, DL, MVT::i32), DAG.getConstant(SMSize, DL, MVT::i32)); } static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { // Pattern match INS. // $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1), // where mask1 = (2**size - 1) << pos, mask0 = ~mask1 // => ins $dst, $src, size, pos, $src1 if (DCI.isBeforeLegalizeOps() || !Subtarget.hasExtractInsert()) return SDValue(); SDValue And0 = N->getOperand(0), And1 = N->getOperand(1); uint64_t SMPos0, SMSize0, SMPos1, SMSize1; ConstantSDNode *CN, *CN1; // See if Op's first operand matches (and $src1 , mask0). if (And0.getOpcode() != ISD::AND) return SDValue(); if (!(CN = dyn_cast(And0.getOperand(1))) || !isShiftedMask(~CN->getSExtValue(), SMPos0, SMSize0)) return SDValue(); // See if Op's second operand matches (and (shl $src, pos), mask1). if (And1.getOpcode() == ISD::AND && And1.getOperand(0).getOpcode() == ISD::SHL) { if (!(CN = dyn_cast(And1.getOperand(1))) || !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) return SDValue(); // The shift masks must have the same position and size. if (SMPos0 != SMPos1 || SMSize0 != SMSize1) return SDValue(); SDValue Shl = And1.getOperand(0); if (!(CN = dyn_cast(Shl.getOperand(1)))) return SDValue(); unsigned Shamt = CN->getZExtValue(); // Return if the shift amount and the first bit position of mask are not the // same. EVT ValTy = N->getValueType(0); if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) return SDValue(); SDLoc DL(N); return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), DAG.getConstant(SMPos0, DL, MVT::i32), DAG.getConstant(SMSize0, DL, MVT::i32), And0.getOperand(0)); } else { // Pattern match DINS. // $dst = or (and $src, mask0), mask1 // where mask0 = ((1 << SMSize0) -1) << SMPos0 // => dins $dst, $src, pos, size if (~CN->getSExtValue() == ((((int64_t)1 << SMSize0) - 1) << SMPos0) && ((SMSize0 + SMPos0 <= 64 && Subtarget.hasMips64r2()) || (SMSize0 + SMPos0 <= 32))) { // Check if AND instruction has constant as argument bool isConstCase = And1.getOpcode() != ISD::AND; if (And1.getOpcode() == ISD::AND) { if (!(CN1 = dyn_cast(And1->getOperand(1)))) return SDValue(); } else { if (!(CN1 = dyn_cast(N->getOperand(1)))) return SDValue(); } // Don't generate INS if constant OR operand doesn't fit into bits // cleared by constant AND operand. if (CN->getSExtValue() & CN1->getSExtValue()) return SDValue(); SDLoc DL(N); EVT ValTy = N->getOperand(0)->getValueType(0); SDValue Const1; SDValue SrlX; if (!isConstCase) { Const1 = DAG.getConstant(SMPos0, DL, MVT::i32); SrlX = DAG.getNode(ISD::SRL, DL, And1->getValueType(0), And1, Const1); } return DAG.getNode( MipsISD::Ins, DL, N->getValueType(0), isConstCase ? DAG.getConstant(CN1->getSExtValue() >> SMPos0, DL, ValTy) : SrlX, DAG.getConstant(SMPos0, DL, MVT::i32), DAG.getConstant(ValTy.getSizeInBits() / 8 < 8 ? SMSize0 & 31 : SMSize0, DL, MVT::i32), And0->getOperand(0)); } return SDValue(); } } static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, const MipsSubtarget &Subtarget) { // ROOTNode must have a multiplication as an operand for the match to be // successful. if (ROOTNode->getOperand(0).getOpcode() != ISD::MUL && ROOTNode->getOperand(1).getOpcode() != ISD::MUL) return SDValue(); // We don't handle vector types here. if (ROOTNode->getValueType(0).isVector()) return SDValue(); // For MIPS64, madd / msub instructions are inefficent to use with 64 bit // arithmetic. E.g. // (add (mul a b) c) => // let res = (madd (mthi (drotr c 32))x(mtlo c) a b) in // MIPS64: (or (dsll (mfhi res) 32) (dsrl (dsll (mflo res) 32) 32) // or // MIPS64R2: (dins (mflo res) (mfhi res) 32 32) // // The overhead of setting up the Hi/Lo registers and reassembling the // result makes this a dubious optimzation for MIPS64. The core of the // problem is that Hi/Lo contain the upper and lower 32 bits of the // operand and result. // // It requires a chain of 4 add/mul for MIPS64R2 to get better code // density than doing it naively, 5 for MIPS64. Additionally, using // madd/msub on MIPS64 requires the operands actually be 32 bit sign // extended operands, not true 64 bit values. // // FIXME: For the moment, disable this completely for MIPS64. if (Subtarget.hasMips64()) return SDValue(); SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::MUL ? ROOTNode->getOperand(0) : ROOTNode->getOperand(1); SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::MUL ? ROOTNode->getOperand(1) : ROOTNode->getOperand(0); // Transform this to a MADD only if the user of this node is the add. // If there are other users of the mul, this function returns here. if (!Mult.hasOneUse()) return SDValue(); // maddu and madd are unusual instructions in that on MIPS64 bits 63..31 // must be in canonical form, i.e. sign extended. For MIPS32, the operands // of the multiply must have 32 or more sign bits, otherwise we cannot // perform this optimization. We have to check this here as we're performing // this optimization pre-legalization. SDValue MultLHS = Mult->getOperand(0); SDValue MultRHS = Mult->getOperand(1); bool IsSigned = MultLHS->getOpcode() == ISD::SIGN_EXTEND && MultRHS->getOpcode() == ISD::SIGN_EXTEND; bool IsUnsigned = MultLHS->getOpcode() == ISD::ZERO_EXTEND && MultRHS->getOpcode() == ISD::ZERO_EXTEND; if (!IsSigned && !IsUnsigned) return SDValue(); // Initialize accumulator. SDLoc DL(ROOTNode); SDValue TopHalf; SDValue BottomHalf; BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, CurDAG.getIntPtrConstant(0, DL)); TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand, CurDAG.getIntPtrConstant(1, DL)); SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, BottomHalf, TopHalf); // Create MipsMAdd(u) / MipsMSub(u) node. bool IsAdd = ROOTNode->getOpcode() == ISD::ADD; unsigned Opcode = IsAdd ? (IsUnsigned ? MipsISD::MAddu : MipsISD::MAdd) : (IsUnsigned ? MipsISD::MSubu : MipsISD::MSub); SDValue MAddOps[3] = { CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(0)), CurDAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mult->getOperand(1)), ACCIn}; EVT VTs[2] = {MVT::i32, MVT::i32}; SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); SDValue ResLo = CurDAG.getNode(MipsISD::MFLO, DL, MVT::i32, MAdd); SDValue ResHi = CurDAG.getNode(MipsISD::MFHI, DL, MVT::i32, MAdd); SDValue Combined = CurDAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResLo, ResHi); return Combined; } static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { // (sub v0 (mul v1, v2)) => (msub v1, v2, v0) if (DCI.isBeforeLegalizeOps()) { if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) return performMADD_MSUBCombine(N, DAG, Subtarget); return SDValue(); } return SDValue(); } static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { // (add v0 (mul v1, v2)) => (madd v1, v2, v0) if (DCI.isBeforeLegalizeOps()) { if (Subtarget.hasMips32() && !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() && N->getValueType(0) == MVT::i64) return performMADD_MSUBCombine(N, DAG, Subtarget); return SDValue(); } // (add v0, (add v1, abs_lo(tjt))) => (add (add v0, v1), abs_lo(tjt)) SDValue Add = N->getOperand(1); if (Add.getOpcode() != ISD::ADD) return SDValue(); SDValue Lo = Add.getOperand(1); if ((Lo.getOpcode() != MipsISD::Lo) || (Lo.getOperand(0).getOpcode() != ISD::TargetJumpTable)) return SDValue(); EVT ValTy = N->getValueType(0); SDLoc DL(N); SDValue Add1 = DAG.getNode(ISD::ADD, DL, ValTy, N->getOperand(0), Add.getOperand(0)); return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo); } static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const MipsSubtarget &Subtarget) { // Pattern match CINS. // $dst = shl (and $src , imm), pos // => cins $dst, $src, pos, size if (DCI.isBeforeLegalizeOps() || !Subtarget.hasCnMips()) return SDValue(); SDValue FirstOperand = N->getOperand(0); unsigned FirstOperandOpc = FirstOperand.getOpcode(); SDValue SecondOperand = N->getOperand(1); EVT ValTy = N->getValueType(0); SDLoc DL(N); uint64_t Pos = 0, SMPos, SMSize; ConstantSDNode *CN; SDValue NewOperand; // The second operand of the shift must be an immediate. if (!(CN = dyn_cast(SecondOperand))) return SDValue(); Pos = CN->getZExtValue(); if (Pos >= ValTy.getSizeInBits()) return SDValue(); if (FirstOperandOpc != ISD::AND) return SDValue(); // AND's second operand must be a shifted mask. if (!(CN = dyn_cast(FirstOperand.getOperand(1))) || !isShiftedMask(CN->getZExtValue(), SMPos, SMSize)) return SDValue(); // Return if the shifted mask does not start at bit 0 or the sum of its size // and Pos exceeds the word's size. if (SMPos != 0 || SMSize > 32 || Pos + SMSize > ValTy.getSizeInBits()) return SDValue(); NewOperand = FirstOperand.getOperand(0); // SMSize is 'location' (position) in this case, not size. SMSize--; return DAG.getNode(MipsISD::CIns, DL, ValTy, NewOperand, DAG.getConstant(Pos, DL, MVT::i32), DAG.getConstant(SMSize, DL, MVT::i32)); } SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; unsigned Opc = N->getOpcode(); switch (Opc) { default: break; case ISD::SDIVREM: case ISD::UDIVREM: return performDivRemCombine(N, DAG, DCI, Subtarget); case ISD::SELECT: return performSELECTCombine(N, DAG, DCI, Subtarget); case MipsISD::CMovFP_F: case MipsISD::CMovFP_T: return performCMovFPCombine(N, DAG, DCI, Subtarget); case ISD::AND: return performANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: return performORCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return performADDCombine(N, DAG, DCI, Subtarget); case ISD::SHL: return performSHLCombine(N, DAG, DCI, Subtarget); case ISD::SUB: return performSUBCombine(N, DAG, DCI, Subtarget); } return SDValue(); } bool MipsTargetLowering::isCheapToSpeculateCttz() const { return Subtarget.hasMips32(); } bool MipsTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasMips32(); } bool MipsTargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { if (N->getOperand(0).getValueType().isVector()) return false; return true; } void MipsTargetLowering::ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { return LowerOperationWrapper(N, Results, DAG); } SDValue MipsTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::BRCOND: return lowerBRCOND(Op, DAG); case ISD::ConstantPool: return lowerConstantPool(Op, DAG); case ISD::GlobalAddress: return lowerGlobalAddress(Op, DAG); case ISD::BlockAddress: return lowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return lowerJumpTable(Op, DAG); case ISD::SELECT: return lowerSELECT(Op, DAG); case ISD::SETCC: return lowerSETCC(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::VAARG: return lowerVAARG(Op, DAG); case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); case ISD::FABS: return lowerFABS(Op, DAG); case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: return lowerShiftRightParts(Op, DAG, true); case ISD::SRL_PARTS: return lowerShiftRightParts(Op, DAG, false); case ISD::LOAD: return lowerLOAD(Op, DAG); case ISD::STORE: return lowerSTORE(Op, DAG); case ISD::EH_DWARF_CFA: return lowerEH_DWARF_CFA(Op, DAG); case ISD::FP_TO_SINT: return lowerFP_TO_SINT(Op, DAG); } return SDValue(); } //===----------------------------------------------------------------------===// // Lower helper functions //===----------------------------------------------------------------------===// // addLiveIn - This helper function adds the specified physical register to the // MachineFunction as a live in value. It also creates a corresponding // virtual register for it. static unsigned addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) { Register VReg = MF.getRegInfo().createVirtualRegister(RC); MF.getRegInfo().addLiveIn(PReg, VReg); return VReg; } static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, MachineBasicBlock &MBB, const TargetInstrInfo &TII, bool Is64Bit, bool IsMicroMips) { if (NoZeroDivCheck) return &MBB; // Insert instruction "teq $divisor_reg, $zero, 7". MachineBasicBlock::iterator I(MI); MachineInstrBuilder MIB; MachineOperand &Divisor = MI.getOperand(2); MIB = BuildMI(MBB, std::next(I), MI.getDebugLoc(), TII.get(IsMicroMips ? Mips::TEQ_MM : Mips::TEQ)) .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())) .addReg(Mips::ZERO) .addImm(7); // Use the 32-bit sub-register if this is a 64-bit division. if (Is64Bit) MIB->getOperand(0).setSubReg(Mips::sub_32); // Clear Divisor's kill flag. Divisor.setIsKill(false); // We would normally delete the original instruction here but in this case // we only needed to inject an additional instruction rather than replace it. return &MBB; } MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected instr type to insert"); case Mips::ATOMIC_LOAD_ADD_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_ADD_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_ADD_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_ADD_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_AND_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_AND_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_AND_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_AND_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_OR_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_OR_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_OR_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_OR_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_XOR_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_XOR_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_XOR_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_XOR_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_NAND_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_NAND_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_NAND_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_NAND_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_SUB_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_SUB_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_SUB_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_SUB_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_SWAP_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_SWAP_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_SWAP_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_SWAP_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_CMP_SWAP_I8: return emitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: return emitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: return emitAtomicCmpSwap(MI, BB); case Mips::ATOMIC_CMP_SWAP_I64: return emitAtomicCmpSwap(MI, BB); case Mips::ATOMIC_LOAD_MIN_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_MIN_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_MIN_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_MIN_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_MAX_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_MAX_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_MAX_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_MAX_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_UMIN_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_UMIN_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_UMIN_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_UMIN_I64: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_UMAX_I8: return emitAtomicBinaryPartword(MI, BB, 1); case Mips::ATOMIC_LOAD_UMAX_I16: return emitAtomicBinaryPartword(MI, BB, 2); case Mips::ATOMIC_LOAD_UMAX_I32: return emitAtomicBinary(MI, BB); case Mips::ATOMIC_LOAD_UMAX_I64: return emitAtomicBinary(MI, BB); case Mips::PseudoSDIV: case Mips::PseudoUDIV: case Mips::DIV: case Mips::DIVU: case Mips::MOD: case Mips::MODU: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false, false); case Mips::SDIV_MM_Pseudo: case Mips::UDIV_MM_Pseudo: case Mips::SDIV_MM: case Mips::UDIV_MM: case Mips::DIV_MMR6: case Mips::DIVU_MMR6: case Mips::MOD_MMR6: case Mips::MODU_MMR6: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), false, true); case Mips::PseudoDSDIV: case Mips::PseudoDUDIV: case Mips::DDIV: case Mips::DDIVU: case Mips::DMOD: case Mips::DMODU: return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo(), true, false); case Mips::PseudoSELECT_I: case Mips::PseudoSELECT_I64: case Mips::PseudoSELECT_S: case Mips::PseudoSELECT_D32: case Mips::PseudoSELECT_D64: return emitPseudoSELECT(MI, BB, false, Mips::BNE); case Mips::PseudoSELECTFP_F_I: case Mips::PseudoSELECTFP_F_I64: case Mips::PseudoSELECTFP_F_S: case Mips::PseudoSELECTFP_F_D32: case Mips::PseudoSELECTFP_F_D64: return emitPseudoSELECT(MI, BB, true, Mips::BC1F); case Mips::PseudoSELECTFP_T_I: case Mips::PseudoSELECTFP_T_I64: case Mips::PseudoSELECTFP_T_S: case Mips::PseudoSELECTFP_T_D32: case Mips::PseudoSELECTFP_T_D64: return emitPseudoSELECT(MI, BB, true, Mips::BC1T); case Mips::PseudoD_SELECT_I: case Mips::PseudoD_SELECT_I64: return emitPseudoD_SELECT(MI, BB); case Mips::LDR_W: return emitLDR_W(MI, BB); case Mips::LDR_D: return emitLDR_D(MI, BB); case Mips::STR_W: return emitSTR_W(MI, BB); case Mips::STR_D: return emitSTR_D(MI, BB); } } // This function also handles Mips::ATOMIC_SWAP_I32 (when BinOpcode == 0), and // Mips::ATOMIC_LOAD_NAND_I32 (when Nand == true) MachineBasicBlock * MipsTargetLowering::emitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); unsigned AtomicOp; bool NeedsAdditionalReg = false; switch (MI.getOpcode()) { case Mips::ATOMIC_LOAD_ADD_I32: AtomicOp = Mips::ATOMIC_LOAD_ADD_I32_POSTRA; break; case Mips::ATOMIC_LOAD_SUB_I32: AtomicOp = Mips::ATOMIC_LOAD_SUB_I32_POSTRA; break; case Mips::ATOMIC_LOAD_AND_I32: AtomicOp = Mips::ATOMIC_LOAD_AND_I32_POSTRA; break; case Mips::ATOMIC_LOAD_OR_I32: AtomicOp = Mips::ATOMIC_LOAD_OR_I32_POSTRA; break; case Mips::ATOMIC_LOAD_XOR_I32: AtomicOp = Mips::ATOMIC_LOAD_XOR_I32_POSTRA; break; case Mips::ATOMIC_LOAD_NAND_I32: AtomicOp = Mips::ATOMIC_LOAD_NAND_I32_POSTRA; break; case Mips::ATOMIC_SWAP_I32: AtomicOp = Mips::ATOMIC_SWAP_I32_POSTRA; break; case Mips::ATOMIC_LOAD_ADD_I64: AtomicOp = Mips::ATOMIC_LOAD_ADD_I64_POSTRA; break; case Mips::ATOMIC_LOAD_SUB_I64: AtomicOp = Mips::ATOMIC_LOAD_SUB_I64_POSTRA; break; case Mips::ATOMIC_LOAD_AND_I64: AtomicOp = Mips::ATOMIC_LOAD_AND_I64_POSTRA; break; case Mips::ATOMIC_LOAD_OR_I64: AtomicOp = Mips::ATOMIC_LOAD_OR_I64_POSTRA; break; case Mips::ATOMIC_LOAD_XOR_I64: AtomicOp = Mips::ATOMIC_LOAD_XOR_I64_POSTRA; break; case Mips::ATOMIC_LOAD_NAND_I64: AtomicOp = Mips::ATOMIC_LOAD_NAND_I64_POSTRA; break; case Mips::ATOMIC_SWAP_I64: AtomicOp = Mips::ATOMIC_SWAP_I64_POSTRA; break; case Mips::ATOMIC_LOAD_MIN_I32: AtomicOp = Mips::ATOMIC_LOAD_MIN_I32_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_MAX_I32: AtomicOp = Mips::ATOMIC_LOAD_MAX_I32_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMIN_I32: AtomicOp = Mips::ATOMIC_LOAD_UMIN_I32_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMAX_I32: AtomicOp = Mips::ATOMIC_LOAD_UMAX_I32_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_MIN_I64: AtomicOp = Mips::ATOMIC_LOAD_MIN_I64_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_MAX_I64: AtomicOp = Mips::ATOMIC_LOAD_MAX_I64_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMIN_I64: AtomicOp = Mips::ATOMIC_LOAD_UMIN_I64_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMAX_I64: AtomicOp = Mips::ATOMIC_LOAD_UMAX_I64_POSTRA; NeedsAdditionalReg = true; break; default: llvm_unreachable("Unknown pseudo atomic for replacement!"); } Register OldVal = MI.getOperand(0).getReg(); Register Ptr = MI.getOperand(1).getReg(); Register Incr = MI.getOperand(2).getReg(); Register Scratch = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); MachineBasicBlock::iterator II(MI); // The scratch registers here with the EarlyClobber | Define | Implicit // flags is used to persuade the register allocator and the machine // verifier to accept the usage of this register. This has to be a real // register which has an UNDEF value but is dead after the instruction which // is unique among the registers chosen for the instruction. // The EarlyClobber flag has the semantic properties that the operand it is // attached to is clobbered before the rest of the inputs are read. Hence it // must be unique among the operands to the instruction. // The Define flag is needed to coerce the machine verifier that an Undef // value isn't a problem. // The Dead flag is needed as the value in scratch isn't used by any other // instruction. Kill isn't used as Dead is more precise. // The implicit flag is here due to the interaction between the other flags // and the machine verifier. // For correctness purpose, a new pseudo is introduced here. We need this // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence // that is spread over >1 basic blocks. A register allocator which // introduces (or any codegen infact) a store, can violate the expectations // of the hardware. // // An atomic read-modify-write sequence starts with a linked load // instruction and ends with a store conditional instruction. The atomic // read-modify-write sequence fails if any of the following conditions // occur between the execution of ll and sc: // * A coherent store is completed by another process or coherent I/O // module into the block of synchronizable physical memory containing // the word. The size and alignment of the block is // implementation-dependent. // * A coherent store is executed between an LL and SC sequence on the // same processor to the block of synchornizable physical memory // containing the word. // Register PtrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Ptr)); Register IncrCopy = RegInfo.createVirtualRegister(RegInfo.getRegClass(Incr)); BuildMI(*BB, II, DL, TII->get(Mips::COPY), IncrCopy).addReg(Incr); BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); MachineInstrBuilder MIB = BuildMI(*BB, II, DL, TII->get(AtomicOp)) .addReg(OldVal, RegState::Define | RegState::EarlyClobber) .addReg(PtrCopy) .addReg(IncrCopy) .addReg(Scratch, RegState::Define | RegState::EarlyClobber | RegState::Implicit | RegState::Dead); if (NeedsAdditionalReg) { Register Scratch2 = RegInfo.createVirtualRegister(RegInfo.getRegClass(OldVal)); MIB.addReg(Scratch2, RegState::Define | RegState::EarlyClobber | RegState::Implicit | RegState::Dead); } MI.eraseFromParent(); return BB; } MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, unsigned SrcReg) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const DebugLoc &DL = MI.getDebugLoc(); if (Subtarget.hasMips32r2() && Size == 1) { BuildMI(BB, DL, TII->get(Mips::SEB), DstReg).addReg(SrcReg); return BB; } if (Subtarget.hasMips32r2() && Size == 2) { BuildMI(BB, DL, TII->get(Mips::SEH), DstReg).addReg(SrcReg); return BB; } MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); Register ScrReg = RegInfo.createVirtualRegister(RC); assert(Size < 32); int64_t ShiftImm = 32 - (Size * 8); BuildMI(BB, DL, TII->get(Mips::SLL), ScrReg).addReg(SrcReg).addImm(ShiftImm); BuildMI(BB, DL, TII->get(Mips::SRA), DstReg).addReg(ScrReg).addImm(ShiftImm); return BB; } MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && "Unsupported size for EmitAtomicBinaryPartial."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const bool ArePtrs64bit = ABI.ArePtrs64bit(); const TargetRegisterClass *RCp = getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); Register Ptr = MI.getOperand(1).getReg(); Register Incr = MI.getOperand(2).getReg(); Register AlignedAddr = RegInfo.createVirtualRegister(RCp); Register ShiftAmt = RegInfo.createVirtualRegister(RC); Register Mask = RegInfo.createVirtualRegister(RC); Register Mask2 = RegInfo.createVirtualRegister(RC); Register Incr2 = RegInfo.createVirtualRegister(RC); Register MaskLSB2 = RegInfo.createVirtualRegister(RCp); Register PtrLSB2 = RegInfo.createVirtualRegister(RC); Register MaskUpper = RegInfo.createVirtualRegister(RC); Register Scratch = RegInfo.createVirtualRegister(RC); Register Scratch2 = RegInfo.createVirtualRegister(RC); Register Scratch3 = RegInfo.createVirtualRegister(RC); unsigned AtomicOp = 0; bool NeedsAdditionalReg = false; switch (MI.getOpcode()) { case Mips::ATOMIC_LOAD_NAND_I8: AtomicOp = Mips::ATOMIC_LOAD_NAND_I8_POSTRA; break; case Mips::ATOMIC_LOAD_NAND_I16: AtomicOp = Mips::ATOMIC_LOAD_NAND_I16_POSTRA; break; case Mips::ATOMIC_SWAP_I8: AtomicOp = Mips::ATOMIC_SWAP_I8_POSTRA; break; case Mips::ATOMIC_SWAP_I16: AtomicOp = Mips::ATOMIC_SWAP_I16_POSTRA; break; case Mips::ATOMIC_LOAD_ADD_I8: AtomicOp = Mips::ATOMIC_LOAD_ADD_I8_POSTRA; break; case Mips::ATOMIC_LOAD_ADD_I16: AtomicOp = Mips::ATOMIC_LOAD_ADD_I16_POSTRA; break; case Mips::ATOMIC_LOAD_SUB_I8: AtomicOp = Mips::ATOMIC_LOAD_SUB_I8_POSTRA; break; case Mips::ATOMIC_LOAD_SUB_I16: AtomicOp = Mips::ATOMIC_LOAD_SUB_I16_POSTRA; break; case Mips::ATOMIC_LOAD_AND_I8: AtomicOp = Mips::ATOMIC_LOAD_AND_I8_POSTRA; break; case Mips::ATOMIC_LOAD_AND_I16: AtomicOp = Mips::ATOMIC_LOAD_AND_I16_POSTRA; break; case Mips::ATOMIC_LOAD_OR_I8: AtomicOp = Mips::ATOMIC_LOAD_OR_I8_POSTRA; break; case Mips::ATOMIC_LOAD_OR_I16: AtomicOp = Mips::ATOMIC_LOAD_OR_I16_POSTRA; break; case Mips::ATOMIC_LOAD_XOR_I8: AtomicOp = Mips::ATOMIC_LOAD_XOR_I8_POSTRA; break; case Mips::ATOMIC_LOAD_XOR_I16: AtomicOp = Mips::ATOMIC_LOAD_XOR_I16_POSTRA; break; case Mips::ATOMIC_LOAD_MIN_I8: AtomicOp = Mips::ATOMIC_LOAD_MIN_I8_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_MIN_I16: AtomicOp = Mips::ATOMIC_LOAD_MIN_I16_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_MAX_I8: AtomicOp = Mips::ATOMIC_LOAD_MAX_I8_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_MAX_I16: AtomicOp = Mips::ATOMIC_LOAD_MAX_I16_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMIN_I8: AtomicOp = Mips::ATOMIC_LOAD_UMIN_I8_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMIN_I16: AtomicOp = Mips::ATOMIC_LOAD_UMIN_I16_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMAX_I8: AtomicOp = Mips::ATOMIC_LOAD_UMAX_I8_POSTRA; NeedsAdditionalReg = true; break; case Mips::ATOMIC_LOAD_UMAX_I16: AtomicOp = Mips::ATOMIC_LOAD_UMAX_I16_POSTRA; NeedsAdditionalReg = true; break; default: llvm_unreachable("Unknown subword atomic pseudo for expansion!"); } // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(exitMBB, BranchProbability::getOne()); // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc // and alignedaddr,ptr,masklsb2 // andi ptrlsb2,ptr,3 // sll shiftamt,ptrlsb2,3 // ori maskupper,$0,255 # 0xff // sll mask,maskupper,shiftamt // nor mask2,$0,mask // sll incr2,incr,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; BuildMI(BB, DL, TII->get(ABI.GetPtrAddiuOp()), MaskLSB2) .addReg(ABI.GetNullPtr()).addImm(-4); BuildMI(BB, DL, TII->get(ABI.GetPtrAndOp()), AlignedAddr) .addReg(Ptr).addReg(MaskLSB2); BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2) .addReg(Ptr, 0, ArePtrs64bit ? Mips::sub_32 : 0).addImm(3); if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { Register Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); } BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) .addReg(MaskUpper).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::SLLV), Incr2).addReg(Incr).addReg(ShiftAmt); // The purposes of the flags on the scratch registers is explained in // emitAtomicBinary. In summary, we need a scratch register which is going to // be undef, that is unique among registers chosen for the instruction. MachineInstrBuilder MIB = BuildMI(BB, DL, TII->get(AtomicOp)) .addReg(Dest, RegState::Define | RegState::EarlyClobber) .addReg(AlignedAddr) .addReg(Incr2) .addReg(Mask) .addReg(Mask2) .addReg(ShiftAmt) .addReg(Scratch, RegState::EarlyClobber | RegState::Define | RegState::Dead | RegState::Implicit) .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | RegState::Dead | RegState::Implicit) .addReg(Scratch3, RegState::EarlyClobber | RegState::Define | RegState::Dead | RegState::Implicit); if (NeedsAdditionalReg) { Register Scratch4 = RegInfo.createVirtualRegister(RC); MIB.addReg(Scratch4, RegState::EarlyClobber | RegState::Define | RegState::Dead | RegState::Implicit); } MI.eraseFromParent(); // The instruction is gone now. return exitMBB; } // Lower atomic compare and swap to a pseudo instruction, taking care to // define a scratch register for the pseudo instruction's expansion. The // instruction is expanded after the register allocator as to prevent // the insertion of stores between the linked load and the store conditional. MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI, MachineBasicBlock *BB) const { assert((MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 || MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I64) && "Unsupported atomic pseudo for EmitAtomicCmpSwap."); const unsigned Size = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? 4 : 8; MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? Mips::ATOMIC_CMP_SWAP_I32_POSTRA : Mips::ATOMIC_CMP_SWAP_I64_POSTRA; Register Dest = MI.getOperand(0).getReg(); Register Ptr = MI.getOperand(1).getReg(); Register OldVal = MI.getOperand(2).getReg(); Register NewVal = MI.getOperand(3).getReg(); Register Scratch = MRI.createVirtualRegister(RC); MachineBasicBlock::iterator II(MI); // We need to create copies of the various registers and kill them at the // atomic pseudo. If the copies are not made, when the atomic is expanded // after fast register allocation, the spills will end up outside of the // blocks that their values are defined in, causing livein errors. Register PtrCopy = MRI.createVirtualRegister(MRI.getRegClass(Ptr)); Register OldValCopy = MRI.createVirtualRegister(MRI.getRegClass(OldVal)); Register NewValCopy = MRI.createVirtualRegister(MRI.getRegClass(NewVal)); BuildMI(*BB, II, DL, TII->get(Mips::COPY), PtrCopy).addReg(Ptr); BuildMI(*BB, II, DL, TII->get(Mips::COPY), OldValCopy).addReg(OldVal); BuildMI(*BB, II, DL, TII->get(Mips::COPY), NewValCopy).addReg(NewVal); // The purposes of the flags on the scratch registers is explained in // emitAtomicBinary. In summary, we need a scratch register which is going to // be undef, that is unique among registers chosen for the instruction. BuildMI(*BB, II, DL, TII->get(AtomicOp)) .addReg(Dest, RegState::Define | RegState::EarlyClobber) .addReg(PtrCopy, RegState::Kill) .addReg(OldValCopy, RegState::Kill) .addReg(NewValCopy, RegState::Kill) .addReg(Scratch, RegState::EarlyClobber | RegState::Define | RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. return BB; } MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword( MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const { assert((Size == 1 || Size == 2) && "Unsupported size for EmitAtomicCmpSwapPartial."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const bool ArePtrs64bit = ABI.ArePtrs64bit(); const TargetRegisterClass *RCp = getRegClassFor(ArePtrs64bit ? MVT::i64 : MVT::i32); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); Register Ptr = MI.getOperand(1).getReg(); Register CmpVal = MI.getOperand(2).getReg(); Register NewVal = MI.getOperand(3).getReg(); Register AlignedAddr = RegInfo.createVirtualRegister(RCp); Register ShiftAmt = RegInfo.createVirtualRegister(RC); Register Mask = RegInfo.createVirtualRegister(RC); Register Mask2 = RegInfo.createVirtualRegister(RC); Register ShiftedCmpVal = RegInfo.createVirtualRegister(RC); Register ShiftedNewVal = RegInfo.createVirtualRegister(RC); Register MaskLSB2 = RegInfo.createVirtualRegister(RCp); Register PtrLSB2 = RegInfo.createVirtualRegister(RC); Register MaskUpper = RegInfo.createVirtualRegister(RC); Register MaskedCmpVal = RegInfo.createVirtualRegister(RC); Register MaskedNewVal = RegInfo.createVirtualRegister(RC); unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8 ? Mips::ATOMIC_CMP_SWAP_I8_POSTRA : Mips::ATOMIC_CMP_SWAP_I16_POSTRA; // The scratch registers here with the EarlyClobber | Define | Dead | Implicit // flags are used to coerce the register allocator and the machine verifier to // accept the usage of these registers. // The EarlyClobber flag has the semantic properties that the operand it is // attached to is clobbered before the rest of the inputs are read. Hence it // must be unique among the operands to the instruction. // The Define flag is needed to coerce the machine verifier that an Undef // value isn't a problem. // The Dead flag is needed as the value in scratch isn't used by any other // instruction. Kill isn't used as Dead is more precise. Register Scratch = RegInfo.createVirtualRegister(RC); Register Scratch2 = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineFunction::iterator It = ++BB->getIterator(); MF->insert(It, exitMBB); // Transfer the remainder of BB and its successor edges to exitMBB. exitMBB->splice(exitMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); exitMBB->transferSuccessorsAndUpdatePHIs(BB); BB->addSuccessor(exitMBB, BranchProbability::getOne()); // thisMBB: // addiu masklsb2,$0,-4 # 0xfffffffc // and alignedaddr,ptr,masklsb2 // andi ptrlsb2,ptr,3 // xori ptrlsb2,ptrlsb2,3 # Only for BE // sll shiftamt,ptrlsb2,3 // ori maskupper,$0,255 # 0xff // sll mask,maskupper,shiftamt // nor mask2,$0,mask // andi maskedcmpval,cmpval,255 // sll shiftedcmpval,maskedcmpval,shiftamt // andi maskednewval,newval,255 // sll shiftednewval,maskednewval,shiftamt int64_t MaskImm = (Size == 1) ? 255 : 65535; BuildMI(BB, DL, TII->get(ArePtrs64bit ? Mips::DADDiu : Mips::ADDiu), MaskLSB2) .addReg(ABI.GetNullPtr()).addImm(-4); BuildMI(BB, DL, TII->get(ArePtrs64bit ? Mips::AND64 : Mips::AND), AlignedAddr) .addReg(Ptr).addReg(MaskLSB2); BuildMI(BB, DL, TII->get(Mips::ANDi), PtrLSB2) .addReg(Ptr, 0, ArePtrs64bit ? Mips::sub_32 : 0).addImm(3); if (Subtarget.isLittle()) { BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(PtrLSB2).addImm(3); } else { Register Off = RegInfo.createVirtualRegister(RC); BuildMI(BB, DL, TII->get(Mips::XORi), Off) .addReg(PtrLSB2).addImm((Size == 1) ? 3 : 2); BuildMI(BB, DL, TII->get(Mips::SLL), ShiftAmt).addReg(Off).addImm(3); } BuildMI(BB, DL, TII->get(Mips::ORi), MaskUpper) .addReg(Mips::ZERO).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), Mask) .addReg(MaskUpper).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedCmpVal) .addReg(CmpVal).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedCmpVal) .addReg(MaskedCmpVal).addReg(ShiftAmt); BuildMI(BB, DL, TII->get(Mips::ANDi), MaskedNewVal) .addReg(NewVal).addImm(MaskImm); BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal) .addReg(MaskedNewVal).addReg(ShiftAmt); // The purposes of the flags on the scratch registers are explained in // emitAtomicBinary. In summary, we need a scratch register which is going to // be undef, that is unique among the register chosen for the instruction. BuildMI(BB, DL, TII->get(AtomicOp)) .addReg(Dest, RegState::Define | RegState::EarlyClobber) .addReg(AlignedAddr) .addReg(Mask) .addReg(ShiftedCmpVal) .addReg(Mask2) .addReg(ShiftedNewVal) .addReg(ShiftAmt) .addReg(Scratch, RegState::EarlyClobber | RegState::Define | RegState::Dead | RegState::Implicit) .addReg(Scratch2, RegState::EarlyClobber | RegState::Define | RegState::Dead | RegState::Implicit); MI.eraseFromParent(); // The instruction is gone now. return exitMBB; } SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // The first operand is the chain, the second is the condition, the third is // the block to branch to if the condition is true. SDValue Chain = Op.getOperand(0); SDValue Dest = Op.getOperand(2); SDLoc DL(Op); assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6()); SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); // Return if flag is not set by a floating point comparison. if (CondRes.getOpcode() != MipsISD::FPCmp) return Op; SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast(CCNode)->getZExtValue(); unsigned Opc = invertFPCondCodeUser(CC) ? Mips::BRANCH_F : Mips::BRANCH_T; SDValue BrCode = DAG.getConstant(Opc, DL, MVT::i32); SDValue FCC0 = DAG.getRegister(Mips::FCC0, MVT::i32); return DAG.getNode(MipsISD::FPBrcond, DL, Op.getValueType(), Chain, BrCode, FCC0, Dest, CondRes); } SDValue MipsTargetLowering:: lowerSELECT(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6()); SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); // Return if flag is not set by a floating point comparison. if (Cond.getOpcode() != MipsISD::FPCmp) return Op; return createCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2), SDLoc(Op)); } SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6()); SDValue Cond = createFPCmp(DAG, Op); assert(Cond.getOpcode() == MipsISD::FPCmp && "Floating point operand expected."); SDLoc DL(Op); SDValue True = DAG.getConstant(1, DL, MVT::i32); SDValue False = DAG.getConstant(0, DL, MVT::i32); return createCMovFP(DAG, Cond, True, False, DL); } SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); const GlobalValue *GV = N->getGlobal(); if (!isPositionIndependent()) { const MipsTargetObjectFile *TLOF = static_cast( getTargetMachine().getObjFileLowering()); const GlobalObject *GO = GV->getAliaseeObject(); if (GO && TLOF->IsGlobalInSmallSection(GO, getTargetMachine())) // %gp_rel relocation return getAddrGPRel(N, SDLoc(N), Ty, DAG, ABI.IsN64()); // %hi/%lo relocation return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) // %highest/%higher/%hi/%lo relocation : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); } // Every other architecture would use shouldAssumeDSOLocal in here, but // mips is special. // * In PIC code mips requires got loads even for local statics! // * To save on got entries, for local statics the got entry contains the // page and an additional add instruction takes care of the low bits. // * It is legal to access a hidden symbol with a non hidden undefined, // so one cannot guarantee that all access to a hidden symbol will know // it is hidden. // * Mips linkers don't support creating a page and a full got entry for // the same symbol. // * Given all that, we have to use a full got entry for hidden symbols :-( if (GV->hasLocalLinkage()) return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); if (Subtarget.useXGOT()) return getAddrGlobalLargeGOT( N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16, DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction())); return getAddrGlobal( N, SDLoc(N), Ty, DAG, (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP : MipsII::MO_GOT, DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction())); } SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { BlockAddressSDNode *N = cast(Op); EVT Ty = Op.getValueType(); if (!isPositionIndependent()) return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); } SDValue MipsTargetLowering:: lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // If the relocation model is PIC, use the General Dynamic TLS Model or // Local Dynamic TLS model, otherwise use the Initial Exec or // Local Exec TLS Model. GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().useEmulatedTLS()) return LowerToTLSEmulatedModel(GA, DAG); SDLoc DL(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); TLSModel::Model model = getTargetMachine().getTLSModel(GV); if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { // General Dynamic and Local Dynamic TLS Model. unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM : MipsII::MO_TLSGD; SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, Flag); SDValue Argument = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT), TGA); unsigned PtrSize = PtrVT.getSizeInBits(); IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT); ArgListTy Args; ArgListEntry Entry; Entry.Node = Argument; Entry.Ty = PtrTy; Args.push_back(Entry); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(DL) .setChain(DAG.getEntryNode()) .setLibCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args)); std::pair CallResult = LowerCallTo(CLI); SDValue Ret = CallResult.first; if (model != TLSModel::LocalDynamic) return Ret; SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_DTPREL_HI); SDValue Hi = DAG.getNode(MipsISD::TlsHi, DL, PtrVT, TGAHi); SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_DTPREL_LO); SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo); SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Ret); return DAG.getNode(ISD::ADD, DL, PtrVT, Add, Lo); } SDValue Offset; if (model == TLSModel::InitialExec) { // Initial Exec TLS Model SDValue TGA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_GOTTPREL); TGA = DAG.getNode(MipsISD::Wrapper, DL, PtrVT, getGlobalReg(DAG, PtrVT), TGA); Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), TGA, MachinePointerInfo()); } else { // Local Exec TLS Model assert(model == TLSModel::LocalExec); SDValue TGAHi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_TPREL_HI); SDValue TGALo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, MipsII::MO_TPREL_LO); SDValue Hi = DAG.getNode(MipsISD::TlsHi, DL, PtrVT, TGAHi); SDValue Lo = DAG.getNode(MipsISD::Lo, DL, PtrVT, TGALo); Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); } SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadPointer, Offset); } SDValue MipsTargetLowering:: lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *N = cast(Op); EVT Ty = Op.getValueType(); if (!isPositionIndependent()) return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); } SDValue MipsTargetLowering:: lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *N = cast(Op); EVT Ty = Op.getValueType(); if (!isPositionIndependent()) { const MipsTargetObjectFile *TLOF = static_cast( getTargetMachine().getObjFileLowering()); if (TLOF->IsConstantInSmallSection(DAG.getDataLayout(), N->getConstVal(), getTargetMachine())) // %gp_rel relocation return getAddrGPRel(N, SDLoc(N), Ty, DAG, ABI.IsN64()); return Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); } return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64()); } SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *FuncInfo = MF.getInfo(); SDLoc DL(Op); SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy(MF.getDataLayout())); // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. const Value *SV = cast(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1), MachinePointerInfo(SV)); } SDValue MipsTargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDNode *Node = Op.getNode(); EVT VT = Node->getValueType(0); SDValue Chain = Node->getOperand(0); SDValue VAListPtr = Node->getOperand(1); const Align Align = llvm::MaybeAlign(Node->getConstantOperandVal(3)).valueOrOne(); const Value *SV = cast(Node->getOperand(2))->getValue(); SDLoc DL(Node); unsigned ArgSlotSizeInBytes = (ABI.IsN32() || ABI.IsN64()) ? 8 : 4; SDValue VAListLoad = DAG.getLoad(getPointerTy(DAG.getDataLayout()), DL, Chain, VAListPtr, MachinePointerInfo(SV)); SDValue VAList = VAListLoad; // Re-align the pointer if necessary. // It should only ever be necessary for 64-bit types on O32 since the minimum // argument alignment is the same as the maximum type alignment for N32/N64. // // FIXME: We currently align too often. The code generator doesn't notice // when the pointer is still aligned from the last va_arg (or pair of // va_args for the i64 on O32 case). if (Align > getMinStackArgumentAlignment()) { VAList = DAG.getNode( ISD::ADD, DL, VAList.getValueType(), VAList, DAG.getConstant(Align.value() - 1, DL, VAList.getValueType())); VAList = DAG.getNode( ISD::AND, DL, VAList.getValueType(), VAList, DAG.getConstant(-(int64_t)Align.value(), DL, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg. auto &TD = DAG.getDataLayout(); unsigned ArgSizeInBytes = TD.getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())); SDValue Tmp3 = DAG.getNode(ISD::ADD, DL, VAList.getValueType(), VAList, DAG.getConstant(alignTo(ArgSizeInBytes, ArgSlotSizeInBytes), DL, VAList.getValueType())); // Store the incremented VAList to the legalized pointer Chain = DAG.getStore(VAListLoad.getValue(1), DL, Tmp3, VAListPtr, MachinePointerInfo(SV)); // In big-endian mode we must adjust the pointer when the load size is smaller // than the argument slot size. We must also reduce the known alignment to // match. For example in the N64 ABI, we must add 4 bytes to the offset to get // the correct half of the slot, and reduce the alignment from 8 (slot // alignment) down to 4 (type alignment). if (!Subtarget.isLittle() && ArgSizeInBytes < ArgSlotSizeInBytes) { unsigned Adjustment = ArgSlotSizeInBytes - ArgSizeInBytes; VAList = DAG.getNode(ISD::ADD, DL, VAListPtr.getValueType(), VAList, DAG.getIntPtrConstant(Adjustment, DL)); } // Load the actual argument out of the pointer VAList return DAG.getLoad(VT, DL, Chain, VAList, MachinePointerInfo()); } static SDValue lowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) { EVT TyX = Op.getOperand(0).getValueType(); EVT TyY = Op.getOperand(1).getValueType(); SDLoc DL(Op); SDValue Const1 = DAG.getConstant(1, DL, MVT::i32); SDValue Const31 = DAG.getConstant(31, DL, MVT::i32); SDValue Res; // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it // to i32. SDValue X = (TyX == MVT::f32) ? DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), Const1); SDValue Y = (TyY == MVT::f32) ? DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) : DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1), Const1); if (HasExtractInsert) { // ext E, Y, 31, 1 ; extract bit31 of Y // ins X, E, 31, 1 ; insert extracted bit at bit31 of X SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1); Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X); } else { // sll SllX, X, 1 // srl SrlX, SllX, 1 // srl SrlY, Y, 31 // sll SllY, SrlX, 31 // or Or, SrlX, SllY SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31); SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31); Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY); } if (TyX == MVT::f32) return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res); SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); } static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) { unsigned WidthX = Op.getOperand(0).getValueSizeInBits(); unsigned WidthY = Op.getOperand(1).getValueSizeInBits(); EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY); SDLoc DL(Op); SDValue Const1 = DAG.getConstant(1, DL, MVT::i32); // Bitcast to integer nodes. SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0)); SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1)); if (HasExtractInsert) { // ext E, Y, width(Y) - 1, 1 ; extract bit width(Y)-1 of Y // ins X, E, width(X) - 1, 1 ; insert extracted bit at bit width(X)-1 of X SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y, DAG.getConstant(WidthY - 1, DL, MVT::i32), Const1); if (WidthX > WidthY) E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E); else if (WidthY > WidthX) E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E); SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E, DAG.getConstant(WidthX - 1, DL, MVT::i32), Const1, X); return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I); } // (d)sll SllX, X, 1 // (d)srl SrlX, SllX, 1 // (d)srl SrlY, Y, width(Y)-1 // (d)sll SllY, SrlX, width(Y)-1 // or Or, SrlX, SllY SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1); SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1); SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y, DAG.getConstant(WidthY - 1, DL, MVT::i32)); if (WidthX > WidthY) SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY); else if (WidthY > WidthX) SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY); SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY, DAG.getConstant(WidthX - 1, DL, MVT::i32)); SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY); return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or); } SDValue MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isGP64bit()) return lowerFCOPYSIGN64(Op, DAG, Subtarget.hasExtractInsert()); return lowerFCOPYSIGN32(Op, DAG, Subtarget.hasExtractInsert()); } static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) { SDLoc DL(Op); SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32); // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it // to i32. SDValue X = (Op.getValueType() == MVT::f32) ? DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), Const1); // Clear MSB. if (HasExtractInsert) Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, DAG.getRegister(Mips::ZERO, MVT::i32), DAG.getConstant(31, DL, MVT::i32), Const1, X); else { // TODO: Provide DAG patterns which transform (and x, cst) // back to a (shl (srl x (clz cst)) (clz cst)) sequence. SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); } if (Op.getValueType() == MVT::f32) return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res); // FIXME: For mips32r2, the sequence of (BuildPairF64 (ins (ExtractElementF64 // Op 1), $zero, 31 1) (ExtractElementF64 Op 0)) and the Op has one use, we // should be able to drop the usage of mfc1/mtc1 and rewrite the register in // place. SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); } static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasExtractInsert) { SDLoc DL(Op); SDValue Res, Const1 = DAG.getConstant(1, DL, MVT::i32); // Bitcast to integer node. SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0)); // Clear MSB. if (HasExtractInsert) Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64, DAG.getRegister(Mips::ZERO_64, MVT::i64), DAG.getConstant(63, DL, MVT::i32), Const1, X); else { SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1); Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1); } return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res); } SDValue MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const { if ((ABI.IsN32() || ABI.IsN64()) && (Op.getValueType() == MVT::f64)) return lowerFABS64(Op, DAG, Subtarget.hasExtractInsert()); return lowerFABS32(Op, DAG, Subtarget.hasExtractInsert()); } SDValue MipsTargetLowering:: lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth if (cast(Op.getOperand(0))->getZExtValue() != 0) { DAG.getContext()->emitError( "return address can be determined only for current frame"); return SDValue(); } MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); SDValue FrameAddr = DAG.getCopyFromReg( DAG.getEntryNode(), DL, ABI.IsN64() ? Mips::FP_64 : Mips::FP, VT); return FrameAddr; } SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { if (verifyReturnAddressArgumentIsConstant(Op, DAG)) return SDValue(); // check the depth if (cast(Op.getOperand(0))->getZExtValue() != 0) { DAG.getContext()->emitError( "return address can be determined only for current frame"); return SDValue(); } MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MVT VT = Op.getSimpleValueType(); unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA; MFI.setReturnAddressIsTaken(true); // Return RA, which contains the return address. Mark it an implicit live-in. Register Reg = MF.addLiveIn(RA, getRegClassFor(VT)); return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, VT); } // An EH_RETURN is the result of lowering llvm.eh.return which in turn is // generated from __builtin_eh_return (offset, handler) // The effect of this is to adjust the stack pointer by "offset" // and then branch to "handler". SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsFI->setCallsEhReturn(); SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); SDLoc DL(Op); EVT Ty = ABI.IsN64() ? MVT::i64 : MVT::i32; // Store stack offset in V1, store jump target in V0. Glue CopyToReg and // EH_RETURN nodes, so that instructions are emitted back-to-back. unsigned OffsetReg = ABI.IsN64() ? Mips::V1_64 : Mips::V1; unsigned AddrReg = ABI.IsN64() ? Mips::V0_64 : Mips::V0; Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain, DAG.getRegister(OffsetReg, Ty), DAG.getRegister(AddrReg, getPointerTy(MF.getDataLayout())), Chain.getValue(1)); } SDValue MipsTargetLowering::lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const { // FIXME: Need pseudo-fence for 'singlethread' fences // FIXME: Set SType for weaker fences where supported/appropriate. unsigned SType = 0; SDLoc DL(Op); return DAG.getNode(MipsISD::Sync, DL, MVT::Other, Op.getOperand(0), DAG.getConstant(SType, DL, MVT::i32)); } SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32; SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); // if shamt < (VT.bits): // lo = (shl lo, shamt) // hi = (or (shl hi, shamt) (srl (srl lo, 1), ~shamt)) // else: // lo = 0 // hi = (shl lo, shamt[4:0]) SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, DAG.getConstant(-1, DL, MVT::i32)); SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, DAG.getConstant(1, DL, VT)); SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, Not); SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); SDValue ShiftLeftLo = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, DAG.getConstant(0, DL, VT), ShiftLeftLo); Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftLeftLo, Or); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, DL); } SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const { SDLoc DL(Op); SDValue Lo = Op.getOperand(0), Hi = Op.getOperand(1); SDValue Shamt = Op.getOperand(2); MVT VT = Subtarget.isGP64bit() ? MVT::i64 : MVT::i32; // if shamt < (VT.bits): // lo = (or (shl (shl hi, 1), ~shamt) (srl lo, shamt)) // if isSRA: // hi = (sra hi, shamt) // else: // hi = (srl hi, shamt) // else: // if isSRA: // lo = (sra hi, shamt[4:0]) // hi = (sra hi, 31) // else: // lo = (srl hi, shamt[4:0]) // hi = 0 SDValue Not = DAG.getNode(ISD::XOR, DL, MVT::i32, Shamt, DAG.getConstant(-1, DL, MVT::i32)); SDValue ShiftLeft1Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(1, DL, VT)); SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, ShiftLeft1Hi, Not); SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); SDValue ShiftRightHi = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shamt); SDValue Cond = DAG.getNode(ISD::AND, DL, MVT::i32, Shamt, DAG.getConstant(VT.getSizeInBits(), DL, MVT::i32)); SDValue Ext = DAG.getNode(ISD::SRA, DL, VT, Hi, DAG.getConstant(VT.getSizeInBits() - 1, DL, VT)); if (!(Subtarget.hasMips4() || Subtarget.hasMips32())) { SDVTList VTList = DAG.getVTList(VT, VT); return DAG.getNode(Subtarget.isGP64bit() ? Mips::PseudoD_SELECT_I64 : Mips::PseudoD_SELECT_I, DL, VTList, Cond, ShiftRightHi, IsSRA ? Ext : DAG.getConstant(0, DL, VT), Or, ShiftRightHi); } Lo = DAG.getNode(ISD::SELECT, DL, VT, Cond, ShiftRightHi, Or); Hi = DAG.getNode(ISD::SELECT, DL, VT, Cond, IsSRA ? Ext : DAG.getConstant(0, DL, VT), ShiftRightHi); SDValue Ops[2] = {Lo, Hi}; return DAG.getMergeValues(Ops, DL); } static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, SDValue Chain, SDValue Src, unsigned Offset) { SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0), MemVT = LD->getMemoryVT(); EVT BasePtrVT = Ptr.getValueType(); SDLoc DL(LD); SDVTList VTList = DAG.getVTList(VT, MVT::Other); if (Offset) Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, DL, BasePtrVT)); SDValue Ops[] = { Chain, Ptr, Src }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT, LD->getMemOperand()); } // Expand an unaligned 32 or 64-bit integer load node. SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast(Op); EVT MemVT = LD->getMemoryVT(); if (Subtarget.systemSupportsUnalignedAccess()) return Op; // Return if load is aligned or if MemVT is neither i32 nor i64. if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) || ((MemVT != MVT::i32) && (MemVT != MVT::i64))) return SDValue(); bool IsLittle = Subtarget.isLittle(); EVT VT = Op.getValueType(); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Chain = LD->getChain(), Undef = DAG.getUNDEF(VT); assert((VT == MVT::i32) || (VT == MVT::i64)); // Expand // (set dst, (i64 (load baseptr))) // to // (set tmp, (ldl (add baseptr, 7), undef)) // (set dst, (ldr baseptr, tmp)) if ((VT == MVT::i64) && (ExtType == ISD::NON_EXTLOAD)) { SDValue LDL = createLoadLR(MipsISD::LDL, DAG, LD, Chain, Undef, IsLittle ? 7 : 0); return createLoadLR(MipsISD::LDR, DAG, LD, LDL.getValue(1), LDL, IsLittle ? 0 : 7); } SDValue LWL = createLoadLR(MipsISD::LWL, DAG, LD, Chain, Undef, IsLittle ? 3 : 0); SDValue LWR = createLoadLR(MipsISD::LWR, DAG, LD, LWL.getValue(1), LWL, IsLittle ? 0 : 3); // Expand // (set dst, (i32 (load baseptr))) or // (set dst, (i64 (sextload baseptr))) or // (set dst, (i64 (extload baseptr))) // to // (set tmp, (lwl (add baseptr, 3), undef)) // (set dst, (lwr baseptr, tmp)) if ((VT == MVT::i32) || (ExtType == ISD::SEXTLOAD) || (ExtType == ISD::EXTLOAD)) return LWR; assert((VT == MVT::i64) && (ExtType == ISD::ZEXTLOAD)); // Expand // (set dst, (i64 (zextload baseptr))) // to // (set tmp0, (lwl (add baseptr, 3), undef)) // (set tmp1, (lwr baseptr, tmp0)) // (set tmp2, (shl tmp1, 32)) // (set dst, (srl tmp2, 32)) SDLoc DL(LD); SDValue Const32 = DAG.getConstant(32, DL, MVT::i32); SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32); SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32); SDValue Ops[] = { SRL, LWR.getValue(1) }; return DAG.getMergeValues(Ops, DL); } static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, SDValue Chain, unsigned Offset) { SDValue Ptr = SD->getBasePtr(), Value = SD->getValue(); EVT MemVT = SD->getMemoryVT(), BasePtrVT = Ptr.getValueType(); SDLoc DL(SD); SDVTList VTList = DAG.getVTList(MVT::Other); if (Offset) Ptr = DAG.getNode(ISD::ADD, DL, BasePtrVT, Ptr, DAG.getConstant(Offset, DL, BasePtrVT)); SDValue Ops[] = { Chain, Value, Ptr }; return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT, SD->getMemOperand()); } // Expand an unaligned 32 or 64-bit integer store node. static SDValue lowerUnalignedIntStore(StoreSDNode *SD, SelectionDAG &DAG, bool IsLittle) { SDValue Value = SD->getValue(), Chain = SD->getChain(); EVT VT = Value.getValueType(); // Expand // (store val, baseptr) or // (truncstore val, baseptr) // to // (swl val, (add baseptr, 3)) // (swr val, baseptr) if ((VT == MVT::i32) || SD->isTruncatingStore()) { SDValue SWL = createStoreLR(MipsISD::SWL, DAG, SD, Chain, IsLittle ? 3 : 0); return createStoreLR(MipsISD::SWR, DAG, SD, SWL, IsLittle ? 0 : 3); } assert(VT == MVT::i64); // Expand // (store val, baseptr) // to // (sdl val, (add baseptr, 7)) // (sdr val, baseptr) SDValue SDL = createStoreLR(MipsISD::SDL, DAG, SD, Chain, IsLittle ? 7 : 0); return createStoreLR(MipsISD::SDR, DAG, SD, SDL, IsLittle ? 0 : 7); } // Lower (store (fp_to_sint $fp) $ptr) to (store (TruncIntFP $fp), $ptr). static SDValue lowerFP_TO_SINT_STORE(StoreSDNode *SD, SelectionDAG &DAG, bool SingleFloat) { SDValue Val = SD->getValue(); if (Val.getOpcode() != ISD::FP_TO_SINT || (Val.getValueSizeInBits() > 32 && SingleFloat)) return SDValue(); EVT FPTy = EVT::getFloatingPointVT(Val.getValueSizeInBits()); SDValue Tr = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Val), FPTy, Val.getOperand(0)); return DAG.getStore(SD->getChain(), SDLoc(SD), Tr, SD->getBasePtr(), SD->getPointerInfo(), SD->getAlignment(), SD->getMemOperand()->getFlags()); } SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { StoreSDNode *SD = cast(Op); EVT MemVT = SD->getMemoryVT(); // Lower unaligned integer stores. if (!Subtarget.systemSupportsUnalignedAccess() && (SD->getAlignment() < MemVT.getSizeInBits() / 8) && ((MemVT == MVT::i32) || (MemVT == MVT::i64))) return lowerUnalignedIntStore(SD, DAG, Subtarget.isLittle()); return lowerFP_TO_SINT_STORE(SD, DAG, Subtarget.isSingleFloat()); } SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const { // Return a fixed StackObject with offset 0 which points to the old stack // pointer. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); EVT ValTy = Op->getValueType(0); int FI = MFI.CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false); return DAG.getFrameIndex(FI, ValTy); } SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const { if (Op.getValueSizeInBits() > 32 && Subtarget.isSingleFloat()) return SDValue(); EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); SDValue Trunc = DAG.getNode(MipsISD::TruncIntFP, SDLoc(Op), FPTy, Op.getOperand(0)); return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op.getValueType(), Trunc); } //===----------------------------------------------------------------------===// // Calling Convention Implementation //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // TODO: Implement a generic logic using tblgen that can support this. // Mips O32 ABI rules: // --- // i32 - Passed in A0, A1, A2, A3 and stack // f32 - Only passed in f32 registers if no int reg has been used yet to hold // an argument. Otherwise, passed in A1, A2, A3 and stack. // f64 - Only passed in two aliased f32 registers if no int reg has been used // yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is // not used, it must be shadowed. If only A3 is available, shadow it and // go to stack. // vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack. // vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3} // with the remainder spilled to the stack. // vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases // spilling the remainder to the stack. // // For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack. //===----------------------------------------------------------------------===// static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State, ArrayRef F64Regs) { const MipsSubtarget &Subtarget = static_cast( State.getMachineFunction().getSubtarget()); static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; const MipsCCState * MipsState = static_cast(&State); static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 }; // Do not process byval args here. if (ArgFlags.isByVal()) return true; // Promote i8 and i16 if (ArgFlags.isInReg() && !Subtarget.isLittle()) { if (LocVT == MVT::i8 || LocVT == MVT::i16 || LocVT == MVT::i32) { LocVT = MVT::i32; if (ArgFlags.isSExt()) LocInfo = CCValAssign::SExtUpper; else if (ArgFlags.isZExt()) LocInfo = CCValAssign::ZExtUpper; else LocInfo = CCValAssign::AExtUpper; } } // Promote i8 and i16 if (LocVT == MVT::i8 || LocVT == MVT::i16) { LocVT = MVT::i32; if (ArgFlags.isSExt()) LocInfo = CCValAssign::SExt; else if (ArgFlags.isZExt()) LocInfo = CCValAssign::ZExt; else LocInfo = CCValAssign::AExt; } unsigned Reg; // f32 and f64 are allocated in A0, A1, A2, A3 when either of the following // is true: function is vararg, argument is 3rd or higher, there is previous // argument which is not f32 or f64. bool AllocateFloatsInIntReg = State.isVarArg() || ValNo > 1 || State.getFirstUnallocated(F32Regs) != ValNo; Align OrigAlign = ArgFlags.getNonZeroOrigAlign(); bool isI64 = (ValVT == MVT::i32 && OrigAlign == Align(8)); bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo); // The MIPS vector ABI for floats passes them in a pair of registers if (ValVT == MVT::i32 && isVectorFloat) { // This is the start of an vector that was scalarized into an unknown number // of components. It doesn't matter how many there are. Allocate one of the // notional 8 byte aligned registers which map onto the argument stack, and // shadow the register lost to alignment requirements. if (ArgFlags.isSplit()) { Reg = State.AllocateReg(FloatVectorIntRegs); if (Reg == Mips::A2) State.AllocateReg(Mips::A1); else if (Reg == 0) State.AllocateReg(Mips::A3); } else { // If we're an intermediate component of the split, we can just attempt to // allocate a register directly. Reg = State.AllocateReg(IntRegs); } } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) { Reg = State.AllocateReg(IntRegs); // If this is the first part of an i64 arg, // the allocated register must be either A0 or A2. if (isI64 && (Reg == Mips::A1 || Reg == Mips::A3)) Reg = State.AllocateReg(IntRegs); LocVT = MVT::i32; } else if (ValVT == MVT::f64 && AllocateFloatsInIntReg) { LocVT = MVT::i32; // Allocate int register and shadow next int register. If first // available register is Mips::A1 or Mips::A3, shadow it too. Reg = State.AllocateReg(IntRegs); if (Reg == Mips::A1 || Reg == Mips::A3) Reg = State.AllocateReg(IntRegs); if (Reg) { State.addLoc( CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); MCRegister HiReg = State.AllocateReg(IntRegs); assert(HiReg); State.addLoc( CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo)); return false; } } else if (ValVT.isFloatingPoint() && !AllocateFloatsInIntReg) { // we are guaranteed to find an available float register if (ValVT == MVT::f32) { Reg = State.AllocateReg(F32Regs); // Shadow int register State.AllocateReg(IntRegs); } else { Reg = State.AllocateReg(F64Regs); // Shadow int registers unsigned Reg2 = State.AllocateReg(IntRegs); if (Reg2 == Mips::A1 || Reg2 == Mips::A3) State.AllocateReg(IntRegs); State.AllocateReg(IntRegs); } } else llvm_unreachable("Cannot handle this ValVT."); if (!Reg) { unsigned Offset = State.AllocateStack(ValVT.getStoreSize(), OrigAlign); State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } else State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { static const MCPhysReg F64Regs[] = { Mips::D6, Mips::D7 }; return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); } static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { static const MCPhysReg F64Regs[] = { Mips::D12_64, Mips::D14_64 }; return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); } static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) LLVM_ATTRIBUTE_UNUSED; #include "MipsGenCallingConv.inc" CCAssignFn *MipsTargetLowering::CCAssignFnForCall() const{ return CC_Mips_FixedArg; } CCAssignFn *MipsTargetLowering::CCAssignFnForReturn() const{ return RetCC_Mips; } //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, const SDLoc &DL, bool IsTailCall, SelectionDAG &DAG) const { if (!IsTailCall) { SDValue PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr, DAG.getIntPtrConstant(Offset, DL)); return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()); } MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(), MaybeAlign(), MachineMemOperand::MOVolatile); } void MipsTargetLowering:: getOpndList(SmallVectorImpl &Ops, std::deque> &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const { // Insert node "GP copy globalreg" before call to function. // // R_MIPS_CALL* operators (emitted when non-internal functions are called // in PIC mode) allow symbols to be resolved via lazy binding. // The lazy binding stub requires GP to point to the GOT. // Note that we don't need GP to point to the GOT for indirect calls // (when R_MIPS_CALL* is not used for the call) because Mips linker generates // lazy binding stub for a function only when R_MIPS_CALL* are the only relocs // used for the function (that is, Mips linker doesn't generate lazy binding // stub for a function whose address is taken in the program). if (IsPICCall && !InternalLinkage && IsCallReloc) { unsigned GPReg = ABI.IsN64() ? Mips::GP_64 : Mips::GP; EVT Ty = ABI.IsN64() ? MVT::i64 : MVT::i32; RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (auto &R : RegsToPass) { Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, R.first, R.second, InFlag); InFlag = Chain.getValue(1); } // Add argument registers to the end of the list so that they are // known live into the call. for (auto &R : RegsToPass) Ops.push_back(CLI.DAG.getRegister(R.first, R.second.getValueType())); // Add a register mask operand representing the call-preserved registers. const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CLI.DAG.getMachineFunction(), CLI.CallConv); assert(Mask && "Missing call preserved mask for calling convention"); if (Subtarget.inMips16HardFloat()) { if (GlobalAddressSDNode *G = dyn_cast(CLI.Callee)) { StringRef Sym = G->getGlobal()->getName(); Function *F = G->getGlobal()->getParent()->getFunction(Sym); if (F && F->hasFnAttribute("__Mips16RetHelper")) { Mask = MipsRegisterInfo::getMips16RetHelperMask(); } } } Ops.push_back(CLI.DAG.getRegisterMask(Mask)); if (InFlag.getNode()) Ops.push_back(InFlag); } void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { switch (MI.getOpcode()) { default: return; case Mips::JALR: case Mips::JALRPseudo: case Mips::JALR64: case Mips::JALR64Pseudo: case Mips::JALR16_MM: case Mips::JALRC16_MMR6: case Mips::TAILCALLREG: case Mips::TAILCALLREG64: case Mips::TAILCALLR6REG: case Mips::TAILCALL64R6REG: case Mips::TAILCALLREG_MM: case Mips::TAILCALLREG_MMR6: { if (!EmitJalrReloc || Subtarget.inMips16Mode() || !isPositionIndependent() || Node->getNumOperands() < 1 || Node->getOperand(0).getNumOperands() < 2) { return; } // We are after the callee address, set by LowerCall(). // If added to MI, asm printer will emit .reloc R_MIPS_JALR for the // symbol. const SDValue TargetAddr = Node->getOperand(0).getOperand(1); StringRef Sym; if (const GlobalAddressSDNode *G = dyn_cast_or_null(TargetAddr)) { // We must not emit the R_MIPS_JALR relocation against data symbols // since this will cause run-time crashes if the linker replaces the // call instruction with a relative branch to the data symbol. if (!isa(G->getGlobal())) { LLVM_DEBUG(dbgs() << "Not adding R_MIPS_JALR against data symbol " << G->getGlobal()->getName() << "\n"); return; } Sym = G->getGlobal()->getName(); } else if (const ExternalSymbolSDNode *ES = dyn_cast_or_null(TargetAddr)) { Sym = ES->getSymbol(); } if (Sym.empty()) return; MachineFunction *MF = MI.getParent()->getParent(); MCSymbol *S = MF->getContext().getOrCreateSymbol(Sym); LLVM_DEBUG(dbgs() << "Adding R_MIPS_JALR against " << Sym << "\n"); MI.addOperand(MachineOperand::CreateMCSymbol(S, MipsII::MO_JALR)); } } } /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. SDValue MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; SmallVectorImpl &Outs = CLI.Outs; SmallVectorImpl &OutVals = CLI.OutVals; SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; CallingConv::ID CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); MipsFunctionInfo *FuncInfo = MF.getInfo(); bool IsPIC = isPositionIndependent(); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; MipsCCState CCInfo( CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext(), MipsCCState::getSpecialCallingConvForCallee(Callee.getNode(), Subtarget)); const ExternalSymbolSDNode *ES = dyn_cast_or_null(Callee.getNode()); // There is one case where CALLSEQ_START..CALLSEQ_END can be nested, which // is during the lowering of a call with a byval argument which produces // a call to memcpy. For the O32 case, this causes the caller to allocate // stack space for the reserved argument area for the callee, then recursively // again for the memcpy call. In the NEWABI case, this doesn't occur as those // ABIs mandate that the callee allocates the reserved argument area. We do // still produce nested CALLSEQ_START..CALLSEQ_END with zero space though. // // If the callee has a byval argument and memcpy is used, we are mandated // to already have produced a reserved argument area for the callee for O32. // Therefore, the reserved argument area can be reused for both calls. // // Other cases of calling memcpy cannot have a chain with a CALLSEQ_START // present, as we have yet to hook that node onto the chain. // // Hence, the CALLSEQ_START and CALLSEQ_END nodes can be eliminated in this // case. GCC does a similar trick, in that wherever possible, it calculates // the maximum out going argument area (including the reserved area), and // preallocates the stack space on entrance to the caller. // // FIXME: We should do the same for efficiency and space. // Note: The check on the calling convention below must match // MipsABIInfo::GetCalleeAllocdArgSizeInBytes(). bool MemcpyInByVal = ES && StringRef(ES->getSymbol()) == StringRef("memcpy") && CallConv != CallingConv::Fast && Chain.getOpcode() == ISD::CALLSEQ_START; // Allocate the reserved argument area. It seems strange to do this from the // caller side but removing it breaks the frame size calculation. unsigned ReservedArgArea = MemcpyInByVal ? 0 : ABI.GetCalleeAllocdArgSizeInBytes(CallConv); CCInfo.AllocateStack(ReservedArgArea, Align(1)); CCInfo.AnalyzeCallOperands(Outs, CC_Mips, CLI.getArgs(), ES ? ES->getSymbol() : nullptr); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); // Call site info for function parameters tracking. MachineFunction::CallSiteInfo CSInfo; // Check if it's really possible to do a tail call. Restrict it to functions // that are part of this compilation unit. bool InternalLinkage = false; if (IsTailCall) { IsTailCall = isEligibleForTailCallOptimization( CCInfo, NextStackOffset, *MF.getInfo()); if (GlobalAddressSDNode *G = dyn_cast(Callee)) { InternalLinkage = G->getGlobal()->hasInternalLinkage(); IsTailCall &= (InternalLinkage || G->getGlobal()->hasLocalLinkage() || G->getGlobal()->hasPrivateLinkage() || G->getGlobal()->hasHiddenVisibility() || G->getGlobal()->hasProtectedVisibility()); } } if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); if (IsTailCall) ++NumTailCalls; // Chain is the output chain of the last Load/Store or CopyToReg node. // ByValChain is the output chain of the last Memcpy node created for copying // byval arguments to the stack. unsigned StackAlignment = TFL->getStackAlignment(); NextStackOffset = alignTo(NextStackOffset, StackAlignment); SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, DL, true); if (!(IsTailCall || MemcpyInByVal)) Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL); SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP, getPointerTy(DAG.getDataLayout())); std::deque> RegsToPass; SmallVector MemOpChains; CCInfo.rewindByValRegsInfo(); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(), OutIdx = 0; i != e; ++i, ++OutIdx) { SDValue Arg = OutVals[OutIdx]; CCValAssign &VA = ArgLocs[i]; MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags; bool UseUpperBits = false; // ByVal Arg. if (Flags.isByVal()) { unsigned FirstByValReg, LastByValReg; unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); assert(ByValIdx < CCInfo.getInRegsParamsCount()); assert(!IsTailCall && "Do not tail-call optimize if there is a byval argument."); passByValArg(Chain, DL, RegsToPass, MemOpChains, StackPtr, MFI, DAG, Arg, FirstByValReg, LastByValReg, Flags, Subtarget.isLittle(), VA); CCInfo.nextInRegsParam(); continue; } // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: if (VA.isRegLoc()) { if ((ValVT == MVT::f32 && LocVT == MVT::i32) || (ValVT == MVT::f64 && LocVT == MVT::i64) || (ValVT == MVT::i64 && LocVT == MVT::f64)) Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); else if (ValVT == MVT::f64 && LocVT == MVT::i32) { SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Arg, DAG.getConstant(0, DL, MVT::i32)); SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Arg, DAG.getConstant(1, DL, MVT::i32)); if (!Subtarget.isLittle()) std::swap(Lo, Hi); assert(VA.needsCustom()); Register LocRegLo = VA.getLocReg(); Register LocRegHigh = ArgLocs[++i].getLocReg(); RegsToPass.push_back(std::make_pair(LocRegLo, Lo)); RegsToPass.push_back(std::make_pair(LocRegHigh, Hi)); continue; } } break; case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, LocVT, Arg); break; case CCValAssign::SExtUpper: UseUpperBits = true; LLVM_FALLTHROUGH; case CCValAssign::SExt: Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg); break; case CCValAssign::ZExtUpper: UseUpperBits = true; LLVM_FALLTHROUGH; case CCValAssign::ZExt: Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg); break; case CCValAssign::AExtUpper: UseUpperBits = true; LLVM_FALLTHROUGH; case CCValAssign::AExt: Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg); break; } if (UseUpperBits) { unsigned ValSizeInBits = Outs[OutIdx].ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); Arg = DAG.getNode( ISD::SHL, DL, VA.getLocVT(), Arg, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); } // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); // If the parameter is passed through reg $D, which splits into // two physical registers, avoid creating call site info. if (Mips::AFGR64RegClass.contains(VA.getLocReg())) continue; // Collect CSInfo about which register passes which parameter. const TargetOptions &Options = DAG.getTarget().Options; if (Options.SupportsDebugEntryValues) CSInfo.emplace_back(VA.getLocReg(), i); continue; } // Register can't get to this point... assert(VA.isMemLoc()); // emit ISD::STORE whichs stores the // parameter value to a stack Location MemOpChains.push_back(passArgOnStack(StackPtr, VA.getLocMemOffset(), Chain, Arg, DL, IsTailCall, DAG)); } // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. EVT Ty = Callee.getValueType(); bool GlobalOrExternal = false, IsCallReloc = false; // The long-calls feature is ignored in case of PIC. // While we do not support -mshared / -mno-shared properly, // ignore long-calls in case of -mabicalls too. if (!Subtarget.isABICalls() && !IsPIC) { // If the function should be called using "long call", // get its address into a register to prevent using // of the `jal` instruction for the direct call. if (auto *N = dyn_cast(Callee)) { if (Subtarget.useLongCalls()) Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); } else if (auto *N = dyn_cast(Callee)) { bool UseLongCalls = Subtarget.useLongCalls(); // If the function has long-call/far/near attribute // it overrides command line switch pased to the backend. if (auto *F = dyn_cast(N->getGlobal())) { if (F->hasFnAttribute("long-call")) UseLongCalls = true; else if (F->hasFnAttribute("short-call")) UseLongCalls = false; } if (UseLongCalls) Callee = Subtarget.hasSym32() ? getAddrNonPIC(N, SDLoc(N), Ty, DAG) : getAddrNonPICSym64(N, SDLoc(N), Ty, DAG); } } if (GlobalAddressSDNode *G = dyn_cast(Callee)) { if (IsPIC) { const GlobalValue *Val = G->getGlobal(); InternalLinkage = Val->hasInternalLinkage(); if (InternalLinkage) Callee = getAddrLocal(G, DL, Ty, DAG, ABI.IsN32() || ABI.IsN64()); else if (Subtarget.useXGOT()) { Callee = getAddrGlobalLargeGOT(G, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(MF, Val)); IsCallReloc = true; } else { Callee = getAddrGlobal(G, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain, FuncInfo->callPtrInfo(MF, Val)); IsCallReloc = true; } } else Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(DAG.getDataLayout()), 0, MipsII::MO_NO_FLAG); GlobalOrExternal = true; } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { const char *Sym = S->getSymbol(); if (!IsPIC) // static Callee = DAG.getTargetExternalSymbol( Sym, getPointerTy(DAG.getDataLayout()), MipsII::MO_NO_FLAG); else if (Subtarget.useXGOT()) { Callee = getAddrGlobalLargeGOT(S, DL, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, FuncInfo->callPtrInfo(MF, Sym)); IsCallReloc = true; } else { // PIC Callee = getAddrGlobal(S, DL, Ty, DAG, MipsII::MO_GOT_CALL, Chain, FuncInfo->callPtrInfo(MF, Sym)); IsCallReloc = true; } GlobalOrExternal = true; } SmallVector Ops(1, Chain); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); getOpndList(Ops, RegsToPass, IsPIC, GlobalOrExternal, InternalLinkage, IsCallReloc, CLI, Callee, Chain); if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); SDValue Ret = DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops); DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); return Ret; } Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops); SDValue InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); // Create the CALLSEQ_END node in the case of where it is not a call to // memcpy. if (!(MemcpyInByVal)) { Chain = DAG.getCALLSEQ_END(Chain, NextStackOffsetVal, DAG.getIntPtrConstant(0, DL, true), InFlag, DL); InFlag = Chain.getValue(1); } // Handle result values, copying them out of physregs into vregs that we // return. return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, InVals, CLI); } /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. SDValue MipsTargetLowering::LowerCallResult( SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals, TargetLowering::CallLoweringInfo &CLI) const { // Assign locations to each value returned by this call. SmallVector RVLocs; MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); const ExternalSymbolSDNode *ES = dyn_cast_or_null(CLI.Callee.getNode()); CCInfo.AnalyzeCallResult(Ins, RetCC_Mips, CLI.RetTy, ES ? ES->getSymbol() : nullptr); // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(), RVLocs[i].getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); if (VA.isUpperBitsInLoc()) { unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); unsigned Shift = VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; Val = DAG.getNode( Shift, DL, VA.getLocVT(), Val, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); } switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; case CCValAssign::AExt: case CCValAssign::AExtUpper: Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); break; case CCValAssign::ZExt: case CCValAssign::ZExtUpper: Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); break; case CCValAssign::SExt: case CCValAssign::SExtUpper: Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val, DAG.getValueType(VA.getValVT())); Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val); break; } InVals.push_back(Val); } return Chain; } static SDValue UnpackFromArgumentSlot(SDValue Val, const CCValAssign &VA, EVT ArgVT, const SDLoc &DL, SelectionDAG &DAG) { MVT LocVT = VA.getLocVT(); EVT ValVT = VA.getValVT(); // Shift into the upper bits if necessary. switch (VA.getLocInfo()) { default: break; case CCValAssign::AExtUpper: case CCValAssign::SExtUpper: case CCValAssign::ZExtUpper: { unsigned ValSizeInBits = ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); unsigned Opcode = VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA; Val = DAG.getNode( Opcode, DL, VA.getLocVT(), Val, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); break; } } // If this is an value smaller than the argument slot size (32-bit for O32, // 64-bit for N32/N64), it has been promoted in some way to the argument slot // size. Extract the value and insert any appropriate assertions regarding // sign/zero extension. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::AExtUpper: case CCValAssign::AExt: Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); break; case CCValAssign::SExtUpper: case CCValAssign::SExt: Val = DAG.getNode(ISD::AssertSext, DL, LocVT, Val, DAG.getValueType(ValVT)); Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); break; case CCValAssign::ZExtUpper: case CCValAssign::ZExt: Val = DAG.getNode(ISD::AssertZext, DL, LocVT, Val, DAG.getValueType(ValVT)); Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val); break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val); break; } return Val; } //===----------------------------------------------------------------------===// // Formal Arguments Calling Convention Implementation //===----------------------------------------------------------------------===// /// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue MipsTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. std::vector OutChains; // Assign locations to all of the incoming arguments. SmallVector ArgLocs; MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), Align(1)); const Function &Func = DAG.getMachineFunction().getFunction(); Function::const_arg_iterator FuncArg = Func.arg_begin(); if (Func.hasFnAttribute("interrupt") && !Func.arg_empty()) report_fatal_error( "Functions with the interrupt attribute cannot have arguments!"); CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg); MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(), CCInfo.getInRegsParamsCount() > 0); unsigned CurArgIdx = 0; CCInfo.rewindByValRegsInfo(); for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { CCValAssign &VA = ArgLocs[i]; if (Ins[InsIdx].isOrigArg()) { std::advance(FuncArg, Ins[InsIdx].getOrigArgIndex() - CurArgIdx); CurArgIdx = Ins[InsIdx].getOrigArgIndex(); } EVT ValVT = VA.getValVT(); ISD::ArgFlagsTy Flags = Ins[InsIdx].Flags; bool IsRegLoc = VA.isRegLoc(); if (Flags.isByVal()) { assert(Ins[InsIdx].isOrigArg() && "Byval arguments cannot be implicit"); unsigned FirstByValReg, LastByValReg; unsigned ByValIdx = CCInfo.getInRegsParamsProcessed(); CCInfo.getInRegsParamInfo(ByValIdx, FirstByValReg, LastByValReg); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); assert(ByValIdx < CCInfo.getInRegsParamsCount()); copyByValRegs(Chain, DL, OutChains, DAG, Flags, InVals, &*FuncArg, FirstByValReg, LastByValReg, VA, CCInfo); CCInfo.nextInRegsParam(); continue; } // Arguments stored on registers if (IsRegLoc) { MVT RegVT = VA.getLocVT(); Register ArgReg = VA.getLocReg(); const TargetRegisterClass *RC = getRegClassFor(RegVT); // Transform the arguments stored on // physical registers into virtual ones unsigned Reg = addLiveIn(DAG.getMachineFunction(), ArgReg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[InsIdx].ArgVT, DL, DAG); // Handle floating point arguments passed in integer registers and // long double arguments passed in floating point registers. if ((RegVT == MVT::i32 && ValVT == MVT::f32) || (RegVT == MVT::i64 && ValVT == MVT::f64) || (RegVT == MVT::f64 && ValVT == MVT::i64)) ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); else if (ABI.IsO32() && RegVT == MVT::i32 && ValVT == MVT::f64) { assert(VA.needsCustom() && "Expected custom argument for f64 split"); CCValAssign &NextVA = ArgLocs[++i]; unsigned Reg2 = addLiveIn(DAG.getMachineFunction(), NextVA.getLocReg(), RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT); if (!Subtarget.isLittle()) std::swap(ArgValue, ArgValue2); ArgValue = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, ArgValue, ArgValue2); } InVals.push_back(ArgValue); } else { // VA.isRegLoc() MVT LocVT = VA.getLocVT(); assert(!VA.needsCustom() && "unexpected custom memory argument"); if (ABI.IsO32()) { // We ought to be able to use LocVT directly but O32 sets it to i32 // when allocating floating point values to integer registers. // This shouldn't influence how we load the value into registers unless // we are targeting softfloat. if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat()) LocVT = VA.getValVT(); } // Only arguments pased on the stack should make it here. assert(VA.isMemLoc()); // The stack pointer offset is relative to the caller stack frame. int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue ArgValue = DAG.getLoad( LocVT, DL, Chain, FIN, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); OutChains.push_back(ArgValue.getValue(1)); ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[InsIdx].ArgVT, DL, DAG); InVals.push_back(ArgValue); } } for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) { if (ArgLocs[i].needsCustom()) { ++i; continue; } // The mips ABIs for returning structs by value requires that we copy // the sret argument into $v0 for the return. Save the argument into // a virtual register so that we can access it from the return points. if (Ins[InsIdx].Flags.isSRet()) { unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) { Reg = MF.getRegInfo().createVirtualRegister( getRegClassFor(ABI.IsN64() ? MVT::i64 : MVT::i32)); MipsFI->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); break; } } if (IsVarArg) writeVarArgRegs(OutChains, Chain, DL, DAG, CCInfo); // All stores are grouped in one node to allow the matching between // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { OutChains.push_back(Chain); Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); } return Chain; } //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// bool MipsTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { SmallVector RVLocs; MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_Mips); } bool MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { if ((ABI.IsN32() || ABI.IsN64()) && Type == MVT::i32) return true; return IsSigned; } SDValue MipsTargetLowering::LowerInterruptReturn(SmallVectorImpl &RetOps, const SDLoc &DL, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *MipsFI = MF.getInfo(); MipsFI->setISR(); return DAG.getNode(MipsISD::ERet, DL, MVT::Other, RetOps); } SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location SmallVector RVLocs; MachineFunction &MF = DAG.getMachineFunction(); // CCState - Info about the registers and stack slot. MipsCCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); // Analyze return values. CCInfo.AnalyzeReturn(Outs, RetCC_Mips); SDValue Flag; SmallVector RetOps(1, Chain); // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { SDValue Val = OutVals[i]; CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); bool UseUpperBits = false; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val); break; case CCValAssign::AExtUpper: UseUpperBits = true; LLVM_FALLTHROUGH; case CCValAssign::AExt: Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val); break; case CCValAssign::ZExtUpper: UseUpperBits = true; LLVM_FALLTHROUGH; case CCValAssign::ZExt: Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val); break; case CCValAssign::SExtUpper: UseUpperBits = true; LLVM_FALLTHROUGH; case CCValAssign::SExt: Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val); break; } if (UseUpperBits) { unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits(); unsigned LocSizeInBits = VA.getLocVT().getSizeInBits(); Val = DAG.getNode( ISD::SHL, DL, VA.getLocVT(), Val, DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT())); } Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } // The mips ABIs for returning structs by value requires that we copy // the sret argument into $v0 for the return. We saved the argument into // a virtual register in the entry block, so now we copy the value out // and into $v0. if (MF.getFunction().hasStructRetAttr()) { MipsFunctionInfo *MipsFI = MF.getInfo(); unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout())); unsigned V0 = ABI.IsN64() ? Mips::V0_64 : Mips::V0; Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(V0, getPointerTy(DAG.getDataLayout()))); } RetOps[0] = Chain; // Update chain. // Add the flag if we have it. if (Flag.getNode()) RetOps.push_back(Flag); // ISRs must use "eret". if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt")) return LowerInterruptReturn(RetOps, DL, DAG); // Standard return on Mips is a "jr $ra" return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// // Mips Inline Assembly Support //===----------------------------------------------------------------------===// /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. MipsTargetLowering::ConstraintType MipsTargetLowering::getConstraintType(StringRef Constraint) const { // Mips specific constraints // GCC config/mips/constraints.md // // 'd' : An address register. Equivalent to r // unless generating MIPS16 code. // 'y' : Equivalent to r; retained for // backwards compatibility. // 'c' : A register suitable for use in an indirect // jump. This will always be $25 for -mabicalls. // 'l' : The lo register. 1 word storage. // 'x' : The hilo register pair. Double word storage. if (Constraint.size() == 1) { switch (Constraint[0]) { default : break; case 'd': case 'y': case 'f': case 'c': case 'l': case 'x': return C_RegisterClass; case 'R': return C_Memory; } } if (Constraint == "ZC") return C_Memory; return TargetLowering::getConstraintType(Constraint); } /// Examine constraint type and operand type and determine a weight value. /// This object must already have been set up with the operand type /// and the current alternative constraint selected. TargetLowering::ConstraintWeight MipsTargetLowering::getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const { ConstraintWeight weight = CW_Invalid; Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); break; case 'd': case 'y': if (type->isIntegerTy()) weight = CW_Register; break; case 'f': // FPU or MSA register if (Subtarget.hasMSA() && type->isVectorTy() && type->getPrimitiveSizeInBits().getFixedSize() == 128) weight = CW_Register; else if (type->isFloatTy()) weight = CW_Register; break; case 'c': // $25 for indirect jumps case 'l': // lo register case 'x': // hilo register pair if (type->isIntegerTy()) weight = CW_SpecificReg; break; case 'I': // signed 16 bit immediate case 'J': // integer zero case 'K': // unsigned 16 bit immediate case 'L': // signed 32 bit immediate where lower 16 bits are 0 case 'N': // immediate in the range of -65535 to -1 (inclusive) case 'O': // signed 15 bit immediate (+- 16383) case 'P': // immediate in the range of 65535 to 1 (inclusive) if (isa(CallOperandVal)) weight = CW_Constant; break; case 'R': weight = CW_Memory; break; } return weight; } /// This is a helper function to parse a physical register string and split it /// into non-numeric and numeric parts (Prefix and Reg). The first boolean flag /// that is returned indicates whether parsing was successful. The second flag /// is true if the numeric part exists. static std::pair parsePhysicalReg(StringRef C, StringRef &Prefix, unsigned long long &Reg) { if (C.front() != '{' || C.back() != '}') return std::make_pair(false, false); // Search for the first numeric character. StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1; I = std::find_if(B, E, isdigit); Prefix = StringRef(B, I - B); // The second flag is set to false if no numeric characters were found. if (I == E) return std::make_pair(true, false); // Parse the numeric characters. return std::make_pair(!getAsUnsignedInteger(StringRef(I, E - I), 10, Reg), true); } EVT MipsTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, ISD::NodeType) const { bool Cond = !Subtarget.isABI_O32() && VT.getSizeInBits() == 32; EVT MinVT = getRegisterType(Context, Cond ? MVT::i64 : MVT::i32); return VT.bitsLT(MinVT) ? MinVT : VT; } std::pair MipsTargetLowering:: parseRegForInlineAsmConstraint(StringRef C, MVT VT) const { const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const TargetRegisterClass *RC; StringRef Prefix; unsigned long long Reg; std::pair R = parsePhysicalReg(C, Prefix, Reg); if (!R.first) return std::make_pair(0U, nullptr); if ((Prefix == "hi" || Prefix == "lo")) { // Parse hi/lo. // No numeric characters follow "hi" or "lo". if (R.second) return std::make_pair(0U, nullptr); RC = TRI->getRegClass(Prefix == "hi" ? Mips::HI32RegClassID : Mips::LO32RegClassID); return std::make_pair(*(RC->begin()), RC); } else if (Prefix.startswith("$msa")) { // Parse $msa(ir|csr|access|save|modify|request|map|unmap) // No numeric characters follow the name. if (R.second) return std::make_pair(0U, nullptr); Reg = StringSwitch(Prefix) .Case("$msair", Mips::MSAIR) .Case("$msacsr", Mips::MSACSR) .Case("$msaaccess", Mips::MSAAccess) .Case("$msasave", Mips::MSASave) .Case("$msamodify", Mips::MSAModify) .Case("$msarequest", Mips::MSARequest) .Case("$msamap", Mips::MSAMap) .Case("$msaunmap", Mips::MSAUnmap) .Default(0); if (!Reg) return std::make_pair(0U, nullptr); RC = TRI->getRegClass(Mips::MSACtrlRegClassID); return std::make_pair(Reg, RC); } if (!R.second) return std::make_pair(0U, nullptr); if (Prefix == "$f") { // Parse $f0-$f31. // If the size of FP registers is 64-bit or Reg is an even number, select // the 64-bit register class. Otherwise, select the 32-bit register class. if (VT == MVT::Other) VT = (Subtarget.isFP64bit() || !(Reg % 2)) ? MVT::f64 : MVT::f32; RC = getRegClassFor(VT); if (RC == &Mips::AFGR64RegClass) { assert(Reg % 2 == 0); Reg >>= 1; } } else if (Prefix == "$fcc") // Parse $fcc0-$fcc7. RC = TRI->getRegClass(Mips::FCCRegClassID); else if (Prefix == "$w") { // Parse $w0-$w31. RC = getRegClassFor((VT == MVT::Other) ? MVT::v16i8 : VT); } else { // Parse $0-$31. assert(Prefix == "$"); RC = getRegClassFor((VT == MVT::Other) ? MVT::i32 : VT); } assert(Reg < RC->getNumRegs()); return std::make_pair(*(RC->begin() + Reg), RC); } /// Given a register class constraint, like 'r', if this corresponds directly /// to an LLVM register class, return a register of 0 and the register class /// pointer. std::pair MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'd': // Address register. Same as 'r' unless generating MIPS16 code. case 'y': // Same as 'r'. Exists for compatibility. case 'r': if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8 || VT == MVT::i1) { if (Subtarget.inMips16Mode()) return std::make_pair(0U, &Mips::CPU16RegsRegClass); return std::make_pair(0U, &Mips::GPR32RegClass); } if (VT == MVT::i64 && !Subtarget.isGP64bit()) return std::make_pair(0U, &Mips::GPR32RegClass); if (VT == MVT::i64 && Subtarget.isGP64bit()) return std::make_pair(0U, &Mips::GPR64RegClass); // This will generate an error message return std::make_pair(0U, nullptr); case 'f': // FPU or MSA register if (VT == MVT::v16i8) return std::make_pair(0U, &Mips::MSA128BRegClass); else if (VT == MVT::v8i16 || VT == MVT::v8f16) return std::make_pair(0U, &Mips::MSA128HRegClass); else if (VT == MVT::v4i32 || VT == MVT::v4f32) return std::make_pair(0U, &Mips::MSA128WRegClass); else if (VT == MVT::v2i64 || VT == MVT::v2f64) return std::make_pair(0U, &Mips::MSA128DRegClass); else if (VT == MVT::f32) return std::make_pair(0U, &Mips::FGR32RegClass); else if ((VT == MVT::f64) && (!Subtarget.isSingleFloat())) { if (Subtarget.isFP64bit()) return std::make_pair(0U, &Mips::FGR64RegClass); return std::make_pair(0U, &Mips::AFGR64RegClass); } break; case 'c': // register suitable for indirect jump if (VT == MVT::i32) return std::make_pair((unsigned)Mips::T9, &Mips::GPR32RegClass); if (VT == MVT::i64) return std::make_pair((unsigned)Mips::T9_64, &Mips::GPR64RegClass); // This will generate an error message return std::make_pair(0U, nullptr); case 'l': // use the `lo` register to store values // that are no bigger than a word if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) return std::make_pair((unsigned)Mips::LO0, &Mips::LO32RegClass); return std::make_pair((unsigned)Mips::LO0_64, &Mips::LO64RegClass); case 'x': // use the concatenated `hi` and `lo` registers // to store doubleword values // Fixme: Not triggering the use of both hi and low // This will generate an error message return std::make_pair(0U, nullptr); } } if (!Constraint.empty()) { std::pair R; R = parseRegForInlineAsmConstraint(Constraint, VT); if (R.second) return R; } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { default: break; // This will fall through to the generic implementation case 'I': // Signed 16 bit constant // If this fails, the parent routine will give an error if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if (isInt<16>(Val)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'J': // integer zero if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getZExtValue(); if (Val == 0) { Result = DAG.getTargetConstant(0, DL, Type); break; } } return; case 'K': // unsigned 16 bit immediate if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); uint64_t Val = (uint64_t)C->getZExtValue(); if (isUInt<16>(Val)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'L': // signed 32 bit immediate where lower 16 bits are 0 if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((isInt<32>(Val)) && ((Val & 0xffff) == 0)){ Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'N': // immediate in the range of -65535 to -1 (inclusive) if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((Val >= -65535) && (Val <= -1)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'O': // signed 15 bit immediate if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((isInt<15>(Val))) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; case 'P': // immediate in the range of 1 to 65535 (inclusive) if (ConstantSDNode *C = dyn_cast(Op)) { EVT Type = Op.getValueType(); int64_t Val = C->getSExtValue(); if ((Val <= 65535) && (Val >= 1)) { Result = DAG.getTargetConstant(Val, DL, Type); break; } } return; } if (Result.getNode()) { Ops.push_back(Result); return; } TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; switch (AM.Scale) { case 0: // "r+i" or just "i", depending on HasBaseReg. break; case 1: if (!AM.HasBaseReg) // allow "r+i". break; return false; // disallow "r+r" or "r+r+i". default: return false; } return true; } bool MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The Mips target isn't yet aware of offsets. return false; } EVT MipsTargetLowering::getOptimalMemOpType( const MemOp &Op, const AttributeList &FuncAttributes) const { if (Subtarget.hasMips64()) return MVT::i64; return MVT::i32; } bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { if (VT != MVT::f32 && VT != MVT::f64) return false; if (Imm.isNegZero()) return false; return Imm.isZero(); } unsigned MipsTargetLowering::getJumpTableEncoding() const { // FIXME: For space reasons this should be: EK_GPRel32BlockAddress. if (ABI.IsN64() && isPositionIndependent()) return MachineJumpTableInfo::EK_GPRel64BlockAddress; return TargetLowering::getJumpTableEncoding(); } bool MipsTargetLowering::useSoftFloat() const { return Subtarget.useSoftFloat(); } void MipsTargetLowering::copyByValRegs( SDValue Chain, const SDLoc &DL, std::vector &OutChains, SelectionDAG &DAG, const ISD::ArgFlagsTy &Flags, SmallVectorImpl &InVals, const Argument *FuncArg, unsigned FirstReg, unsigned LastReg, const CCValAssign &VA, MipsCCState &State) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes(); unsigned NumRegs = LastReg - FirstReg; unsigned RegAreaSize = NumRegs * GPRSizeInBytes; unsigned FrameObjSize = std::max(Flags.getByValSize(), RegAreaSize); int FrameObjOffset; ArrayRef ByValArgRegs = ABI.GetByValArgRegs(); if (RegAreaSize) FrameObjOffset = (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) - (int)((ByValArgRegs.size() - FirstReg) * GPRSizeInBytes); else FrameObjOffset = VA.getLocMemOffset(); // Create frame object. EVT PtrTy = getPointerTy(DAG.getDataLayout()); // Make the fixed object stored to mutable so that the load instructions // referencing it have their memory dependencies added. // Set the frame object as isAliased which clears the underlying objects // vector in ScheduleDAGInstrs::buildSchedGraph() resulting in addition of all // stores as dependencies for loads referencing this fixed object. int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, false, true); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); InVals.push_back(FIN); if (!NumRegs) return; // Copy arg registers. MVT RegTy = MVT::getIntegerVT(GPRSizeInBytes * 8); const TargetRegisterClass *RC = getRegClassFor(RegTy); for (unsigned I = 0; I < NumRegs; ++I) { unsigned ArgReg = ByValArgRegs[FirstReg + I]; unsigned VReg = addLiveIn(MF, ArgReg, RC); unsigned Offset = I * GPRSizeInBytes; SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrTy, FIN, DAG.getConstant(Offset, DL, PtrTy)); SDValue Store = DAG.getStore(Chain, DL, DAG.getRegister(VReg, RegTy), StorePtr, MachinePointerInfo(FuncArg, Offset)); OutChains.push_back(Store); } } // Copy byVal arg to registers and stack. void MipsTargetLowering::passByValArg( SDValue Chain, const SDLoc &DL, std::deque> &RegsToPass, SmallVectorImpl &MemOpChains, SDValue StackPtr, MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg, unsigned LastReg, const ISD::ArgFlagsTy &Flags, bool isLittle, const CCValAssign &VA) const { unsigned ByValSizeInBytes = Flags.getByValSize(); unsigned OffsetInBytes = 0; // From beginning of struct unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); Align Alignment = std::min(Flags.getNonZeroByValAlign(), Align(RegSizeInBytes)); EVT PtrTy = getPointerTy(DAG.getDataLayout()), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); unsigned NumRegs = LastReg - FirstReg; if (NumRegs) { ArrayRef ArgRegs = ABI.GetByValArgRegs(); bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes); unsigned I = 0; // Copy words to registers. for (; I < NumRegs - LeftoverBytes; ++I, OffsetInBytes += RegSizeInBytes) { SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, DAG.getConstant(OffsetInBytes, DL, PtrTy)); SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, MachinePointerInfo(), Alignment); MemOpChains.push_back(LoadVal.getValue(1)); unsigned ArgReg = ArgRegs[FirstReg + I]; RegsToPass.push_back(std::make_pair(ArgReg, LoadVal)); } // Return if the struct has been fully copied. if (ByValSizeInBytes == OffsetInBytes) return; // Copy the remainder of the byval argument with sub-word loads and shifts. if (LeftoverBytes) { SDValue Val; for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; if (RemainingSizeInBytes < LoadSizeInBytes) continue; // Load subword. SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, DAG.getConstant(OffsetInBytes, DL, PtrTy)); SDValue LoadVal = DAG.getExtLoad( ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), MVT::getIntegerVT(LoadSizeInBytes * 8), Alignment); MemOpChains.push_back(LoadVal.getValue(1)); // Shift the loaded value. unsigned Shamt; if (isLittle) Shamt = TotalBytesLoaded * 8; else Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8; SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, DAG.getConstant(Shamt, DL, MVT::i32)); if (Val.getNode()) Val = DAG.getNode(ISD::OR, DL, RegTy, Val, Shift); else Val = Shift; OffsetInBytes += LoadSizeInBytes; TotalBytesLoaded += LoadSizeInBytes; Alignment = std::min(Alignment, Align(LoadSizeInBytes)); } unsigned ArgReg = ArgRegs[FirstReg + I]; RegsToPass.push_back(std::make_pair(ArgReg, Val)); return; } } // Copy remainder of byval arg to it with memcpy. unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, DAG.getConstant(OffsetInBytes, DL, PtrTy)); SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, DAG.getIntPtrConstant(VA.getLocMemOffset(), DL)); Chain = DAG.getMemcpy( Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, DL, PtrTy), Align(Alignment), /*isVolatile=*/false, /*AlwaysInline=*/false, /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); } void MipsTargetLowering::writeVarArgRegs(std::vector &OutChains, SDValue Chain, const SDLoc &DL, SelectionDAG &DAG, CCState &State) const { ArrayRef ArgRegs = ABI.GetVarArgRegs(); unsigned Idx = State.getFirstUnallocated(ArgRegs); unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); const TargetRegisterClass *RC = getRegClassFor(RegTy); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); // Offset of the first variable argument from stack pointer. int VaArgOffset; if (ArgRegs.size() == Idx) VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes); else { VaArgOffset = (int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) - (int)(RegSizeInBytes * (ArgRegs.size() - Idx)); } // Record the frame index of the first variable argument // which is a value necessary to VASTART. int FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); MipsFI->setVarArgsFrameIndex(FI); // Copy the integer registers that have not been used for argument passing // to the argument register save area. For O32, the save area is allocated // in the caller's stack frame, while for N32/64, it is allocated in the // callee's stack frame. for (unsigned I = Idx; I < ArgRegs.size(); ++I, VaArgOffset += RegSizeInBytes) { unsigned Reg = addLiveIn(MF, ArgRegs[I], RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy); FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo()); cast(Store.getNode())->getMemOperand()->setValue( (Value *)nullptr); OutChains.push_back(Store); } } void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size, Align Alignment) const { const TargetFrameLowering *TFL = Subtarget.getFrameLowering(); assert(Size && "Byval argument's size shouldn't be 0."); Alignment = std::min(Alignment, TFL->getStackAlign()); unsigned FirstReg = 0; unsigned NumRegs = 0; if (State->getCallingConv() != CallingConv::Fast) { unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes(); ArrayRef IntArgRegs = ABI.GetByValArgRegs(); // FIXME: The O32 case actually describes no shadow registers. const MCPhysReg *ShadowRegs = ABI.IsO32() ? IntArgRegs.data() : Mips64DPRegs; // We used to check the size as well but we can't do that anymore since // CCState::HandleByVal() rounds up the size after calling this function. assert( Alignment >= Align(RegSizeInBytes) && "Byval argument's alignment should be a multiple of RegSizeInBytes."); FirstReg = State->getFirstUnallocated(IntArgRegs); // If Alignment > RegSizeInBytes, the first arg register must be even. // FIXME: This condition happens to do the right thing but it's not the // right way to test it. We want to check that the stack frame offset // of the register is aligned. if ((Alignment > RegSizeInBytes) && (FirstReg % 2)) { State->AllocateReg(IntArgRegs[FirstReg], ShadowRegs[FirstReg]); ++FirstReg; } // Mark the registers allocated. Size = alignTo(Size, RegSizeInBytes); for (unsigned I = FirstReg; Size > 0 && (I < IntArgRegs.size()); Size -= RegSizeInBytes, ++I, ++NumRegs) State->AllocateReg(IntArgRegs[I], ShadowRegs[I]); } State->addInRegsParamInfo(FirstReg, FirstReg + NumRegs); } MachineBasicBlock *MipsTargetLowering::emitPseudoSELECT(MachineInstr &MI, MachineBasicBlock *BB, bool isFPCmp, unsigned Opc) const { assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) && "Subtarget already supports SELECT nodes with the use of" "conditional-move instructions."); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); // To "insert" a SELECT instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the // destination vreg to set, the condition code register to branch on, the // true/false values to select between, and a branch opcode to use. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... // TrueVal = ... // setcc r1, r2, r3 // bNE r1, r0, copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); if (isFPCmp) { // bc1[tf] cc, sinkMBB BuildMI(BB, DL, TII->get(Opc)) .addReg(MI.getOperand(1).getReg()) .addMBB(sinkMBB); } else { // bne rs, $0, sinkMBB BuildMI(BB, DL, TII->get(Opc)) .addReg(MI.getOperand(1).getReg()) .addReg(Mips::ZERO) .addMBB(sinkMBB); } // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] // ... BB = sinkMBB; BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(2).getReg()) .addMBB(thisMBB) .addReg(MI.getOperand(3).getReg()) .addMBB(copy0MBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } MachineBasicBlock * MipsTargetLowering::emitPseudoD_SELECT(MachineInstr &MI, MachineBasicBlock *BB) const { assert(!(Subtarget.hasMips4() || Subtarget.hasMips32()) && "Subtarget already supports SELECT nodes with the use of" "conditional-move instructions."); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL = MI.getDebugLoc(); // D_SELECT substitutes two SELECT nodes that goes one after another and // have the same condition operand. On machines which don't have // conditional-move instruction, it reduces unnecessary branch instructions // which are result of using two diamond patterns that are result of two // SELECT pseudo instructions. const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = ++BB->getIterator(); // thisMBB: // ... // TrueVal = ... // setcc r1, r2, r3 // bNE r1, r0, copy1MBB // fallthrough --> copy0MBB MachineBasicBlock *thisMBB = BB; MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); // Transfer the remainder of BB and its successor edges to sinkMBB. sinkMBB->splice(sinkMBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), BB->end()); sinkMBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); // bne rs, $0, sinkMBB BuildMI(BB, DL, TII->get(Mips::BNE)) .addReg(MI.getOperand(2).getReg()) .addReg(Mips::ZERO) .addMBB(sinkMBB); // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; // Update machine-CFG edges BB->addSuccessor(sinkMBB); // sinkMBB: // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] // ... BB = sinkMBB; // Use two PHI nodes to select two reults BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(0).getReg()) .addReg(MI.getOperand(3).getReg()) .addMBB(thisMBB) .addReg(MI.getOperand(5).getReg()) .addMBB(copy0MBB); BuildMI(*BB, BB->begin(), DL, TII->get(Mips::PHI), MI.getOperand(1).getReg()) .addReg(MI.getOperand(4).getReg()) .addMBB(thisMBB) .addReg(MI.getOperand(6).getReg()) .addMBB(copy0MBB); MI.eraseFromParent(); // The pseudo instruction is gone now. return BB; } // FIXME? Maybe this could be a TableGen attribute on some registers and // this table could be generated automatically from RegInfo. Register MipsTargetLowering::getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const { - // Named registers is expected to be fairly rare. For now, just support $28 - // since the linux kernel uses it. + // The Linux kernel uses $28 and sp. if (Subtarget.isGP64bit()) { Register Reg = StringSwitch(RegName) - .Case("$28", Mips::GP_64) - .Default(Register()); + .Case("$28", Mips::GP_64) + .Case("sp", Mips::SP_64) + .Default(Register()); if (Reg) return Reg; } else { Register Reg = StringSwitch(RegName) - .Case("$28", Mips::GP) - .Default(Register()); + .Case("$28", Mips::GP) + .Case("sp", Mips::SP) + .Default(Register()); if (Reg) return Reg; } report_fatal_error("Invalid register name global variable"); } MachineBasicBlock *MipsTargetLowering::emitLDR_W(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const bool IsLittle = Subtarget.isLittle(); DebugLoc DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); Register Address = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); MachineBasicBlock::iterator I(MI); if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { // Mips release 6 can load from adress that is not naturally-aligned. Register Temp = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, I, DL, TII->get(Mips::LW)) .addDef(Temp) .addUse(Address) .addImm(Imm); BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(Temp); } else { // Mips release 5 needs to use instructions that can load from an unaligned // memory address. Register LoadHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register LoadFull = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register Undef = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(Undef); BuildMI(*BB, I, DL, TII->get(Mips::LWR)) .addDef(LoadHalf) .addUse(Address) .addImm(Imm + (IsLittle ? 0 : 3)) .addUse(Undef); BuildMI(*BB, I, DL, TII->get(Mips::LWL)) .addDef(LoadFull) .addUse(Address) .addImm(Imm + (IsLittle ? 3 : 0)) .addUse(LoadHalf); BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Dest).addUse(LoadFull); } MI.eraseFromParent(); return BB; } MachineBasicBlock *MipsTargetLowering::emitLDR_D(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const bool IsLittle = Subtarget.isLittle(); DebugLoc DL = MI.getDebugLoc(); Register Dest = MI.getOperand(0).getReg(); Register Address = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); MachineBasicBlock::iterator I(MI); if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { // Mips release 6 can load from adress that is not naturally-aligned. if (Subtarget.isGP64bit()) { Register Temp = MRI.createVirtualRegister(&Mips::GPR64RegClass); BuildMI(*BB, I, DL, TII->get(Mips::LD)) .addDef(Temp) .addUse(Address) .addImm(Imm); BuildMI(*BB, I, DL, TII->get(Mips::FILL_D)).addDef(Dest).addUse(Temp); } else { Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass); Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, I, DL, TII->get(Mips::LW)) .addDef(Lo) .addUse(Address) .addImm(Imm + (IsLittle ? 0 : 4)); BuildMI(*BB, I, DL, TII->get(Mips::LW)) .addDef(Hi) .addUse(Address) .addImm(Imm + (IsLittle ? 4 : 0)); BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(Lo); BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest) .addUse(Wtemp) .addUse(Hi) .addImm(1); } } else { // Mips release 5 needs to use instructions that can load from an unaligned // memory address. Register LoHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register LoFull = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register LoUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register HiHalf = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register HiFull = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register HiUndef = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register Wtemp = MRI.createVirtualRegister(&Mips::MSA128WRegClass); BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(LoUndef); BuildMI(*BB, I, DL, TII->get(Mips::LWR)) .addDef(LoHalf) .addUse(Address) .addImm(Imm + (IsLittle ? 0 : 7)) .addUse(LoUndef); BuildMI(*BB, I, DL, TII->get(Mips::LWL)) .addDef(LoFull) .addUse(Address) .addImm(Imm + (IsLittle ? 3 : 4)) .addUse(LoHalf); BuildMI(*BB, I, DL, TII->get(Mips::IMPLICIT_DEF)).addDef(HiUndef); BuildMI(*BB, I, DL, TII->get(Mips::LWR)) .addDef(HiHalf) .addUse(Address) .addImm(Imm + (IsLittle ? 4 : 3)) .addUse(HiUndef); BuildMI(*BB, I, DL, TII->get(Mips::LWL)) .addDef(HiFull) .addUse(Address) .addImm(Imm + (IsLittle ? 7 : 0)) .addUse(HiHalf); BuildMI(*BB, I, DL, TII->get(Mips::FILL_W)).addDef(Wtemp).addUse(LoFull); BuildMI(*BB, I, DL, TII->get(Mips::INSERT_W), Dest) .addUse(Wtemp) .addUse(HiFull) .addImm(1); } MI.eraseFromParent(); return BB; } MachineBasicBlock *MipsTargetLowering::emitSTR_W(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const bool IsLittle = Subtarget.isLittle(); DebugLoc DL = MI.getDebugLoc(); Register StoreVal = MI.getOperand(0).getReg(); Register Address = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); MachineBasicBlock::iterator I(MI); if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { // Mips release 6 can store to adress that is not naturally-aligned. Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass); Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(BitcastW).addUse(StoreVal); BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) .addDef(Tmp) .addUse(BitcastW) .addImm(0); BuildMI(*BB, I, DL, TII->get(Mips::SW)) .addUse(Tmp) .addUse(Address) .addImm(Imm); } else { // Mips release 5 needs to use instructions that can store to an unaligned // memory address. Register Tmp = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) .addDef(Tmp) .addUse(StoreVal) .addImm(0); BuildMI(*BB, I, DL, TII->get(Mips::SWR)) .addUse(Tmp) .addUse(Address) .addImm(Imm + (IsLittle ? 0 : 3)); BuildMI(*BB, I, DL, TII->get(Mips::SWL)) .addUse(Tmp) .addUse(Address) .addImm(Imm + (IsLittle ? 3 : 0)); } MI.eraseFromParent(); return BB; } MachineBasicBlock *MipsTargetLowering::emitSTR_D(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); const bool IsLittle = Subtarget.isLittle(); DebugLoc DL = MI.getDebugLoc(); Register StoreVal = MI.getOperand(0).getReg(); Register Address = MI.getOperand(1).getReg(); unsigned Imm = MI.getOperand(2).getImm(); MachineBasicBlock::iterator I(MI); if (Subtarget.hasMips32r6() || Subtarget.hasMips64r6()) { // Mips release 6 can store to adress that is not naturally-aligned. if (Subtarget.isGP64bit()) { Register BitcastD = MRI.createVirtualRegister(&Mips::MSA128DRegClass); Register Lo = MRI.createVirtualRegister(&Mips::GPR64RegClass); BuildMI(*BB, I, DL, TII->get(Mips::COPY)) .addDef(BitcastD) .addUse(StoreVal); BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_D)) .addDef(Lo) .addUse(BitcastD) .addImm(0); BuildMI(*BB, I, DL, TII->get(Mips::SD)) .addUse(Lo) .addUse(Address) .addImm(Imm); } else { Register BitcastW = MRI.createVirtualRegister(&Mips::MSA128WRegClass); Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, I, DL, TII->get(Mips::COPY)) .addDef(BitcastW) .addUse(StoreVal); BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) .addDef(Lo) .addUse(BitcastW) .addImm(0); BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) .addDef(Hi) .addUse(BitcastW) .addImm(1); BuildMI(*BB, I, DL, TII->get(Mips::SW)) .addUse(Lo) .addUse(Address) .addImm(Imm + (IsLittle ? 0 : 4)); BuildMI(*BB, I, DL, TII->get(Mips::SW)) .addUse(Hi) .addUse(Address) .addImm(Imm + (IsLittle ? 4 : 0)); } } else { // Mips release 5 needs to use instructions that can store to an unaligned // memory address. Register Bitcast = MRI.createVirtualRegister(&Mips::MSA128WRegClass); Register Lo = MRI.createVirtualRegister(&Mips::GPR32RegClass); Register Hi = MRI.createVirtualRegister(&Mips::GPR32RegClass); BuildMI(*BB, I, DL, TII->get(Mips::COPY)).addDef(Bitcast).addUse(StoreVal); BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) .addDef(Lo) .addUse(Bitcast) .addImm(0); BuildMI(*BB, I, DL, TII->get(Mips::COPY_S_W)) .addDef(Hi) .addUse(Bitcast) .addImm(1); BuildMI(*BB, I, DL, TII->get(Mips::SWR)) .addUse(Lo) .addUse(Address) .addImm(Imm + (IsLittle ? 0 : 3)); BuildMI(*BB, I, DL, TII->get(Mips::SWL)) .addUse(Lo) .addUse(Address) .addImm(Imm + (IsLittle ? 3 : 0)); BuildMI(*BB, I, DL, TII->get(Mips::SWR)) .addUse(Hi) .addUse(Address) .addImm(Imm + (IsLittle ? 4 : 7)); BuildMI(*BB, I, DL, TII->get(Mips::SWL)) .addUse(Hi) .addUse(Address) .addImm(Imm + (IsLittle ? 7 : 4)); } MI.eraseFromParent(); return BB; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp index 49babc24cb82..10abea7ebd32 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/Inliner.cpp @@ -1,1161 +1,1198 @@ //===- Inliner.cpp - Code common to all inliners --------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the mechanics required to implement inlining without // missing any calls and updating the call graph. The decisions of which calls // are profitable to inline are implemented elsewhere. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InlineOrder.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/CallPromotionUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "inline" STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); STATISTIC(NumMergedAllocas, "Number of allocas merged together"); /// Flag to disable manual alloca merging. /// /// Merging of allocas was originally done as a stack-size saving technique /// prior to LLVM's code generator having support for stack coloring based on /// lifetime markers. It is now in the process of being removed. To experiment /// with disabling it and relying fully on lifetime marker based stack /// coloring, you can pass this flag to LLVM. static cl::opt DisableInlinedAllocaMerging("disable-inlined-alloca-merging", cl::init(false), cl::Hidden); +static cl::opt IntraSCCCostMultiplier( + "intra-scc-cost-multiplier", cl::init(2), cl::Hidden, + cl::desc( + "Cost multiplier to multiply onto inlined call sites where the " + "new call was previously an intra-SCC call (not relevant when the " + "original call was already intra-SCC). This can accumulate over " + "multiple inlinings (e.g. if a call site already had a cost " + "multiplier and one of its inlined calls was also subject to " + "this, the inlined call would have the original multiplier " + "multiplied by intra-scc-cost-multiplier). This is to prevent tons of " + "inlining through a child SCC which can cause terrible compile times")); + /// A flag for test, so we can print the content of the advisor when running it /// as part of the default (e.g. -O3) pipeline. static cl::opt KeepAdvisorForPrinting("keep-inline-advisor-for-printing", cl::init(false), cl::Hidden); extern cl::opt InlinerFunctionImportStats; static cl::opt CGSCCInlineReplayFile( "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc( "Optimization remarks file containing inline remarks to be replayed " "by cgscc inlining."), cl::Hidden); static cl::opt CGSCCInlineReplayScope( "cgscc-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during cgscc inlining."), cl::Hidden); static cl::opt CGSCCInlineReplayFallback( "cgscc-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values( clEnumValN( ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc( "How cgscc inline replay treats sites that don't come from the replay. " "Original: defers to original advisor, AlwaysInline: inline all sites " "not in replay, NeverInline: inline no sites not in replay"), cl::Hidden); static cl::opt CGSCCInlineReplayFormat( "cgscc-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values( clEnumValN(CallSiteFormat::Format::Line, "Line", ""), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", ":"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "."), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", ":. (default)")), cl::desc("How cgscc inline replay file is formatted"), cl::Hidden); static cl::opt InlineEnablePriorityOrder( "inline-enable-priority-order", cl::Hidden, cl::init(false), cl::desc("Enable the priority inline order for the inliner")); LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} /// For this class, we declare that we require and preserve the call graph. /// If the derived class implements this method, it should /// always explicitly call the implementation here. void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); } using InlinedArrayAllocasTy = DenseMap>; /// Look at all of the allocas that we inlined through this call site. If we /// have already inlined other allocas through other calls into this function, /// then we know that they have disjoint lifetimes and that we can merge them. /// /// There are many heuristics possible for merging these allocas, and the /// different options have different tradeoffs. One thing that we *really* /// don't want to hurt is SRoA: once inlining happens, often allocas are no /// longer address taken and so they can be promoted. /// /// Our "solution" for that is to only merge allocas whose outermost type is an /// array type. These are usually not promoted because someone is using a /// variable index into them. These are also often the most important ones to /// merge. /// /// A better solution would be to have real memory lifetime markers in the IR /// and not have the inliner do any merging of allocas at all. This would /// allow the backend to do proper stack slot coloring of all allocas that /// *actually make it to the backend*, which is really what we want. /// /// Because we don't have this information, we do this simple and useful hack. static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { SmallPtrSet UsedAllocas; // When processing our SCC, check to see if the call site was inlined from // some other call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } // Assume that C was not inlined into B initially, and so we're processing A // and decide to inline B into A. Doing this makes an alloca available for // reuse and makes a callsite (C) available for inlining. When we process // the C call site we don't want to do any alloca merging between X and Y // because their scopes are not disjoint. We could make this smarter by // keeping track of the inline history for each alloca in the // InlinedArrayAllocas but this isn't likely to be a significant win. if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. return; // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast(AI->getAllocatedType()); if (!ATy || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. std::vector &AllocasForType = InlinedArrayAllocas[ATy]; // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' // set to keep track of which "available" allocas are being used by this // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (AllocaInst *AvailableAlloca : AllocasForType) { Align Align1 = AI->getAlign(); Align Align2 = AvailableAlloca->getAlign(); // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca).second) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); // Move affected dbg.declare calls immediately after the new alloca to // avoid the situation when a dbg.declare precedes its alloca. if (auto *L = LocalAsMetadata::getIfExists(AI)) if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) for (User *U : MDV->users()) if (DbgDeclareInst *DDI = dyn_cast(U)) DDI->moveBefore(AvailableAlloca->getNextNode()); AI->replaceAllUsesWith(AvailableAlloca); if (Align1 > Align2) AvailableAlloca->setAlignment(AI->getAlign()); AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; IFI.StaticAllocas[AllocaNo] = nullptr; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark // it used so we don't reuse it for other allocas from this inline // operation. AllocasForType.push_back(AI); UsedAllocas.insert(AI); } } /// If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static InlineResult inlineCallIfPossible( CallBase &CB, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime, function_ref &AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { Function *Callee = CB.getCalledFunction(); Function *Caller = CB.getCaller(); AAResults &AAR = AARGetter(*Callee); // Try to inline the function. Get the list of static allocas that were // inlined. InlineResult IR = InlineFunction(CB, IFI, &AAR, InsertLifetime); if (!IR.isSuccess()) return IR; if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats.recordInline(*Caller, *Callee); AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee); if (!DisableInlinedAllocaMerging) mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory); return IR; // success } /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool inlineHistoryIncludes( Function *F, int InlineHistoryID, const SmallVectorImpl> &InlineHistory) { while (InlineHistoryID != -1) { assert(unsigned(InlineHistoryID) < InlineHistory.size() && "Invalid inline history ID"); if (InlineHistory[InlineHistoryID].first == F) return true; InlineHistoryID = InlineHistory[InlineHistoryID].second; } return false; } bool LegacyInlinerBase::doInitialization(CallGraph &CG) { if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats.setModuleInfo(CG.getModule()); return false; // No changes to CallGraph. } bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { if (skipSCC(SCC)) return false; return inlineCalls(SCC); } static bool inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, std::function GetAssumptionCache, ProfileSummaryInfo *PSI, std::function GetTLI, bool InsertLifetime, function_ref GetInlineCost, function_ref AARGetter, ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { SmallPtrSet SCCFunctions; LLVM_DEBUG(dbgs() << "Inliner visiting SCC:"); for (CallGraphNode *Node : SCC) { Function *F = Node->getFunction(); if (F) SCCFunctions.insert(F); LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); } // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. SmallVector, 16> CallSites; // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. SmallVector, 8> InlineHistory; for (CallGraphNode *Node : SCC) { Function *F = Node->getFunction(); if (!F || F->isDeclaration()) continue; OptimizationRemarkEmitter ORE(F); for (BasicBlock &BB : *F) for (Instruction &I : BB) { auto *CB = dyn_cast(&I); // If this isn't a call, or it is a call to an intrinsic, it can // never be inlined. if (!CB || isa(I)) continue; // If this is a direct call to an external function, we can never inline // it. If it is an indirect call, inlining may resolve it to be a // direct call, so we keep it. if (Function *Callee = CB->getCalledFunction()) if (Callee->isDeclaration()) { using namespace ore; setInlineRemark(*CB, "unavailable definition"); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", CB->getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); continue; } CallSites.push_back(std::make_pair(CB, -1)); } } LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); // If there are no calls in this function, exit early. if (CallSites.empty()) return false; // Now that we have all of the call sites, move the ones to functions in the // current SCC to the end of the list. unsigned FirstCallInSCC = CallSites.size(); for (unsigned I = 0; I < FirstCallInSCC; ++I) if (Function *F = CallSites[I].first->getCalledFunction()) if (SCCFunctions.count(F)) std::swap(CallSites[I--], CallSites[--FirstCallInSCC]); InlinedArrayAllocasTy InlinedArrayAllocas; InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI); // Now that we have all of the call sites, loop over them and inline them if // it looks profitable to do so. bool Changed = false; bool LocalChange; do { LocalChange = false; // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. // CallSites may be modified inside so ranged for loop can not be used. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { auto &P = CallSites[CSi]; CallBase &CB = *P.first; const int InlineHistoryID = P.second; Function *Caller = CB.getCaller(); Function *Callee = CB.getCalledFunction(); // We can only inline direct calls to non-declarations. if (!Callee || Callee->isDeclaration()) continue; bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller)); if (!IsTriviallyDead) { // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee // itself. If so, we'd be recursively inlining the same function, // which would provide the same callsites, which would cause us to // infinitely inline. if (InlineHistoryID != -1 && inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { setInlineRemark(CB, "recursive"); continue; } } // FIXME for new PM: because of the old PM we currently generate ORE and // in turn BFI on demand. With the new PM, the ORE dependency should // just become a regular analysis dependency. OptimizationRemarkEmitter ORE(Caller); auto OIC = shouldInline(CB, GetInlineCost, ORE); // If the policy determines that we should inline this function, // delete the call instead. if (!OIC) continue; // If this call site is dead and it is to a readonly function, we should // just delete the call instead of trying to inline it, regardless of // size. This happens because IPSCCP propagates the result out of the // call and then we're left with the dead call. if (IsTriviallyDead) { LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n"); // Update the call graph by deleting the edge from Callee to Caller. setInlineRemark(CB, "trivially dead"); CG[Caller]->removeCallEdgeFor(CB); CB.eraseFromParent(); ++NumCallsDeleted; } else { // Get DebugLoc to report. CB will be invalid after Inliner. DebugLoc DLoc = CB.getDebugLoc(); BasicBlock *Block = CB.getParent(); // Attempt to inline the function. using namespace ore; InlineResult IR = inlineCallIfPossible( CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID, InsertLifetime, AARGetter, ImportedFunctionsStats); if (!IR.isSuccess()) { setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " + inlineCostStr(*OIC)); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", Caller) << ": " << NV("Reason", IR.getFailureReason()); }); continue; } ++NumInlined; emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC); // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. if (!InlineInfo.InlinedCalls.empty()) { // Create a new inline history entry for this, so that we remember // that these new callsites came about due to inlining Callee. int NewHistoryID = InlineHistory.size(); InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); #ifndef NDEBUG // Make sure no dupplicates in the inline candidates. This could // happen when a callsite is simpilfied to reusing the return value // of another callsite during function cloning, thus the other // callsite will be reconsidered here. DenseSet DbgCallSites; for (auto &II : CallSites) DbgCallSites.insert(II.first); #endif for (Value *Ptr : InlineInfo.InlinedCalls) { #ifndef NDEBUG assert(DbgCallSites.count(dyn_cast(Ptr)) == 0); #endif CallSites.push_back( std::make_pair(dyn_cast(Ptr), NewHistoryID)); } } } // If we inlined or deleted the last possible call site to the function, // delete the function body now. if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() && // TODO: Can remove if in SCC now. !SCCFunctions.count(Callee) && // The function may be apparently dead, but if there are indirect // callgraph references to the node, we cannot delete it yet, this // could invalidate the CGSCC iterator. CG[Callee]->getNumReferences() == 0) { LLVM_DEBUG(dbgs() << " -> Deleting dead function: " << Callee->getName() << "\n"); CallGraphNode *CalleeNode = CG[Callee]; // Remove any call graph edges from the callee to its callees. CalleeNode->removeAllCalledFunctions(); // Removing the node for callee from the call graph and delete it. delete CG.removeFunctionFromModule(CalleeNode); ++NumDeleted; } // Remove this call site from the list. If possible, use // swap/pop_back for efficiency, but do not use it if doing so would // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. if (SCC.isSingular()) { CallSites[CSi] = CallSites.back(); CallSites.pop_back(); } else { CallSites.erase(CallSites.begin() + CSi); } --CSi; Changed = true; LocalChange = true; } } while (LocalChange); return Changed; } bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); ACT = &getAnalysis(); PSI = &getAnalysis().getPSI(); GetTLI = [&](Function &F) -> const TargetLibraryInfo & { return getAnalysis().getTLI(F); }; auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); }; return inlineCallsImpl( SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this), ImportedFunctionsStats); } /// Remove now-dead linkonce functions at the end of /// processing to avoid breaking the SCC traversal. bool LegacyInlinerBase::doFinalization(CallGraph &CG) { if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) ImportedFunctionsStats.dump(InlinerFunctionImportStats == InlinerFunctionImportStatsOpts::Verbose); return removeDeadFunctions(CG); } /// Remove dead functions that are not included in DNR (Do Not Remove) list. bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) { SmallVector FunctionsToRemove; SmallVector DeadFunctionsInComdats; auto RemoveCGN = [&](CallGraphNode *CGN) { // Remove any call graph edges from the function to its callees. CGN->removeAllCalledFunctions(); // Remove any edges from the external node to the function's call graph // node. These edges might have been made irrelegant due to // optimization of the program. CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); // Removing the node for callee from the call graph and delete it. FunctionsToRemove.push_back(CGN); }; // Scan for all of the functions, looking for ones that should now be removed // from the program. Insert the dead ones in the FunctionsToRemove set. for (const auto &I : CG) { CallGraphNode *CGN = I.second.get(); Function *F = CGN->getFunction(); if (!F || F->isDeclaration()) continue; // Handle the case when this function is called and we only want to care // about always-inline functions. This is a bit of a hack to share code // between here and the InlineAlways pass. if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) continue; // If the only remaining users of the function are dead constants, remove // them. F->removeDeadConstantUsers(); if (!F->isDefTriviallyDead()) continue; // It is unsafe to drop a function with discardable linkage from a COMDAT // without also dropping the other members of the COMDAT. // The inliner doesn't visit non-function entities which are in COMDAT // groups so it is unsafe to do so *unless* the linkage is local. if (!F->hasLocalLinkage()) { if (F->hasComdat()) { DeadFunctionsInComdats.push_back(F); continue; } } RemoveCGN(CGN); } if (!DeadFunctionsInComdats.empty()) { // Filter out the functions whose comdats remain alive. filterDeadComdatFunctions(DeadFunctionsInComdats); // Remove the rest. for (Function *F : DeadFunctionsInComdats) RemoveCGN(CG[F]); } if (FunctionsToRemove.empty()) return false; // Now that we know which functions to delete, do so. We didn't want to do // this inline, because that would invalidate our CallGraph::iterator // objects. :( // // Note that it doesn't matter that we are iterating over a non-stable order // here to do this, it doesn't matter which order the functions are deleted // in. array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); FunctionsToRemove.erase( std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), FunctionsToRemove.end()); for (CallGraphNode *CGN : FunctionsToRemove) { delete CG.removeFunctionFromModule(CGN); ++NumDeleted; } return true; } InlineAdvisor & InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M) { if (OwnedAdvisor) return *OwnedAdvisor; auto *IAA = MAM.getCachedResult(M); if (!IAA) { // It should still be possible to run the inliner as a stand-alone SCC pass, // for test scenarios. In that case, we default to the // DefaultInlineAdvisor, which doesn't need to keep state between SCC pass // runs. It also uses just the default InlineParams. // In this case, we need to use the provided FAM, which is valid for the // duration of the inliner pass, and thus the lifetime of the owned advisor. // The one we would get from the MAM can be invalidated as a result of the // inliner's activity. OwnedAdvisor = std::make_unique(M, FAM, getInlineParams()); if (!CGSCCInlineReplayFile.empty()) OwnedAdvisor = getReplayInlineAdvisor( M, FAM, M.getContext(), std::move(OwnedAdvisor), ReplayInlinerSettings{CGSCCInlineReplayFile, CGSCCInlineReplayScope, CGSCCInlineReplayFallback, {CGSCCInlineReplayFormat}}, /*EmitRemarks=*/true); return *OwnedAdvisor; } assert(IAA->getAdvisor() && "Expected a present InlineAdvisorAnalysis also have an " "InlineAdvisor initialized"); return *IAA->getAdvisor(); } PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { const auto &MAMProxy = AM.getResult(InitialC, CG); bool Changed = false; assert(InitialC.size() > 0 && "Cannot handle an empty SCC!"); Module &M = *InitialC.begin()->getFunction().getParent(); ProfileSummaryInfo *PSI = MAMProxy.getCachedResult(M); FunctionAnalysisManager &FAM = AM.getResult(InitialC, CG) .getManager(); InlineAdvisor &Advisor = getAdvisor(MAMProxy, FAM, M); Advisor.onPassEntry(); auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(&InitialC); }); // We use a single common worklist for calls across the entire SCC. We // process these in-order and append new calls introduced during inlining to // the end. The PriorityInlineOrder is optional here, in which the smaller // callee would have a higher priority to inline. // // Note that this particular order of processing is actually critical to // avoid very bad behaviors. Consider *highly connected* call graphs where // each function contains a small amount of code and a couple of calls to // other functions. Because the LLVM inliner is fundamentally a bottom-up // inliner, it can handle gracefully the fact that these all appear to be // reasonable inlining candidates as it will flatten things until they become // too big to inline, and then move on and flatten another batch. // // However, when processing call edges *within* an SCC we cannot rely on this // bottom-up behavior. As a consequence, with heavily connected *SCCs* of // functions we can end up incrementally inlining N calls into each of // N functions because each incremental inlining decision looks good and we // don't have a topological ordering to prevent explosions. // // To compensate for this, we don't process transitive edges made immediate // by inlining until we've done one pass of inlining across the entire SCC. // Large, highly connected SCCs still lead to some amount of code bloat in // this model, but it is uniformly spread across all the functions in the SCC // and eventually they all become too large to inline, rather than // incrementally maknig a single function grow in a super linear fashion. std::unique_ptr>> Calls; if (InlineEnablePriorityOrder) Calls = std::make_unique>(); else Calls = std::make_unique>>(); assert(Calls != nullptr && "Expected an initialized InlineOrder"); // Populate the initial list of calls in this SCC. for (auto &N : InitialC) { auto &ORE = FAM.getResult(N.getFunction()); // We want to generally process call sites top-down in order for // simplifications stemming from replacing the call with the returned value // after inlining to be visible to subsequent inlining decisions. // FIXME: Using instructions sequence is a really bad way to do this. // Instead we should do an actual RPO walk of the function body. for (Instruction &I : instructions(N.getFunction())) if (auto *CB = dyn_cast(&I)) if (Function *Callee = CB->getCalledFunction()) { if (!Callee->isDeclaration()) Calls->push({CB, -1}); else if (!isa(I)) { using namespace ore; setInlineRemark(*CB, "unavailable definition"); ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) << NV("Callee", Callee) << " will not be inlined into " << NV("Caller", CB->getCaller()) << " because its definition is unavailable" << setIsVerbose(); }); } } } if (Calls->empty()) return PreservedAnalyses::all(); // Capture updatable variable for the current SCC. auto *C = &InitialC; // When inlining a callee produces new call sites, we want to keep track of // the fact that they were inlined from the callee. This allows us to avoid // infinite inlining in some obscure cases. To represent this, we use an // index into the InlineHistory vector. SmallVector, 16> InlineHistory; // Track a set vector of inlined callees so that we can augment the caller // with all of their edges in the call graph before pruning out the ones that // got simplified away. SmallSetVector InlinedCallees; // Track the dead functions to delete once finished with inlining calls. We // defer deleting these to make it easier to handle the call graph updates. SmallVector DeadFunctions; // Track potentially dead non-local functions with comdats to see if they can // be deleted as a batch after inlining. SmallVector DeadFunctionsInComdats; // Loop forward over all of the calls. while (!Calls->empty()) { // We expect the calls to typically be batched with sequences of calls that // have the same caller, so we first set up some shared infrastructure for // this caller. We also do any pruning we can at this layer on the caller // alone. Function &F = *Calls->front().first->getCaller(); LazyCallGraph::Node &N = *CG.lookup(F); if (CG.lookupSCC(N) != C) { Calls->pop(); continue; } LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n" << " Function size: " << F.getInstructionCount() << "\n"); auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; // Now process as many calls as we have within this caller in the sequence. // We bail out as soon as the caller has to change so we can update the // call graph and prepare the context of that new caller. bool DidInline = false; while (!Calls->empty() && Calls->front().first->getCaller() == &F) { auto P = Calls->pop(); CallBase *CB = P.first; const int InlineHistoryID = P.second; Function &Callee = *CB->getCalledFunction(); if (InlineHistoryID != -1 && inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) { LLVM_DEBUG(dbgs() << "Skipping inlining due to history: " << F.getName() << " -> " << Callee.getName() << "\n"); setInlineRemark(*CB, "recursive"); continue; } // Check if this inlining may repeat breaking an SCC apart that has // already been split once before. In that case, inlining here may // trigger infinite inlining, much like is prevented within the inliner // itself by the InlineHistory above, but spread across CGSCC iterations // and thus hidden from the full inline history. - if (CG.lookupSCC(*CG.lookup(Callee)) == C && - UR.InlinedInternalEdges.count({&N, C})) { + LazyCallGraph::SCC *CalleeSCC = CG.lookupSCC(*CG.lookup(Callee)); + if (CalleeSCC == C && UR.InlinedInternalEdges.count({&N, C})) { LLVM_DEBUG(dbgs() << "Skipping inlining internal SCC edge from a node " "previously split out of this SCC by inlining: " << F.getName() << " -> " << Callee.getName() << "\n"); setInlineRemark(*CB, "recursive SCC split"); continue; } std::unique_ptr Advice = Advisor.getAdvice(*CB, OnlyMandatory); // Check whether we want to inline this callsite. if (!Advice) continue; if (!Advice->isInliningRecommended()) { Advice->recordUnattemptedInlining(); continue; } + int CBCostMult = + getStringFnAttrAsInt( + *CB, InlineConstants::FunctionInlineCostMultiplierAttributeName) + .getValueOr(1); + // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( /*cg=*/nullptr, GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); InlineResult IR = InlineFunction(*CB, IFI, &FAM.getResult(*CB->getCaller())); if (!IR.isSuccess()) { Advice->recordUnsuccessfulInlining(IR); continue; } DidInline = true; InlinedCallees.insert(&Callee); ++NumInlined; LLVM_DEBUG(dbgs() << " Size after inlining: " << F.getInstructionCount() << "\n"); // Add any new callsites to defined functions to the worklist. if (!IFI.InlinedCallSites.empty()) { int NewHistoryID = InlineHistory.size(); InlineHistory.push_back({&Callee, InlineHistoryID}); for (CallBase *ICB : reverse(IFI.InlinedCallSites)) { Function *NewCallee = ICB->getCalledFunction(); assert(!(NewCallee && NewCallee->isIntrinsic()) && "Intrinsic calls should not be tracked."); if (!NewCallee) { // Try to promote an indirect (virtual) call without waiting for // the post-inline cleanup and the next DevirtSCCRepeatedPass // iteration because the next iteration may not happen and we may // miss inlining it. if (tryPromoteCall(*ICB)) NewCallee = ICB->getCalledFunction(); } - if (NewCallee) - if (!NewCallee->isDeclaration()) + if (NewCallee) { + if (!NewCallee->isDeclaration()) { Calls->push({ICB, NewHistoryID}); + // Continually inlining through an SCC can result in huge compile + // times and bloated code since we arbitrarily stop at some point + // when the inliner decides it's not profitable to inline anymore. + // We attempt to mitigate this by making these calls exponentially + // more expensive. + // This doesn't apply to calls in the same SCC since if we do + // inline through the SCC the function will end up being + // self-recursive which the inliner bails out on, and inlining + // within an SCC is necessary for performance. + if (CalleeSCC != C && + CalleeSCC == CG.lookupSCC(CG.get(*NewCallee))) { + Attribute NewCBCostMult = Attribute::get( + M.getContext(), + InlineConstants::FunctionInlineCostMultiplierAttributeName, + itostr(CBCostMult * IntraSCCCostMultiplier)); + ICB->addFnAttr(NewCBCostMult); + } + } + } } } // Merge the attributes based on the inlining. AttributeFuncs::mergeAttributesForInlining(F, Callee); // For local functions or discardable functions without comdats, check // whether this makes the callee trivially dead. In that case, we can drop // the body of the function eagerly which may reduce the number of callers // of other functions to one, changing inline cost thresholds. Non-local // discardable functions with comdats are checked later on. bool CalleeWasDeleted = false; if (Callee.isDiscardableIfUnused() && Callee.hasZeroLiveUses() && !CG.isLibFunction(Callee)) { if (Callee.hasLocalLinkage() || !Callee.hasComdat()) { Calls->erase_if([&](const std::pair &Call) { return Call.first->getCaller() == &Callee; }); // Clear the body and queue the function itself for deletion when we // finish inlining and call graph updates. // Note that after this point, it is an error to do anything other // than use the callee's address or delete it. Callee.dropAllReferences(); assert(!is_contained(DeadFunctions, &Callee) && "Cannot put cause a function to become dead twice!"); DeadFunctions.push_back(&Callee); CalleeWasDeleted = true; } else { DeadFunctionsInComdats.push_back(&Callee); } } if (CalleeWasDeleted) Advice->recordInliningWithCalleeDeleted(); else Advice->recordInlining(); } if (!DidInline) continue; Changed = true; // At this point, since we have made changes we have at least removed // a call instruction. However, in the process we do some incremental // simplification of the surrounding code. This simplification can // essentially do all of the same things as a function pass and we can // re-use the exact same logic for updating the call graph to reflect the // change. // Inside the update, we also update the FunctionAnalysisManager in the // proxy for this particular SCC. We do this as the SCC may have changed and // as we're going to mutate this particular function we want to make sure // the proxy is in place to forward any invalidation events. LazyCallGraph::SCC *OldC = C; C = &updateCGAndAnalysisManagerForCGSCCPass(CG, *C, N, AM, UR, FAM); LLVM_DEBUG(dbgs() << "Updated inlining SCC: " << *C << "\n"); // If this causes an SCC to split apart into multiple smaller SCCs, there // is a subtle risk we need to prepare for. Other transformations may // expose an "infinite inlining" opportunity later, and because of the SCC // mutation, we will revisit this function and potentially re-inline. If we // do, and that re-inlining also has the potentially to mutate the SCC // structure, the infinite inlining problem can manifest through infinite // SCC splits and merges. To avoid this, we capture the originating caller // node and the SCC containing the call edge. This is a slight over // approximation of the possible inlining decisions that must be avoided, // but is relatively efficient to store. We use C != OldC to know when // a new SCC is generated and the original SCC may be generated via merge // in later iterations. // // It is also possible that even if no new SCC is generated // (i.e., C == OldC), the original SCC could be split and then merged // into the same one as itself. and the original SCC will be added into // UR.CWorklist again, we want to catch such cases too. // // FIXME: This seems like a very heavyweight way of retaining the inline // history, we should look for a more efficient way of tracking it. if ((C != OldC || UR.CWorklist.count(OldC)) && llvm::any_of(InlinedCallees, [&](Function *Callee) { return CG.lookupSCC(*CG.lookup(*Callee)) == OldC; })) { LLVM_DEBUG(dbgs() << "Inlined an internal call edge and split an SCC, " "retaining this to avoid infinite inlining.\n"); UR.InlinedInternalEdges.insert({&N, OldC}); } InlinedCallees.clear(); // Invalidate analyses for this function now so that we don't have to // invalidate analyses for all functions in this SCC later. FAM.invalidate(F, PreservedAnalyses::none()); } // We must ensure that we only delete functions with comdats if every function // in the comdat is going to be deleted. if (!DeadFunctionsInComdats.empty()) { filterDeadComdatFunctions(DeadFunctionsInComdats); for (auto *Callee : DeadFunctionsInComdats) Callee->dropAllReferences(); DeadFunctions.append(DeadFunctionsInComdats); } // Now that we've finished inlining all of the calls across this SCC, delete // all of the trivially dead functions, updating the call graph and the CGSCC // pass manager in the process. // // Note that this walks a pointer set which has non-deterministic order but // that is OK as all we do is delete things and add pointers to unordered // sets. for (Function *DeadF : DeadFunctions) { // Get the necessary information out of the call graph and nuke the // function there. Also, clear out any cached analyses. auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF)); FAM.clear(*DeadF, DeadF->getName()); AM.clear(DeadC, DeadC.getName()); auto &DeadRC = DeadC.getOuterRefSCC(); CG.removeDeadFunction(*DeadF); // Mark the relevant parts of the call graph as invalid so we don't visit // them. UR.InvalidatedSCCs.insert(&DeadC); UR.InvalidatedRefSCCs.insert(&DeadRC); // If the updated SCC was the one containing the deleted function, clear it. if (&DeadC == UR.UpdatedC) UR.UpdatedC = nullptr; // And delete the actual function from the module. M.getFunctionList().erase(DeadF); ++NumDeleted; } if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; // Even if we change the IR, we update the core CGSCC data structures and so // can preserve the proxy to the function analysis manager. PA.preserve(); // We have already invalidated all analyses on modified functions. PA.preserveSet>(); return PA; } ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params, bool MandatoryFirst, InliningAdvisorMode Mode, unsigned MaxDevirtIterations) : Params(Params), Mode(Mode), MaxDevirtIterations(MaxDevirtIterations) { // Run the inliner first. The theory is that we are walking bottom-up and so // the callees have already been fully optimized, and we want to inline them // into the callers so that our optimizations can reflect that. // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. if (MandatoryFirst) PM.addPass(InlinerPass(/*OnlyMandatory*/ true)); PM.addPass(InlinerPass()); } PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M, ModuleAnalysisManager &MAM) { auto &IAA = MAM.getResult(M); if (!IAA.tryCreate(Params, Mode, {CGSCCInlineReplayFile, CGSCCInlineReplayScope, CGSCCInlineReplayFallback, {CGSCCInlineReplayFormat}})) { M.getContext().emitError( "Could not setup Inlining Advisor for the requested " "mode and/or options"); return PreservedAnalyses::all(); } // We wrap the CGSCC pipeline in a devirtualization repeater. This will try // to detect when we devirtualize indirect calls and iterate the SCC passes // in that case to try and catch knock-on inlining or function attrs // opportunities. Then we add it to the module pipeline by walking the SCCs // in postorder (or bottom-up). // If MaxDevirtIterations is 0, we just don't use the devirtualization // wrapper. if (MaxDevirtIterations == 0) MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(PM))); else MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations))); MPM.addPass(std::move(AfterCGMPM)); MPM.run(M, MAM); // Discard the InlineAdvisor, a subsequent inlining session should construct // its own. auto PA = PreservedAnalyses::all(); if (!KeepAdvisorForPrinting) PA.abandon(); return PA; } void InlinerPass::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { static_cast *>(this)->printPipeline( OS, MapClassName2PassName); if (OnlyMandatory) OS << ""; } void ModuleInlinerWrapperPass::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { // Print some info about passes added to the wrapper. This is however // incomplete as InlineAdvisorAnalysis part isn't included (which also depends // on Params and Mode). if (!MPM.isEmpty()) { MPM.printPipeline(OS, MapClassName2PassName); OS << ","; } OS << "cgscc("; if (MaxDevirtIterations != 0) OS << "devirt<" << MaxDevirtIterations << ">("; PM.printPipeline(OS, MapClassName2PassName); if (MaxDevirtIterations != 0) OS << ")"; OS << ")"; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 5113c0c67acc..7205ae178d21 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1,5168 +1,5168 @@ //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // OpenMP specific optimizations: // // - Deduplication of runtime calls, e.g., omp_get_thread_num. // - Replacing globalized device memory with stack memory. // - Replacing globalized device memory with shared memory. // - Parallel region merging. // - Transforming generic-mode device kernels to SPMD mode. // - Specializing the state machine for generic-mode device kernels. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/OpenMPOpt.h" #include "llvm/ADT/EnumeratedArray.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/CallGraphUpdater.h" #include "llvm/Transforms/Utils/CodeExtractor.h" #include using namespace llvm; using namespace omp; #define DEBUG_TYPE "openmp-opt" static cl::opt DisableOpenMPOptimizations( "openmp-opt-disable", cl::ZeroOrMore, cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, cl::init(false)); static cl::opt EnableParallelRegionMerging( "openmp-opt-enable-merging", cl::ZeroOrMore, cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false)); static cl::opt DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, cl::desc("Disable function internalization."), cl::Hidden, cl::init(false)); static cl::opt PrintICVValues("openmp-print-icv-values", cl::init(false), cl::Hidden); static cl::opt PrintOpenMPKernels("openmp-print-gpu-kernels", cl::init(false), cl::Hidden); static cl::opt HideMemoryTransferLatency( "openmp-hide-memory-transfer-latency", cl::desc("[WIP] Tries to hide the latency of host to device memory" " transfers"), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptDeglobalization( "openmp-opt-disable-deglobalization", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving deglobalization."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptSPMDization( "openmp-opt-disable-spmdization", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving SPMD-ization."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptFolding( "openmp-opt-disable-folding", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptStateMachineRewrite( "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false)); static cl::opt DisableOpenMPOptBarrierElimination( "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore, cl::desc("Disable OpenMP optimizations that eliminate barriers."), cl::Hidden, cl::init(false)); static cl::opt PrintModuleAfterOptimizations( "openmp-opt-print-module", cl::ZeroOrMore, cl::desc("Print the current module after OpenMP optimizations."), cl::Hidden, cl::init(false)); static cl::opt AlwaysInlineDeviceFunctions( "openmp-opt-inline-device", cl::ZeroOrMore, cl::desc("Inline all applicible functions on the device."), cl::Hidden, cl::init(false)); static cl::opt EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore, cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false)); static cl::opt SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, cl::desc("Maximal number of attributor iterations."), cl::init(256)); STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); STATISTIC(NumOpenMPParallelRegionsDeleted, "Number of OpenMP parallel regions deleted"); STATISTIC(NumOpenMPRuntimeFunctionsIdentified, "Number of OpenMP runtime functions identified"); STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, "Number of OpenMP runtime function uses identified"); STATISTIC(NumOpenMPTargetRegionKernels, "Number of OpenMP target region entry points (=kernels) identified"); STATISTIC(NumOpenMPTargetRegionKernelsSPMD, "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"); STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"); STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"); STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback, "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback"); STATISTIC( NumOpenMPParallelRegionsReplacedInGPUStateMachine, "Number of OpenMP parallel regions replaced with ID in GPU state machines"); STATISTIC(NumOpenMPParallelRegionsMerged, "Number of OpenMP parallel regions merged"); STATISTIC(NumBytesMovedToSharedMemory, "Amount of memory pushed to shared memory"); STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); #if !defined(NDEBUG) static constexpr auto TAG = "[" DEBUG_TYPE "]"; #endif namespace { struct AAHeapToShared; struct AAICVTracker; /// OpenMP specific information. For now, stores RFIs and ICVs also needed for /// Attributor runs. struct OMPInformationCache : public InformationCache { OMPInformationCache(Module &M, AnalysisGetter &AG, BumpPtrAllocator &Allocator, SetVector &CGSCC, KernelSet &Kernels) : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), Kernels(Kernels) { OMPBuilder.initialize(); initializeRuntimeFunctions(); initializeInternalControlVars(); } /// Generic information that describes an internal control variable. struct InternalControlVarInfo { /// The kind, as described by InternalControlVar enum. InternalControlVar Kind; /// The name of the ICV. StringRef Name; /// Environment variable associated with this ICV. StringRef EnvVarName; /// Initial value kind. ICVInitValue InitKind; /// Initial value. ConstantInt *InitValue; /// Setter RTL function associated with this ICV. RuntimeFunction Setter; /// Getter RTL function associated with this ICV. RuntimeFunction Getter; /// RTL Function corresponding to the override clause of this ICV RuntimeFunction Clause; }; /// Generic information that describes a runtime function struct RuntimeFunctionInfo { /// The kind, as described by the RuntimeFunction enum. RuntimeFunction Kind; /// The name of the function. StringRef Name; /// Flag to indicate a variadic function. bool IsVarArg; /// The return type of the function. Type *ReturnType; /// The argument types of the function. SmallVector ArgumentTypes; /// The declaration if available. Function *Declaration = nullptr; /// Uses of this runtime function per function containing the use. using UseVector = SmallVector; /// Clear UsesMap for runtime function. void clearUsesMap() { UsesMap.clear(); } /// Boolean conversion that is true if the runtime function was found. operator bool() const { return Declaration; } /// Return the vector of uses in function \p F. UseVector &getOrCreateUseVector(Function *F) { std::shared_ptr &UV = UsesMap[F]; if (!UV) UV = std::make_shared(); return *UV; } /// Return the vector of uses in function \p F or `nullptr` if there are /// none. const UseVector *getUseVector(Function &F) const { auto I = UsesMap.find(&F); if (I != UsesMap.end()) return I->second.get(); return nullptr; } /// Return how many functions contain uses of this runtime function. size_t getNumFunctionsWithUses() const { return UsesMap.size(); } /// Return the number of arguments (or the minimal number for variadic /// functions). size_t getNumArgs() const { return ArgumentTypes.size(); } /// Run the callback \p CB on each use and forget the use if the result is /// true. The callback will be fed the function in which the use was /// encountered as second argument. void foreachUse(SmallVectorImpl &SCC, function_ref CB) { for (Function *F : SCC) foreachUse(CB, F); } /// Run the callback \p CB on each use within the function \p F and forget /// the use if the result is true. void foreachUse(function_ref CB, Function *F) { SmallVector ToBeDeleted; ToBeDeleted.clear(); unsigned Idx = 0; UseVector &UV = getOrCreateUseVector(F); for (Use *U : UV) { if (CB(*U, *F)) ToBeDeleted.push_back(Idx); ++Idx; } // Remove the to-be-deleted indices in reverse order as prior // modifications will not modify the smaller indices. while (!ToBeDeleted.empty()) { unsigned Idx = ToBeDeleted.pop_back_val(); UV[Idx] = UV.back(); UV.pop_back(); } } private: /// Map from functions to all uses of this runtime function contained in /// them. DenseMap> UsesMap; public: /// Iterators for the uses of this runtime function. decltype(UsesMap)::iterator begin() { return UsesMap.begin(); } decltype(UsesMap)::iterator end() { return UsesMap.end(); } }; /// An OpenMP-IR-Builder instance OpenMPIRBuilder OMPBuilder; /// Map from runtime function kind to the runtime function description. EnumeratedArray RFIs; /// Map from function declarations/definitions to their runtime enum type. DenseMap RuntimeFunctionIDMap; /// Map from ICV kind to the ICV description. EnumeratedArray ICVs; /// Helper to initialize all internal control variable information for those /// defined in OMPKinds.def. void initializeInternalControlVars() { #define ICV_RT_SET(_Name, RTL) \ { \ auto &ICV = ICVs[_Name]; \ ICV.Setter = RTL; \ } #define ICV_RT_GET(Name, RTL) \ { \ auto &ICV = ICVs[Name]; \ ICV.Getter = RTL; \ } #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ { \ auto &ICV = ICVs[Enum]; \ ICV.Name = _Name; \ ICV.Kind = Enum; \ ICV.InitKind = Init; \ ICV.EnvVarName = _EnvVarName; \ switch (ICV.InitKind) { \ case ICV_IMPLEMENTATION_DEFINED: \ ICV.InitValue = nullptr; \ break; \ case ICV_ZERO: \ ICV.InitValue = ConstantInt::get( \ Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ break; \ case ICV_FALSE: \ ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ break; \ case ICV_LAST: \ break; \ } \ } #include "llvm/Frontend/OpenMP/OMPKinds.def" } /// Returns true if the function declaration \p F matches the runtime /// function types, that is, return type \p RTFRetType, and argument types /// \p RTFArgTypes. static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, SmallVector &RTFArgTypes) { // TODO: We should output information to the user (under debug output // and via remarks). if (!F) return false; if (F->getReturnType() != RTFRetType) return false; if (F->arg_size() != RTFArgTypes.size()) return false; auto *RTFTyIt = RTFArgTypes.begin(); for (Argument &Arg : F->args()) { if (Arg.getType() != *RTFTyIt) return false; ++RTFTyIt; } return true; } // Helper to collect all uses of the declaration in the UsesMap. unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { unsigned NumUses = 0; if (!RFI.Declaration) return NumUses; OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); if (CollectStats) { NumOpenMPRuntimeFunctionsIdentified += 1; NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); } // TODO: We directly convert uses into proper calls and unknown uses. for (Use &U : RFI.Declaration->uses()) { if (Instruction *UserI = dyn_cast(U.getUser())) { if (ModuleSlice.count(UserI->getFunction())) { RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); ++NumUses; } } else { RFI.getOrCreateUseVector(nullptr).push_back(&U); ++NumUses; } } return NumUses; } // Helper function to recollect uses of a runtime function. void recollectUsesForFunction(RuntimeFunction RTF) { auto &RFI = RFIs[RTF]; RFI.clearUsesMap(); collectUses(RFI, /*CollectStats*/ false); } // Helper function to recollect uses of all runtime functions. void recollectUses() { for (int Idx = 0; Idx < RFIs.size(); ++Idx) recollectUsesForFunction(static_cast(Idx)); } // Helper function to inherit the calling convention of the function callee. void setCallingConvention(FunctionCallee Callee, CallInst *CI) { if (Function *Fn = dyn_cast(Callee.getCallee())) CI->setCallingConv(Fn->getCallingConv()); } /// Helper to initialize all runtime function information for those defined /// in OpenMPKinds.def. void initializeRuntimeFunctions() { Module &M = *((*ModuleSlice.begin())->getParent()); // Helper macros for handling __VA_ARGS__ in OMP_RTL #define OMP_TYPE(VarName, ...) \ Type *VarName = OMPBuilder.VarName; \ (void)VarName; #define OMP_ARRAY_TYPE(VarName, ...) \ ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ (void)VarName##Ty; \ PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ (void)VarName##PtrTy; #define OMP_FUNCTION_TYPE(VarName, ...) \ FunctionType *VarName = OMPBuilder.VarName; \ (void)VarName; \ PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ (void)VarName##Ptr; #define OMP_STRUCT_TYPE(VarName, ...) \ StructType *VarName = OMPBuilder.VarName; \ (void)VarName; \ PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ (void)VarName##Ptr; #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ { \ SmallVector ArgsTypes({__VA_ARGS__}); \ Function *F = M.getFunction(_Name); \ RTLFunctions.insert(F); \ if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ RuntimeFunctionIDMap[F] = _Enum; \ auto &RFI = RFIs[_Enum]; \ RFI.Kind = _Enum; \ RFI.Name = _Name; \ RFI.IsVarArg = _IsVarArg; \ RFI.ReturnType = OMPBuilder._ReturnType; \ RFI.ArgumentTypes = std::move(ArgsTypes); \ RFI.Declaration = F; \ unsigned NumUses = collectUses(RFI); \ (void)NumUses; \ LLVM_DEBUG({ \ dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \ << " found\n"; \ if (RFI.Declaration) \ dbgs() << TAG << "-> got " << NumUses << " uses in " \ << RFI.getNumFunctionsWithUses() \ << " different functions.\n"; \ }); \ } \ } #include "llvm/Frontend/OpenMP/OMPKinds.def" // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` // functions, except if `optnone` is present. for (Function &F : M) { for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) if (F.getName().startswith(Prefix) && !F.hasFnAttribute(Attribute::OptimizeNone)) F.removeFnAttr(Attribute::NoInline); } // TODO: We should attach the attributes defined in OMPKinds.def. } /// Collection of known kernels (\see Kernel) in the module. KernelSet &Kernels; /// Collection of known OpenMP runtime functions.. DenseSet RTLFunctions; }; template struct BooleanStateWithSetVector : public BooleanState { bool contains(const Ty &Elem) const { return Set.contains(Elem); } bool insert(const Ty &Elem) { if (InsertInvalidates) BooleanState::indicatePessimisticFixpoint(); return Set.insert(Elem); } const Ty &operator[](int Idx) const { return Set[Idx]; } bool operator==(const BooleanStateWithSetVector &RHS) const { return BooleanState::operator==(RHS) && Set == RHS.Set; } bool operator!=(const BooleanStateWithSetVector &RHS) const { return !(*this == RHS); } bool empty() const { return Set.empty(); } size_t size() const { return Set.size(); } /// "Clamp" this state with \p RHS. BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) { BooleanState::operator^=(RHS); Set.insert(RHS.Set.begin(), RHS.Set.end()); return *this; } private: /// A set to keep track of elements. SetVector Set; public: typename decltype(Set)::iterator begin() { return Set.begin(); } typename decltype(Set)::iterator end() { return Set.end(); } typename decltype(Set)::const_iterator begin() const { return Set.begin(); } typename decltype(Set)::const_iterator end() const { return Set.end(); } }; template using BooleanStateWithPtrSetVector = BooleanStateWithSetVector; struct KernelInfoState : AbstractState { /// Flag to track if we reached a fixpoint. bool IsAtFixpoint = false; /// The parallel regions (identified by the outlined parallel functions) that /// can be reached from the associated function. BooleanStateWithPtrSetVector ReachedKnownParallelRegions; /// State to track what parallel region we might reach. BooleanStateWithPtrSetVector ReachedUnknownParallelRegions; /// State to track if we are in SPMD-mode, assumed or know, and why we decided /// we cannot be. If it is assumed, then RequiresFullRuntime should also be /// false. BooleanStateWithPtrSetVector SPMDCompatibilityTracker; /// The __kmpc_target_init call in this kernel, if any. If we find more than /// one we abort as the kernel is malformed. CallBase *KernelInitCB = nullptr; /// The __kmpc_target_deinit call in this kernel, if any. If we find more than /// one we abort as the kernel is malformed. CallBase *KernelDeinitCB = nullptr; /// Flag to indicate if the associated function is a kernel entry. bool IsKernelEntry = false; /// State to track what kernel entries can reach the associated function. BooleanStateWithPtrSetVector ReachingKernelEntries; /// State to indicate if we can track parallel level of the associated /// function. We will give up tracking if we encounter unknown caller or the /// caller is __kmpc_parallel_51. BooleanStateWithSetVector ParallelLevels; /// Abstract State interface ///{ KernelInfoState() {} KernelInfoState(bool BestState) { if (!BestState) indicatePessimisticFixpoint(); } /// See AbstractState::isValidState(...) bool isValidState() const override { return true; } /// See AbstractState::isAtFixpoint(...) bool isAtFixpoint() const override { return IsAtFixpoint; } /// See AbstractState::indicatePessimisticFixpoint(...) ChangeStatus indicatePessimisticFixpoint() override { IsAtFixpoint = true; ReachingKernelEntries.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.indicatePessimisticFixpoint(); ReachedKnownParallelRegions.indicatePessimisticFixpoint(); ReachedUnknownParallelRegions.indicatePessimisticFixpoint(); return ChangeStatus::CHANGED; } /// See AbstractState::indicateOptimisticFixpoint(...) ChangeStatus indicateOptimisticFixpoint() override { IsAtFixpoint = true; ReachingKernelEntries.indicateOptimisticFixpoint(); SPMDCompatibilityTracker.indicateOptimisticFixpoint(); ReachedKnownParallelRegions.indicateOptimisticFixpoint(); ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); return ChangeStatus::UNCHANGED; } /// Return the assumed state KernelInfoState &getAssumed() { return *this; } const KernelInfoState &getAssumed() const { return *this; } bool operator==(const KernelInfoState &RHS) const { if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker) return false; if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions) return false; if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions) return false; if (ReachingKernelEntries != RHS.ReachingKernelEntries) return false; return true; } /// Returns true if this kernel contains any OpenMP parallel regions. bool mayContainParallelRegion() { return !ReachedKnownParallelRegions.empty() || !ReachedUnknownParallelRegions.empty(); } /// Return empty set as the best state of potential values. static KernelInfoState getBestState() { return KernelInfoState(true); } static KernelInfoState getBestState(KernelInfoState &KIS) { return getBestState(); } /// Return full set as the worst state of potential values. static KernelInfoState getWorstState() { return KernelInfoState(false); } /// "Clamp" this state with \p KIS. KernelInfoState operator^=(const KernelInfoState &KIS) { // Do not merge two different _init and _deinit call sites. if (KIS.KernelInitCB) { if (KernelInitCB && KernelInitCB != KIS.KernelInitCB) llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " "assumptions."); KernelInitCB = KIS.KernelInitCB; } if (KIS.KernelDeinitCB) { if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt " "assumptions."); KernelDeinitCB = KIS.KernelDeinitCB; } SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; return *this; } KernelInfoState operator&=(const KernelInfoState &KIS) { return (*this ^= KIS); } ///} }; /// Used to map the values physically (in the IR) stored in an offload /// array, to a vector in memory. struct OffloadArray { /// Physical array (in the IR). AllocaInst *Array = nullptr; /// Mapped values. SmallVector StoredValues; /// Last stores made in the offload array. SmallVector LastAccesses; OffloadArray() = default; /// Initializes the OffloadArray with the values stored in \p Array before /// instruction \p Before is reached. Returns false if the initialization /// fails. /// This MUST be used immediately after the construction of the object. bool initialize(AllocaInst &Array, Instruction &Before) { if (!Array.getAllocatedType()->isArrayTy()) return false; if (!getValues(Array, Before)) return false; this->Array = &Array; return true; } static const unsigned DeviceIDArgNum = 1; static const unsigned BasePtrsArgNum = 3; static const unsigned PtrsArgNum = 4; static const unsigned SizesArgNum = 5; private: /// Traverses the BasicBlock where \p Array is, collecting the stores made to /// \p Array, leaving StoredValues with the values stored before the /// instruction \p Before is reached. bool getValues(AllocaInst &Array, Instruction &Before) { // Initialize container. const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); StoredValues.assign(NumValues, nullptr); LastAccesses.assign(NumValues, nullptr); // TODO: This assumes the instruction \p Before is in the same // BasicBlock as Array. Make it general, for any control flow graph. BasicBlock *BB = Array.getParent(); if (BB != Before.getParent()) return false; const DataLayout &DL = Array.getModule()->getDataLayout(); const unsigned int PointerSize = DL.getPointerSize(); for (Instruction &I : *BB) { if (&I == &Before) break; if (!isa(&I)) continue; auto *S = cast(&I); int64_t Offset = -1; auto *Dst = GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); if (Dst == &Array) { int64_t Idx = Offset / PointerSize; StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); LastAccesses[Idx] = S; } } return isFilled(); } /// Returns true if all values in StoredValues and /// LastAccesses are not nullptrs. bool isFilled() { const unsigned NumValues = StoredValues.size(); for (unsigned I = 0; I < NumValues; ++I) { if (!StoredValues[I] || !LastAccesses[I]) return false; } return true; } }; struct OpenMPOpt { using OptimizationRemarkGetter = function_ref; OpenMPOpt(SmallVectorImpl &SCC, CallGraphUpdater &CGUpdater, OptimizationRemarkGetter OREGetter, OMPInformationCache &OMPInfoCache, Attributor &A) : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} /// Check if any remarks are enabled for openmp-opt bool remarksEnabled() { auto &Ctx = M.getContext(); return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE); } /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. bool run(bool IsModulePass) { if (SCC.empty()) return false; bool Changed = false; LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() << " functions in a slice with " << OMPInfoCache.ModuleSlice.size() << " functions\n"); if (IsModulePass) { Changed |= runAttributor(IsModulePass); // Recollect uses, in case Attributor deleted any. OMPInfoCache.recollectUses(); // TODO: This should be folded into buildCustomStateMachine. Changed |= rewriteDeviceCodeStateMachine(); if (remarksEnabled()) analysisGlobalization(); Changed |= eliminateBarriers(); } else { if (PrintICVValues) printICVs(); if (PrintOpenMPKernels) printKernels(); Changed |= runAttributor(IsModulePass); // Recollect uses, in case Attributor deleted any. OMPInfoCache.recollectUses(); Changed |= deleteParallelRegions(); if (HideMemoryTransferLatency) Changed |= hideMemTransfersLatency(); Changed |= deduplicateRuntimeCalls(); if (EnableParallelRegionMerging) { if (mergeParallelRegions()) { deduplicateRuntimeCalls(); Changed = true; } } Changed |= eliminateBarriers(); } return Changed; } /// Print initial ICV values for testing. /// FIXME: This should be done from the Attributor once it is added. void printICVs() const { InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, ICV_proc_bind}; for (Function *F : OMPInfoCache.ModuleSlice) { for (auto ICV : ICVs) { auto ICVInfo = OMPInfoCache.ICVs[ICV]; auto Remark = [&](OptimizationRemarkAnalysis ORA) { return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) << " Value: " << (ICVInfo.InitValue ? toString(ICVInfo.InitValue->getValue(), 10, true) : "IMPLEMENTATION_DEFINED"); }; emitRemark(F, "OpenMPICVTracker", Remark); } } } /// Print OpenMP GPU kernels for testing. void printKernels() const { for (Function *F : SCC) { if (!OMPInfoCache.Kernels.count(F)) continue; auto Remark = [&](OptimizationRemarkAnalysis ORA) { return ORA << "OpenMP GPU kernel " << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; }; emitRemark(F, "OpenMPGPU", Remark); } } /// Return the call if \p U is a callee use in a regular call. If \p RFI is /// given it has to be the callee or a nullptr is returned. static CallInst *getCallIfRegularCall( Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { CallInst *CI = dyn_cast(U.getUser()); if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && (!RFI || (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) return CI; return nullptr; } /// Return the call if \p V is a regular call. If \p RFI is given it has to be /// the callee or a nullptr is returned. static CallInst *getCallIfRegularCall( Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { CallInst *CI = dyn_cast(&V); if (CI && !CI->hasOperandBundles() && (!RFI || (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) return CI; return nullptr; } private: /// Merge parallel regions when it is safe. bool mergeParallelRegions() { const unsigned CallbackCalleeOperand = 2; const unsigned CallbackFirstArgOperand = 3; using InsertPointTy = OpenMPIRBuilder::InsertPointTy; // Check if there are any __kmpc_fork_call calls to merge. OMPInformationCache::RuntimeFunctionInfo &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; if (!RFI.Declaration) return false; // Unmergable calls that prevent merging a parallel region. OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], }; bool Changed = false; LoopInfo *LI = nullptr; DominatorTree *DT = nullptr; SmallDenseMap> BB2PRMap; BasicBlock *StartBB = nullptr, *EndBB = nullptr; auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, BasicBlock &ContinuationIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); assert(StartBB != nullptr && "StartBB should not be null"); CGStartBB->getTerminator()->setSuccessor(0, StartBB); assert(EndBB != nullptr && "EndBB should not be null"); EndBB->getTerminator()->setSuccessor(0, CGEndBB); }; auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, Value &Inner, Value *&ReplacementValue) -> InsertPointTy { ReplacementValue = &Inner; return CodeGenIP; }; auto FiniCB = [&](InsertPointTy CodeGenIP) {}; /// Create a sequential execution region within a merged parallel region, /// encapsulated in a master construct with a barrier for synchronization. auto CreateSequentialRegion = [&](Function *OuterFn, BasicBlock *OuterPredBB, Instruction *SeqStartI, Instruction *SeqEndI) { // Isolate the instructions of the sequential region to a separate // block. BasicBlock *ParentBB = SeqStartI->getParent(); BasicBlock *SeqEndBB = SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); BasicBlock *SeqAfterBB = SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); BasicBlock *SeqStartBB = SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); assert(ParentBB->getUniqueSuccessor() == SeqStartBB && "Expected a different CFG"); const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); ParentBB->getTerminator()->eraseFromParent(); auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, BasicBlock &ContinuationIP) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); assert(SeqStartBB != nullptr && "SeqStartBB should not be null"); CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); assert(SeqEndBB != nullptr && "SeqEndBB should not be null"); SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); }; auto FiniCB = [&](InsertPointTy CodeGenIP) {}; // Find outputs from the sequential region to outside users and // broadcast their values to them. for (Instruction &I : *SeqStartBB) { SmallPtrSet OutsideUsers; for (User *Usr : I.users()) { Instruction &UsrI = *cast(Usr); // Ignore outputs to LT intrinsics, code extraction for the merged // parallel region will fix them. if (UsrI.isLifetimeStartOrEnd()) continue; if (UsrI.getParent() != SeqStartBB) OutsideUsers.insert(&UsrI); } if (OutsideUsers.empty()) continue; // Emit an alloca in the outer region to store the broadcasted // value. const DataLayout &DL = M.getDataLayout(); AllocaInst *AllocaI = new AllocaInst( I.getType(), DL.getAllocaAddrSpace(), nullptr, I.getName() + ".seq.output.alloc", &OuterFn->front().front()); // Emit a store instruction in the sequential BB to update the // value. new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()); // Emit a load instruction and replace the use of the output value // with it. for (Instruction *UsrI : OutsideUsers) { LoadInst *LoadI = new LoadInst( I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); UsrI->replaceUsesOfWith(&I, LoadI); } } OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); InsertPointTy SeqAfterIP = OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn << "\n"); }; // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all // contained in BB and only separated by instructions that can be // redundantly executed in parallel. The block BB is split before the first // call (in MergableCIs) and after the last so the entire region we merge // into a single parallel region is contained in a single basic block // without any other instructions. We use the OpenMPIRBuilder to outline // that block and call the resulting function via __kmpc_fork_call. auto Merge = [&](const SmallVectorImpl &MergableCIs, BasicBlock *BB) { // TODO: Change the interface to allow single CIs expanded, e.g, to // include an outer loop. assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs"); auto Remark = [&](OptimizationRemark OR) { OR << "Parallel region merged with parallel region" << (MergableCIs.size() > 2 ? "s" : "") << " at "; for (auto *CI : llvm::drop_begin(MergableCIs)) { OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); if (CI != MergableCIs.back()) OR << ", "; } return OR << "."; }; emitRemark(MergableCIs.front(), "OMP150", Remark); Function *OriginalFn = BB->getParent(); LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size() << " parallel regions in " << OriginalFn->getName() << "\n"); // Isolate the calls to merge in a separate block. EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); BasicBlock *AfterBB = SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, "omp.par.merged"); assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG"); const DebugLoc DL = BB->getTerminator()->getDebugLoc(); BB->getTerminator()->eraseFromParent(); // Create sequential regions for sequential instructions that are // in-between mergable parallel regions. for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; It != End; ++It) { Instruction *ForkCI = *It; Instruction *NextForkCI = *(It + 1); // Continue if there are not in-between instructions. if (ForkCI->getNextNode() == NextForkCI) continue; CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), NextForkCI->getPrevNode()); } OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), DL); IRBuilder<>::InsertPoint AllocaIP( &OriginalFn->getEntryBlock(), OriginalFn->getEntryBlock().getFirstInsertionPt()); // Create the merged parallel region with default proc binding, to // avoid overriding binding settings, and without explicit cancellation. InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false); BranchInst::Create(AfterBB, AfterIP.getBlock()); // Perform the actual outlining. OMPInfoCache.OMPBuilder.finalize(OriginalFn); Function *OutlinedFn = MergableCIs.front()->getCaller(); // Replace the __kmpc_fork_call calls with direct calls to the outlined // callbacks. SmallVector Args; for (auto *CI : MergableCIs) { Value *Callee = CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); FunctionType *FT = cast(Callee->getType()->getPointerElementType()); Args.clear(); Args.push_back(OutlinedFn->getArg(0)); Args.push_back(OutlinedFn->getArg(1)); for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; ++U) Args.push_back(CI->getArgOperand(U)); CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI); if (CI->getDebugLoc()) NewCI->setDebugLoc(CI->getDebugLoc()); // Forward parameter attributes from the callback to the callee. for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E; ++U) for (const Attribute &A : CI->getAttributes().getParamAttrs(U)) NewCI->addParamAttr( U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); // Emit an explicit barrier to replace the implicit fork-join barrier. if (CI != MergableCIs.back()) { // TODO: Remove barrier if the merged parallel region includes the // 'nowait' clause. OMPInfoCache.OMPBuilder.createBarrier( InsertPointTy(NewCI->getParent(), NewCI->getNextNode()->getIterator()), OMPD_parallel); } CI->eraseFromParent(); } assert(OutlinedFn != OriginalFn && "Outlining failed"); CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); CGUpdater.reanalyzeFunction(*OriginalFn); NumOpenMPParallelRegionsMerged += MergableCIs.size(); return true; }; // Helper function that identifes sequences of // __kmpc_fork_call uses in a basic block. auto DetectPRsCB = [&](Use &U, Function &F) { CallInst *CI = getCallIfRegularCall(U, &RFI); BB2PRMap[CI->getParent()].insert(CI); return false; }; BB2PRMap.clear(); RFI.foreachUse(SCC, DetectPRsCB); SmallVector, 4> MergableCIsVector; // Find mergable parallel regions within a basic block that are // safe to merge, that is any in-between instructions can safely // execute in parallel after merging. // TODO: support merging across basic-blocks. for (auto &It : BB2PRMap) { auto &CIs = It.getSecond(); if (CIs.size() < 2) continue; BasicBlock *BB = It.getFirst(); SmallVector MergableCIs; /// Returns true if the instruction is mergable, false otherwise. /// A terminator instruction is unmergable by definition since merging /// works within a BB. Instructions before the mergable region are /// mergable if they are not calls to OpenMP runtime functions that may /// set different execution parameters for subsequent parallel regions. /// Instructions in-between parallel regions are mergable if they are not /// calls to any non-intrinsic function since that may call a non-mergable /// OpenMP runtime function. auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { // We do not merge across BBs, hence return false (unmergable) if the // instruction is a terminator. if (I.isTerminator()) return false; if (!isa(&I)) return true; CallInst *CI = cast(&I); if (IsBeforeMergableRegion) { Function *CalledFunction = CI->getCalledFunction(); if (!CalledFunction) return false; // Return false (unmergable) if the call before the parallel // region calls an explicit affinity (proc_bind) or number of // threads (num_threads) compiler-generated function. Those settings // may be incompatible with following parallel regions. // TODO: ICV tracking to detect compatibility. for (const auto &RFI : UnmergableCallsInfo) { if (CalledFunction == RFI.Declaration) return false; } } else { // Return false (unmergable) if there is a call instruction // in-between parallel regions when it is not an intrinsic. It // may call an unmergable OpenMP runtime function in its callpath. // TODO: Keep track of possible OpenMP calls in the callpath. if (!isa(CI)) return false; } return true; }; // Find maximal number of parallel region CIs that are safe to merge. for (auto It = BB->begin(), End = BB->end(); It != End;) { Instruction &I = *It; ++It; if (CIs.count(&I)) { MergableCIs.push_back(cast(&I)); continue; } // Continue expanding if the instruction is mergable. if (IsMergable(I, MergableCIs.empty())) continue; // Forward the instruction iterator to skip the next parallel region // since there is an unmergable instruction which can affect it. for (; It != End; ++It) { Instruction &SkipI = *It; if (CIs.count(&SkipI)) { LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipI << " due to " << I << "\n"); ++It; break; } } // Store mergable regions found. if (MergableCIs.size() > 1) { MergableCIsVector.push_back(MergableCIs); LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size() << " parallel regions in block " << BB->getName() << " of function " << BB->getParent()->getName() << "\n";); } MergableCIs.clear(); } if (!MergableCIsVector.empty()) { Changed = true; for (auto &MergableCIs : MergableCIsVector) Merge(MergableCIs, BB); MergableCIsVector.clear(); } } if (Changed) { /// Re-collect use for fork calls, emitted barrier calls, and /// any emitted master/end_master calls. OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); } return Changed; } /// Try to delete parallel regions if possible. bool deleteParallelRegions() { const unsigned CallbackCalleeOperand = 2; OMPInformationCache::RuntimeFunctionInfo &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; if (!RFI.Declaration) return false; bool Changed = false; auto DeleteCallCB = [&](Use &U, Function &) { CallInst *CI = getCallIfRegularCall(U); if (!CI) return false; auto *Fn = dyn_cast( CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); if (!Fn) return false; if (!Fn->onlyReadsMemory()) return false; if (!Fn->hasFnAttribute(Attribute::WillReturn)) return false; LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in " << CI->getCaller()->getName() << "\n"); auto Remark = [&](OptimizationRemark OR) { return OR << "Removing parallel region with no side-effects."; }; emitRemark(CI, "OMP160", Remark); CGUpdater.removeCallSite(*CI); CI->eraseFromParent(); Changed = true; ++NumOpenMPParallelRegionsDeleted; return true; }; RFI.foreachUse(SCC, DeleteCallCB); return Changed; } /// Try to eliminate runtime calls by reusing existing ones. bool deduplicateRuntimeCalls() { bool Changed = false; RuntimeFunction DeduplicableRuntimeCallIDs[] = { OMPRTL_omp_get_num_threads, OMPRTL_omp_in_parallel, OMPRTL_omp_get_cancellation, OMPRTL_omp_get_thread_limit, OMPRTL_omp_get_supported_active_levels, OMPRTL_omp_get_level, OMPRTL_omp_get_ancestor_thread_num, OMPRTL_omp_get_team_size, OMPRTL_omp_get_active_level, OMPRTL_omp_in_final, OMPRTL_omp_get_proc_bind, OMPRTL_omp_get_num_places, OMPRTL_omp_get_num_procs, OMPRTL_omp_get_place_num, OMPRTL_omp_get_partition_num_places, OMPRTL_omp_get_partition_place_nums}; // Global-tid is handled separately. SmallSetVector GTIdArgs; collectGlobalThreadIdArguments(GTIdArgs); LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size() << " global thread ID arguments\n"); for (Function *F : SCC) { for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) Changed |= deduplicateRuntimeCalls( *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); // __kmpc_global_thread_num is special as we can replace it with an // argument in enough cases to make it worth trying. Value *GTIdArg = nullptr; for (Argument &Arg : F->args()) if (GTIdArgs.count(&Arg)) { GTIdArg = &Arg; break; } Changed |= deduplicateRuntimeCalls( *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); } return Changed; } /// Tries to hide the latency of runtime calls that involve host to /// device memory transfers by splitting them into their "issue" and "wait" /// versions. The "issue" is moved upwards as much as possible. The "wait" is /// moved downards as much as possible. The "issue" issues the memory transfer /// asynchronously, returning a handle. The "wait" waits in the returned /// handle for the memory transfer to finish. bool hideMemTransfersLatency() { auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; bool Changed = false; auto SplitMemTransfers = [&](Use &U, Function &Decl) { auto *RTCall = getCallIfRegularCall(U, &RFI); if (!RTCall) return false; OffloadArray OffloadArrays[3]; if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) return false; LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays)); // TODO: Check if can be moved upwards. bool WasSplit = false; Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); if (WaitMovementPoint) WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); Changed |= WasSplit; return WasSplit; }; RFI.foreachUse(SCC, SplitMemTransfers); return Changed; } /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels. /// TODO: Make this an AA and expand it to work across blocks and functions. bool eliminateBarriers() { bool Changed = false; if (DisableOpenMPOptBarrierElimination) return /*Changed=*/false; if (OMPInfoCache.Kernels.empty()) return /*Changed=*/false; enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT }; class BarrierInfo { Instruction *I; enum ImplicitBarrierType Type; public: BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {} BarrierInfo(Instruction &I) : I(&I) {} bool isImplicit() { return !I; } bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; } bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; } Instruction *getInstruction() { return I; } }; for (Function *Kernel : OMPInfoCache.Kernels) { for (BasicBlock &BB : *Kernel) { SmallVector BarriersInBlock; SmallPtrSet BarriersToBeDeleted; // Add the kernel entry implicit barrier. if (&Kernel->getEntryBlock() == &BB) BarriersInBlock.push_back(IBT_ENTRY); // Find implicit and explicit aligned barriers in the same basic block. for (Instruction &I : BB) { if (isa(I)) { // Add the implicit barrier when exiting the kernel. BarriersInBlock.push_back(IBT_EXIT); continue; } CallBase *CB = dyn_cast(&I); if (!CB) continue; auto IsAlignBarrierCB = [&](CallBase &CB) { switch (CB.getIntrinsicID()) { case Intrinsic::nvvm_barrier0: case Intrinsic::nvvm_barrier0_and: case Intrinsic::nvvm_barrier0_or: case Intrinsic::nvvm_barrier0_popc: return true; default: break; } return hasAssumption(CB, KnownAssumptionString("ompx_aligned_barrier")); }; if (IsAlignBarrierCB(*CB)) { // Add an explicit aligned barrier. BarriersInBlock.push_back(I); } } if (BarriersInBlock.size() <= 1) continue; // A barrier in a barrier pair is removeable if all instructions // between the barriers in the pair are side-effect free modulo the // barrier operation. auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI, BarrierInfo *EndBI) { assert( !StartBI->isImplicitExit() && "Expected start barrier to be other than a kernel exit barrier"); assert( !EndBI->isImplicitEntry() && "Expected end barrier to be other than a kernel entry barrier"); // If StarBI instructions is null then this the implicit // kernel entry barrier, so iterate from the first instruction in the // entry block. Instruction *I = (StartBI->isImplicitEntry()) ? &Kernel->getEntryBlock().front() : StartBI->getInstruction()->getNextNode(); assert(I && "Expected non-null start instruction"); Instruction *E = (EndBI->isImplicitExit()) ? I->getParent()->getTerminator() : EndBI->getInstruction(); assert(E && "Expected non-null end instruction"); for (; I != E; I = I->getNextNode()) { if (!I->mayHaveSideEffects() && !I->mayReadFromMemory()) continue; auto IsPotentiallyAffectedByBarrier = [](Optional Loc) { const Value *Obj = (Loc && Loc->Ptr) ? getUnderlyingObject(Loc->Ptr) : nullptr; if (!Obj) { LLVM_DEBUG( dbgs() << "Access to unknown location requires barriers\n"); return true; } if (isa(Obj)) return false; if (isa(Obj)) return false; if (auto *GV = dyn_cast(Obj)) { if (GV->isConstant()) return false; if (GV->isThreadLocal()) return false; if (GV->getAddressSpace() == (int)AddressSpace::Local) return false; if (GV->getAddressSpace() == (int)AddressSpace::Constant) return false; } LLVM_DEBUG(dbgs() << "Access to '" << *Obj << "' requires barriers\n"); return true; }; if (MemIntrinsic *MI = dyn_cast(I)) { Optional Loc = MemoryLocation::getForDest(MI); if (IsPotentiallyAffectedByBarrier(Loc)) return false; if (MemTransferInst *MTI = dyn_cast(I)) { Optional Loc = MemoryLocation::getForSource(MTI); if (IsPotentiallyAffectedByBarrier(Loc)) return false; } continue; } if (auto *LI = dyn_cast(I)) if (LI->hasMetadata(LLVMContext::MD_invariant_load)) continue; Optional Loc = MemoryLocation::getOrNone(I); if (IsPotentiallyAffectedByBarrier(Loc)) return false; } return true; }; // Iterate barrier pairs and remove an explicit barrier if analysis // deems it removeable. for (auto *It = BarriersInBlock.begin(), *End = BarriersInBlock.end() - 1; It != End; ++It) { BarrierInfo *StartBI = It; BarrierInfo *EndBI = (It + 1); // Cannot remove when both are implicit barriers, continue. if (StartBI->isImplicit() && EndBI->isImplicit()) continue; if (!IsBarrierRemoveable(StartBI, EndBI)) continue; assert(!(StartBI->isImplicit() && EndBI->isImplicit()) && "Expected at least one explicit barrier to remove."); // Remove an explicit barrier, check first, then second. if (!StartBI->isImplicit()) { LLVM_DEBUG(dbgs() << "Remove start barrier " << *StartBI->getInstruction() << "\n"); BarriersToBeDeleted.insert(StartBI->getInstruction()); } else { LLVM_DEBUG(dbgs() << "Remove end barrier " << *EndBI->getInstruction() << "\n"); BarriersToBeDeleted.insert(EndBI->getInstruction()); } } if (BarriersToBeDeleted.empty()) continue; Changed = true; for (Instruction *I : BarriersToBeDeleted) { ++NumBarriersEliminated; auto Remark = [&](OptimizationRemark OR) { return OR << "Redundant barrier eliminated."; }; if (EnableVerboseRemarks) emitRemark(I, "OMP190", Remark); I->eraseFromParent(); } } } return Changed; } void analysisGlobalization() { auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; auto CheckGlobalization = [&](Use &U, Function &Decl) { if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { auto Remark = [&](OptimizationRemarkMissed ORM) { return ORM << "Found thread data sharing on the GPU. " << "Expect degraded performance due to data globalization."; }; emitRemark(CI, "OMP112", Remark); } return false; }; RFI.foreachUse(SCC, CheckGlobalization); } /// Maps the values stored in the offload arrays passed as arguments to /// \p RuntimeCall into the offload arrays in \p OAs. bool getValuesInOffloadArrays(CallInst &RuntimeCall, MutableArrayRef OAs) { assert(OAs.size() == 3 && "Need space for three offload arrays!"); // A runtime call that involves memory offloading looks something like: // call void @__tgt_target_data_begin_mapper(arg0, arg1, // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, // ...) // So, the idea is to access the allocas that allocate space for these // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. // Therefore: // i8** %offload_baseptrs. Value *BasePtrsArg = RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); // i8** %offload_ptrs. Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); // i8** %offload_sizes. Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); // Get values stored in **offload_baseptrs. auto *V = getUnderlyingObject(BasePtrsArg); if (!isa(V)) return false; auto *BasePtrsArray = cast(V); if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) return false; // Get values stored in **offload_baseptrs. V = getUnderlyingObject(PtrsArg); if (!isa(V)) return false; auto *PtrsArray = cast(V); if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) return false; // Get values stored in **offload_sizes. V = getUnderlyingObject(SizesArg); // If it's a [constant] global array don't analyze it. if (isa(V)) return isa(V); if (!isa(V)) return false; auto *SizesArray = cast(V); if (!OAs[2].initialize(*SizesArray, RuntimeCall)) return false; return true; } /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. /// For now this is a way to test that the function getValuesInOffloadArrays /// is working properly. /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. void dumpValuesInOffloadArrays(ArrayRef OAs) { assert(OAs.size() == 3 && "There are three offload arrays to debug!"); LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n"); std::string ValuesStr; raw_string_ostream Printer(ValuesStr); std::string Separator = " --- "; for (auto *BP : OAs[0].StoredValues) { BP->print(Printer); Printer << Separator; } LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n"); ValuesStr.clear(); for (auto *P : OAs[1].StoredValues) { P->print(Printer); Printer << Separator; } LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n"); ValuesStr.clear(); for (auto *S : OAs[2].StoredValues) { S->print(Printer); Printer << Separator; } LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n"); } /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be /// moved. Returns nullptr if the movement is not possible, or not worth it. Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { // FIXME: This traverses only the BasicBlock where RuntimeCall is. // Make it traverse the CFG. Instruction *CurrentI = &RuntimeCall; bool IsWorthIt = false; while ((CurrentI = CurrentI->getNextNode())) { // TODO: Once we detect the regions to be offloaded we should use the // alias analysis manager to check if CurrentI may modify one of // the offloaded regions. if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { if (IsWorthIt) return CurrentI; return nullptr; } // FIXME: For now if we move it over anything without side effect // is worth it. IsWorthIt = true; } // Return end of BasicBlock. return RuntimeCall.getParent()->getTerminator(); } /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. bool splitTargetDataBeginRTC(CallInst &RuntimeCall, Instruction &WaitMovementPoint) { // Create stack allocated handle (__tgt_async_info) at the beginning of the // function. Used for storing information of the async transfer, allowing to // wait on it later. auto &IRBuilder = OMPInfoCache.OMPBuilder; auto *F = RuntimeCall.getCaller(); Instruction *FirstInst = &(F->getEntryBlock().front()); AllocaInst *Handle = new AllocaInst( IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst); // Add "issue" runtime call declaration: // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, // i8**, i8**, i64*, i64*) FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( M, OMPRTL___tgt_target_data_begin_mapper_issue); // Change RuntimeCall call site for its asynchronous version. SmallVector Args; for (auto &Arg : RuntimeCall.args()) Args.push_back(Arg.get()); Args.push_back(Handle); CallInst *IssueCallsite = CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall); OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite); RuntimeCall.eraseFromParent(); // Add "wait" runtime call declaration: // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( M, OMPRTL___tgt_target_data_begin_mapper_wait); Value *WaitParams[2] = { IssueCallsite->getArgOperand( OffloadArray::DeviceIDArgNum), // device_id. Handle // handle to wait on. }; CallInst *WaitCallsite = CallInst::Create( WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite); return true; } static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, bool GlobalOnly, bool &SingleChoice) { if (CurrentIdent == NextIdent) return CurrentIdent; // TODO: Figure out how to actually combine multiple debug locations. For // now we just keep an existing one if there is a single choice. if (!GlobalOnly || isa(NextIdent)) { SingleChoice = !CurrentIdent; return NextIdent; } return nullptr; } /// Return an `struct ident_t*` value that represents the ones used in the /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not /// return a local `struct ident_t*`. For now, if we cannot find a suitable /// return value we create one from scratch. We also do not yet combine /// information, e.g., the source locations, see combinedIdentStruct. Value * getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, Function &F, bool GlobalOnly) { bool SingleChoice = true; Value *Ident = nullptr; auto CombineIdentStruct = [&](Use &U, Function &Caller) { CallInst *CI = getCallIfRegularCall(U, &RFI); if (!CI || &F != &Caller) return false; Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), /* GlobalOnly */ true, SingleChoice); return false; }; RFI.foreachUse(SCC, CombineIdentStruct); if (!Ident || !SingleChoice) { // The IRBuilder uses the insertion block to get to the module, this is // unfortunate but we work around it for now. if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( &F.getEntryBlock(), F.getEntryBlock().begin())); // Create a fallback location if non was found. // TODO: Use the debug locations of the calls instead. uint32_t SrcLocStrSize; Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); } return Ident; } /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or /// \p ReplVal if given. bool deduplicateRuntimeCalls(Function &F, OMPInformationCache::RuntimeFunctionInfo &RFI, Value *ReplVal = nullptr) { auto *UV = RFI.getUseVector(F); if (!UV || UV->size() + (ReplVal != nullptr) < 2) return false; LLVM_DEBUG( dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Name << (ReplVal ? " with an existing value\n" : "\n") << "\n"); assert((!ReplVal || (isa(ReplVal) && cast(ReplVal)->getParent() == &F)) && "Unexpected replacement value!"); // TODO: Use dominance to find a good position instead. auto CanBeMoved = [this](CallBase &CB) { unsigned NumArgs = CB.arg_size(); if (NumArgs == 0) return true; if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) return false; for (unsigned U = 1; U < NumArgs; ++U) if (isa(CB.getArgOperand(U))) return false; return true; }; if (!ReplVal) { for (Use *U : *UV) if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { if (!CanBeMoved(*CI)) continue; // If the function is a kernel, dedup will move // the runtime call right after the kernel init callsite. Otherwise, // it will move it to the beginning of the caller function. if (isKernel(F)) { auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; auto *KernelInitUV = KernelInitRFI.getUseVector(F); if (KernelInitUV->empty()) continue; assert(KernelInitUV->size() == 1 && "Expected a single __kmpc_target_init in kernel\n"); CallInst *KernelInitCI = getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI); assert(KernelInitCI && "Expected a call to __kmpc_target_init in kernel\n"); CI->moveAfter(KernelInitCI); } else CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); ReplVal = CI; break; } if (!ReplVal) return false; } // If we use a call as a replacement value we need to make sure the ident is // valid at the new location. For now we just pick a global one, either // existing and used by one of the calls, or created from scratch. if (CallBase *CI = dyn_cast(ReplVal)) { if (!CI->arg_empty() && CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, /* GlobalOnly */ true); CI->setArgOperand(0, Ident); } } bool Changed = false; auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { CallInst *CI = getCallIfRegularCall(U, &RFI); if (!CI || CI == ReplVal || &F != &Caller) return false; assert(CI->getCaller() == &F && "Unexpected call!"); auto Remark = [&](OptimizationRemark OR) { return OR << "OpenMP runtime call " << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated."; }; if (CI->getDebugLoc()) emitRemark(CI, "OMP170", Remark); else emitRemark(&F, "OMP170", Remark); CGUpdater.removeCallSite(*CI); CI->replaceAllUsesWith(ReplVal); CI->eraseFromParent(); ++NumOpenMPRuntimeCallsDeduplicated; Changed = true; return true; }; RFI.foreachUse(SCC, ReplaceAndDeleteCB); return Changed; } /// Collect arguments that represent the global thread id in \p GTIdArgs. void collectGlobalThreadIdArguments(SmallSetVector >IdArgs) { // TODO: Below we basically perform a fixpoint iteration with a pessimistic // initialization. We could define an AbstractAttribute instead and // run the Attributor here once it can be run as an SCC pass. // Helper to check the argument \p ArgNo at all call sites of \p F for // a GTId. auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { if (!F.hasLocalLinkage()) return false; for (Use &U : F.uses()) { if (CallInst *CI = getCallIfRegularCall(U)) { Value *ArgOp = CI->getArgOperand(ArgNo); if (CI == &RefCI || GTIdArgs.count(ArgOp) || getCallIfRegularCall( *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) continue; } return false; } return true; }; // Helper to identify uses of a GTId as GTId arguments. auto AddUserArgs = [&](Value >Id) { for (Use &U : GTId.uses()) if (CallInst *CI = dyn_cast(U.getUser())) if (CI->isArgOperand(&U)) if (Function *Callee = CI->getCalledFunction()) if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) GTIdArgs.insert(Callee->getArg(U.getOperandNo())); }; // The argument users of __kmpc_global_thread_num calls are GTIds. OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) AddUserArgs(*CI); return false; }); // Transitively search for more arguments by looking at the users of the // ones we know already. During the search the GTIdArgs vector is extended // so we cannot cache the size nor can we use a range based for. for (unsigned U = 0; U < GTIdArgs.size(); ++U) AddUserArgs(*GTIdArgs[U]); } /// Kernel (=GPU) optimizations and utility functions /// ///{{ /// Check if \p F is a kernel, hence entry point for target offloading. bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } /// Cache to remember the unique kernel for a function. DenseMap> UniqueKernelMap; /// Find the unique kernel that will execute \p F, if any. Kernel getUniqueKernelFor(Function &F); /// Find the unique kernel that will execute \p I, if any. Kernel getUniqueKernelFor(Instruction &I) { return getUniqueKernelFor(*I.getFunction()); } /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in /// the cases we can avoid taking the address of a function. bool rewriteDeviceCodeStateMachine(); /// ///}} /// Emit a remark generically /// /// This template function can be used to generically emit a remark. The /// RemarkKind should be one of the following: /// - OptimizationRemark to indicate a successful optimization attempt /// - OptimizationRemarkMissed to report a failed optimization attempt /// - OptimizationRemarkAnalysis to provide additional information about an /// optimization attempt /// /// The remark is built using a callback function provided by the caller that /// takes a RemarkKind as input and returns a RemarkKind. template void emitRemark(Instruction *I, StringRef RemarkName, RemarkCallBack &&RemarkCB) const { Function *F = I->getParent()->getParent(); auto &ORE = OREGetter(F); if (RemarkName.startswith("OMP")) ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)) << " [" << RemarkName << "]"; }); else ORE.emit( [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, I)); }); } /// Emit a remark on a function. template void emitRemark(Function *F, StringRef RemarkName, RemarkCallBack &&RemarkCB) const { auto &ORE = OREGetter(F); if (RemarkName.startswith("OMP")) ORE.emit([&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)) << " [" << RemarkName << "]"; }); else ORE.emit( [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, F)); }); } /// RAII struct to temporarily change an RTL function's linkage to external. /// This prevents it from being mistakenly removed by other optimizations. struct ExternalizationRAII { ExternalizationRAII(OMPInformationCache &OMPInfoCache, RuntimeFunction RFKind) : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) { if (!Declaration) return; LinkageType = Declaration->getLinkage(); Declaration->setLinkage(GlobalValue::ExternalLinkage); } ~ExternalizationRAII() { if (!Declaration) return; Declaration->setLinkage(LinkageType); } Function *Declaration; GlobalValue::LinkageTypes LinkageType; }; /// The underlying module. Module &M; /// The SCC we are operating on. SmallVectorImpl &SCC; /// Callback to update the call graph, the first argument is a removed call, /// the second an optional replacement call. CallGraphUpdater &CGUpdater; /// Callback to get an OptimizationRemarkEmitter from a Function * OptimizationRemarkGetter OREGetter; /// OpenMP-specific information cache. Also Used for Attributor runs. OMPInformationCache &OMPInfoCache; /// Attributor instance. Attributor &A; /// Helper function to run Attributor on SCC. bool runAttributor(bool IsModulePass) { if (SCC.empty()) return false; // Temporarily make these function have external linkage so the Attributor // doesn't remove them when we try to look them up later. ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel); ExternalizationRAII EndParallel(OMPInfoCache, OMPRTL___kmpc_kernel_end_parallel); ExternalizationRAII BarrierSPMD(OMPInfoCache, OMPRTL___kmpc_barrier_simple_spmd); ExternalizationRAII BarrierGeneric(OMPInfoCache, OMPRTL___kmpc_barrier_simple_generic); ExternalizationRAII ThreadId(OMPInfoCache, OMPRTL___kmpc_get_hardware_thread_id_in_block); ExternalizationRAII NumThreads( OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block); ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size); registerAAs(IsModulePass); ChangeStatus Changed = A.run(); LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() << " functions, result: " << Changed << ".\n"); return Changed == ChangeStatus::CHANGED; } void registerFoldRuntimeCall(RuntimeFunction RF); /// Populate the Attributor with abstract attribute opportunities in the /// function. void registerAAs(bool IsModulePass); }; Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { if (!OMPInfoCache.ModuleSlice.count(&F)) return nullptr; // Use a scope to keep the lifetime of the CachedKernel short. { Optional &CachedKernel = UniqueKernelMap[&F]; if (CachedKernel) return *CachedKernel; // TODO: We should use an AA to create an (optimistic and callback // call-aware) call graph. For now we stick to simple patterns that // are less powerful, basically the worst fixpoint. if (isKernel(F)) { CachedKernel = Kernel(&F); return *CachedKernel; } CachedKernel = nullptr; if (!F.hasLocalLinkage()) { // See https://openmp.llvm.org/remarks/OptimizationRemarks.html auto Remark = [&](OptimizationRemarkAnalysis ORA) { return ORA << "Potentially unknown OpenMP target region caller."; }; emitRemark(&F, "OMP100", Remark); return nullptr; } } auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { if (auto *Cmp = dyn_cast(U.getUser())) { // Allow use in equality comparisons. if (Cmp->isEquality()) return getUniqueKernelFor(*Cmp); return nullptr; } if (auto *CB = dyn_cast(U.getUser())) { // Allow direct calls. if (CB->isCallee(&U)) return getUniqueKernelFor(*CB); OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; // Allow the use in __kmpc_parallel_51 calls. if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) return getUniqueKernelFor(*CB); return nullptr; } // Disallow every other use. return nullptr; }; // TODO: In the future we want to track more than just a unique kernel. SmallPtrSet PotentialKernels; OMPInformationCache::foreachUse(F, [&](const Use &U) { PotentialKernels.insert(GetUniqueKernelForUse(U)); }); Kernel K = nullptr; if (PotentialKernels.size() == 1) K = *PotentialKernels.begin(); // Cache the result. UniqueKernelMap[&F] = K; return K; } bool OpenMPOpt::rewriteDeviceCodeStateMachine() { OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; bool Changed = false; if (!KernelParallelRFI) return Changed; // If we have disabled state machine changes, exit if (DisableOpenMPOptStateMachineRewrite) return Changed; for (Function *F : SCC) { // Check if the function is a use in a __kmpc_parallel_51 call at // all. bool UnknownUse = false; bool KernelParallelUse = false; unsigned NumDirectCalls = 0; SmallVector ToBeReplacedStateMachineUses; OMPInformationCache::foreachUse(*F, [&](Use &U) { if (auto *CB = dyn_cast(U.getUser())) if (CB->isCallee(&U)) { ++NumDirectCalls; return; } if (isa(U.getUser())) { ToBeReplacedStateMachineUses.push_back(&U); return; } // Find wrapper functions that represent parallel kernels. CallInst *CI = OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); const unsigned int WrapperFunctionArgNo = 6; if (!KernelParallelUse && CI && CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { KernelParallelUse = true; ToBeReplacedStateMachineUses.push_back(&U); return; } UnknownUse = true; }); // Do not emit a remark if we haven't seen a __kmpc_parallel_51 // use. if (!KernelParallelUse) continue; // If this ever hits, we should investigate. // TODO: Checking the number of uses is not a necessary restriction and // should be lifted. if (UnknownUse || NumDirectCalls != 1 || ToBeReplacedStateMachineUses.size() > 2) { auto Remark = [&](OptimizationRemarkAnalysis ORA) { return ORA << "Parallel region is used in " << (UnknownUse ? "unknown" : "unexpected") << " ways. Will not attempt to rewrite the state machine."; }; emitRemark(F, "OMP101", Remark); continue; } // Even if we have __kmpc_parallel_51 calls, we (for now) give // up if the function is not called from a unique kernel. Kernel K = getUniqueKernelFor(*F); if (!K) { auto Remark = [&](OptimizationRemarkAnalysis ORA) { return ORA << "Parallel region is not called from a unique kernel. " "Will not attempt to rewrite the state machine."; }; emitRemark(F, "OMP102", Remark); continue; } // We now know F is a parallel body function called only from the kernel K. // We also identified the state machine uses in which we replace the // function pointer by a new global symbol for identification purposes. This // ensures only direct calls to the function are left. Module &M = *F->getParent(); Type *Int8Ty = Type::getInt8Ty(M.getContext()); auto *ID = new GlobalVariable( M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, UndefValue::get(Int8Ty), F->getName() + ".ID"); for (Use *U : ToBeReplacedStateMachineUses) U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( ID, U->get()->getType())); ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; Changed = true; } return Changed; } /// Abstract Attribute for tracking ICV values. struct AAICVTracker : public StateWrapper { using Base = StateWrapper; AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} void initialize(Attributor &A) override { Function *F = getAnchorScope(); if (!F || !A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); } /// Returns true if value is assumed to be tracked. bool isAssumedTracked() const { return getAssumed(); } /// Returns true if value is known to be tracked. bool isKnownTracked() const { return getAssumed(); } /// Create an abstract attribute biew for the position \p IRP. static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); /// Return the value with which \p I can be replaced for specific \p ICV. virtual Optional getReplacementValue(InternalControlVar ICV, const Instruction *I, Attributor &A) const { return None; } /// Return an assumed unique ICV value if a single candidate is found. If /// there cannot be one, return a nullptr. If it is not clear yet, return the /// Optional::NoneType. virtual Optional getUniqueReplacementValue(InternalControlVar ICV) const = 0; // Currently only nthreads is being tracked. // this array will only grow with time. InternalControlVar TrackableICVs[1] = {ICV_nthreads}; /// See AbstractAttribute::getName() const std::string getName() const override { return "AAICVTracker"; } /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } /// This function should return true if the type of the \p AA is AAICVTracker static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); } static const char ID; }; struct AAICVTrackerFunction : public AAICVTracker { AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) : AAICVTracker(IRP, A) {} // FIXME: come up with better string. const std::string getAsStr() const override { return "ICVTrackerFunction"; } // FIXME: come up with some stats. void trackStatistics() const override {} /// We don't manifest anything for this AA. ChangeStatus manifest(Attributor &A) override { return ChangeStatus::UNCHANGED; } // Map of ICV to their values at specific program point. EnumeratedArray, InternalControlVar, InternalControlVar::ICV___last> ICVReplacementValuesMap; ChangeStatus updateImpl(Attributor &A) override { ChangeStatus HasChanged = ChangeStatus::UNCHANGED; Function *F = getAnchorScope(); auto &OMPInfoCache = static_cast(A.getInfoCache()); for (InternalControlVar ICV : TrackableICVs) { auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; auto &ValuesMap = ICVReplacementValuesMap[ICV]; auto TrackValues = [&](Use &U, Function &) { CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); if (!CI) return false; // FIXME: handle setters with more that 1 arguments. /// Track new value. if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) HasChanged = ChangeStatus::CHANGED; return false; }; auto CallCheck = [&](Instruction &I) { Optional ReplVal = getValueForCall(A, I, ICV); if (ReplVal.hasValue() && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) HasChanged = ChangeStatus::CHANGED; return true; }; // Track all changes of an ICV. SetterRFI.foreachUse(TrackValues, F); bool UsedAssumedInformation = false; A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, UsedAssumedInformation, /* CheckBBLivenessOnly */ true); /// TODO: Figure out a way to avoid adding entry in /// ICVReplacementValuesMap Instruction *Entry = &F->getEntryBlock().front(); if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) ValuesMap.insert(std::make_pair(Entry, nullptr)); } return HasChanged; } /// Helper to check if \p I is a call and get the value for it if it is /// unique. Optional getValueForCall(Attributor &A, const Instruction &I, InternalControlVar &ICV) const { const auto *CB = dyn_cast(&I); if (!CB || CB->hasFnAttr("no_openmp") || CB->hasFnAttr("no_openmp_routines")) return None; auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; Function *CalledFunction = CB->getCalledFunction(); // Indirect call, assume ICV changes. if (CalledFunction == nullptr) return nullptr; if (CalledFunction == GetterRFI.Declaration) return None; if (CalledFunction == SetterRFI.Declaration) { if (ICVReplacementValuesMap[ICV].count(&I)) return ICVReplacementValuesMap[ICV].lookup(&I); return nullptr; } // Since we don't know, assume it changes the ICV. if (CalledFunction->isDeclaration()) return nullptr; const auto &ICVTrackingAA = A.getAAFor( *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); if (ICVTrackingAA.isAssumedTracked()) { Optional URV = ICVTrackingAA.getUniqueReplacementValue(ICV); if (!URV || (*URV && AA::isValidAtPosition(**URV, I, OMPInfoCache))) return URV; } // If we don't know, assume it changes. return nullptr; } // We don't check unique value for a function, so return None. Optional getUniqueReplacementValue(InternalControlVar ICV) const override { return None; } /// Return the value with which \p I can be replaced for specific \p ICV. Optional getReplacementValue(InternalControlVar ICV, const Instruction *I, Attributor &A) const override { const auto &ValuesMap = ICVReplacementValuesMap[ICV]; if (ValuesMap.count(I)) return ValuesMap.lookup(I); SmallVector Worklist; SmallPtrSet Visited; Worklist.push_back(I); Optional ReplVal; while (!Worklist.empty()) { const Instruction *CurrInst = Worklist.pop_back_val(); if (!Visited.insert(CurrInst).second) continue; const BasicBlock *CurrBB = CurrInst->getParent(); // Go up and look for all potential setters/calls that might change the // ICV. while ((CurrInst = CurrInst->getPrevNode())) { if (ValuesMap.count(CurrInst)) { Optional NewReplVal = ValuesMap.lookup(CurrInst); // Unknown value, track new. if (!ReplVal.hasValue()) { ReplVal = NewReplVal; break; } // If we found a new value, we can't know the icv value anymore. if (NewReplVal.hasValue()) if (ReplVal != NewReplVal) return nullptr; break; } Optional NewReplVal = getValueForCall(A, *CurrInst, ICV); if (!NewReplVal.hasValue()) continue; // Unknown value, track new. if (!ReplVal.hasValue()) { ReplVal = NewReplVal; break; } // if (NewReplVal.hasValue()) // We found a new value, we can't know the icv value anymore. if (ReplVal != NewReplVal) return nullptr; } // If we are in the same BB and we have a value, we are done. if (CurrBB == I->getParent() && ReplVal.hasValue()) return ReplVal; // Go through all predecessors and add terminators for analysis. for (const BasicBlock *Pred : predecessors(CurrBB)) if (const Instruction *Terminator = Pred->getTerminator()) Worklist.push_back(Terminator); } return ReplVal; } }; struct AAICVTrackerFunctionReturned : AAICVTracker { AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) : AAICVTracker(IRP, A) {} // FIXME: come up with better string. const std::string getAsStr() const override { return "ICVTrackerFunctionReturned"; } // FIXME: come up with some stats. void trackStatistics() const override {} /// We don't manifest anything for this AA. ChangeStatus manifest(Attributor &A) override { return ChangeStatus::UNCHANGED; } // Map of ICV to their values at specific program point. EnumeratedArray, InternalControlVar, InternalControlVar::ICV___last> ICVReplacementValuesMap; /// Return the value with which \p I can be replaced for specific \p ICV. Optional getUniqueReplacementValue(InternalControlVar ICV) const override { return ICVReplacementValuesMap[ICV]; } ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; const auto &ICVTrackingAA = A.getAAFor( *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); if (!ICVTrackingAA.isAssumedTracked()) return indicatePessimisticFixpoint(); for (InternalControlVar ICV : TrackableICVs) { Optional &ReplVal = ICVReplacementValuesMap[ICV]; Optional UniqueICVValue; auto CheckReturnInst = [&](Instruction &I) { Optional NewReplVal = ICVTrackingAA.getReplacementValue(ICV, &I, A); // If we found a second ICV value there is no unique returned value. if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) return false; UniqueICVValue = NewReplVal; return true; }; bool UsedAssumedInformation = false; if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, UsedAssumedInformation, /* CheckBBLivenessOnly */ true)) UniqueICVValue = nullptr; if (UniqueICVValue == ReplVal) continue; ReplVal = UniqueICVValue; Changed = ChangeStatus::CHANGED; } return Changed; } }; struct AAICVTrackerCallSite : AAICVTracker { AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) : AAICVTracker(IRP, A) {} void initialize(Attributor &A) override { Function *F = getAnchorScope(); if (!F || !A.isFunctionIPOAmendable(*F)) indicatePessimisticFixpoint(); // We only initialize this AA for getters, so we need to know which ICV it // gets. auto &OMPInfoCache = static_cast(A.getInfoCache()); for (InternalControlVar ICV : TrackableICVs) { auto ICVInfo = OMPInfoCache.ICVs[ICV]; auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; if (Getter.Declaration == getAssociatedFunction()) { AssociatedICV = ICVInfo.Kind; return; } } /// Unknown ICV. indicatePessimisticFixpoint(); } ChangeStatus manifest(Attributor &A) override { if (!ReplVal.hasValue() || !ReplVal.getValue()) return ChangeStatus::UNCHANGED; A.changeValueAfterManifest(*getCtxI(), **ReplVal); A.deleteAfterManifest(*getCtxI()); return ChangeStatus::CHANGED; } // FIXME: come up with better string. const std::string getAsStr() const override { return "ICVTrackerCallSite"; } // FIXME: come up with some stats. void trackStatistics() const override {} InternalControlVar AssociatedICV; Optional ReplVal; ChangeStatus updateImpl(Attributor &A) override { const auto &ICVTrackingAA = A.getAAFor( *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); // We don't have any information, so we assume it changes the ICV. if (!ICVTrackingAA.isAssumedTracked()) return indicatePessimisticFixpoint(); Optional NewReplVal = ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); if (ReplVal == NewReplVal) return ChangeStatus::UNCHANGED; ReplVal = NewReplVal; return ChangeStatus::CHANGED; } // Return the value with which associated value can be replaced for specific // \p ICV. Optional getUniqueReplacementValue(InternalControlVar ICV) const override { return ReplVal; } }; struct AAICVTrackerCallSiteReturned : AAICVTracker { AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) : AAICVTracker(IRP, A) {} // FIXME: come up with better string. const std::string getAsStr() const override { return "ICVTrackerCallSiteReturned"; } // FIXME: come up with some stats. void trackStatistics() const override {} /// We don't manifest anything for this AA. ChangeStatus manifest(Attributor &A) override { return ChangeStatus::UNCHANGED; } // Map of ICV to their values at specific program point. EnumeratedArray, InternalControlVar, InternalControlVar::ICV___last> ICVReplacementValuesMap; /// Return the value with which associated value can be replaced for specific /// \p ICV. Optional getUniqueReplacementValue(InternalControlVar ICV) const override { return ICVReplacementValuesMap[ICV]; } ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; const auto &ICVTrackingAA = A.getAAFor( *this, IRPosition::returned(*getAssociatedFunction()), DepClassTy::REQUIRED); // We don't have any information, so we assume it changes the ICV. if (!ICVTrackingAA.isAssumedTracked()) return indicatePessimisticFixpoint(); for (InternalControlVar ICV : TrackableICVs) { Optional &ReplVal = ICVReplacementValuesMap[ICV]; Optional NewReplVal = ICVTrackingAA.getUniqueReplacementValue(ICV); if (ReplVal == NewReplVal) continue; ReplVal = NewReplVal; Changed = ChangeStatus::CHANGED; } return Changed; } }; struct AAExecutionDomainFunction : public AAExecutionDomain { AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) : AAExecutionDomain(IRP, A) {} const std::string getAsStr() const override { return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) + "/" + std::to_string(NumBBs) + " BBs thread 0 only."; } /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override {} void initialize(Attributor &A) override { Function *F = getAnchorScope(); for (const auto &BB : *F) SingleThreadedBBs.insert(&BB); NumBBs = SingleThreadedBBs.size(); } ChangeStatus manifest(Attributor &A) override { LLVM_DEBUG({ for (const BasicBlock *BB : SingleThreadedBBs) dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " " << BB->getName() << " is executed by a single thread.\n"; }); return ChangeStatus::UNCHANGED; } ChangeStatus updateImpl(Attributor &A) override; /// Check if an instruction is executed by a single thread. bool isExecutedByInitialThreadOnly(const Instruction &I) const override { return isExecutedByInitialThreadOnly(*I.getParent()); } bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { return isValidState() && SingleThreadedBBs.contains(&BB); } /// Set of basic blocks that are executed by a single thread. SmallSetVector SingleThreadedBBs; /// Total number of basic blocks in this function. long unsigned NumBBs; }; ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { Function *F = getAnchorScope(); ReversePostOrderTraversal RPOT(F); auto NumSingleThreadedBBs = SingleThreadedBBs.size(); bool AllCallSitesKnown; auto PredForCallSite = [&](AbstractCallSite ACS) { const auto &ExecutionDomainAA = A.getAAFor( *this, IRPosition::function(*ACS.getInstruction()->getFunction()), DepClassTy::REQUIRED); return ACS.isDirectCall() && ExecutionDomainAA.isExecutedByInitialThreadOnly( *ACS.getInstruction()); }; if (!A.checkForAllCallSites(PredForCallSite, *this, /* RequiresAllCallSites */ true, AllCallSitesKnown)) SingleThreadedBBs.remove(&F->getEntryBlock()); auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; // Check if the edge into the successor block contains a condition that only // lets the main thread execute it. auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) { if (!Edge || !Edge->isConditional()) return false; if (Edge->getSuccessor(0) != SuccessorBB) return false; auto *Cmp = dyn_cast(Edge->getCondition()); if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) return false; ConstantInt *C = dyn_cast(Cmp->getOperand(1)); if (!C) return false; // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) if (C->isAllOnesValue()) { auto *CB = dyn_cast(Cmp->getOperand(0)); CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; if (!CB) return false; const int InitModeArgNo = 1; auto *ModeCI = dyn_cast(CB->getOperand(InitModeArgNo)); return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC); } if (C->isZero()) { // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x() if (auto *II = dyn_cast(Cmp->getOperand(0))) if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x) return true; // Match: 0 == llvm.amdgcn.workitem.id.x() if (auto *II = dyn_cast(Cmp->getOperand(0))) if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x) return true; } return false; }; // Merge all the predecessor states into the current basic block. A basic // block is executed by a single thread if all of its predecessors are. auto MergePredecessorStates = [&](BasicBlock *BB) { if (pred_empty(BB)) return SingleThreadedBBs.contains(BB); bool IsInitialThread = true; for (BasicBlock *PredBB : predecessors(BB)) { if (!IsInitialThreadOnly(dyn_cast(PredBB->getTerminator()), BB)) IsInitialThread &= SingleThreadedBBs.contains(PredBB); } return IsInitialThread; }; for (auto *BB : RPOT) { if (!MergePredecessorStates(BB)) SingleThreadedBBs.remove(BB); } return (NumSingleThreadedBBs == SingleThreadedBBs.size()) ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } /// Try to replace memory allocation calls called by a single thread with a /// static buffer of shared memory. struct AAHeapToShared : public StateWrapper { using Base = StateWrapper; AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Create an abstract attribute view for the position \p IRP. static AAHeapToShared &createForPosition(const IRPosition &IRP, Attributor &A); /// Returns true if HeapToShared conversion is assumed to be possible. virtual bool isAssumedHeapToShared(CallBase &CB) const = 0; /// Returns true if HeapToShared conversion is assumed and the CB is a /// callsite to a free operation to be removed. virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0; /// See AbstractAttribute::getName(). const std::string getName() const override { return "AAHeapToShared"; } /// See AbstractAttribute::getIdAddr(). const char *getIdAddr() const override { return &ID; } /// This function should return true if the type of the \p AA is /// AAHeapToShared. static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); } /// Unique ID (due to the unique address) static const char ID; }; struct AAHeapToSharedFunction : public AAHeapToShared { AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) : AAHeapToShared(IRP, A) {} const std::string getAsStr() const override { return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + " malloc calls eligible."; } /// See AbstractAttribute::trackStatistics(). void trackStatistics() const override {} /// This functions finds free calls that will be removed by the /// HeapToShared transformation. void findPotentialRemovedFreeCalls(Attributor &A) { auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; PotentialRemovedFreeCalls.clear(); // Update free call users of found malloc calls. for (CallBase *CB : MallocCalls) { SmallVector FreeCalls; for (auto *U : CB->users()) { CallBase *C = dyn_cast(U); if (C && C->getCalledFunction() == FreeRFI.Declaration) FreeCalls.push_back(C); } if (FreeCalls.size() != 1) continue; PotentialRemovedFreeCalls.insert(FreeCalls.front()); } } void initialize(Attributor &A) override { auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; for (User *U : RFI.Declaration->users()) if (CallBase *CB = dyn_cast(U)) MallocCalls.insert(CB); findPotentialRemovedFreeCalls(A); } bool isAssumedHeapToShared(CallBase &CB) const override { return isValidState() && MallocCalls.count(&CB); } bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override { return isValidState() && PotentialRemovedFreeCalls.count(&CB); } ChangeStatus manifest(Attributor &A) override { if (MallocCalls.empty()) return ChangeStatus::UNCHANGED; auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; Function *F = getAnchorScope(); auto *HS = A.lookupAAFor(IRPosition::function(*F), this, DepClassTy::OPTIONAL); ChangeStatus Changed = ChangeStatus::UNCHANGED; for (CallBase *CB : MallocCalls) { // Skip replacing this if HeapToStack has already claimed it. if (HS && HS->isAssumedHeapToStack(*CB)) continue; // Find the unique free call to remove it. SmallVector FreeCalls; for (auto *U : CB->users()) { CallBase *C = dyn_cast(U); if (C && C->getCalledFunction() == FreeCall.Declaration) FreeCalls.push_back(C); } if (FreeCalls.size() != 1) continue; auto *AllocSize = cast(CB->getArgOperand(0)); LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB << " with " << AllocSize->getZExtValue() << " bytes of shared memory\n"); // Create a new shared memory buffer of the same size as the allocation // and replace all the uses of the original allocation with it. Module *M = CB->getModule(); Type *Int8Ty = Type::getInt8Ty(M->getContext()); Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); auto *SharedMem = new GlobalVariable( *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, UndefValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr, GlobalValue::NotThreadLocal, static_cast(AddressSpace::Shared)); auto *NewBuffer = ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); auto Remark = [&](OptimizationRemark OR) { return OR << "Replaced globalized variable with " << ore::NV("SharedMemory", AllocSize->getZExtValue()) << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") << "of shared memory."; }; A.emitRemark(CB, "OMP111", Remark); MaybeAlign Alignment = CB->getRetAlign(); assert(Alignment && "HeapToShared on allocation without alignment attribute"); SharedMem->setAlignment(MaybeAlign(Alignment)); A.changeValueAfterManifest(*CB, *NewBuffer); A.deleteAfterManifest(*CB); A.deleteAfterManifest(*FreeCalls.front()); NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); Changed = ChangeStatus::CHANGED; } return Changed; } ChangeStatus updateImpl(Attributor &A) override { auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; Function *F = getAnchorScope(); auto NumMallocCalls = MallocCalls.size(); // Only consider malloc calls executed by a single thread with a constant. for (User *U : RFI.Declaration->users()) { const auto &ED = A.getAAFor( *this, IRPosition::function(*F), DepClassTy::REQUIRED); if (CallBase *CB = dyn_cast(U)) if (!isa(CB->getArgOperand(0)) || !ED.isExecutedByInitialThreadOnly(*CB)) MallocCalls.remove(CB); } findPotentialRemovedFreeCalls(A); if (NumMallocCalls != MallocCalls.size()) return ChangeStatus::CHANGED; return ChangeStatus::UNCHANGED; } /// Collection of all malloc calls in a function. SmallSetVector MallocCalls; /// Collection of potentially removed free calls in a function. SmallPtrSet PotentialRemovedFreeCalls; }; struct AAKernelInfo : public StateWrapper { using Base = StateWrapper; AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Statistics are tracked as part of manifest for now. void trackStatistics() const override {} /// See AbstractAttribute::getAsStr() const std::string getAsStr() const override { if (!isValidState()) return ""; return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD" : "generic") + std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]" : "") + std::string(" #PRs: ") + (ReachedKnownParallelRegions.isValidState() ? std::to_string(ReachedKnownParallelRegions.size()) : "") + ", #Unknown PRs: " + (ReachedUnknownParallelRegions.isValidState() ? std::to_string(ReachedUnknownParallelRegions.size()) : "") + ", #Reaching Kernels: " + (ReachingKernelEntries.isValidState() ? std::to_string(ReachingKernelEntries.size()) : ""); } /// Create an abstract attribute biew for the position \p IRP. static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A); /// See AbstractAttribute::getName() const std::string getName() const override { return "AAKernelInfo"; } /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } /// This function should return true if the type of the \p AA is AAKernelInfo static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); } static const char ID; }; /// The function kernel info abstract attribute, basically, what can we say /// about a function with regards to the KernelInfoState. struct AAKernelInfoFunction : AAKernelInfo { AAKernelInfoFunction(const IRPosition &IRP, Attributor &A) : AAKernelInfo(IRP, A) {} SmallPtrSet GuardedInstructions; SmallPtrSetImpl &getGuardedInstructions() { return GuardedInstructions; } /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { // This is a high-level transform that might change the constant arguments // of the init and dinit calls. We need to tell the Attributor about this // to avoid other parts using the current constant value for simpliication. auto &OMPInfoCache = static_cast(A.getInfoCache()); Function *Fn = getAnchorScope(); if (!OMPInfoCache.Kernels.count(Fn)) return; // Add itself to the reaching kernel and set IsKernelEntry. ReachingKernelEntries.insert(Fn); IsKernelEntry = true; OMPInformationCache::RuntimeFunctionInfo &InitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; OMPInformationCache::RuntimeFunctionInfo &DeinitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit]; // For kernels we perform more initialization work, first we find the init // and deinit calls. auto StoreCallBase = [](Use &U, OMPInformationCache::RuntimeFunctionInfo &RFI, CallBase *&Storage) { CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI); assert(CB && "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!"); assert(!Storage && "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!"); Storage = CB; return false; }; InitRFI.foreachUse( [&](Use &U, Function &) { StoreCallBase(U, InitRFI, KernelInitCB); return false; }, Fn); DeinitRFI.foreachUse( [&](Use &U, Function &) { StoreCallBase(U, DeinitRFI, KernelDeinitCB); return false; }, Fn); // Ignore kernels without initializers such as global constructors. if (!KernelInitCB || !KernelDeinitCB) { indicateOptimisticFixpoint(); return; } // For kernels we might need to initialize/finalize the IsSPMD state and // we need to register a simplification callback so that the Attributor // knows the constant arguments to __kmpc_target_init and // __kmpc_target_deinit might actually change. Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional { // IRP represents the "use generic state machine" argument of an // __kmpc_target_init call. We will answer this one with the internal // state. As long as we are not in an invalid state, we will create a // custom state machine so the value should be a `i1 false`. If we are // in an invalid state, we won't change the value that is in the IR. if (!ReachedKnownParallelRegions.isValidState()) return nullptr; // If we have disabled state machine rewrites, don't make a custom one. if (DisableOpenMPOptStateMachineRewrite) return nullptr; if (AA) A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); UsedAssumedInformation = !isAtFixpoint(); auto *FalseVal = ConstantInt::getBool(IRP.getAnchorValue().getContext(), false); return FalseVal; }; Attributor::SimplifictionCallbackTy ModeSimplifyCB = [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional { // IRP represents the "SPMDCompatibilityTracker" argument of an // __kmpc_target_init or // __kmpc_target_deinit call. We will answer this one with the internal // state. if (!SPMDCompatibilityTracker.isValidState()) return nullptr; if (!SPMDCompatibilityTracker.isAtFixpoint()) { if (AA) A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); UsedAssumedInformation = true; } else { UsedAssumedInformation = false; } auto *Val = ConstantInt::getSigned( IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()), SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); return Val; }; Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB = [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional { // IRP represents the "RequiresFullRuntime" argument of an // __kmpc_target_init or __kmpc_target_deinit call. We will answer this // one with the internal state of the SPMDCompatibilityTracker, so if // generic then true, if SPMD then false. if (!SPMDCompatibilityTracker.isValidState()) return nullptr; if (!SPMDCompatibilityTracker.isAtFixpoint()) { if (AA) A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); UsedAssumedInformation = true; } else { UsedAssumedInformation = false; } auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(), !SPMDCompatibilityTracker.isAssumed()); return Val; }; constexpr const int InitModeArgNo = 1; constexpr const int DeinitModeArgNo = 1; constexpr const int InitUseStateMachineArgNo = 2; constexpr const int InitRequiresFullRuntimeArgNo = 3; constexpr const int DeinitRequiresFullRuntimeArgNo = 2; A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), StateMachineSimplifyCB); A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo), ModeSimplifyCB); A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo), ModeSimplifyCB); A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelInitCB, InitRequiresFullRuntimeArgNo), IsGenericModeSimplifyCB); A.registerSimplificationCallback( IRPosition::callsite_argument(*KernelDeinitCB, DeinitRequiresFullRuntimeArgNo), IsGenericModeSimplifyCB); // Check if we know we are in SPMD-mode already. ConstantInt *ModeArg = dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); // This is a generic region but SPMDization is disabled so stop tracking. else if (DisableOpenMPOptSPMDization) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); } /// Sanitize the string \p S such that it is a suitable global symbol name. static std::string sanitizeForGlobalName(std::string S) { std::replace_if( S.begin(), S.end(), [](const char C) { return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || (C >= '0' && C <= '9') || C == '_'); }, '.'); return S; } /// Modify the IR based on the KernelInfoState as the fixpoint iteration is /// finished now. ChangeStatus manifest(Attributor &A) override { // If we are not looking at a kernel with __kmpc_target_init and // __kmpc_target_deinit call we cannot actually manifest the information. if (!KernelInitCB || !KernelDeinitCB) return ChangeStatus::UNCHANGED; // If we can we change the execution mode to SPMD-mode otherwise we build a // custom state machine. ChangeStatus Changed = ChangeStatus::UNCHANGED; if (!changeToSPMDMode(A, Changed)) return buildCustomStateMachine(A); return Changed; } bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { auto &OMPInfoCache = static_cast(A.getInfoCache()); if (!SPMDCompatibilityTracker.isAssumed()) { for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { if (!NonCompatibleI) continue; // Skip diagnostics on calls to known OpenMP runtime functions for now. if (auto *CB = dyn_cast(NonCompatibleI)) if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction())) continue; auto Remark = [&](OptimizationRemarkAnalysis ORA) { ORA << "Value has potential side effects preventing SPMD-mode " "execution"; if (isa(NonCompatibleI)) { ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to " "the called function to override"; } return ORA << "."; }; A.emitRemark(NonCompatibleI, "OMP121", Remark); LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: " << *NonCompatibleI << "\n"); } return false; } // Check if the kernel is already in SPMD mode, if so, return success. Function *Kernel = getAnchorScope(); GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( (Kernel->getName() + "_exec_mode").str()); assert(ExecMode && "Kernel without exec mode?"); assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!"); // Set the global exec mode flag to indicate SPMD-Generic mode. assert(isa(ExecMode->getInitializer()) && "ExecMode is not an integer!"); const int8_t ExecModeVal = cast(ExecMode->getInitializer())->getSExtValue(); if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC) return true; // We will now unconditionally modify the IR, indicate a change. Changed = ChangeStatus::CHANGED; auto CreateGuardedRegion = [&](Instruction *RegionStartI, Instruction *RegionEndI) { LoopInfo *LI = nullptr; DominatorTree *DT = nullptr; MemorySSAUpdater *MSU = nullptr; using InsertPointTy = OpenMPIRBuilder::InsertPointTy; BasicBlock *ParentBB = RegionStartI->getParent(); Function *Fn = ParentBB->getParent(); Module &M = *Fn->getParent(); // Create all the blocks and logic. // ParentBB: // goto RegionCheckTidBB // RegionCheckTidBB: // Tid = __kmpc_hardware_thread_id() // if (Tid != 0) // goto RegionBarrierBB // RegionStartBB: // // goto RegionEndBB // RegionEndBB: // // goto RegionBarrierBB // RegionBarrierBB: // __kmpc_simple_barrier_spmd() // // second barrier is omitted if lacking escaping values. // // __kmpc_simple_barrier_spmd() // goto RegionExitBB // RegionExitBB: // BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(), DT, LI, MSU, "region.guarded.end"); BasicBlock *RegionBarrierBB = SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI, MSU, "region.barrier"); BasicBlock *RegionExitBB = SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(), DT, LI, MSU, "region.exit"); BasicBlock *RegionStartBB = SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded"); assert(ParentBB->getUniqueSuccessor() == RegionStartBB && "Expected a different CFG"); BasicBlock *RegionCheckTidBB = SplitBlock( ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid"); // Register basic blocks with the Attributor. A.registerManifestAddedBasicBlock(*RegionEndBB); A.registerManifestAddedBasicBlock(*RegionBarrierBB); A.registerManifestAddedBasicBlock(*RegionExitBB); A.registerManifestAddedBasicBlock(*RegionStartBB); A.registerManifestAddedBasicBlock(*RegionCheckTidBB); bool HasBroadcastValues = false; // Find escaping outputs from the guarded region to outside users and // broadcast their values to them. for (Instruction &I : *RegionStartBB) { SmallPtrSet OutsideUsers; for (User *Usr : I.users()) { Instruction &UsrI = *cast(Usr); if (UsrI.getParent() != RegionStartBB) OutsideUsers.insert(&UsrI); } if (OutsideUsers.empty()) continue; HasBroadcastValues = true; // Emit a global variable in shared memory to store the broadcasted // value. auto *SharedMem = new GlobalVariable( M, I.getType(), /* IsConstant */ false, GlobalValue::InternalLinkage, UndefValue::get(I.getType()), sanitizeForGlobalName( (I.getName() + ".guarded.output.alloc").str()), nullptr, GlobalValue::NotThreadLocal, static_cast(AddressSpace::Shared)); // Emit a store instruction to update the value. new StoreInst(&I, SharedMem, RegionEndBB->getTerminator()); LoadInst *LoadI = new LoadInst(I.getType(), SharedMem, I.getName() + ".guarded.output.load", RegionBarrierBB->getTerminator()); // Emit a load instruction and replace uses of the output value. for (Instruction *UsrI : OutsideUsers) UsrI->replaceUsesOfWith(&I, LoadI); } auto &OMPInfoCache = static_cast(A.getInfoCache()); // Go to tid check BB in ParentBB. const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); ParentBB->getTerminator()->eraseFromParent(); OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); OMPInfoCache.OMPBuilder.updateToLocation(Loc); uint32_t SrcLocStrSize; auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); // Add check for Tid in RegionCheckTidBB RegionCheckTidBB->getTerminator()->eraseFromParent(); OpenMPIRBuilder::LocationDescription LocRegionCheckTid( InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL); OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid); FunctionCallee HardwareTidFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_hardware_thread_id_in_block); CallInst *Tid = OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {}); Tid->setDebugLoc(DL); OMPInfoCache.setCallingConvention(HardwareTidFn, Tid); Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid); OMPInfoCache.OMPBuilder.Builder .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB) ->setDebugLoc(DL); // First barrier for synchronization, ensures main thread has updated // values. FunctionCallee BarrierFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_barrier_simple_spmd); OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy( RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt())); CallInst *Barrier = OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid}); Barrier->setDebugLoc(DL); OMPInfoCache.setCallingConvention(BarrierFn, Barrier); // Second barrier ensures workers have read broadcast values. if (HasBroadcastValues) { CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "", RegionBarrierBB->getTerminator()); Barrier->setDebugLoc(DL); OMPInfoCache.setCallingConvention(BarrierFn, Barrier); } }; auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; SmallPtrSet Visited; for (Instruction *GuardedI : SPMDCompatibilityTracker) { BasicBlock *BB = GuardedI->getParent(); if (!Visited.insert(BB).second) continue; SmallVector> Reorders; Instruction *LastEffect = nullptr; BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend(); while (++IP != IPEnd) { if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory()) continue; Instruction *I = &*IP; if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI)) continue; if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) { LastEffect = nullptr; continue; } if (LastEffect) Reorders.push_back({I, LastEffect}); LastEffect = &*IP; } for (auto &Reorder : Reorders) Reorder.first->moveBefore(Reorder.second); } SmallVector, 4> GuardedRegions; for (Instruction *GuardedI : SPMDCompatibilityTracker) { BasicBlock *BB = GuardedI->getParent(); auto *CalleeAA = A.lookupAAFor( IRPosition::function(*GuardedI->getFunction()), nullptr, DepClassTy::NONE); assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo"); auto &CalleeAAFunction = *cast(CalleeAA); // Continue if instruction is already guarded. if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI)) continue; Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr; for (Instruction &I : *BB) { // If instruction I needs to be guarded update the guarded region // bounds. if (SPMDCompatibilityTracker.contains(&I)) { CalleeAAFunction.getGuardedInstructions().insert(&I); if (GuardedRegionStart) GuardedRegionEnd = &I; else GuardedRegionStart = GuardedRegionEnd = &I; continue; } // Instruction I does not need guarding, store // any region found and reset bounds. if (GuardedRegionStart) { GuardedRegions.push_back( std::make_pair(GuardedRegionStart, GuardedRegionEnd)); GuardedRegionStart = nullptr; GuardedRegionEnd = nullptr; } } } for (auto &GR : GuardedRegions) CreateGuardedRegion(GR.first, GR.second); // Adjust the global exec mode flag that tells the runtime what mode this // kernel is executed in. assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC && "Initially non-SPMD kernel has SPMD exec mode!"); ExecMode->setInitializer( ConstantInt::get(ExecMode->getInitializer()->getType(), ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD)); // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. const int InitModeArgNo = 1; const int DeinitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; const int InitRequiresFullRuntimeArgNo = 3; const int DeinitRequiresFullRuntimeArgNo = 2; auto &Ctx = getAnchorValue().getContext(); A.changeUseAfterManifest( KernelInitCB->getArgOperandUse(InitModeArgNo), *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_SPMD)); A.changeUseAfterManifest( KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *ConstantInt::getBool(Ctx, false)); A.changeUseAfterManifest( KernelDeinitCB->getArgOperandUse(DeinitModeArgNo), *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx), OMP_TGT_EXEC_MODE_SPMD)); A.changeUseAfterManifest( KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo), *ConstantInt::getBool(Ctx, false)); A.changeUseAfterManifest( KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo), *ConstantInt::getBool(Ctx, false)); ++NumOpenMPTargetRegionKernelsSPMD; auto Remark = [&](OptimizationRemark OR) { return OR << "Transformed generic-mode kernel to SPMD-mode."; }; A.emitRemark(KernelInitCB, "OMP120", Remark); return true; }; ChangeStatus buildCustomStateMachine(Attributor &A) { // If we have disabled state machine rewrites, don't make a custom one if (DisableOpenMPOptStateMachineRewrite) return ChangeStatus::UNCHANGED; // Don't rewrite the state machine if we are not in a valid state. if (!ReachedKnownParallelRegions.isValidState()) return ChangeStatus::UNCHANGED; const int InitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; // Check if the current configuration is non-SPMD and generic state machine. // If we already have SPMD mode or a custom state machine we do not need to // go any further. If it is anything but a constant something is weird and // we give up. ConstantInt *UseStateMachine = dyn_cast( KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); ConstantInt *Mode = dyn_cast(KernelInitCB->getArgOperand(InitModeArgNo)); // If we are stuck with generic mode, try to create a custom device (=GPU) // state machine which is specialized for the parallel regions that are // reachable by the kernel. if (!UseStateMachine || UseStateMachine->isZero() || !Mode || (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD)) return ChangeStatus::UNCHANGED; // If not SPMD mode, indicate we use a custom state machine now. auto &Ctx = getAnchorValue().getContext(); auto *FalseVal = ConstantInt::getBool(Ctx, false); A.changeUseAfterManifest( KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); // If we don't actually need a state machine we are done here. This can // happen if there simply are no parallel regions. In the resulting kernel // all worker threads will simply exit right away, leaving the main thread // to do the work alone. if (!mayContainParallelRegion()) { ++NumOpenMPTargetRegionKernelsWithoutStateMachine; auto Remark = [&](OptimizationRemark OR) { return OR << "Removing unused state machine from generic-mode kernel."; }; A.emitRemark(KernelInitCB, "OMP130", Remark); return ChangeStatus::CHANGED; } // Keep track in the statistics of our new shiny custom state machine. if (ReachedUnknownParallelRegions.empty()) { ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback; auto Remark = [&](OptimizationRemark OR) { return OR << "Rewriting generic-mode kernel with a customized state " "machine."; }; A.emitRemark(KernelInitCB, "OMP131", Remark); } else { ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback; auto Remark = [&](OptimizationRemarkAnalysis OR) { return OR << "Generic-mode kernel is executed with a customized state " "machine that requires a fallback."; }; A.emitRemark(KernelInitCB, "OMP132", Remark); // Tell the user why we ended up with a fallback. for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) { if (!UnknownParallelRegionCB) continue; auto Remark = [&](OptimizationRemarkAnalysis ORA) { return ORA << "Call may contain unknown parallel regions. Use " << "`__attribute__((assume(\"omp_no_parallelism\")))` to " "override."; }; A.emitRemark(UnknownParallelRegionCB, "OMP133", Remark); } } // Create all the blocks: // // InitCB = __kmpc_target_init(...) // BlockHwSize = // __kmpc_get_hardware_num_threads_in_block(); // WarpSize = __kmpc_get_warp_size(); // BlockSize = BlockHwSize - WarpSize; - // if (InitCB >= BlockSize) return; - // IsWorkerCheckBB: bool IsWorker = InitCB >= 0; + // IsWorkerCheckBB: bool IsWorker = InitCB != -1; // if (IsWorker) { + // if (InitCB >= BlockSize) return; // SMBeginBB: __kmpc_barrier_simple_generic(...); // void *WorkFn; // bool Active = __kmpc_kernel_parallel(&WorkFn); // if (!WorkFn) return; // SMIsActiveCheckBB: if (Active) { // SMIfCascadeCurrentBB: if (WorkFn == ) // ParFn0(...); // SMIfCascadeCurrentBB: else if (WorkFn == ) // ParFn1(...); // ... // SMIfCascadeCurrentBB: else // ((WorkFnTy*)WorkFn)(...); // SMEndParallelBB: __kmpc_kernel_end_parallel(...); // } // SMDoneBB: __kmpc_barrier_simple_generic(...); // goto SMBeginBB; // } // UserCodeEntryBB: // user code // __kmpc_target_deinit(...) // Function *Kernel = getAssociatedFunction(); assert(Kernel && "Expected an associated function!"); BasicBlock *InitBB = KernelInitCB->getParent(); BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock( KernelInitCB->getNextNode(), "thread.user_code.check"); BasicBlock *IsWorkerCheckBB = BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB); BasicBlock *StateMachineBeginBB = BasicBlock::Create( Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB); BasicBlock *StateMachineFinishedBB = BasicBlock::Create( Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB); BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create( Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB); BasicBlock *StateMachineIfCascadeCurrentBB = BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", Kernel, UserCodeEntryBB); BasicBlock *StateMachineEndParallelBB = BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end", Kernel, UserCodeEntryBB); BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create( Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB); A.registerManifestAddedBasicBlock(*InitBB); A.registerManifestAddedBasicBlock(*UserCodeEntryBB); A.registerManifestAddedBasicBlock(*IsWorkerCheckBB); A.registerManifestAddedBasicBlock(*StateMachineBeginBB); A.registerManifestAddedBasicBlock(*StateMachineFinishedBB); A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB); A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB); A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB); A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB); const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); InitBB->getTerminator()->eraseFromParent(); + Instruction *IsWorker = + ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, + ConstantInt::get(KernelInitCB->getType(), -1), + "thread.is_worker", InitBB); + IsWorker->setDebugLoc(DLoc); + BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); + Module &M = *Kernel->getParent(); auto &OMPInfoCache = static_cast(A.getInfoCache()); FunctionCallee BlockHwSizeFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_hardware_num_threads_in_block); FunctionCallee WarpSizeFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_warp_size); CallInst *BlockHwSize = - CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB); + CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize); BlockHwSize->setDebugLoc(DLoc); - CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB); + CallInst *WarpSize = + CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB); OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize); WarpSize->setDebugLoc(DLoc); - Instruction *BlockSize = - BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB); + Instruction *BlockSize = BinaryOperator::CreateSub( + BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB); BlockSize->setDebugLoc(DLoc); - Instruction *IsMainOrWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, - BlockSize, "thread.is_main_or_worker", InitBB); + Instruction *IsMainOrWorker = ICmpInst::Create( + ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize, + "thread.is_main_or_worker", IsWorkerCheckBB); IsMainOrWorker->setDebugLoc(DLoc); - BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker, - InitBB); - - Instruction *IsWorker = - ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, - ConstantInt::get(KernelInitCB->getType(), -1), - "thread.is_worker", IsWorkerCheckBB); - IsWorker->setDebugLoc(DLoc); - BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, - IsWorkerCheckBB); + BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB, + IsMainOrWorker, IsWorkerCheckBB); // Create local storage for the work function pointer. const DataLayout &DL = M.getDataLayout(); Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); Instruction *WorkFnAI = new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, "worker.work_fn.addr", &Kernel->getEntryBlock().front()); WorkFnAI->setDebugLoc(DLoc); OMPInfoCache.OMPBuilder.updateToLocation( OpenMPIRBuilder::LocationDescription( IRBuilder<>::InsertPoint(StateMachineBeginBB, StateMachineBeginBB->end()), DLoc)); Value *Ident = KernelInitCB->getArgOperand(0); Value *GTid = KernelInitCB; FunctionCallee BarrierFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_barrier_simple_generic); CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB); OMPInfoCache.setCallingConvention(BarrierFn, Barrier); Barrier->setDebugLoc(DLoc); if (WorkFnAI->getType()->getPointerAddressSpace() != (unsigned int)AddressSpace::Generic) { WorkFnAI = new AddrSpaceCastInst( WorkFnAI, PointerType::getWithSamePointeeType( cast(WorkFnAI->getType()), (unsigned int)AddressSpace::Generic), WorkFnAI->getName() + ".generic", StateMachineBeginBB); WorkFnAI->setDebugLoc(DLoc); } FunctionCallee KernelParallelFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_kernel_parallel); CallInst *IsActiveWorker = CallInst::Create( KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB); OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker); IsActiveWorker->setDebugLoc(DLoc); Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn", StateMachineBeginBB); WorkFn->setDebugLoc(DLoc); FunctionType *ParallelRegionFnTy = FunctionType::get( Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)}, false); Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast", StateMachineBeginBB); Instruction *IsDone = ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, Constant::getNullValue(VoidPtrTy), "worker.is_done", StateMachineBeginBB); IsDone->setDebugLoc(DLoc); BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB, IsDone, StateMachineBeginBB) ->setDebugLoc(DLoc); BranchInst::Create(StateMachineIfCascadeCurrentBB, StateMachineDoneBarrierBB, IsActiveWorker, StateMachineIsActiveCheckBB) ->setDebugLoc(DLoc); Value *ZeroArg = Constant::getNullValue(ParallelRegionFnTy->getParamType(0)); // Now that we have most of the CFG skeleton it is time for the if-cascade // that checks the function pointer we got from the runtime against the // parallel regions we expect, if there are any. for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) { auto *ParallelRegion = ReachedKnownParallelRegions[I]; BasicBlock *PRExecuteBB = BasicBlock::Create( Ctx, "worker_state_machine.parallel_region.execute", Kernel, StateMachineEndParallelBB); CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB) ->setDebugLoc(DLoc); BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB) ->setDebugLoc(DLoc); BasicBlock *PRNextBB = BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", Kernel, StateMachineEndParallelBB); // Check if we need to compare the pointer at all or if we can just // call the parallel region function. Value *IsPR; if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) { Instruction *CmpI = ICmpInst::Create( ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion, "worker.check_parallel_region", StateMachineIfCascadeCurrentBB); CmpI->setDebugLoc(DLoc); IsPR = CmpI; } else { IsPR = ConstantInt::getTrue(Ctx); } BranchInst::Create(PRExecuteBB, PRNextBB, IsPR, StateMachineIfCascadeCurrentBB) ->setDebugLoc(DLoc); StateMachineIfCascadeCurrentBB = PRNextBB; } // At the end of the if-cascade we place the indirect function pointer call // in case we might need it, that is if there can be parallel regions we // have not handled in the if-cascade above. if (!ReachedUnknownParallelRegions.empty()) { StateMachineIfCascadeCurrentBB->setName( "worker_state_machine.parallel_region.fallback.execute"); CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "", StateMachineIfCascadeCurrentBB) ->setDebugLoc(DLoc); } BranchInst::Create(StateMachineEndParallelBB, StateMachineIfCascadeCurrentBB) ->setDebugLoc(DLoc); FunctionCallee EndParallelFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_kernel_end_parallel); CallInst *EndParallel = CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB); OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel); EndParallel->setDebugLoc(DLoc); BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB) ->setDebugLoc(DLoc); CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB) ->setDebugLoc(DLoc); BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB) ->setDebugLoc(DLoc); return ChangeStatus::CHANGED; } /// Fixpoint iteration update function. Will be called every time a dependence /// changed its state (and in the beginning). ChangeStatus updateImpl(Attributor &A) override { KernelInfoState StateBefore = getState(); // Callback to check a read/write instruction. auto CheckRWInst = [&](Instruction &I) { // We handle calls later. if (isa(I)) return true; // We only care about write effects. if (!I.mayWriteToMemory()) return true; if (auto *SI = dyn_cast(&I)) { SmallVector Objects; getUnderlyingObjects(SI->getPointerOperand(), Objects); if (llvm::all_of(Objects, [](const Value *Obj) { return isa(Obj); })) return true; // Check for AAHeapToStack moved objects which must not be guarded. auto &HS = A.getAAFor( *this, IRPosition::function(*I.getFunction()), DepClassTy::OPTIONAL); if (llvm::all_of(Objects, [&HS](const Value *Obj) { auto *CB = dyn_cast(Obj); if (!CB) return false; return HS.isAssumedHeapToStack(*CB); })) { return true; } } // Insert instruction that needs guarding. SPMDCompatibilityTracker.insert(&I); return true; }; bool UsedAssumedInformationInCheckRWInst = false; if (!SPMDCompatibilityTracker.isAtFixpoint()) if (!A.checkForAllReadWriteInstructions( CheckRWInst, *this, UsedAssumedInformationInCheckRWInst)) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); bool UsedAssumedInformationFromReachingKernels = false; if (!IsKernelEntry) { updateParallelLevels(A); bool AllReachingKernelsKnown = true; updateReachingKernelEntries(A, AllReachingKernelsKnown); UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown; if (!ParallelLevels.isValidState()) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); else if (!ReachingKernelEntries.isValidState()) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); else if (!SPMDCompatibilityTracker.empty()) { // Check if all reaching kernels agree on the mode as we can otherwise // not guard instructions. We might not be sure about the mode so we // we cannot fix the internal spmd-zation state either. int SPMD = 0, Generic = 0; for (auto *Kernel : ReachingKernelEntries) { auto &CBAA = A.getAAFor( *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL); if (CBAA.SPMDCompatibilityTracker.isValidState() && CBAA.SPMDCompatibilityTracker.isAssumed()) ++SPMD; else ++Generic; if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint()) UsedAssumedInformationFromReachingKernels = true; } if (SPMD != 0 && Generic != 0) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); } } // Callback to check a call instruction. bool AllParallelRegionStatesWereFixed = true; bool AllSPMDStatesWereFixed = true; auto CheckCallInst = [&](Instruction &I) { auto &CB = cast(I); auto &CBAA = A.getAAFor( *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); getState() ^= CBAA.getState(); AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint(); AllParallelRegionStatesWereFixed &= CBAA.ReachedKnownParallelRegions.isAtFixpoint(); AllParallelRegionStatesWereFixed &= CBAA.ReachedUnknownParallelRegions.isAtFixpoint(); return true; }; bool UsedAssumedInformationInCheckCallInst = false; if (!A.checkForAllCallLikeInstructions( CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) { LLVM_DEBUG(dbgs() << TAG << "Failed to visit all call-like instructions!\n";); return indicatePessimisticFixpoint(); } // If we haven't used any assumed information for the reached parallel // region states we can fix it. if (!UsedAssumedInformationInCheckCallInst && AllParallelRegionStatesWereFixed) { ReachedKnownParallelRegions.indicateOptimisticFixpoint(); ReachedUnknownParallelRegions.indicateOptimisticFixpoint(); } // If we are sure there are no parallel regions in the kernel we do not // want SPMD mode. if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() && ReachedKnownParallelRegions.isAtFixpoint() && ReachedUnknownParallelRegions.isValidState() && ReachedKnownParallelRegions.isValidState() && !mayContainParallelRegion()) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); // If we haven't used any assumed information for the SPMD state we can fix // it. if (!UsedAssumedInformationInCheckRWInst && !UsedAssumedInformationInCheckCallInst && !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed) SPMDCompatibilityTracker.indicateOptimisticFixpoint(); return StateBefore == getState() ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } private: /// Update info regarding reaching kernels. void updateReachingKernelEntries(Attributor &A, bool &AllReachingKernelsKnown) { auto PredCallSite = [&](AbstractCallSite ACS) { Function *Caller = ACS.getInstruction()->getFunction(); assert(Caller && "Caller is nullptr"); auto &CAA = A.getOrCreateAAFor( IRPosition::function(*Caller), this, DepClassTy::REQUIRED); if (CAA.ReachingKernelEntries.isValidState()) { ReachingKernelEntries ^= CAA.ReachingKernelEntries; return true; } // We lost track of the caller of the associated function, any kernel // could reach now. ReachingKernelEntries.indicatePessimisticFixpoint(); return true; }; if (!A.checkForAllCallSites(PredCallSite, *this, true /* RequireAllCallSites */, AllReachingKernelsKnown)) ReachingKernelEntries.indicatePessimisticFixpoint(); } /// Update info regarding parallel levels. void updateParallelLevels(Attributor &A) { auto &OMPInfoCache = static_cast(A.getInfoCache()); OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; auto PredCallSite = [&](AbstractCallSite ACS) { Function *Caller = ACS.getInstruction()->getFunction(); assert(Caller && "Caller is nullptr"); auto &CAA = A.getOrCreateAAFor(IRPosition::function(*Caller)); if (CAA.ParallelLevels.isValidState()) { // Any function that is called by `__kmpc_parallel_51` will not be // folded as the parallel level in the function is updated. In order to // get it right, all the analysis would depend on the implentation. That // said, if in the future any change to the implementation, the analysis // could be wrong. As a consequence, we are just conservative here. if (Caller == Parallel51RFI.Declaration) { ParallelLevels.indicatePessimisticFixpoint(); return true; } ParallelLevels ^= CAA.ParallelLevels; return true; } // We lost track of the caller of the associated function, any kernel // could reach now. ParallelLevels.indicatePessimisticFixpoint(); return true; }; bool AllCallSitesKnown = true; if (!A.checkForAllCallSites(PredCallSite, *this, true /* RequireAllCallSites */, AllCallSitesKnown)) ParallelLevels.indicatePessimisticFixpoint(); } }; /// The call site kernel info abstract attribute, basically, what can we say /// about a call site with regards to the KernelInfoState. For now this simply /// forwards the information from the callee. struct AAKernelInfoCallSite : AAKernelInfo { AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A) : AAKernelInfo(IRP, A) {} /// See AbstractAttribute::initialize(...). void initialize(Attributor &A) override { AAKernelInfo::initialize(A); CallBase &CB = cast(getAssociatedValue()); Function *Callee = getAssociatedFunction(); auto &AssumptionAA = A.getAAFor( *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); // Check for SPMD-mode assumptions. if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) { SPMDCompatibilityTracker.indicateOptimisticFixpoint(); indicateOptimisticFixpoint(); } // First weed out calls we do not care about, that is readonly/readnone // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a // parallel region or anything else we are looking for. if (!CB.mayWriteToMemory() || isa(CB)) { indicateOptimisticFixpoint(); return; } // Next we check if we know the callee. If it is a known OpenMP function // we will handle them explicitly in the switch below. If it is not, we // will use an AAKernelInfo object on the callee to gather information and // merge that into the current state. The latter happens in the updateImpl. auto &OMPInfoCache = static_cast(A.getInfoCache()); const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { // Unknown caller or declarations are not analyzable, we give up. if (!Callee || !A.isFunctionIPOAmendable(*Callee)) { // Unknown callees might contain parallel regions, except if they have // an appropriate assumption attached. if (!(AssumptionAA.hasAssumption("omp_no_openmp") || AssumptionAA.hasAssumption("omp_no_parallelism"))) ReachedUnknownParallelRegions.insert(&CB); // If SPMDCompatibilityTracker is not fixed, we need to give up on the // idea we can run something unknown in SPMD-mode. if (!SPMDCompatibilityTracker.isAtFixpoint()) { SPMDCompatibilityTracker.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.insert(&CB); } // We have updated the state for this unknown call properly, there won't // be any change so we indicate a fixpoint. indicateOptimisticFixpoint(); } // If the callee is known and can be used in IPO, we will update the state // based on the callee state in updateImpl. return; } const unsigned int WrapperFunctionArgNo = 6; RuntimeFunction RF = It->getSecond(); switch (RF) { // All the functions we know are compatible with SPMD mode. case OMPRTL___kmpc_is_spmd_exec_mode: case OMPRTL___kmpc_distribute_static_fini: case OMPRTL___kmpc_for_static_fini: case OMPRTL___kmpc_global_thread_num: case OMPRTL___kmpc_get_hardware_num_threads_in_block: case OMPRTL___kmpc_get_hardware_num_blocks: case OMPRTL___kmpc_single: case OMPRTL___kmpc_end_single: case OMPRTL___kmpc_master: case OMPRTL___kmpc_end_master: case OMPRTL___kmpc_barrier: case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_end_reduce_nowait: break; case OMPRTL___kmpc_distribute_static_init_4: case OMPRTL___kmpc_distribute_static_init_4u: case OMPRTL___kmpc_distribute_static_init_8: case OMPRTL___kmpc_distribute_static_init_8u: case OMPRTL___kmpc_for_static_init_4: case OMPRTL___kmpc_for_static_init_4u: case OMPRTL___kmpc_for_static_init_8: case OMPRTL___kmpc_for_static_init_8u: { // Check the schedule and allow static schedule in SPMD mode. unsigned ScheduleArgOpNo = 2; auto *ScheduleTypeCI = dyn_cast(CB.getArgOperand(ScheduleArgOpNo)); unsigned ScheduleTypeVal = ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; switch (OMPScheduleType(ScheduleTypeVal)) { case OMPScheduleType::Static: case OMPScheduleType::StaticChunked: case OMPScheduleType::Distribute: case OMPScheduleType::DistributeChunked: break; default: SPMDCompatibilityTracker.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.insert(&CB); break; }; } break; case OMPRTL___kmpc_target_init: KernelInitCB = &CB; break; case OMPRTL___kmpc_target_deinit: KernelDeinitCB = &CB; break; case OMPRTL___kmpc_parallel_51: if (auto *ParallelRegion = dyn_cast( CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) { ReachedKnownParallelRegions.insert(ParallelRegion); break; } // The condition above should usually get the parallel region function // pointer and record it. In the off chance it doesn't we assume the // worst. ReachedUnknownParallelRegions.insert(&CB); break; case OMPRTL___kmpc_omp_task: // We do not look into tasks right now, just give up. SPMDCompatibilityTracker.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.insert(&CB); ReachedUnknownParallelRegions.insert(&CB); break; case OMPRTL___kmpc_alloc_shared: case OMPRTL___kmpc_free_shared: // Return without setting a fixpoint, to be resolved in updateImpl. return; default: // Unknown OpenMP runtime calls cannot be executed in SPMD-mode, // generally. However, they do not hide parallel regions. SPMDCompatibilityTracker.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.insert(&CB); break; } // All other OpenMP runtime calls will not reach parallel regions so they // can be safely ignored for now. Since it is a known OpenMP runtime call we // have now modeled all effects and there is no need for any update. indicateOptimisticFixpoint(); } ChangeStatus updateImpl(Attributor &A) override { // TODO: Once we have call site specific value information we can provide // call site specific liveness information and then it makes // sense to specialize attributes for call sites arguments instead of // redirecting requests to the callee argument. Function *F = getAssociatedFunction(); auto &OMPInfoCache = static_cast(A.getInfoCache()); const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F); // If F is not a runtime function, propagate the AAKernelInfo of the callee. if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { const IRPosition &FnPos = IRPosition::function(*F); auto &FnAA = A.getAAFor(*this, FnPos, DepClassTy::REQUIRED); if (getState() == FnAA.getState()) return ChangeStatus::UNCHANGED; getState() = FnAA.getState(); return ChangeStatus::CHANGED; } // F is a runtime function that allocates or frees memory, check // AAHeapToStack and AAHeapToShared. KernelInfoState StateBefore = getState(); assert((It->getSecond() == OMPRTL___kmpc_alloc_shared || It->getSecond() == OMPRTL___kmpc_free_shared) && "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call"); CallBase &CB = cast(getAssociatedValue()); auto &HeapToStackAA = A.getAAFor( *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); auto &HeapToSharedAA = A.getAAFor( *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); RuntimeFunction RF = It->getSecond(); switch (RF) { // If neither HeapToStack nor HeapToShared assume the call is removed, // assume SPMD incompatibility. case OMPRTL___kmpc_alloc_shared: if (!HeapToStackAA.isAssumedHeapToStack(CB) && !HeapToSharedAA.isAssumedHeapToShared(CB)) SPMDCompatibilityTracker.insert(&CB); break; case OMPRTL___kmpc_free_shared: if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) && !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB)) SPMDCompatibilityTracker.insert(&CB); break; default: SPMDCompatibilityTracker.indicatePessimisticFixpoint(); SPMDCompatibilityTracker.insert(&CB); } return StateBefore == getState() ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } }; struct AAFoldRuntimeCall : public StateWrapper { using Base = StateWrapper; AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {} /// Statistics are tracked as part of manifest for now. void trackStatistics() const override {} /// Create an abstract attribute biew for the position \p IRP. static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP, Attributor &A); /// See AbstractAttribute::getName() const std::string getName() const override { return "AAFoldRuntimeCall"; } /// See AbstractAttribute::getIdAddr() const char *getIdAddr() const override { return &ID; } /// This function should return true if the type of the \p AA is /// AAFoldRuntimeCall static bool classof(const AbstractAttribute *AA) { return (AA->getIdAddr() == &ID); } static const char ID; }; struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A) : AAFoldRuntimeCall(IRP, A) {} /// See AbstractAttribute::getAsStr() const std::string getAsStr() const override { if (!isValidState()) return ""; std::string Str("simplified value: "); if (!SimplifiedValue.hasValue()) return Str + std::string("none"); if (!SimplifiedValue.getValue()) return Str + std::string("nullptr"); if (ConstantInt *CI = dyn_cast(SimplifiedValue.getValue())) return Str + std::to_string(CI->getSExtValue()); return Str + std::string("unknown"); } void initialize(Attributor &A) override { if (DisableOpenMPOptFolding) indicatePessimisticFixpoint(); Function *Callee = getAssociatedFunction(); auto &OMPInfoCache = static_cast(A.getInfoCache()); const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() && "Expected a known OpenMP runtime function"); RFKind = It->getSecond(); CallBase &CB = cast(getAssociatedValue()); A.registerSimplificationCallback( IRPosition::callsite_returned(CB), [&](const IRPosition &IRP, const AbstractAttribute *AA, bool &UsedAssumedInformation) -> Optional { assert((isValidState() || (SimplifiedValue.hasValue() && SimplifiedValue.getValue() == nullptr)) && "Unexpected invalid state!"); if (!isAtFixpoint()) { UsedAssumedInformation = true; if (AA) A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); } return SimplifiedValue; }); } ChangeStatus updateImpl(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; switch (RFKind) { case OMPRTL___kmpc_is_spmd_exec_mode: Changed |= foldIsSPMDExecMode(A); break; case OMPRTL___kmpc_is_generic_main_thread_id: Changed |= foldIsGenericMainThread(A); break; case OMPRTL___kmpc_parallel_level: Changed |= foldParallelLevel(A); break; case OMPRTL___kmpc_get_hardware_num_threads_in_block: Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit"); break; case OMPRTL___kmpc_get_hardware_num_blocks: Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams"); break; default: llvm_unreachable("Unhandled OpenMP runtime function!"); } return Changed; } ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) { Instruction &I = *getCtxI(); A.changeValueAfterManifest(I, **SimplifiedValue); A.deleteAfterManifest(I); CallBase *CB = dyn_cast(&I); auto Remark = [&](OptimizationRemark OR) { if (auto *C = dyn_cast(*SimplifiedValue)) return OR << "Replacing OpenMP runtime call " << CB->getCalledFunction()->getName() << " with " << ore::NV("FoldedValue", C->getZExtValue()) << "."; return OR << "Replacing OpenMP runtime call " << CB->getCalledFunction()->getName() << "."; }; if (CB && EnableVerboseRemarks) A.emitRemark(CB, "OMP180", Remark); LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with " << **SimplifiedValue << "\n"); Changed = ChangeStatus::CHANGED; } return Changed; } ChangeStatus indicatePessimisticFixpoint() override { SimplifiedValue = nullptr; return AAFoldRuntimeCall::indicatePessimisticFixpoint(); } private: /// Fold __kmpc_is_spmd_exec_mode into a constant if possible. ChangeStatus foldIsSPMDExecMode(Attributor &A) { Optional SimplifiedValueBefore = SimplifiedValue; unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; auto &CallerKernelInfoAA = A.getAAFor( *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) return indicatePessimisticFixpoint(); for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { auto &AA = A.getAAFor(*this, IRPosition::function(*K), DepClassTy::REQUIRED); if (!AA.isValidState()) { SimplifiedValue = nullptr; return indicatePessimisticFixpoint(); } if (AA.SPMDCompatibilityTracker.isAssumed()) { if (AA.SPMDCompatibilityTracker.isAtFixpoint()) ++KnownSPMDCount; else ++AssumedSPMDCount; } else { if (AA.SPMDCompatibilityTracker.isAtFixpoint()) ++KnownNonSPMDCount; else ++AssumedNonSPMDCount; } } if ((AssumedSPMDCount + KnownSPMDCount) && (AssumedNonSPMDCount + KnownNonSPMDCount)) return indicatePessimisticFixpoint(); auto &Ctx = getAnchorValue().getContext(); if (KnownSPMDCount || AssumedSPMDCount) { assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!"); // All reaching kernels are in SPMD mode. Update all function calls to // __kmpc_is_spmd_exec_mode to 1. SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); } else if (KnownNonSPMDCount || AssumedNonSPMDCount) { assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!"); // All reaching kernels are in non-SPMD mode. Update all function // calls to __kmpc_is_spmd_exec_mode to 0. SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); } else { // We have empty reaching kernels, therefore we cannot tell if the // associated call site can be folded. At this moment, SimplifiedValue // must be none. assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none"); } return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } /// Fold __kmpc_is_generic_main_thread_id into a constant if possible. ChangeStatus foldIsGenericMainThread(Attributor &A) { Optional SimplifiedValueBefore = SimplifiedValue; CallBase &CB = cast(getAssociatedValue()); Function *F = CB.getFunction(); const auto &ExecutionDomainAA = A.getAAFor( *this, IRPosition::function(*F), DepClassTy::REQUIRED); if (!ExecutionDomainAA.isValidState()) return indicatePessimisticFixpoint(); auto &Ctx = getAnchorValue().getContext(); if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB)) SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); else return indicatePessimisticFixpoint(); return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } /// Fold __kmpc_parallel_level into a constant if possible. ChangeStatus foldParallelLevel(Attributor &A) { Optional SimplifiedValueBefore = SimplifiedValue; auto &CallerKernelInfoAA = A.getAAFor( *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); if (!CallerKernelInfoAA.ParallelLevels.isValidState()) return indicatePessimisticFixpoint(); if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) return indicatePessimisticFixpoint(); if (CallerKernelInfoAA.ReachingKernelEntries.empty()) { assert(!SimplifiedValue.hasValue() && "SimplifiedValue should keep none at this point"); return ChangeStatus::UNCHANGED; } unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { auto &AA = A.getAAFor(*this, IRPosition::function(*K), DepClassTy::REQUIRED); if (!AA.SPMDCompatibilityTracker.isValidState()) return indicatePessimisticFixpoint(); if (AA.SPMDCompatibilityTracker.isAssumed()) { if (AA.SPMDCompatibilityTracker.isAtFixpoint()) ++KnownSPMDCount; else ++AssumedSPMDCount; } else { if (AA.SPMDCompatibilityTracker.isAtFixpoint()) ++KnownNonSPMDCount; else ++AssumedNonSPMDCount; } } if ((AssumedSPMDCount + KnownSPMDCount) && (AssumedNonSPMDCount + KnownNonSPMDCount)) return indicatePessimisticFixpoint(); auto &Ctx = getAnchorValue().getContext(); // If the caller can only be reached by SPMD kernel entries, the parallel // level is 1. Similarly, if the caller can only be reached by non-SPMD // kernel entries, it is 0. if (AssumedSPMDCount || KnownSPMDCount) { assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 && "Expected only SPMD kernels!"); SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); } else { assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 && "Expected only non-SPMD kernels!"); SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0); } return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) { // Specialize only if all the calls agree with the attribute constant value int32_t CurrentAttrValue = -1; Optional SimplifiedValueBefore = SimplifiedValue; auto &CallerKernelInfoAA = A.getAAFor( *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) return indicatePessimisticFixpoint(); // Iterate over the kernels that reach this function for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { int32_t NextAttrVal = -1; if (K->hasFnAttribute(Attr)) NextAttrVal = std::stoi(K->getFnAttribute(Attr).getValueAsString().str()); if (NextAttrVal == -1 || (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal)) return indicatePessimisticFixpoint(); CurrentAttrValue = NextAttrVal; } if (CurrentAttrValue != -1) { auto &Ctx = getAnchorValue().getContext(); SimplifiedValue = ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue); } return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } /// An optional value the associated value is assumed to fold to. That is, we /// assume the associated value (which is a call) can be replaced by this /// simplified value. Optional SimplifiedValue; /// The runtime function kind of the callee of the associated call site. RuntimeFunction RFKind; }; } // namespace /// Register folding callsite void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { auto &RFI = OMPInfoCache.RFIs[RF]; RFI.foreachUse(SCC, [&](Use &U, Function &F) { CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); if (!CI) return false; A.getOrCreateAAFor( IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr, DepClassTy::NONE, /* ForceUpdate */ false, /* UpdateAfterInit */ false); return false; }); } void OpenMPOpt::registerAAs(bool IsModulePass) { if (SCC.empty()) return; if (IsModulePass) { // Ensure we create the AAKernelInfo AAs first and without triggering an // update. This will make sure we register all value simplification // callbacks before any other AA has the chance to create an AAValueSimplify // or similar. for (Function *Kernel : OMPInfoCache.Kernels) A.getOrCreateAAFor( IRPosition::function(*Kernel), /* QueryingAA */ nullptr, DepClassTy::NONE, /* ForceUpdate */ false, /* UpdateAfterInit */ false); registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id); registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level); registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block); registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks); } // Create CallSite AA for all Getters. for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { auto ICVInfo = OMPInfoCache.ICVs[static_cast(Idx)]; auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; auto CreateAA = [&](Use &U, Function &Caller) { CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); if (!CI) return false; auto &CB = cast(*CI); IRPosition CBPos = IRPosition::callsite_function(CB); A.getOrCreateAAFor(CBPos); return false; }; GetterRFI.foreachUse(SCC, CreateAA); } auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; auto CreateAA = [&](Use &U, Function &F) { A.getOrCreateAAFor(IRPosition::function(F)); return false; }; if (!DisableOpenMPOptDeglobalization) GlobalizationRFI.foreachUse(SCC, CreateAA); // Create an ExecutionDomain AA for every function and a HeapToStack AA for // every function if there is a device kernel. if (!isOpenMPDevice(M)) return; for (auto *F : SCC) { if (F->isDeclaration()) continue; A.getOrCreateAAFor(IRPosition::function(*F)); if (!DisableOpenMPOptDeglobalization) A.getOrCreateAAFor(IRPosition::function(*F)); for (auto &I : instructions(*F)) { if (auto *LI = dyn_cast(&I)) { bool UsedAssumedInformation = false; A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, UsedAssumedInformation); } else if (auto *SI = dyn_cast(&I)) { A.getOrCreateAAFor(IRPosition::value(*SI)); } } } } const char AAICVTracker::ID = 0; const char AAKernelInfo::ID = 0; const char AAExecutionDomain::ID = 0; const char AAHeapToShared::ID = 0; const char AAFoldRuntimeCall::ID = 0; AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, Attributor &A) { AAICVTracker *AA = nullptr; switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: llvm_unreachable("ICVTracker can only be created for function position!"); case IRPosition::IRP_RETURNED: AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); break; case IRPosition::IRP_CALL_SITE_RETURNED: AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); break; case IRPosition::IRP_CALL_SITE: AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); break; case IRPosition::IRP_FUNCTION: AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); break; } return *AA; } AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, Attributor &A) { AAExecutionDomainFunction *AA = nullptr; switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: case IRPosition::IRP_RETURNED: case IRPosition::IRP_CALL_SITE_RETURNED: case IRPosition::IRP_CALL_SITE: llvm_unreachable( "AAExecutionDomain can only be created for function position!"); case IRPosition::IRP_FUNCTION: AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); break; } return *AA; } AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, Attributor &A) { AAHeapToSharedFunction *AA = nullptr; switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_CALL_SITE_ARGUMENT: case IRPosition::IRP_RETURNED: case IRPosition::IRP_CALL_SITE_RETURNED: case IRPosition::IRP_CALL_SITE: llvm_unreachable( "AAHeapToShared can only be created for function position!"); case IRPosition::IRP_FUNCTION: AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); break; } return *AA; } AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP, Attributor &A) { AAKernelInfo *AA = nullptr; switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_RETURNED: case IRPosition::IRP_CALL_SITE_RETURNED: case IRPosition::IRP_CALL_SITE_ARGUMENT: llvm_unreachable("KernelInfo can only be created for function position!"); case IRPosition::IRP_CALL_SITE: AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A); break; case IRPosition::IRP_FUNCTION: AA = new (A.Allocator) AAKernelInfoFunction(IRP, A); break; } return *AA; } AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP, Attributor &A) { AAFoldRuntimeCall *AA = nullptr; switch (IRP.getPositionKind()) { case IRPosition::IRP_INVALID: case IRPosition::IRP_FLOAT: case IRPosition::IRP_ARGUMENT: case IRPosition::IRP_RETURNED: case IRPosition::IRP_FUNCTION: case IRPosition::IRP_CALL_SITE: case IRPosition::IRP_CALL_SITE_ARGUMENT: llvm_unreachable("KernelInfo can only be created for call site position!"); case IRPosition::IRP_CALL_SITE_RETURNED: AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A); break; } return *AA; } PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { if (!containsOpenMP(M)) return PreservedAnalyses::all(); if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); FunctionAnalysisManager &FAM = AM.getResult(M).getManager(); KernelSet Kernels = getDeviceKernels(M); auto IsCalled = [&](Function &F) { if (Kernels.contains(&F)) return true; for (const User *U : F.users()) if (!isa(U)) return true; return false; }; auto EmitRemark = [&](Function &F) { auto &ORE = FAM.getResult(F); ORE.emit([&]() { OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F); return ORA << "Could not internalize function. " << "Some optimizations may not be possible. [OMP140]"; }); }; // Create internal copies of each function if this is a kernel Module. This // allows iterprocedural passes to see every call edge. DenseMap InternalizedMap; if (isOpenMPDevice(M)) { SmallPtrSet InternalizeFns; for (Function &F : M) if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && !DisableInternalization) { if (Attributor::isInternalizable(F)) { InternalizeFns.insert(&F); } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { EmitRemark(F); } } Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); } // Look at every function in the Module unless it was internalized. SmallVector SCC; for (Function &F : M) if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) SCC.push_back(&F); if (SCC.empty()) return PreservedAnalyses::all(); AnalysisGetter AG(FAM); auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { return FAM.getResult(*F); }; BumpPtrAllocator Allocator; CallGraphUpdater CGUpdater; SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixpointIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(true); // Optionally inline device functions for potentially better performance. if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M)) for (Function &F : M) if (!F.isDeclaration() && !Kernels.contains(&F) && !F.hasFnAttribute(Attribute::NoInline)) F.addFnAttr(Attribute::AlwaysInline); if (PrintModuleAfterOptimizations) LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M); if (Changed) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, CGSCCUpdateResult &UR) { if (!containsOpenMP(*C.begin()->getFunction().getParent())) return PreservedAnalyses::all(); if (DisableOpenMPOptimizations) return PreservedAnalyses::all(); SmallVector SCC; // If there are kernels in the module, we have to run on all SCC's. for (LazyCallGraph::Node &N : C) { Function *Fn = &N.getFunction(); SCC.push_back(Fn); } if (SCC.empty()) return PreservedAnalyses::all(); Module &M = *C.begin()->getFunction().getParent(); KernelSet Kernels = getDeviceKernels(M); FunctionAnalysisManager &FAM = AM.getResult(C, CG).getManager(); AnalysisGetter AG(FAM); auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { return FAM.getResult(*F); }; BumpPtrAllocator Allocator; CallGraphUpdater CGUpdater; CGUpdater.initialize(CG, C, AM, UR); SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, Kernels); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixpointIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Changed = OMPOpt.run(false); if (PrintModuleAfterOptimizations) LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M); if (Changed) return PreservedAnalyses::none(); return PreservedAnalyses::all(); } namespace { struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { CallGraphUpdater CGUpdater; static char ID; OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { CallGraphSCCPass::getAnalysisUsage(AU); } bool runOnSCC(CallGraphSCC &CGSCC) override { if (!containsOpenMP(CGSCC.getCallGraph().getModule())) return false; if (DisableOpenMPOptimizations || skipSCC(CGSCC)) return false; SmallVector SCC; // If there are kernels in the module, we have to run on all SCC's. for (CallGraphNode *CGN : CGSCC) { Function *Fn = CGN->getFunction(); if (!Fn || Fn->isDeclaration()) continue; SCC.push_back(Fn); } if (SCC.empty()) return false; Module &M = CGSCC.getCallGraph().getModule(); KernelSet Kernels = getDeviceKernels(M); CallGraph &CG = getAnalysis().getCallGraph(); CGUpdater.initialize(CG, CGSCC); // Maintain a map of functions to avoid rebuilding the ORE DenseMap> OREMap; auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { std::unique_ptr &ORE = OREMap[F]; if (!ORE) ORE = std::make_unique(F); return *ORE; }; AnalysisGetter AG; SetVector Functions(SCC.begin(), SCC.end()); BumpPtrAllocator Allocator; OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, Kernels); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixpointIterations, OREGetter, DEBUG_TYPE); OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); bool Result = OMPOpt.run(false); if (PrintModuleAfterOptimizations) LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M); return Result; } bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } }; } // end anonymous namespace KernelSet llvm::omp::getDeviceKernels(Module &M) { // TODO: Create a more cross-platform way of determining device kernels. NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); KernelSet Kernels; if (!MD) return Kernels; for (auto *Op : MD->operands()) { if (Op->getNumOperands() < 2) continue; MDString *KindID = dyn_cast(Op->getOperand(1)); if (!KindID || KindID->getString() != "kernel") continue; Function *KernelFn = mdconst::dyn_extract_or_null(Op->getOperand(0)); if (!KernelFn) continue; ++NumOpenMPTargetRegionKernels; Kernels.insert(KernelFn); } return Kernels; } bool llvm::omp::containsOpenMP(Module &M) { Metadata *MD = M.getModuleFlag("openmp"); if (!MD) return false; return true; } bool llvm::omp::isOpenMPDevice(Module &M) { Metadata *MD = M.getModuleFlag("openmp-device"); if (!MD) return false; return true; } char OpenMPOptCGSCCLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", "OpenMP specific optimizations", false, false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc", "OpenMP specific optimizations", false, false) Pass *llvm::createOpenMPOptCGSCCLegacyPass() { return new OpenMPOptCGSCCLegacyPass(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 74f68531b89a..6e5aeb9c41f6 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1,1331 +1,1354 @@ //===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the PassManagerBuilder class, which is used to set up a // "standard" optimization sequence suitable for languages like C and C++. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CFLAndersAliasAnalysis.h" #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/Transforms/Scalar/LoopUnrollPass.h" #include "llvm/Transforms/Scalar/SCCP.h" #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Vectorize.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" using namespace llvm; namespace llvm { cl::opt RunPartialInlining("enable-partial-inlining", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); static cl::opt UseGVNAfterVectorization("use-gvn-after-vectorization", cl::init(false), cl::Hidden, cl::desc("Run GVN instead of Early CSE after vectorization passes")); cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); // Experimental option to use CFL-AA static cl::opt<::CFLAAType> UseCFLAA("use-cfl-aa", cl::init(::CFLAAType::None), cl::Hidden, cl::desc("Enable the new, experimental CFL alias analysis"), cl::values(clEnumValN(::CFLAAType::None, "none", "Disable CFL-AA"), clEnumValN(::CFLAAType::Steensgaard, "steens", "Enable unification-based CFL-AA"), clEnumValN(::CFLAAType::Andersen, "anders", "Enable inclusion-based CFL-AA"), clEnumValN(::CFLAAType::Both, "both", "Enable both variants of CFL-AA"))); cl::opt EnableLoopInterchange( "enable-loopinterchange", cl::init(false), cl::Hidden, cl::desc("Enable the experimental LoopInterchange Pass")); cl::opt EnableUnrollAndJam("enable-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable Unroll And Jam Pass")); cl::opt EnableLoopFlatten("enable-loop-flatten", cl::init(false), cl::Hidden, cl::desc("Enable the LoopFlatten Pass")); cl::opt EnableDFAJumpThreading("enable-dfa-jump-thread", cl::desc("Enable DFA jump threading."), cl::init(false), cl::Hidden); static cl::opt EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable preparation for ThinLTO.")); static cl::opt EnablePerformThinLTO("perform-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable performing ThinLTO.")); cl::opt EnableHotColdSplit("hot-cold-split", cl::init(false), cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass")); cl::opt EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden, cl::desc("Enable ir outliner pass")); static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); cl::opt DisablePreInliner("disable-preinline", cl::init(false), cl::Hidden, cl::desc("Disable pre-instrumentation inliner")); cl::opt PreInlineThreshold( "preinline-threshold", cl::Hidden, cl::init(75), cl::ZeroOrMore, cl::desc("Control the amount of inlining in pre-instrumentation inliner " "(default = 75)")); cl::opt EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::ZeroOrMore, cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), cl::Hidden, cl::desc("Disable shrink-wrap library calls")); static cl::opt EnableSimpleLoopUnswitch( "enable-simple-loop-unswitch", cl::init(false), cl::Hidden, cl::desc("Enable the simple loop unswitch pass. Also enables independent " "cleanup passes integrated into the loop pass manager pipeline.")); cl::opt EnableGVNSink("enable-gvn-sink", cl::init(false), cl::ZeroOrMore, cl::desc("Enable the GVN sinking pass (default = off)")); // This option is used in simplifying testing SampleFDO optimizations for // profile loading. cl::opt EnableCHR("enable-chr", cl::init(true), cl::Hidden, cl::desc("Enable control height reduction optimization (CHR)")); cl::opt FlattenedProfileUsed( "flattened-profile-used", cl::init(false), cl::Hidden, cl::desc("Indicate the sample profile being used is flattened, i.e., " "no inline hierachy exists in the profile. ")); cl::opt EnableOrderFileInstrumentation( "enable-order-file-instrumentation", cl::init(false), cl::Hidden, cl::desc("Enable order file instrumentation (default = off)")); cl::opt EnableMatrix( "enable-matrix", cl::init(false), cl::Hidden, cl::desc("Enable lowering of the matrix intrinsics")); cl::opt EnableConstraintElimination( "enable-constraint-elimination", cl::init(false), cl::Hidden, cl::desc( "Enable pass to eliminate conditions based on linear constraints.")); cl::opt EnableFunctionSpecialization( "enable-function-specialization", cl::init(false), cl::Hidden, cl::desc("Enable Function Specialization pass")); cl::opt AttributorRun( "attributor-enable", cl::Hidden, cl::init(AttributorRunOption::NONE), cl::desc("Enable the attributor inter-procedural deduction pass."), cl::values(clEnumValN(AttributorRunOption::ALL, "all", "enable all attributor runs"), clEnumValN(AttributorRunOption::MODULE, "module", "enable module-wide attributor runs"), clEnumValN(AttributorRunOption::CGSCC, "cgscc", "enable call graph SCC attributor runs"), clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); extern cl::opt EnableKnowledgeRetention; } // namespace llvm PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; DisableUnrollLoops = false; SLPVectorize = false; LoopVectorize = true; LoopsInterleaved = true; RerollLoops = RunLoopRerolling; NewGVN = RunNewGVN; LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; DisableGVNLoadPRE = false; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; VerifyInput = false; VerifyOutput = false; MergeFunctions = false; PrepareForLTO = false; EnablePGOInstrGen = false; EnablePGOCSInstrGen = false; EnablePGOCSInstrUse = false; PGOInstrGen = ""; PGOInstrUse = ""; PGOSampleUse = ""; PrepareForThinLTO = EnablePrepareForThinLTO; PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; CallGraphProfile = true; } PassManagerBuilder::~PassManagerBuilder() { delete LibraryInfo; delete Inliner; } /// Set of global extensions, automatically added as part of the standard set. static ManagedStatic< SmallVector, 8>> GlobalExtensions; static PassManagerBuilder::GlobalExtensionID GlobalExtensionsCounter; /// Check if GlobalExtensions is constructed and not empty. /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger /// the construction of the object. static bool GlobalExtensionsNotEmpty() { return GlobalExtensions.isConstructed() && !GlobalExtensions->empty(); } PassManagerBuilder::GlobalExtensionID PassManagerBuilder::addGlobalExtension(PassManagerBuilder::ExtensionPointTy Ty, PassManagerBuilder::ExtensionFn Fn) { auto ExtensionID = GlobalExtensionsCounter++; GlobalExtensions->push_back(std::make_tuple(Ty, std::move(Fn), ExtensionID)); return ExtensionID; } void PassManagerBuilder::removeGlobalExtension( PassManagerBuilder::GlobalExtensionID ExtensionID) { // RegisterStandardPasses may try to call this function after GlobalExtensions // has already been destroyed; doing so should not generate an error. if (!GlobalExtensions.isConstructed()) return; auto GlobalExtension = llvm::find_if(*GlobalExtensions, [ExtensionID](const auto &elem) { return std::get<2>(elem) == ExtensionID; }); assert(GlobalExtension != GlobalExtensions->end() && "The extension ID to be removed should always be valid."); GlobalExtensions->erase(GlobalExtension); } void PassManagerBuilder::addExtension(ExtensionPointTy Ty, ExtensionFn Fn) { Extensions.push_back(std::make_pair(Ty, std::move(Fn))); } void PassManagerBuilder::addExtensionsToPM(ExtensionPointTy ETy, legacy::PassManagerBase &PM) const { if (GlobalExtensionsNotEmpty()) { for (auto &Ext : *GlobalExtensions) { if (std::get<0>(Ext) == ETy) std::get<1>(Ext)(*this, PM); } } for (unsigned i = 0, e = Extensions.size(); i != e; ++i) if (Extensions[i].first == ETy) Extensions[i].second(*this, PM); } void PassManagerBuilder::addInitialAliasAnalysisPasses( legacy::PassManagerBase &PM) const { switch (UseCFLAA) { case ::CFLAAType::Steensgaard: PM.add(createCFLSteensAAWrapperPass()); break; case ::CFLAAType::Andersen: PM.add(createCFLAndersAAWrapperPass()); break; case ::CFLAAType::Both: PM.add(createCFLSteensAAWrapperPass()); PM.add(createCFLAndersAAWrapperPass()); break; default: break; } // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that // BasicAliasAnalysis wins if they disagree. This is intended to help // support "obvious" type-punning idioms. PM.add(createTypeBasedAAWrapperPass()); PM.add(createScopedNoAliasAAWrapperPass()); } void PassManagerBuilder::populateFunctionPassManager( legacy::FunctionPassManager &FPM) { addExtensionsToPM(EP_EarlyAsPossible, FPM); // Add LibraryInfo if we have some. if (LibraryInfo) FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); // The backends do not handle matrix intrinsics currently. // Make sure they are also lowered in O0. // FIXME: A lightweight version of the pass should run in the backend // pipeline on demand. if (EnableMatrix && OptLevel == 0) FPM.add(createLowerMatrixIntrinsicsMinimalPass()); if (OptLevel == 0) return; addInitialAliasAnalysisPasses(FPM); // Lower llvm.expect to metadata before attempting transforms. // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. FPM.add(createLowerExpectIntrinsicPass()); FPM.add(createCFGSimplificationPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); } // Do PGO instrumentation generation or use pass as the option specified. void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS = false) { if (IsCS) { if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) return; } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) return; // Perform the preinline and cleanup passes for O1 and above. // We will not do this inline for context sensitive PGO (when IsCS is true). if (OptLevel > 0 && !DisablePreInliner && PGOSampleUse.empty() && !IsCS) { // Create preinline pass. We construct an InlineParams object and specify // the threshold here to avoid the command line options of the regular // inliner to influence pre-inlining. The only fields of InlineParams we // care about are DefaultThreshold and HintThreshold. InlineParams IP; IP.DefaultThreshold = PreInlineThreshold; // FIXME: The hint threshold has the same value used by the regular inliner // when not optimzing for size. This should probably be lowered after // performance testing. // Use PreInlineThreshold for both -Os and -Oz. Not running preinliner makes // the instrumented binary unusably large. Even if PreInlineThreshold is not // correct thresold for -Oz, it is better than not running preinliner. IP.HintThreshold = SizeLevel > 0 ? PreInlineThreshold : 325; MPM.add(createFunctionInliningPass(IP)); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); } if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); // Add the profile lowering pass. InstrProfOptions Options; if (!PGOInstrGen.empty()) Options.InstrProfileOutput = PGOInstrGen; Options.DoCounterPromotion = true; Options.UseBFIInPromotion = IsCS; MPM.add(createLoopRotatePass()); MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); } if (!PGOInstrUse.empty()) MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); // Indirect call promotion that promotes intra-module targets only. // For ThinLTO this is done earlier due to interactions with globalopt // for imported functions. We don't run this at -O0. if (OptLevel > 0 && !IsCS) MPM.add( createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies if (EnableKnowledgeRetention) MPM.add(createAssumeSimplifyPass()); if (OptLevel > 1) { if (EnableGVNHoist) MPM.add(createGVNHoistPass()); if (EnableGVNSink) { MPM.add(createGVNSinkPass()); - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); } } if (EnableConstraintElimination) MPM.add(createConstraintEliminationPass()); if (OptLevel > 1) { // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals } - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs // Combine silly seq's if (OptLevel > 2) MPM.add(createAggressiveInstCombinerPass()); MPM.add(createInstructionCombiningPass()); if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) MPM.add(createLibCallsShrinkWrapPass()); addExtensionsToPM(EP_Peephole, MPM); // Optimize memory intrinsic calls based on the profiled size information. if (SizeLevel == 0) MPM.add(createPGOMemOPSizeOptLegacyPass()); // TODO: Investigate the cost/benefit of tail call elimination on debugging. if (OptLevel > 1) MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions // The matrix extension can introduce large vector operations early, which can // benefit from running vector-combine early on. if (EnableMatrix) MPM.add(createVectorCombinePass()); // Begin the loop pass pipeline. if (EnableSimpleLoopUnswitch) { // The simple loop unswitch pass relies on separate cleanup passes. Schedule // them first so when we re-process a loop they run before other loop // passes. MPM.add(createLoopInstSimplifyPass()); MPM.add(createLoopSimplifyCFGPass()); } // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. + // to reduce amount of IR that will have to be duplicated. However, + // do not perform speculative hoisting the first time as LICM + // will destroy metadata that may not need to be destroyed if run + // after loop rotation. // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/false)); // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // TODO: Investigate promotion cap for O1. - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); else MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); // FIXME: We break the loop pass pipeline here in order to do full // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the // need for this. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); MPM.add(createInstructionCombiningPass()); // We resume loop passes creating a second loop pipeline here. if (EnableLoopFlatten) { MPM.add(createLoopFlattenPass()); // Flatten loops MPM.add(createLoopSimplifyCFGPass()); } MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars addExtensionsToPM(EP_LateLoopOptimizations, MPM); MPM.add(createLoopDeletionPass()); // Delete dead loops if (EnableLoopInterchange) MPM.add(createLoopInterchangePass()); // Interchange loops // Unroll small loops and perform peeling. MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); addExtensionsToPM(EP_LoopOptimizerEnd, MPM); // This ends the loop pass pipelines. // Break up allocas that may now be splittable after loop unrolling. MPM.add(createSROAPass()); if (OptLevel > 1) { MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds MPM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies } MPM.add(createSCCPPass()); // Constant prop with SCCP if (EnableConstraintElimination) MPM.add(createConstraintEliminationPass()); // Delete dead bit computations (instcombine runs after to fold away the dead // computations, and then ADCE will run later to exploit any new DCE // opportunities that creates). MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1) { if (EnableDFAJumpThreading && SizeLevel == 0) MPM.add(createDFAJumpThreadingPass()); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); } MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset // TODO: Investigate if this is too expensive at O1. if (OptLevel > 1) { MPM.add(createDeadStoreEliminationPass()); // Delete dead stores - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } addExtensionsToPM(EP_ScalarOptimizerLate, MPM); if (RerollLoops) MPM.add(createLoopRerollPass()); // Merge & remove BBs and sink & hoist common instructions. MPM.add(createCFGSimplificationPass( SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); // Clean up after everything. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); if (EnableCHR && OptLevel >= 3 && (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) MPM.add(createControlHeightReductionLegacyPass()); } /// FIXME: Should LTO cause any differences to this set of passes? void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, bool IsFullLTO) { PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); if (IsFullLTO) { // The vectorizer may have significantly shortened a loop body; unroll // again. Unroll small loops to hide loop backedge latency and saturate any // parallel execution resources of an out-of-order processor. We also then // need to clean up redundancies and loop invariant code. // FIXME: It would be really good to use a loop-integrated instruction // combiner for cleanup here so that the unrolling and LICM can be pipelined // across the loop nests. // We do UnrollAndJam in a separate LPM to ensure it happens before unroll if (EnableUnrollAndJam && !DisableUnrollLoops) PM.add(createLoopUnrollAndJamPass(OptLevel)); PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); PM.add(createWarnMissedTransformationsPass()); } if (!IsFullLTO) { // Eliminate loads by forwarding stores from the previous iteration to loads // of the current iteration. PM.add(createLoopLoadEliminationPass()); } // Cleanup after the loop optimization passes. PM.add(createInstructionCombiningPass()); if (OptLevel > 1 && ExtraVectorizerPasses) { // At higher optimization levels, try to clean up any runtime overlap and // alignment checks inserted by the vectorizer. We want to track correlated // runtime checks for two inner loops in the same outer loop, fold any // common computations, hoist loop-invariant aspects out of any outer loop, // and unswitch the runtime checks if possible. Once hoisted, we may have // dead (or speculatable) control flows or more combining opportunities. PM.add(createEarlyCSEPass()); PM.add(createCorrelatedValuePropagationPass()); PM.add(createInstructionCombiningPass()); - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); - PM.add(createCFGSimplificationPass()); + PM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); PM.add(createInstructionCombiningPass()); } // Now that we've formed fast to execute loop structures, we do further // optimizations. These are run afterward as they might block doing complex // analyses and transforms such as what are needed for loop vectorization. // Cleanup after loop vectorization, etc. Simplification passes like CVP and // GVN, loop transforms, and others have already run, so it's now better to // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. PM.add(createCFGSimplificationPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) + .convertSwitchRangeToICmp(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) .sinkCommonInsts(true))); if (IsFullLTO) { PM.add(createSCCPPass()); // Propagate exposed constants PM.add(createInstructionCombiningPass()); // Clean up again PM.add(createBitTrackingDCEPass()); } // Optimize parallel scalar instruction chains into SIMD instructions. if (SLPVectorize) { PM.add(createSLPVectorizerPass()); if (OptLevel > 1 && ExtraVectorizerPasses) PM.add(createEarlyCSEPass()); } // Enhance/cleanup vector code. PM.add(createVectorCombinePass()); if (!IsFullLTO) { addExtensionsToPM(EP_Peephole, PM); PM.add(createInstructionCombiningPass()); if (EnableUnrollAndJam && !DisableUnrollLoops) { // Unroll and Jam. We do this before unroll but need to be in a separate // loop pass manager in order for the outer loop to be processed by // unroll and jam before the inner loop is unrolled. PM.add(createLoopUnrollAndJamPass(OptLevel)); } // Unroll small loops PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. PM.add(createInstructionCombiningPass()); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } PM.add(createWarnMissedTransformationsPass()); } // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. PM.add(createAlignmentFromAssumptionsPass()); if (IsFullLTO) PM.add(createInstructionCombiningPass()); } void PassManagerBuilder::populateModulePassManager( legacy::PassManagerBase &MPM) { // Whether this is a default or *LTO pre-link pipeline. The FullLTO post-link // is handled separately, so just check this is not the ThinLTO post-link. bool DefaultOrPreLinkPipeline = !PerformThinLTO; MPM.add(createAnnotation2MetadataLegacyPass()); if (!PGOSampleUse.empty()) { MPM.add(createPruneEHPass()); // In ThinLTO mode, when flattened profile is used, all the available // profile information will be annotated in PreLink phase so there is // no need to load the profile again in PostLink. if (!(FlattenedProfileUsed && PerformThinLTO)) MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); } // Allow forcing function attributes as a debugging and tuning aid. MPM.add(createForceFunctionAttrsLegacyPass()); // If all optimizations are disabled, just run the always-inline pass and, // if enabled, the function merging pass. if (OptLevel == 0) { addPGOInstrPasses(MPM); if (Inliner) { MPM.add(Inliner); Inliner = nullptr; } // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly // creates a CGSCC pass manager, but we don't want to add extensions into // that pass manager. To prevent this we insert a no-op module pass to reset // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 // builds. The function merging pass is if (MergeFunctions) MPM.add(createMergeFunctionsPass()); else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); if (PerformThinLTO) { MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead // globals in the object file. MPM.add(createEliminateAvailableExternallyPass()); MPM.add(createGlobalDCEPass()); } addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); if (PrepareForLTO || PrepareForThinLTO) { MPM.add(createCanonicalizeAliasesPass()); // Rename anon globals to be able to export them in the summary. // This has to be done after we add the extensions to the pass manager // as there could be passes (e.g. Adddress sanitizer) which introduce // new unnamed globals. MPM.add(createNameAnonGlobalPass()); } MPM.add(createAnnotationRemarksLegacyPass()); return; } // Add LibraryInfo if we have some. if (LibraryInfo) MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); addInitialAliasAnalysisPasses(MPM); // For ThinLTO there are two passes of indirect call promotion. The // first is during the compile phase when PerformThinLTO=false and // intra-module indirect call targets are promoted. The second is during // the ThinLTO backend when PerformThinLTO=true, when we promote imported // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. if (PerformThinLTO) { MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); } // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops // as it will change the CFG too much to make the 2nd profile annotation // in backend more difficult. bool PrepareForThinLTOUsingPGOSampleProfile = PrepareForThinLTO && !PGOSampleUse.empty(); if (PrepareForThinLTOUsingPGOSampleProfile) DisableUnrollLoops = true; // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); // Infer attributes on declarations, call sites, arguments, etc. if (AttributorRun & AttributorRunOption::MODULE) MPM.add(createAttributorLegacyPass()); addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); if (OptLevel > 2) MPM.add(createCallSiteSplittingPass()); // Propage constant function arguments by specializing the functions. if (OptLevel > 2 && EnableFunctionSpecialization) MPM.add(createFunctionSpecializationPass()); MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. MPM.add(createPromoteMemoryToRegisterPass()); MPM.add(createDeadArgEliminationPass()); // Dead argument elimination MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + MPM.add( + createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Clean up after IPCP & DAE // For SamplePGO in ThinLTO compile phase, we do not want to do indirect // call promotion as it will change the CFG too much to make the 2nd // profile annotation in backend more difficult. // PGO instrumentation is added during the compile phase for ThinLTO, do // not run it a second time if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); // Create profile COMDAT variables. Lld linker wants to see all variables // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. if (!PerformThinLTO && EnablePGOCSInstrGen) MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive // for the entire SCC pass run below. MPM.add(createGlobalsAAWrapperPass()); // Start of CallGraph SCC passes. MPM.add(createPruneEHPass()); // Remove dead EH info bool RunInliner = false; if (Inliner) { MPM.add(Inliner); Inliner = nullptr; RunInliner = true; } // Infer attributes on declarations, call sites, arguments, etc. for an SCC. if (AttributorRun & AttributorRunOption::CGSCC) MPM.add(createAttributorCGSCCLegacyPass()); // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if // there are no OpenMP runtime calls present in the module. if (OptLevel > 1) MPM.add(createOpenMPOptCGSCCLegacyPass()); MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args addExtensionsToPM(EP_CGSCCOptimizerLate, MPM); addFunctionSimplificationPasses(MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); if (RunPartialInlining) MPM.add(createPartialInliningPass()); if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve // these so they are eligible for inlining at link-time. Note if they // are unreferenced they will be removed by GlobalDCE later, so // this only impacts referenced available externally globals. // Eventually they will be suppressed during codegen, but eliminating // here enables more opportunity for GlobalDCE as it may make // globals referenced by available external functions dead // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass // for LTO and ThinLTO -- The actual pass will be called after all inlines // are performed. // Need to do this after COMDAT variables have been eliminated, // (i.e. after EliminateAvailableExternallyPass). if (!(PrepareForLTO || PrepareForThinLTO)) addPGOInstrPasses(MPM, /* IsCS */ true); if (EnableOrderFileInstrumentation) MPM.add(createInstrOrderFilePass()); MPM.add(createReversePostOrderFunctionAttrsPass()); // The inliner performs some kind of dead code elimination as it goes, // but there are cases that are not really caught by it. We might // at some point consider teaching the inliner about them, but it // is OK for now to run GlobalOpt + GlobalDCE in tandem as their // benefits generally outweight the cost, making the whole pipeline // faster. if (RunInliner) { MPM.add(createGlobalOptimizerPass()); MPM.add(createGlobalDCEPass()); } // If we are planning to perform ThinLTO later, let's not bloat the code with // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes // during ThinLTO and perform the rest of the optimizations afterward. if (PrepareForThinLTO) { // Ensure we perform any last passes, but do so before renaming anonymous // globals in case the passes add any. addExtensionsToPM(EP_OptimizerLast, MPM); MPM.add(createCanonicalizeAliasesPass()); // Rename anon globals to be able to export them in the summary. MPM.add(createNameAnonGlobalPass()); return; } if (PerformThinLTO) // Optimize globals now when performing ThinLTO, this enables more // optimizations later. MPM.add(createGlobalOptimizerPass()); // Scheduling LoopVersioningLICM when inlining is over, because after that // we may see more accurate aliasing. Reason to run this late is that too // early versioning may prevent further inlining due to increase of code // size. By placing it just after inlining other optimizations which runs // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); } // We add a fresh GlobalsModRef run at this point. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal // and richly annotated call graph. By computing aliasing and mod/ref // information for all local globals here, the late loop passes and notably // the vectorizer will be able to use them to help recognize vectorizable // memory operations. // // Note that this relies on a bug in the pass manager which preserves // a module analysis into a function pass pipeline (and throughout it) so // long as the first function pass doesn't invalidate the module analysis. // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for // this to work. Fortunately, it is trivial to preserve AliasAnalysis // (doing nothing preserves it as it is required to be conservatively // correct in the face of IR changes). MPM.add(createGlobalsAAWrapperPass()); MPM.add(createFloat2IntPass()); MPM.add(createLowerConstantIntrinsicsPass()); if (EnableMatrix) { MPM.add(createLowerMatrixIntrinsicsPass()); // CSE the pointer arithmetic of the column vectors. This allows alias // analysis to establish no-aliasing between loads and stores of different // columns of the same matrix. MPM.add(createEarlyCSEPass(false)); } addExtensionsToPM(EP_VectorizerStart, MPM); // Re-rotate loops in all our loop nests. These may have fallout out of // rotated form due to GVN or other transformations, and the vectorizer relies // on the rotated form. Disable header duplication at -Oz. MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO)); // Distribute loops to allow partial vectorization. I.e. isolate dependences // into separate loop that would otherwise inhibit vectorization. This is // currently only performed for loops marked with the metadata // llvm.loop.distribute=true or when -enable-loop-distribute is specified. MPM.add(createLoopDistributePass()); addVectorPasses(MPM, /* IsFullLTO */ false); // FIXME: We shouldn't bother with this anymore. MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. if (OptLevel > 1) { MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. MPM.add(createConstantMergePass()); // Merge dup global constants } // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildModuleSimplificationPipeline). if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO)) MPM.add(createHotColdSplittingPass()); if (EnableIROutliner) MPM.add(createIROutlinerPass()); if (MergeFunctions) MPM.add(createMergeFunctionsPass()); // Add Module flag "CG Profile" based on Branch Frequency Information. if (CallGraphProfile) MPM.add(createCGProfileLegacyPass()); // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, // LoopSink pass needs to be a very late IR pass to avoid undoing LICM // result too early. MPM.add(createLoopSinkPass()); // Get rid of LCSSA nodes. MPM.add(createInstSimplifyLegacyPass()); // This hoists/decomposes div/rem ops. It should run after other sink/hoist // passes to avoid re-sinking, but before SimplifyCFG because it can allow // flattening of blocks. MPM.add(createDivRemPairsPass()); // LoopSink (and other loop passes since the last simplifyCFG) might have // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. - MPM.add(createCFGSimplificationPass()); + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().convertSwitchRangeToICmp(true))); addExtensionsToPM(EP_OptimizerLast, MPM); if (PrepareForLTO) { MPM.add(createCanonicalizeAliasesPass()); // Rename anon globals to be able to handle them in the summary MPM.add(createNameAnonGlobalPass()); } MPM.add(createAnnotationRemarksLegacyPass()); } void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Load sample profile before running the LTO optimization pipeline. if (!PGOSampleUse.empty()) { PM.add(createPruneEHPass()); PM.add(createSampleProfileLoaderPass(PGOSampleUse)); } // Remove unused virtual tables to improve the quality of code generated by // whole-program devirtualization and bitset lowering. PM.add(createGlobalDCEPass()); // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); // Allow forcing function attributes as a debugging and tuning aid. PM.add(createForceFunctionAttrsLegacyPass()); // Infer attributes about declarations if possible. PM.add(createInferFunctionAttrsLegacyPass()); if (OptLevel > 1) { // Split call-site with more constrained arguments. PM.add(createCallSiteSplittingPass()); // Indirect call promotion. This should promote all the targets that are // left by the earlier promotion pass that promotes intra-module targets. // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. PM.add( createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty())); // Propage constant function arguments by specializing the functions. if (EnableFunctionSpecialization && OptLevel > 2) PM.add(createFunctionSpecializationPass()); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. PM.add(createIPSCCPPass()); // Attach metadata to indirect call sites indicating the set of functions // they may target at run-time. This should follow IPSCCP. PM.add(createCalledValuePropagationPass()); // Infer attributes on declarations, call sites, arguments, etc. if (AttributorRun & AttributorRunOption::MODULE) PM.add(createAttributorLegacyPass()); } // Infer attributes about definitions. The readnone attribute in particular is // required for virtual constant propagation. PM.add(createPostOrderFunctionAttrsLegacyPass()); PM.add(createReversePostOrderFunctionAttrsPass()); // Split globals using inrange annotations on GEP indices. This can help // improve the quality of generated code when virtual constant propagation or // control flow integrity are enabled. PM.add(createGlobalSplitPass()); // Apply whole-program devirtualization and virtual constant propagation. PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); // That's all we need at opt level 1. if (OptLevel == 1) return; // Now that we internalized some globals, see if we can hack on them! PM.add(createGlobalOptimizerPass()); // Promote any localized global vars. PM.add(createPromoteMemoryToRegisterPass()); // Linking modules together can lead to duplicated global constants, only // keep one copy of each constant. PM.add(createConstantMergePass()); // Remove unused arguments from functions. PM.add(createDeadArgEliminationPass()); // Reduce the code after globalopt and ipsccp. Both can open up significant // simplification opportunities, and both can propagate functions through // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. if (OptLevel > 2) PM.add(createAggressiveInstCombinerPass()); PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); // Inline small functions bool RunInliner = Inliner; if (RunInliner) { PM.add(Inliner); Inliner = nullptr; } PM.add(createPruneEHPass()); // Remove dead EH info. // CSFDO instrumentation and use pass. addPGOInstrPasses(PM, /* IsCS */ true); // Infer attributes on declarations, call sites, arguments, etc. for an SCC. if (AttributorRun & AttributorRunOption::CGSCC) PM.add(createAttributorCGSCCLegacyPass()); // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if // there are no OpenMP runtime calls present in the module. if (OptLevel > 1) PM.add(createOpenMPOptCGSCCLegacyPass()); // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); PM.add(createGlobalDCEPass()); // Remove dead functions. // If we didn't decide to inline a function, check to see if we can // transform it to pass arguments by value instead of by reference. PM.add(createArgumentPromotionPass()); // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); // Break up allocas PM.add(createSROAPass()); // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. if (OptLevel > 1) PM.add(createTailCallEliminationPass()); // Infer attributes on declarations, call sites, arguments, etc. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. // More loops are countable; try to optimize them. if (EnableLoopFlatten) PM.add(createLoopFlattenPass()); PM.add(createIndVarSimplifyPass()); PM.add(createLoopDeletionPass()); if (EnableLoopInterchange) PM.add(createLoopInterchangePass()); if (EnableConstraintElimination) PM.add(createConstraintEliminationPass()); // Unroll small loops and perform peeling. PM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, ForgetAllSCEVInLoopUnroll)); PM.add(createLoopDistributePass()); addVectorPasses(PM, /* IsFullLTO */ true); addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass(/*FreezeSelectCond*/ true)); } void PassManagerBuilder::addLateLTOOptimizationPasses( legacy::PassManagerBase &PM) { // See comment in the new PM for justification of scheduling splitting at // this stage (\ref buildLTODefaultPipeline). if (EnableHotColdSplit) PM.add(createHotColdSplittingPass()); // Delete basic blocks, which optimization passes may have killed. PM.add( createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true))); // Drop bodies of available externally objects to improve GlobalDCE. PM.add(createEliminateAvailableExternallyPass()); // Now that we have optimized the program, discard unreachable functions. PM.add(createGlobalDCEPass()); // FIXME: this is profitable (for compiler time) to do at -O0 too, but // currently it damages debug info. if (MergeFunctions) PM.add(createMergeFunctionsPass()); } void PassManagerBuilder::populateThinLTOPassManager( legacy::PassManagerBase &PM) { PerformThinLTO = true; if (LibraryInfo) PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); if (VerifyInput) PM.add(createVerifierPass()); if (ImportSummary) { // This pass imports type identifier resolutions for whole-program // devirtualization and CFI. It must run early because other passes may // disturb the specific instruction patterns that these passes look for, // creating dependencies on resolutions that may not appear in the summary. // // For example, GVN may transform the pattern assume(type.test) appearing in // two basic blocks into assume(phi(type.test, type.test)), which would // transform a dependency on a WPD resolution into a dependency on a type // identifier resolution for CFI. // // Also, WPD has access to more precise information than ICP and can // devirtualize more effectively, so it should operate on the IR first. PM.add(createWholeProgramDevirtPass(nullptr, ImportSummary)); PM.add(createLowerTypeTestsPass(nullptr, ImportSummary)); } populateModulePassManager(PM); if (VerifyOutput) PM.add(createVerifierPass()); PerformThinLTO = false; } void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (LibraryInfo) PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); if (VerifyInput) PM.add(createVerifierPass()); addExtensionsToPM(EP_FullLinkTimeOptimizationEarly, PM); if (OptLevel != 0) addLTOOptimizationPasses(PM); else { // The whole-program-devirt pass needs to run at -O0 because only it knows // about the llvm.type.checked.load intrinsic: it needs to both lower the // intrinsic itself and handle it in the summary. PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); } // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. PM.add(createCrossDSOCFIPass()); // Lower type metadata and the type.test intrinsic. This pass supports Clang's // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at // link time if CFI is enabled. The pass does nothing if CFI is disabled. PM.add(createLowerTypeTestsPass(ExportSummary, nullptr)); // Run a second time to clean up any type tests left behind by WPD for use // in ICP (which is performed earlier than this in the regular LTO pipeline). PM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); if (OptLevel != 0) addLateLTOOptimizationPasses(PM); addExtensionsToPM(EP_FullLinkTimeOptimizationLast, PM); PM.add(createAnnotationRemarksLegacyPass()); if (VerifyOutput) PM.add(createVerifierPass()); } LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { PassManagerBuilder *PMB = new PassManagerBuilder(); return wrap(PMB); } void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { PassManagerBuilder *Builder = unwrap(PMB); delete Builder; } void LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, unsigned OptLevel) { PassManagerBuilder *Builder = unwrap(PMB); Builder->OptLevel = OptLevel; } void LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, unsigned SizeLevel) { PassManagerBuilder *Builder = unwrap(PMB); Builder->SizeLevel = SizeLevel; } void LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, LLVMBool Value) { // NOTE: The DisableUnitAtATime switch has been removed. } void LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, LLVMBool Value) { PassManagerBuilder *Builder = unwrap(PMB); Builder->DisableUnrollLoops = Value; } void LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, LLVMBool Value) { // NOTE: The simplify-libcalls pass has been removed. } void LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, unsigned Threshold) { PassManagerBuilder *Builder = unwrap(PMB); Builder->Inliner = createFunctionInliningPass(Threshold); } void LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); legacy::FunctionPassManager *FPM = unwrap(PM); Builder->populateFunctionPassManager(*FPM); } void LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); legacy::PassManagerBase *MPM = unwrap(PM); Builder->populateModulePassManager(*MPM); } void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM, LLVMBool Internalize, LLVMBool RunInliner) { PassManagerBuilder *Builder = unwrap(PMB); legacy::PassManagerBase *LPM = unwrap(PM); // A small backwards compatibility hack. populateLTOPassManager used to take // an RunInliner option. if (RunInliner && !Builder->Inliner) Builder->Inliner = createFunctionInliningPass(); Builder->populateLTOPassManager(*LPM); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp index 7fb1a25bdf13..6372ce19f8ee 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1,2357 +1,2364 @@ //===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass performs loop invariant code motion, attempting to remove as much // code from the body of a loop as possible. It does this by either hoisting // code into the preheader block, or by sinking code to the exit blocks if it is // safe. This pass also promotes must-aliased memory locations in the loop to // live in registers, thus hoisting and sinking "invariant" loads and stores. // // Hoisting operations out of loops is a canonicalization transform. It // enables and simplifies subsequent optimizations in the middle-end. // Rematerialization of hoisted instructions to reduce register pressure is the // responsibility of the back-end, which has more accurate information about // register pressure and also handles other optimizations than LICM that // increase live-ranges. // // This pass uses alias analysis for two purposes: // // 1. Moving loop invariant loads and calls out of loops. If we can determine // that a load or call inside of a loop never aliases anything stored to, // we can hoist it or sink it like any other instruction. // 2. Scalar Promotion of Memory - If there is a store instruction inside of // the loop, we try to move the store to happen AFTER the loop instead of // inside of the loop. This can only happen if a few conditions are true: // A. The pointer stored through is loop invariant // B. There are no stores or loads in the loop which _may_ alias the // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use // the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/LICM.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/PredIteratorCache.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include #include using namespace llvm; #define DEBUG_TYPE "licm" STATISTIC(NumCreatedBlocks, "Number of blocks created"); STATISTIC(NumClonedBranches, "Number of branches cloned"); STATISTIC(NumSunk, "Number of instructions sunk out of loop"); STATISTIC(NumHoisted, "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk"); STATISTIC(NumPromoted, "Number of memory locations promoted to registers"); /// Memory promotion is enabled by default. static cl::opt DisablePromotion("disable-licm-promotion", cl::Hidden, cl::init(false), cl::desc("Disable memory promotion in LICM pass")); static cl::opt ControlFlowHoisting( "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " "invariance in loop using invariant start (default = 8)")); // Experimental option to allow imprecision in LICM in pathological cases, in // exchange for faster compile. This is to be removed if MemorySSA starts to // address the same issue. This flag applies only when LICM uses MemorySSA // instead on AliasSetTracker. LICM calls MemorySSAWalker's // getClobberingMemoryAccess, up to the value of the Cap, getting perfect // accuracy. Afterwards, LICM will call into MemorySSA's getDefiningAccess, // which may not be precise, since optimizeUses is capped. The result is // correct, but we may not get as "far up" as possible to get which access is // clobbering the one queried. cl::opt llvm::SetLicmMssaOptCap( "licm-mssa-optimization-cap", cl::init(100), cl::Hidden, cl::desc("Enable imprecision in LICM in pathological cases, in exchange " "for faster compile. Caps the MemorySSA clobbering calls.")); // Experimentally, memory promotion carries less importance than sinking and // hoisting. Limit when we do promotion when using MemorySSA, in order to save // compile time. cl::opt llvm::SetLicmMssaNoAccForPromotionCap( "licm-mssa-max-acc-promotion", cl::init(250), cl::Hidden, cl::desc("[LICM & MemorySSA] When MSSA in LICM is disabled, this has no " "effect. When MSSA in LICM is enabled, then this is the maximum " "number of accesses allowed to be present in a loop in order to " "enable memory promotion.")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop, bool LoopNestMode); static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE); -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI = nullptr); +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation); static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA); static bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags); static bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU); static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU); static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU); static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE); static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref Fn); static SmallVector, 0> collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L); namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} private: unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; }; struct LegacyLICMPass : public LoopPass { static char ID; // Pass identification, replacement for typeid LegacyLICMPass( unsigned LicmMssaOptCap = SetLicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap) - : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) { + unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation = true) + : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation) { initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry()); } bool runOnLoop(Loop *L, LPPassManager &LPM) override { if (skipLoop(L)) return false; LLVM_DEBUG(dbgs() << "Perform LICM on Loop with header at block " << L->getHeader()->getNameOrAsOperand() << "\n"); auto *SE = getAnalysisIfAvailable(); MemorySSA *MSSA = &getAnalysis().getMSSA(); bool hasProfileData = L->getHeader()->getParent()->hasProfileData(); BlockFrequencyInfo *BFI = hasProfileData ? &getAnalysis().getBFI() : nullptr; // For the old PM, we can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); return LICM.runOnLoop( L, &getAnalysis().getAAResults(), &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), BFI, &getAnalysis().getTLI( *L->getHeader()->getParent()), &getAnalysis().getTTI( *L->getHeader()->getParent()), SE ? &SE->getSE() : nullptr, MSSA, &ORE); } /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG... /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); getLoopAnalysisUsage(AU); LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); AU.addPreserved(); AU.addPreserved(); } private: LoopInvariantCodeMotion LICM; }; } // namespace PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { if (!AR.MSSA) report_fatal_error("LICM requires MemorySSA (loop-mssa)"); // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); PA.preserve(); PA.preserve(); PA.preserve(); return PA; } PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { if (!AR.MSSA) report_fatal_error("LNICM requires MemorySSA (loop-mssa)"); // For the new PM, we also can't use OptimizationRemarkEmitter as an analysis // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(LN.getParent()); - LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); Loop &OutermostLoop = LN.getOutermostLoop(); bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true); if (!Changed) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); PA.preserve(); PA.preserve(); PA.preserve(); return PA; } char LegacyLICMPass::ID = 0; INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_DEPENDENCY(LazyBFIPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) Pass *llvm::createLICMPass() { return new LegacyLICMPass(); } Pass *llvm::createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap) { - return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) { + return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, + LicmAllowSpeculation); } llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L, MemorySSA *MSSA) : SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap, IsSink, L, MSSA) {} llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags( unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, Loop *L, MemorySSA *MSSA) : LicmMssaOptCap(LicmMssaOptCap), LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), IsSink(IsSink) { assert(((L != nullptr) == (MSSA != nullptr)) && "Unexpected values for SinkAndHoistLICMFlags"); if (!MSSA) return; unsigned AccessCapCount = 0; for (auto *BB : L->getBlocks()) if (const auto *Accesses = MSSA->getBlockAccesses(BB)) for (const auto &MA : *Accesses) { (void)MA; ++AccessCapCount; if (AccessCapCount > LicmMssaNoAccForPromotionCap) { NoOfMemAccTooLarge = true; return; } } } /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. bool LoopInvariantCodeMotion::runOnLoop( Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, OptimizationRemarkEmitter *ORE, bool LoopNestMode) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); // If this loop has metadata indicating that LICM is not to be performed then // just exit. if (hasDisableLICMTransformsHint(L)) { return false; } // Don't sink stores from loops with coroutine suspend instructions. // LICM would sink instructions into the default destination of // the coroutine switch. The default destination of the switch is to // handle the case where the coroutine is suspended, by which point the // coroutine frame may have been destroyed. No instruction can be sunk there. // FIXME: This would unfortunately hurt the performance of coroutines, however // there is currently no general solution for this. Similar issues could also // potentially happen in other passes where instructions are being moved // across that edge. bool HasCoroSuspendInst = llvm::any_of(L->getBlocks(), [](BasicBlock *BB) { return llvm::any_of(*BB, [](Instruction &I) { IntrinsicInst *II = dyn_cast(&I); return II && II->getIntrinsicID() == Intrinsic::coro_suspend; }); }); MemorySSAUpdater MSSAU(MSSA); SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA); // Get the preheader block to move instructions into... BasicBlock *Preheader = L->getLoopPreheader(); // Compute loop safety information. ICFLoopSafetyInfo SafetyInfo; SafetyInfo.computeLoopSafetyInfo(L); // We want to visit all of the instructions in this loop... that are not parts // of our subloops (they have already had their invariants hoisted out of // their loop, into this loop, so there is no need to process the BODIES of // the subloops). // // Traverse the body of the loop in depth first order on the dominator tree so // that we are guaranteed to see definitions before we see uses. This allows // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. if (L->hasDedicatedExits()) Changed |= LoopNestMode ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE) : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE); Flags.setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L, - &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode); + &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, + LicmAllowSpeculation); // Now that all loop invariants have been removed from the loop, promote any // memory references to scalars that we can. // Don't sink stores from loops without dedicated block exits. Exits // containing indirect branches are not transformed by loop simplify, // make sure we catch that. An additional load may be generated in the // preheader for SSA updater, so also avoid sinking when no preheader // is available. if (!DisablePromotion && Preheader && L->hasDedicatedExits() && !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) { // Figure out the loop exits and their insertion points SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); // We can't insert into a catchswitch. bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) { return isa(Exit->getTerminator()); }); if (!HasCatchSwitch) { SmallVector InsertPts; SmallVector MSSAInsertPts; InsertPts.reserve(ExitBlocks.size()); MSSAInsertPts.reserve(ExitBlocks.size()); for (BasicBlock *ExitBlock : ExitBlocks) { InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); MSSAInsertPts.push_back(nullptr); } PredIteratorCache PIC; // Promoting one set of accesses may make the pointers for another set // loop invariant, so run this in a loop (with the MaybePromotable set // decreasing in size over time). bool Promoted = false; bool LocalPromoted; do { LocalPromoted = false; for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, - LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE); + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); // Once we have promoted values across the loop body we have to // recursively reform LCSSA as any nested loop may now have values defined // within the loop used in the outer loop. // FIXME: This is really heavy handed. It would be a bit better to use an // SSAUpdater strategy during promotion that was LCSSA aware and reformed // it as it went. if (Promoted) formLCSSARecursively(*L, *DT, LI, SE); Changed |= Promoted; } } // Check that neither this loop nor its parent have had LCSSA broken. LICM is // specifically moving instructions across the loop boundary and so it is // especially in need of basic functional correctness checking here. assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!"); assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) && "Parent loop not left in LCSSA form after LICM!"); if (VerifyMemorySSA) MSSA->verifyMemorySSA(); if (Changed && SE) SE->forgetLoopDispositions(L); return Changed; } /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in reverse depth /// first order w.r.t the DominatorTree. This allows us to visit uses before /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && "Unexpected input to sinkRegion."); // We want to visit children before parents. We will enque all the parents // before their children in the worklist and process the worklist in reverse // order. SmallVector Worklist = collectChildrenInLoop(N, CurLoop); bool Changed = false; for (DomTreeNode *DTN : reverse(Worklist)) { BasicBlock *BB = DTN->getBlock(); // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (inSubLoop(BB, CurLoop, LI)) continue; for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { Instruction &I = *--II; // The instruction is not used in the loop if it is dead. In this case, // we just delete it instead of sinking it. if (isInstructionTriviallyDead(&I, TLI)) { LLVM_DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n'); salvageKnowledge(&I); salvageDebugInfo(I); ++II; eraseInstruction(I, *SafetyInfo, MSSAU); Changed = true; continue; } // Check to see if we can sink this instruction to the exit blocks // of the loop. We can do this if the all users of the instruction are // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // bool FreeInLoop = false; bool LoopNestMode = OutermostLoop != nullptr; if (!I.mayHaveSideEffects() && isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, SafetyInfo, TTI, FreeInLoop, LoopNestMode) && canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true, &Flags, ORE)) { if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) { if (!FreeInLoop) { ++II; salvageDebugInfo(I); eraseInstruction(I, *SafetyInfo, MSSAU); } Changed = true; } } } } if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); return Changed; } bool llvm::sinkRegionForLoopNest( DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { bool Changed = false; SmallPriorityWorklist Worklist; Worklist.insert(CurLoop); appendLoopsToWorklist(*CurLoop, Worklist); while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop); } return Changed; } namespace { // This is a helper class for hoistRegion to make it able to hoist control flow // in order to be able to hoist phis. The way this works is that we initially // start hoisting to the loop preheader, and when we see a loop invariant branch // we make note of this. When we then come to hoist an instruction that's // conditional on such a branch we duplicate the branch and the relevant control // flow, then hoist the instruction into the block corresponding to its original // block in the duplicated control flow. class ControlFlowHoister { private: // Information about the loop we are hoisting from LoopInfo *LI; DominatorTree *DT; Loop *CurLoop; MemorySSAUpdater *MSSAU; // A map of blocks in the loop to the block their instructions will be hoisted // to. DenseMap HoistDestinationMap; // The branches that we can hoist, mapped to the block that marks a // convergence point of their control flow. DenseMap HoistableBranches; public: ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop, MemorySSAUpdater *MSSAU) : LI(LI), DT(DT), CurLoop(CurLoop), MSSAU(MSSAU) {} void registerPossiblyHoistableBranch(BranchInst *BI) { // We can only hoist conditional branches with loop invariant operands. if (!ControlFlowHoisting || !BI->isConditional() || !CurLoop->hasLoopInvariantOperands(BI)) return; // The branch destinations need to be in the loop, and we don't gain // anything by duplicating conditional branches with duplicate successors, // as it's essentially the same as an unconditional branch. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = BI->getSuccessor(1); if (!CurLoop->contains(TrueDest) || !CurLoop->contains(FalseDest) || TrueDest == FalseDest) return; // We can hoist BI if one branch destination is the successor of the other, // or both have common successor which we check by seeing if the // intersection of their successors is non-empty. // TODO: This could be expanded to allowing branches where both ends // eventually converge to a single block. SmallPtrSet TrueDestSucc, FalseDestSucc; TrueDestSucc.insert(succ_begin(TrueDest), succ_end(TrueDest)); FalseDestSucc.insert(succ_begin(FalseDest), succ_end(FalseDest)); BasicBlock *CommonSucc = nullptr; if (TrueDestSucc.count(FalseDest)) { CommonSucc = FalseDest; } else if (FalseDestSucc.count(TrueDest)) { CommonSucc = TrueDest; } else { set_intersect(TrueDestSucc, FalseDestSucc); // If there's one common successor use that. if (TrueDestSucc.size() == 1) CommonSucc = *TrueDestSucc.begin(); // If there's more than one pick whichever appears first in the block list // (we can't use the value returned by TrueDestSucc.begin() as it's // unpredicatable which element gets returned). else if (!TrueDestSucc.empty()) { Function *F = TrueDest->getParent(); auto IsSucc = [&](BasicBlock &BB) { return TrueDestSucc.count(&BB); }; auto It = llvm::find_if(*F, IsSucc); assert(It != F->end() && "Could not find successor in function"); CommonSucc = &*It; } } // The common successor has to be dominated by the branch, as otherwise // there will be some other path to the successor that will not be // controlled by this branch so any phi we hoist would be controlled by the // wrong condition. This also takes care of avoiding hoisting of loop back // edges. // TODO: In some cases this could be relaxed if the successor is dominated // by another block that's been hoisted and we can guarantee that the // control flow has been replicated exactly. if (CommonSucc && DT->dominates(BI, CommonSucc)) HoistableBranches[BI] = CommonSucc; } bool canHoistPHI(PHINode *PN) { // The phi must have loop invariant operands. if (!ControlFlowHoisting || !CurLoop->hasLoopInvariantOperands(PN)) return false; // We can hoist phis if the block they are in is the target of hoistable // branches which cover all of the predecessors of the block. SmallPtrSet PredecessorBlocks; BasicBlock *BB = PN->getParent(); for (BasicBlock *PredBB : predecessors(BB)) PredecessorBlocks.insert(PredBB); // If we have less predecessor blocks than predecessors then the phi will // have more than one incoming value for the same block which we can't // handle. // TODO: This could be handled be erasing some of the duplicate incoming // values. if (PredecessorBlocks.size() != pred_size(BB)) return false; for (auto &Pair : HoistableBranches) { if (Pair.second == BB) { // Which blocks are predecessors via this branch depends on if the // branch is triangle-like or diamond-like. if (Pair.first->getSuccessor(0) == BB) { PredecessorBlocks.erase(Pair.first->getParent()); PredecessorBlocks.erase(Pair.first->getSuccessor(1)); } else if (Pair.first->getSuccessor(1) == BB) { PredecessorBlocks.erase(Pair.first->getParent()); PredecessorBlocks.erase(Pair.first->getSuccessor(0)); } else { PredecessorBlocks.erase(Pair.first->getSuccessor(0)); PredecessorBlocks.erase(Pair.first->getSuccessor(1)); } } } // PredecessorBlocks will now be empty if for every predecessor of BB we // found a hoistable branch source. return PredecessorBlocks.empty(); } BasicBlock *getOrCreateHoistedBlock(BasicBlock *BB) { if (!ControlFlowHoisting) return CurLoop->getLoopPreheader(); // If BB has already been hoisted, return that if (HoistDestinationMap.count(BB)) return HoistDestinationMap[BB]; // Check if this block is conditional based on a pending branch auto HasBBAsSuccessor = [&](DenseMap::value_type &Pair) { return BB != Pair.second && (Pair.first->getSuccessor(0) == BB || Pair.first->getSuccessor(1) == BB); }; auto It = llvm::find_if(HoistableBranches, HasBBAsSuccessor); // If not involved in a pending branch, hoist to preheader BasicBlock *InitialPreheader = CurLoop->getLoopPreheader(); if (It == HoistableBranches.end()) { LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getNameOrAsOperand() << " as hoist destination for " << BB->getNameOrAsOperand() << "\n"); HoistDestinationMap[BB] = InitialPreheader; return InitialPreheader; } BranchInst *BI = It->first; assert(std::find_if(++It, HoistableBranches.end(), HasBBAsSuccessor) == HoistableBranches.end() && "BB is expected to be the target of at most one branch"); LLVMContext &C = BB->getContext(); BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = BI->getSuccessor(1); BasicBlock *CommonSucc = HoistableBranches[BI]; BasicBlock *HoistTarget = getOrCreateHoistedBlock(BI->getParent()); // Create hoisted versions of blocks that currently don't have them auto CreateHoistedBlock = [&](BasicBlock *Orig) { if (HoistDestinationMap.count(Orig)) return HoistDestinationMap[Orig]; BasicBlock *New = BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent()); HoistDestinationMap[Orig] = New; DT->addNewBlock(New, HoistTarget); if (CurLoop->getParentLoop()) CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI); ++NumCreatedBlocks; LLVM_DEBUG(dbgs() << "LICM created " << New->getName() << " as hoist destination for " << Orig->getName() << "\n"); return New; }; BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest); BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest); BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc); // Link up these blocks with branches. if (!HoistCommonSucc->getTerminator()) { // The new common successor we've generated will branch to whatever that // hoist target branched to. BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor(); assert(TargetSucc && "Expected hoist target to have a single successor"); HoistCommonSucc->moveBefore(TargetSucc); BranchInst::Create(TargetSucc, HoistCommonSucc); } if (!HoistTrueDest->getTerminator()) { HoistTrueDest->moveBefore(HoistCommonSucc); BranchInst::Create(HoistCommonSucc, HoistTrueDest); } if (!HoistFalseDest->getTerminator()) { HoistFalseDest->moveBefore(HoistCommonSucc); BranchInst::Create(HoistCommonSucc, HoistFalseDest); } // If BI is being cloned to what was originally the preheader then // HoistCommonSucc will now be the new preheader. if (HoistTarget == InitialPreheader) { // Phis in the loop header now need to use the new preheader. InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc); MSSAU->wireOldPredecessorsToNewImmediatePredecessor( HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget}); // The new preheader dominates the loop header. DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc); DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader()); DT->changeImmediateDominator(HeaderNode, PreheaderNode); // The preheader hoist destination is now the new preheader, with the // exception of the hoist destination of this branch. for (auto &Pair : HoistDestinationMap) if (Pair.second == InitialPreheader && Pair.first != BI->getParent()) Pair.second = HoistCommonSucc; } // Now finally clone BI. ReplaceInstWithInst( HoistTarget->getTerminator(), BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition())); ++NumClonedBranches; assert(CurLoop->getLoopPreheader() && "Hoisting blocks should not have destroyed preheader"); return HoistDestinationMap[BB]; } }; } // namespace /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in depth first /// order w.r.t the DominatorTree. This allows us to visit definitions before /// uses, allowing us to hoist a loop body in one pass without iteration. /// bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, SinkAndHoistLICMFlags &Flags, - OptimizationRemarkEmitter *ORE, bool LoopNestMode) { + OptimizationRemarkEmitter *ORE, bool LoopNestMode, + bool AllowSpeculation) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr && "Unexpected input to hoistRegion."); ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU); // Keep track of instructions that have been hoisted, as they may need to be // re-hoisted if they end up not dominating all of their uses. SmallVector HoistedInstructions; // For PHI hoisting to work we need to hoist blocks before their successors. // We can do this by iterating through the blocks in the loop in reverse // post-order. LoopBlocksRPO Worklist(CurLoop); Worklist.perform(LI); bool Changed = false; for (BasicBlock *BB : Worklist) { // Only need to process the contents of this block if it is not part of a // subloop (which would already have been processed). if (!LoopNestMode && inSubLoop(BB, CurLoop, LI)) continue; for (Instruction &I : llvm::make_early_inc_range(*BB)) { // Try constant folding this instruction. If all the operands are // constants, it is technically hoistable, but it would be better to // just fold it. if (Constant *C = ConstantFoldInstruction( &I, I.getModule()->getDataLayout(), TLI)) { LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n'); // FIXME MSSA: Such replacements may make accesses unoptimized (D51960). I.replaceAllUsesWith(C); if (isInstructionTriviallyDead(&I, TLI)) eraseInstruction(I, *SafetyInfo, MSSAU); Changed = true; continue; } // Try hoisting the instruction out to the preheader. We can only do // this if all of the operands of the instruction are loop invariant and // if it is safe to hoist the instruction. We also check block frequency // to make sure instruction only gets hoisted into colder blocks. // TODO: It may be safe to hoist if we are hoisting to a conditional block // and we have accurately duplicated the control flow from the loop header // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU, true, &Flags, ORE) && isSafeToExecuteUnconditionally( I, DT, TLI, CurLoop, SafetyInfo, ORE, - CurLoop->getLoopPreheader()->getTerminator())) { + CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(&I); Changed = true; continue; } // Attempt to remove floating point division out of the loop by // converting it to a reciprocal multiplication. if (I.getOpcode() == Instruction::FDiv && I.hasAllowReciprocal() && CurLoop->isLoopInvariant(I.getOperand(1))) { auto Divisor = I.getOperand(1); auto One = llvm::ConstantFP::get(Divisor->getType(), 1.0); auto ReciprocalDivisor = BinaryOperator::CreateFDiv(One, Divisor); ReciprocalDivisor->setFastMathFlags(I.getFastMathFlags()); SafetyInfo->insertInstructionTo(ReciprocalDivisor, I.getParent()); ReciprocalDivisor->insertBefore(&I); auto Product = BinaryOperator::CreateFMul(I.getOperand(0), ReciprocalDivisor); Product->setFastMathFlags(I.getFastMathFlags()); SafetyInfo->insertInstructionTo(Product, I.getParent()); Product->insertAfter(&I); I.replaceAllUsesWith(Product); eraseInstruction(I, *SafetyInfo, MSSAU); hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(ReciprocalDivisor); Changed = true; continue; } auto IsInvariantStart = [&](Instruction &I) { using namespace PatternMatch; return I.use_empty() && match(&I, m_Intrinsic()); }; auto MustExecuteWithoutWritesBefore = [&](Instruction &I) { return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop); }; if ((IsInvariantStart(I) || isGuard(&I)) && CurLoop->hasLoopInvariantOperands(&I) && MustExecuteWithoutWritesBefore(I)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); HoistedInstructions.push_back(&I); Changed = true; continue; } if (PHINode *PN = dyn_cast(&I)) { if (CFH.canHoistPHI(PN)) { // Redirect incoming blocks first to ensure that we create hoisted // versions of those blocks before we hoist the phi. for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i) PN->setIncomingBlock( i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, MSSAU, SE, ORE); assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); Changed = true; continue; } } // Remember possibly hoistable branches so we can actually hoist them // later if needed. if (BranchInst *BI = dyn_cast(&I)) CFH.registerPossiblyHoistableBranch(BI); } } // If we hoisted instructions to a conditional block they may not dominate // their uses that weren't hoisted (such as phis where some operands are not // loop invariant). If so make them unconditional by moving them to their // immediate dominator. We iterate through the instructions in reverse order // which ensures that when we rehoist an instruction we rehoist its operands, // and also keep track of where in the block we are rehoisting to to make sure // that we rehoist instructions before the instructions that use them. Instruction *HoistPoint = nullptr; if (ControlFlowHoisting) { for (Instruction *I : reverse(HoistedInstructions)) { if (!llvm::all_of(I->uses(), [&](Use &U) { return DT->dominates(I, U); })) { BasicBlock *Dominator = DT->getNode(I->getParent())->getIDom()->getBlock(); if (!HoistPoint || !DT->dominates(HoistPoint->getParent(), Dominator)) { if (HoistPoint) assert(DT->dominates(Dominator, HoistPoint->getParent()) && "New hoist point expected to dominate old hoist point"); HoistPoint = Dominator->getTerminator(); } LLVM_DEBUG(dbgs() << "LICM rehoisting to " << HoistPoint->getParent()->getNameOrAsOperand() << ": " << *I << "\n"); moveInstructionBefore(*I, *HoistPoint, *SafetyInfo, MSSAU, SE); HoistPoint = I; Changed = true; } } } if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); // Now that we've finished hoisting make sure that LI and DT are still // valid. #ifdef EXPENSIVE_CHECKS if (Changed) { assert(DT->verify(DominatorTree::VerificationLevel::Fast) && "Dominator tree verification failed"); LI->verify(*DT); } #endif return Changed; } // Return true if LI is invariant within scope of the loop. LI is invariant if // CurLoop is dominated by an invariant.start representing the same memory // location and size as the memory location LI loads from, and also the // invariant.start has no uses. static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT, Loop *CurLoop) { Value *Addr = LI->getOperand(0); const DataLayout &DL = LI->getModule()->getDataLayout(); const TypeSize LocSizeInBits = DL.getTypeSizeInBits(LI->getType()); // It is not currently possible for clang to generate an invariant.start // intrinsic with scalable vector types because we don't support thread local // sizeless types and we don't permit sizeless types in structs or classes. // Furthermore, even if support is added for this in future the intrinsic // itself is defined to have a size of -1 for variable sized objects. This // makes it impossible to verify if the intrinsic envelops our region of // interest. For example, both and // types would have a -1 parameter, but the former is clearly double the size // of the latter. if (LocSizeInBits.isScalable()) return false; // if the type is i8 addrspace(x)*, we know this is the type of // llvm.invariant.start operand auto *PtrInt8Ty = PointerType::get(Type::getInt8Ty(LI->getContext()), LI->getPointerAddressSpace()); unsigned BitcastsVisited = 0; // Look through bitcasts until we reach the i8* type (this is invariant.start // operand type). while (Addr->getType() != PtrInt8Ty) { auto *BC = dyn_cast(Addr); // Avoid traversing high number of bitcast uses. if (++BitcastsVisited > MaxNumUsesTraversed || !BC) return false; Addr = BC->getOperand(0); } // If we've ended up at a global/constant, bail. We shouldn't be looking at // uselists for non-local Values in a loop pass. if (isa(Addr)) return false; unsigned UsesVisited = 0; // Traverse all uses of the load operand value, to see if invariant.start is // one of the uses, and whether it dominates the load instruction. for (auto *U : Addr->users()) { // Avoid traversing for Load operand with high number of users. if (++UsesVisited > MaxNumUsesTraversed) return false; IntrinsicInst *II = dyn_cast(U); // If there are escaping uses of invariant.start instruction, the load maybe // non-invariant. if (!II || II->getIntrinsicID() != Intrinsic::invariant_start || !II->use_empty()) continue; ConstantInt *InvariantSize = cast(II->getArgOperand(0)); // The intrinsic supports having a -1 argument for variable sized objects // so we should check for that here. if (InvariantSize->isNegative()) continue; uint64_t InvariantSizeInBits = InvariantSize->getSExtValue() * 8; // Confirm the invariant.start location size contains the load operand size // in bits. Also, the invariant.start should dominate the load, and we // should not hoist the load out of a loop that contains this dominating // invariant.start. if (LocSizeInBits.getFixedSize() <= InvariantSizeInBits && DT->properlyDominates(II->getParent(), CurLoop->getHeader())) return true; } return false; } namespace { /// Return true if-and-only-if we know how to (mechanically) both hoist and /// sink a given instruction out of a loop. Does not address legality /// concerns such as aliasing or speculation safety. bool isHoistableAndSinkableInst(Instruction &I) { // Only these instructions are hoistable/sinkable. return (isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I) || isa(I)); } /// Return true if all of the alias sets within this AST are known not to /// contain a Mod, or if MSSA knows there are no MemoryDefs in the loop. bool isReadOnly(AliasSetTracker *CurAST, const MemorySSAUpdater *MSSAU, const Loop *L) { if (CurAST) { for (AliasSet &AS : *CurAST) { if (!AS.isForwardingAliasSet() && AS.isMod()) { return false; } } return true; } else { /*MSSAU*/ for (auto *BB : L->getBlocks()) if (MSSAU->getMemorySSA()->getBlockDefs(BB)) return false; return true; } } /// Return true if I is the only Instruction with a MemoryAccess in L. bool isOnlyMemoryAccess(const Instruction *I, const Loop *L, const MemorySSAUpdater *MSSAU) { for (auto *BB : L->getBlocks()) if (auto *Accs = MSSAU->getMemorySSA()->getBlockAccesses(BB)) { int NotAPhi = 0; for (const auto &Acc : *Accs) { if (isa(&Acc)) continue; const auto *MUD = cast(&Acc); if (MUD->getMemoryInst() != I || NotAPhi++ == 1) return false; } } return true; } } bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *Flags, OptimizationRemarkEmitter *ORE) { assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) && "Either AliasSetTracker or MemorySSA should be initialized."); // If we don't understand the instruction, bail early. if (!isHoistableAndSinkableInst(I)) return false; MemorySSA *MSSA = MSSAU ? MSSAU->getMemorySSA() : nullptr; if (MSSA) assert(Flags != nullptr && "Flags cannot be null."); // Loads have extra constraints we have to verify before we can hoist them. if (LoadInst *LI = dyn_cast(&I)) { if (!LI->isUnordered()) return false; // Don't sink/hoist volatile or ordered atomic loads! // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; if (LI->hasMetadata(LLVMContext::MD_invariant_load)) return true; if (LI->isAtomic() && !TargetExecutesOncePerLoop) return false; // Don't risk duplicating unordered loads // This checks for an invariant.start dominating the load. if (isLoadInvariantInLoop(LI, DT, CurLoop)) return true; bool Invalidated; if (CurAST) Invalidated = pointerInvalidatedByLoop(MemoryLocation::get(LI), CurAST, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( MSSA, cast(MSSA->getMemoryAccess(LI)), CurLoop, I, *Flags); // Check loop-invariant address because this may also be a sinkable load // whose address is not necessarily loop-invariant. if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand())) ORE->emit([&]() { return OptimizationRemarkMissed( DEBUG_TYPE, "LoadWithLoopInvariantAddressInvalidated", LI) << "failed to move load with loop-invariant address " "because the loop may invalidate its value"; }); return !Invalidated; } else if (CallInst *CI = dyn_cast(&I)) { // Don't sink or hoist dbg info; it's legal, but not useful. if (isa(I)) return false; // Don't sink calls which can throw. if (CI->mayThrow()) return false; // Convergent attribute has been used on operations that involve // inter-thread communication which results are implicitly affected by the // enclosing control flows. It is not safe to hoist or sink such operations // across control flow. if (CI->isConvergent()) return false; using namespace PatternMatch; if (match(CI, m_Intrinsic())) // Assumes don't actually alias anything or throw return true; if (match(CI, m_Intrinsic())) // Widenable conditions don't actually alias anything or throw return true; // Handle simple cases by querying alias analysis. FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI); if (Behavior == FMRB_DoesNotAccessMemory) return true; if (AAResults::onlyReadsMemory(Behavior)) { // A readonly argmemonly function only reads from memory pointed to by // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. if (AAResults::onlyAccessesArgPointees(Behavior)) { // TODO: expand to writeable arguments for (Value *Op : CI->args()) if (Op->getType()->isPointerTy()) { bool Invalidated; if (CurAST) Invalidated = pointerInvalidatedByLoop( MemoryLocation::getBeforeOrAfter(Op), CurAST, CurLoop, AA); else Invalidated = pointerInvalidatedByLoopWithMSSA( MSSA, cast(MSSA->getMemoryAccess(CI)), CurLoop, I, *Flags); if (Invalidated) return false; } return true; } // If this call only reads from memory and there are no writes to memory // in the loop, we can hoist or sink the call as appropriate. if (isReadOnly(CurAST, MSSAU, CurLoop)) return true; } // FIXME: This should use mod/ref information to see if we can hoist or // sink the call. return false; } else if (auto *FI = dyn_cast(&I)) { // Fences alias (most) everything to provide ordering. For the moment, // just give up if there are any other memory operations in the loop. if (CurAST) { auto Begin = CurAST->begin(); assert(Begin != CurAST->end() && "must contain FI"); if (std::next(Begin) != CurAST->end()) // constant memory for instance, TODO: handle better return false; auto *UniqueI = Begin->getUniqueInstruction(); if (!UniqueI) // other memory op, give up return false; (void)FI; // suppress unused variable warning assert(UniqueI == FI && "AS must contain FI"); return true; } else // MSSAU return isOnlyMemoryAccess(FI, CurLoop, MSSAU); } else if (auto *SI = dyn_cast(&I)) { if (!SI->isUnordered()) return false; // Don't sink/hoist volatile or ordered atomic store! // We can only hoist a store that we can prove writes a value which is not // read or overwritten within the loop. For those cases, we fallback to // load store promotion instead. TODO: We can extend this to cases where // there is exactly one write to the location and that write dominates an // arbitrary number of reads in the loop. if (CurAST) { auto &AS = CurAST->getAliasSetFor(MemoryLocation::get(SI)); if (AS.isRef() || !AS.isMustAlias()) // Quick exit test, handled by the full path below as well. return false; auto *UniqueI = AS.getUniqueInstruction(); if (!UniqueI) // other memory op, give up return false; assert(UniqueI == SI && "AS must contain SI"); return true; } else { // MSSAU if (isOnlyMemoryAccess(SI, CurLoop, MSSAU)) return true; // If there are more accesses than the Promotion cap or no "quota" to // check clobber, then give up as we're not walking a list that long. if (Flags->tooManyMemoryAccesses() || Flags->tooManyClobberingCalls()) return false; // If there are interfering Uses (i.e. their defining access is in the // loop), or ordered loads (stored as Defs!), don't move this store. // Could do better here, but this is conservatively correct. // TODO: Cache set of Uses on the first walk in runOnLoop, update when // moving accesses. Can also extend to dominating uses. auto *SIMD = MSSA->getMemoryAccess(SI); for (auto *BB : CurLoop->getBlocks()) if (auto *Accesses = MSSA->getBlockAccesses(BB)) { for (const auto &MA : *Accesses) if (const auto *MU = dyn_cast(&MA)) { auto *MD = MU->getDefiningAccess(); if (!MSSA->isLiveOnEntryDef(MD) && CurLoop->contains(MD->getBlock())) return false; // Disable hoisting past potentially interfering loads. Optimized // Uses may point to an access outside the loop, as getClobbering // checks the previous iteration when walking the backedge. // FIXME: More precise: no Uses that alias SI. if (!Flags->getIsSink() && !MSSA->dominates(SIMD, MU)) return false; } else if (const auto *MD = dyn_cast(&MA)) { if (auto *LI = dyn_cast(MD->getMemoryInst())) { (void)LI; // Silence warning. assert(!LI->isUnordered() && "Expected unordered load"); return false; } // Any call, while it may not be clobbering SI, it may be a use. if (auto *CI = dyn_cast(MD->getMemoryInst())) { // Check if the call may read from the memory location written // to by SI. Check CI's attributes and arguments; the number of // such checks performed is limited above by NoOfMemAccTooLarge. ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI)); if (isModOrRefSet(MRI)) return false; } } } auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI); Flags->incrementClobberingCalls(); // If there are no clobbering Defs in the loop, store is safe to hoist. return MSSA->isLiveOnEntryDef(Source) || !CurLoop->contains(Source->getBlock()); } } assert(!I.mayReadOrWriteMemory() && "unhandled aliasing"); // We've established mechanical ability and aliasing, it's up to the caller // to check fault safety return true; } /// Returns true if a PHINode is a trivially replaceable with an /// Instruction. /// This is true when all incoming values are that instruction. /// This pattern occurs most often with LCSSA PHI nodes. /// static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) { for (const Value *IncValue : PN.incoming_values()) if (IncValue != &I) return false; return true; } /// Return true if the instruction is free in the loop. static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, const TargetTransformInfo *TTI) { if (const GetElementPtrInst *GEP = dyn_cast(&I)) { if (TTI->getUserCost(GEP, TargetTransformInfo::TCK_SizeAndLatency) != TargetTransformInfo::TCC_Free) return false; // For a GEP, we cannot simply use getUserCost because currently it // optimistically assumes that a GEP will fold into addressing mode // regardless of its users. const BasicBlock *BB = GEP->getParent(); for (const User *U : GEP->users()) { const Instruction *UI = cast(U); if (CurLoop->contains(UI) && (BB != UI->getParent() || (!isa(UI) && !isa(UI)))) return false; } return true; } else return TTI->getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } /// Return true if the only users of this instruction are outside of /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// /// We also return true if the instruction could be folded away in lowering. /// (e.g., a GEP can be folded into a load as an addressing mode in the loop). static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, TargetTransformInfo *TTI, bool &FreeInLoop, bool LoopNestMode) { const auto &BlockColors = SafetyInfo->getBlockColors(); bool IsFree = isFreeInLoop(I, CurLoop, TTI); for (const User *U : I.users()) { const Instruction *UI = cast(U); if (const PHINode *PN = dyn_cast(UI)) { const BasicBlock *BB = PN->getParent(); // We cannot sink uses in catchswitches. if (isa(BB->getTerminator())) return false; // We need to sink a callsite to a unique funclet. Avoid sinking if the // phi use is too muddled. if (isa(I)) if (!BlockColors.empty() && BlockColors.find(const_cast(BB))->second.size() != 1) return false; if (LoopNestMode) { while (isa(UI) && UI->hasOneUser() && UI->getNumOperands() == 1) { if (!CurLoop->contains(UI)) break; UI = cast(UI->user_back()); } } } if (CurLoop->contains(UI)) { if (IsFree) { FreeInLoop = true; continue; } return false; } } return true; } static Instruction *cloneInstructionInExitBlock( Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI, const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) { Instruction *New; if (auto *CI = dyn_cast(&I)) { const auto &BlockColors = SafetyInfo->getBlockColors(); // Sinking call-sites need to be handled differently from other // instructions. The cloned call-site needs a funclet bundle operand // appropriate for its location in the CFG. SmallVector OpBundles; for (unsigned BundleIdx = 0, BundleEnd = CI->getNumOperandBundles(); BundleIdx != BundleEnd; ++BundleIdx) { OperandBundleUse Bundle = CI->getOperandBundleAt(BundleIdx); if (Bundle.getTagID() == LLVMContext::OB_funclet) continue; OpBundles.emplace_back(Bundle); } if (!BlockColors.empty()) { const ColorVector &CV = BlockColors.find(&ExitBlock)->second; assert(CV.size() == 1 && "non-unique color for exit block!"); BasicBlock *BBColor = CV.front(); Instruction *EHPad = BBColor->getFirstNonPHI(); if (EHPad->isEHPad()) OpBundles.emplace_back("funclet", EHPad); } New = CallInst::Create(CI, OpBundles); } else { New = I.clone(); } ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); if (!I.getName().empty()) New->setName(I.getName() + ".le"); if (MSSAU && MSSAU->getMemorySSA()->getMemoryAccess(&I)) { // Create a new MemoryAccess and let MemorySSA set its defining access. MemoryAccess *NewMemAcc = MSSAU->createMemoryAccessInBB( New, nullptr, New->getParent(), MemorySSA::Beginning); if (NewMemAcc) { if (auto *MemDef = dyn_cast(NewMemAcc)) MSSAU->insertDef(MemDef, /*RenameUses=*/true); else { auto *MemUse = cast(NewMemAcc); MSSAU->insertUse(MemUse, /*RenameUses=*/true); } } } // Build LCSSA PHI nodes for any in-loop operands (if legal). Note that // this is particularly cheap because we can rip off the PHI node that we're // replacing for the number and blocks of the predecessors. // OPT: If this shows up in a profile, we can instead finish sinking all // invariant instructions, and then walk their operands to re-establish // LCSSA. That will eliminate creating PHI nodes just to nuke them when // sinking bottom-up. for (Use &Op : New->operands()) if (LI->wouldBeOutOfLoopUseRequiringLCSSA(Op.get(), PN.getParent())) { auto *OInst = cast(Op.get()); PHINode *OpPN = PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), OInst->getName() + ".lcssa", &ExitBlock.front()); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); Op = OpPN; } return New; } static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU) { if (MSSAU) MSSAU->removeMemoryAccess(&I); SafetyInfo.removeInstruction(&I); I.eraseFromParent(); } static void moveInstructionBefore(Instruction &I, Instruction &Dest, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE) { SafetyInfo.removeInstruction(&I); SafetyInfo.insertInstructionTo(&I, Dest.getParent()); I.moveBefore(&Dest); if (MSSAU) if (MemoryUseOrDef *OldMemAcc = cast_or_null( MSSAU->getMemorySSA()->getMemoryAccess(&I))) MSSAU->moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator); if (SE) SE->forgetValue(&I); } static Instruction *sinkThroughTriviallyReplaceablePHI( PHINode *TPN, Instruction *I, LoopInfo *LI, SmallDenseMap &SunkCopies, const LoopSafetyInfo *SafetyInfo, const Loop *CurLoop, MemorySSAUpdater *MSSAU) { assert(isTriviallyReplaceablePHI(*TPN, *I) && "Expect only trivially replaceable PHI"); BasicBlock *ExitBlock = TPN->getParent(); Instruction *New; auto It = SunkCopies.find(ExitBlock); if (It != SunkCopies.end()) New = It->second; else New = SunkCopies[ExitBlock] = cloneInstructionInExitBlock( *I, *ExitBlock, *TPN, LI, SafetyInfo, MSSAU); return New; } static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) { BasicBlock *BB = PN->getParent(); if (!BB->canSplitPredecessors()) return false; // It's not impossible to split EHPad blocks, but if BlockColors already exist // it require updating BlockColors for all offspring blocks accordingly. By // skipping such corner case, we can make updating BlockColors after splitting // predecessor fairly simple. if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad()) return false; for (BasicBlock *BBPred : predecessors(BB)) { if (isa(BBPred->getTerminator()) || isa(BBPred->getTerminator())) return false; } return true; } static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, LoopInfo *LI, const Loop *CurLoop, LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU) { #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif BasicBlock *ExitBB = PN->getParent(); assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); // Split predecessors of the loop exit to make instructions in the loop are // exposed to exit blocks through trivially replaceable PHIs while keeping the // loop in the canonical form where each predecessor of each exit block should // be contained within the loop. For example, this will convert the loop below // from // // LB1: // %v1 = // br %LE, %LB2 // LB2: // %v2 = // br %LE, %LB1 // LE: // %p = phi [%v1, %LB1], [%v2, %LB2] <-- non-trivially replaceable // // to // // LB1: // %v1 = // br %LE.split, %LB2 // LB2: // %v2 = // br %LE.split2, %LB1 // LE.split: // %p1 = phi [%v1, %LB1] <-- trivially replaceable // br %LE // LE.split2: // %p2 = phi [%v2, %LB2] <-- trivially replaceable // br %LE // LE: // %p = phi [%p1, %LE.split], [%p2, %LE.split2] // const auto &BlockColors = SafetyInfo->getBlockColors(); SmallSetVector PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); while (!PredBBs.empty()) { BasicBlock *PredBB = *PredBBs.begin(); assert(CurLoop->contains(PredBB) && "Expect all predecessors are in the loop"); if (PN->getBasicBlockIndex(PredBB) >= 0) { BasicBlock *NewPred = SplitBlockPredecessors( ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true); // Since we do not allow splitting EH-block with BlockColors in // canSplitPredecessors(), we can simply assign predecessor's color to // the new block. if (!BlockColors.empty()) // Grab a reference to the ColorVector to be inserted before getting the // reference to the vector we are copying because inserting the new // element in BlockColors might cause the map to be reallocated. SafetyInfo->copyColors(NewPred, PredBB); } PredBBs.remove(PredBB); } } /// When an instruction is found to only be used outside of the loop, this /// function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its /// position, and may either delete it or move it to outside of the loop. /// static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, BlockFrequencyInfo *BFI, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, OptimizationRemarkEmitter *ORE) { bool Changed = false; LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); // Iterate over users to be ready for actual sinking. Replace users via // unreachable blocks with undef and make all user PHIs trivially replaceable. SmallPtrSet VisitedUsers; for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { auto *User = cast(*UI); Use &U = UI.getUse(); ++UI; if (VisitedUsers.count(User) || CurLoop->contains(User)) continue; if (!DT->isReachableFromEntry(User->getParent())) { U = UndefValue::get(I.getType()); Changed = true; continue; } // The user must be a PHI node. PHINode *PN = cast(User); // Surprisingly, instructions can be used outside of loops without any // exits. This can only happen in PHI nodes if the incoming block is // unreachable. BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { U = UndefValue::get(I.getType()); Changed = true; continue; } VisitedUsers.insert(PN); if (isTriviallyReplaceablePHI(*PN, I)) continue; if (!canSplitPredecessors(PN, SafetyInfo)) return Changed; // Split predecessors of the PHI so that we can make users trivially // replaceable. splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo, MSSAU); // Should rebuild the iterators, as they may be invalidated by // splitPredecessorsOfLoopExit(). UI = I.user_begin(); UE = I.user_end(); } if (VisitedUsers.empty()) return Changed; ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) << "sinking " << ore::NV("Inst", &I); }); if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumSunk; #ifndef NDEBUG SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif // Clones of this instruction. Don't create more than one per exit block! SmallDenseMap SunkCopies; // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. // First check if I is worth sinking for all uses. Sink only when it is worth // across all uses. SmallSetVector Users(I.user_begin(), I.user_end()); for (auto *UI : Users) { auto *User = cast(UI); if (CurLoop->contains(User)) continue; PHINode *PN = cast(User); assert(ExitBlockSet.count(PN->getParent()) && "The LCSSA PHI is not in an exit block!"); // The PHI must be trivially replaceable. Instruction *New = sinkThroughTriviallyReplaceablePHI( PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU); PN->replaceAllUsesWith(New); eraseInstruction(*PN, *SafetyInfo, nullptr); Changed = true; } return Changed; } /// When an instruction is found to only use loop invariant operands that /// is safe to hoist, this instruction is called to do the dirty work. /// static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) { LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": " << I << "\n"); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "Hoisted", &I) << "hoisting " << ore::NV("Inst", &I); }); // Metadata can be dependent on conditions we are hoisting above. // Conservatively strip all metadata on the instruction unless we were // guaranteed to execute I if we entered the loop, in which case the metadata // is valid in the loop preheader. // Similarly, If I is a call and it is not guaranteed to execute in the loop, // then moving to the preheader means we should strip attributes on the call // that can cause UB since we may be hoisting above conditions that allowed // inferring those attributes. They may not be valid at the preheader. if ((I.hasMetadataOtherThanDebugLoc() || isa(I)) && // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) I.dropUndefImplyingAttrsAndUnknownMetadata(); if (isa(I)) // Move the new node to the end of the phi list in the destination block. moveInstructionBefore(I, *Dest->getFirstNonPHI(), *SafetyInfo, MSSAU, SE); else // Move the new node to the destination block, before its terminator. moveInstructionBefore(I, *Dest->getTerminator(), *SafetyInfo, MSSAU, SE); I.updateLocationAfterHoist(); if (isa(I)) ++NumMovedLoads; else if (isa(I)) ++NumMovedCalls; ++NumHoisted; } /// Only sink or hoist an instruction if it is not a trapping instruction, /// or if the instruction is known not to trap when moved to the preheader. /// or if it is a trapping instruction and is guaranteed to execute. -static bool isSafeToExecuteUnconditionally(Instruction &Inst, - const DominatorTree *DT, - const TargetLibraryInfo *TLI, - const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, - const Instruction *CtxI) { - if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) +static bool isSafeToExecuteUnconditionally( + Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, const Instruction *CtxI, + bool AllowSpeculation) { + if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI)) return true; bool GuaranteedToExecute = SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop); if (!GuaranteedToExecute) { auto *LI = dyn_cast(&Inst); if (LI && CurLoop->isLoopInvariant(LI->getPointerOperand())) ORE->emit([&]() { return OptimizationRemarkMissed( DEBUG_TYPE, "LoadWithLoopInvariantAddressCondExecuted", LI) << "failed to hoist load with loop-invariant address " "because load is conditionally executed"; }); } return GuaranteedToExecute; } namespace { class LoopPromoter : public LoadAndStorePromoter { Value *SomePtr; // Designated pointer to store to. const SmallSetVector &PointerMustAliases; SmallVectorImpl &LoopExitBlocks; SmallVectorImpl &LoopInsertPts; SmallVectorImpl &MSSAInsertPts; PredIteratorCache &PredCache; MemorySSAUpdater *MSSAU; LoopInfo &LI; DebugLoc DL; Align Alignment; bool UnorderedAtomic; AAMDNodes AATags; ICFLoopSafetyInfo &SafetyInfo; bool CanInsertStoresInExitBlocks; // We're about to add a use of V in a loop exit block. Insert an LCSSA phi // (if legal) if doing so would add an out-of-loop use to an instruction // defined in-loop. Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const { if (!LI.wouldBeOutOfLoopUseRequiringLCSSA(V, BB)) return V; Instruction *I = cast(V); // We need to create an LCSSA PHI node for the incoming value and // store that. PHINode *PN = PHINode::Create(I->getType(), PredCache.size(BB), I->getName() + ".lcssa", &BB->front()); for (BasicBlock *Pred : PredCache.get(BB)) PN->addIncoming(I, Pred); return PN; } public: LoopPromoter(Value *SP, ArrayRef Insts, SSAUpdater &S, const SmallSetVector &PMA, SmallVectorImpl &LEB, SmallVectorImpl &LIP, SmallVectorImpl &MSSAIP, PredIteratorCache &PIC, MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)), Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), SafetyInfo(SafetyInfo), CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { Value *Ptr; if (LoadInst *LI = dyn_cast(I)) Ptr = LI->getOperand(0); else Ptr = cast(I)->getPointerOperand(); return PointerMustAliases.count(Ptr); } void insertStoresInLoopExitBlocks() { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told // the SSA updater about the defs in the loop and the preheader // definition, it is all set and we can start using it. for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) { BasicBlock *ExitBlock = LoopExitBlocks[i]; Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); LiveInValue = maybeInsertLCSSAPHI(LiveInValue, ExitBlock); Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock); Instruction *InsertPos = LoopInsertPts[i]; StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos); if (UnorderedAtomic) NewSI->setOrdering(AtomicOrdering::Unordered); NewSI->setAlignment(Alignment); NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; MemoryAccess *NewMemAcc; if (!MSSAInsertPoint) { NewMemAcc = MSSAU->createMemoryAccessInBB( NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning); } else { NewMemAcc = MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint); } MSSAInsertPts[i] = NewMemAcc; MSSAU->insertDef(cast(NewMemAcc), true); // FIXME: true for safety, false may still be correct. } } void doExtraRewritesBeforeFinalDeletion() override { if (CanInsertStoresInExitBlocks) insertStoresInLoopExitBlocks(); } void instructionDeleted(Instruction *I) const override { SafetyInfo.removeInstruction(I); MSSAU->removeMemoryAccess(I); } bool shouldDelete(Instruction *I) const override { if (isa(I)) return CanInsertStoresInExitBlocks; return true; } }; bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L, DominatorTree *DT) { // We can perform the captured-before check against any instruction in the // loop header, as the loop header is reachable from any instruction inside // the loop. // TODO: ReturnCaptures=true shouldn't be necessary here. return !PointerMayBeCapturedBefore(V, /* ReturnCaptures */ true, /* StoreCaptures */ true, L->getHeader()->getTerminator(), DT); } /// Return true if we can prove that a caller cannot inspect the object if an /// unwind occurs inside the loop. bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L, DominatorTree *DT) { bool RequiresNoCaptureBeforeUnwind; if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind)) return false; return !RequiresNoCaptureBeforeUnwind || isNotCapturedBeforeOrInLoop(Object, L, DT); } } // namespace /// Try to promote memory values to scalars by sinking stores out of the /// loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. /// bool llvm::promoteLoopAccessesToScalars( const SmallSetVector &PointerMustAliases, SmallVectorImpl &ExitBlocks, SmallVectorImpl &InsertPts, SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); Value *SomePtr = *PointerMustAliases.begin(); BasicBlock *Preheader = CurLoop->getLoopPreheader(); // It is not safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: // // for () { if (c) *P += 1; } // // into: // // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. // // The safety property divides into two parts: // p1) The memory may not be dereferenceable on entry to the loop. In this // case, we can't insert the required load in the preheader. // p2) The memory model does not allow us to insert a store along any dynamic // path which did not originally have one. // // If at least one store is guaranteed to execute, both properties are // satisfied, and promotion is legal. // // This, however, is not a necessary condition. Even if no store/load is // guaranteed to execute, we can still establish these properties. // We can establish (p1) by proving that hoisting the load into the preheader // is safe (i.e. proving dereferenceability on all paths through the loop). We // can use any access within the alias set to prove dereferenceability, // since they're all must alias. // // There are two ways establish (p2): // a) Prove the location is thread-local. In this case the memory model // requirement does not apply, and stores are safe to insert. // b) Prove a store dominates every exit block. In this case, if an exit // blocks is reached, the original dynamic path would have taken us through // the store, so inserting a store into the exit block is safe. Note that this // is different from the store being guaranteed to execute. For instance, // if an exception is thrown on the first iteration of the loop, the original // store is never executed, but the exit blocks are not executed either. bool DereferenceableInPH = false; bool SafeToInsertStore = false; bool FoundLoadToPromote = false; SmallVector LoopUses; // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. Align Alignment; // Keep track of which types of access we see bool SawUnorderedAtomic = false; bool SawNotAtomic = false; AAMDNodes AATags; const DataLayout &MDL = Preheader->getModule()->getDataLayout(); bool IsKnownThreadLocalObject = false; if (SafetyInfo->anyBlockMayThrow()) { // If a loop can throw, we have to insert a store along each unwind edge. // That said, we can't actually make the unwind edge explicit. Therefore, // we have to prove that the store is dead along the unwind edge. We do // this by proving that the caller can't have a reference to the object // after return and thus can't possibly load from the object. Value *Object = getUnderlyingObject(SomePtr); if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT)) return false; // Subtlety: Alloca's aren't visible to callers, but *are* potentially // visible to other threads if captured and used during their lifetimes. IsKnownThreadLocalObject = !isa(Object); } // Check that all accesses to pointers in the aliass set use the same type. // We cannot (yet) promote a memory location that is loaded and stored in // different sizes. While we are at it, collect alignment and AA info. Type *AccessTy = nullptr; for (Value *ASIV : PointerMustAliases) { for (User *U : ASIV->users()) { // Ignore instructions that are outside the loop. Instruction *UI = dyn_cast(U); if (!UI || !CurLoop->contains(UI)) continue; // If there is an non-load/store instruction in the loop, we can't promote // it. if (LoadInst *Load = dyn_cast(UI)) { if (!Load->isUnordered()) return false; SawUnorderedAtomic |= Load->isAtomic(); SawNotAtomic |= !Load->isAtomic(); FoundLoadToPromote = true; Align InstAlignment = Load->getAlign(); // Note that proving a load safe to speculate requires proving // sufficient alignment at the target location. Proving it guaranteed // to execute does as well. Thus we can increase our guaranteed // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) - if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop, - SafetyInfo, ORE, - Preheader->getTerminator())) { + if (isSafeToExecuteUnconditionally( + *Load, DT, TLI, CurLoop, SafetyInfo, ORE, + Preheader->getTerminator(), AllowSpeculation)) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); } } else if (const StoreInst *Store = dyn_cast(UI)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) continue; if (!Store->isUnordered()) return false; SawUnorderedAtomic |= Store->isAtomic(); SawNotAtomic |= !Store->isAtomic(); // If the store is guaranteed to execute, both properties are satisfied. // We may want to check if a store is guaranteed to execute even if we // already know that promotion is safe, since it may have higher // alignment than any other guaranteed stores, in which case we can // raise the alignment on the promoted store. Align InstAlignment = Store->getAlign(); if (!DereferenceableInPH || !SafeToInsertStore || (InstAlignment > Alignment)) { if (SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop)) { DereferenceableInPH = true; SafeToInsertStore = true; Alignment = std::max(Alignment, InstAlignment); } } // If a store dominates all exit blocks, it is safe to sink. // As explained above, if an exit block was executed, a dominating // store must have been executed at least once, so we are not // introducing stores on paths that did not have them. // Note that this only looks at explicit exit blocks. If we ever // start sinking stores into unwind edges (see above), this will break. if (!SafeToInsertStore) SafeToInsertStore = llvm::all_of(ExitBlocks, [&](BasicBlock *Exit) { return DT->dominates(Store->getParent(), Exit); }); // If the store is not guaranteed to execute, we may still get // deref info through it. if (!DereferenceableInPH) { DereferenceableInPH = isDereferenceableAndAlignedPointer( Store->getPointerOperand(), Store->getValueOperand()->getType(), Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI); } } else return false; // Not a load or store. if (!AccessTy) AccessTy = getLoadStoreType(UI); else if (AccessTy != getLoadStoreType(UI)) return false; // Merge the AA tags. if (LoopUses.empty()) { // On the first load/store, just take its AA tags. AATags = UI->getAAMetadata(); } else if (AATags) { AATags = AATags.merge(UI->getAAMetadata()); } LoopUses.push_back(UI); } } // If we found both an unordered atomic instruction and a non-atomic memory // access, bail. We can't blindly promote non-atomic to atomic since we // might not be able to lower the result. We can't downgrade since that // would violate memory model. Also, align 0 is an error for atomics. if (SawUnorderedAtomic && SawNotAtomic) return false; // If we're inserting an atomic load in the preheader, we must be able to // lower it. We're only guaranteed to be able to lower naturally aligned // atomics. if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy)) return false; // If we couldn't prove we can hoist the load, bail. if (!DereferenceableInPH) return false; // We know we can hoist the load, but don't have a guaranteed store. // Check whether the location is thread-local. If it is, then we can insert // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { if (IsKnownThreadLocalObject) SafeToInsertStore = true; else { Value *Object = getUnderlyingObject(SomePtr); SafeToInsertStore = (isNoAliasCall(Object) || isa(Object)) && isNotCapturedBeforeOrInLoop(Object, CurLoop, DT); } } // If we've still failed to prove we can sink the store, hoist the load // only, if possible. if (!SafeToInsertStore && !FoundLoadToPromote) // If we cannot hoist the load either, give up. return false; // Lets do the promotion! if (SafeToInsertStore) LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr << '\n'); else LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr << '\n'); ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) << "Moving accesses to memory location out of the loop"; }); ++NumPromoted; // Look at all the loop uses, and try to merge their locations. std::vector LoopUsesLocs; for (auto U : LoopUses) LoopUsesLocs.push_back(U->getDebugLoc().get()); auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs)); // We use the SSAUpdater interface to insert phi nodes as required. SmallVector NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, SafeToInsertStore); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst( AccessTy, SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator()); if (SawUnorderedAtomic) PreheaderLoad->setOrdering(AtomicOrdering::Unordered); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DebugLoc()); if (AATags) PreheaderLoad->setAAMetadata(AATags); SSA.AddAvailableValue(Preheader, PreheaderLoad); MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB( PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End); MemoryUse *NewMemUse = cast(PreheaderLoadMemoryAccess); MSSAU->insertUse(NewMemUse, /*RenameUses=*/true); if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); if (VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); // If the SSAUpdater didn't use the load in the preheader, just zap it now. if (PreheaderLoad->use_empty()) eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU); return true; } static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L, function_ref Fn) { for (const BasicBlock *BB : L->blocks()) if (const auto *Accesses = MSSA->getBlockAccesses(BB)) for (const auto &Access : *Accesses) if (const auto *MUD = dyn_cast(&Access)) Fn(MUD->getMemoryInst()); } static SmallVector, 0> collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) { AliasSetTracker AST(*AA); auto IsPotentiallyPromotable = [L](const Instruction *I) { if (const auto *SI = dyn_cast(I)) return L->isLoopInvariant(SI->getPointerOperand()); if (const auto *LI = dyn_cast(I)) return L->isLoopInvariant(LI->getPointerOperand()); return false; }; // Populate AST with potentially promotable accesses and remove them from // MaybePromotable, so they will not be checked again on the next iteration. SmallPtrSet AttemptingPromotion; foreachMemoryAccess(MSSA, L, [&](Instruction *I) { if (IsPotentiallyPromotable(I)) { AttemptingPromotion.insert(I); AST.add(I); } }); // We're only interested in must-alias sets that contain a mod. SmallVector Sets; for (AliasSet &AS : AST) if (!AS.isForwardingAliasSet() && AS.isMod() && AS.isMustAlias()) Sets.push_back(&AS); if (Sets.empty()) return {}; // Nothing to promote... // Discard any sets for which there is an aliasing non-promotable access. foreachMemoryAccess(MSSA, L, [&](Instruction *I) { if (AttemptingPromotion.contains(I)) return; llvm::erase_if(Sets, [&](const AliasSet *AS) { return AS->aliasesUnknownInst(I, *AA); }); }); SmallVector, 0> Result; for (const AliasSet *Set : Sets) { SmallSetVector PointerMustAliases; for (const auto &ASI : *Set) PointerMustAliases.insert(ASI.getValue()); Result.push_back(std::move(PointerMustAliases)); } return Result; } static bool pointerInvalidatedByLoop(MemoryLocation MemLoc, AliasSetTracker *CurAST, Loop *CurLoop, AAResults *AA) { return CurAST->getAliasSetFor(MemLoc).isMod(); } bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU, Loop *CurLoop, Instruction &I, SinkAndHoistLICMFlags &Flags) { // For hoisting, use the walker to determine safety if (!Flags.getIsSink()) { MemoryAccess *Source; // See declaration of SetLicmMssaOptCap for usage details. if (Flags.tooManyClobberingCalls()) Source = MU->getDefiningAccess(); else { Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU); Flags.incrementClobberingCalls(); } return !MSSA->isLiveOnEntryDef(Source) && CurLoop->contains(Source->getBlock()); } // For sinking, we'd need to check all Defs below this use. The getClobbering // call will look on the backedge of the loop, but will check aliasing with // the instructions on the previous iteration. // For example: // for (i ... ) // load a[i] ( Use (LoE) // store a[i] ( 1 = Def (2), with 2 = Phi for the loop. // i++; // The load sees no clobbering inside the loop, as the backedge alias check // does phi translation, and will check aliasing against store a[i-1]. // However sinking the load outside the loop, below the store is incorrect. // For now, only sink if there are no Defs in the loop, and the existing ones // precede the use and are in the same block. // FIXME: Increase precision: Safe to sink if Use post dominates the Def; // needs PostDominatorTreeAnalysis. // FIXME: More precise: no Defs that alias this Use. if (Flags.tooManyMemoryAccesses()) return true; for (auto *BB : CurLoop->getBlocks()) if (pointerInvalidatedByBlockWithMSSA(*BB, *MSSA, *MU)) return true; // When sinking, the source block may not be part of the loop so check it. if (!CurLoop->contains(&I)) return pointerInvalidatedByBlockWithMSSA(*I.getParent(), *MSSA, *MU); return false; } bool pointerInvalidatedByBlockWithMSSA(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) { if (const auto *Accesses = MSSA.getBlockDefs(&BB)) for (const auto &MA : *Accesses) if (const auto *MD = dyn_cast(&MA)) if (MU.getBlock() != MD->getBlock() || !MSSA.locallyDominates(MD, &MU)) return true; return false; } /// Little predicate that returns true if the specified basic block is in /// a subloop of the current one, not the current one itself. /// static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI) { assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop"); return LI->getLoopFor(BB) != CurLoop; } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ee17da1875e5..b8972751066d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -1,429 +1,438 @@ //===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements dead code elimination and basic block merging, along // with a collection of other peephole control flow optimizations. For example: // // * Removes basic blocks with no predecessors. // * Merges a basic block into its predecessor if there is only one and the // predecessor only has one successor. // * Eliminates PHI nodes for basic blocks with a single predecessor. // * Eliminates a basic block that only contains an unconditional branch. // * Changes invoke instructions to nounwind functions to be calls. // * Change things like "if (x) if (y)" into "if (x&y)". // * etc.. // //===----------------------------------------------------------------------===// #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/ValueHandle.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include using namespace llvm; #define DEBUG_TYPE "simplifycfg" static cl::opt UserBonusInstThreshold( "bonus-inst-threshold", cl::Hidden, cl::init(1), cl::desc("Control the number of bonus instructions (default = 1)")); static cl::opt UserKeepLoops( "keep-loops", cl::Hidden, cl::init(true), cl::desc("Preserve canonical loop structure (default = true)")); +static cl::opt UserSwitchRangeToICmp( + "switch-range-to-icmp", cl::Hidden, cl::init(false), + cl::desc( + "Convert switches into an integer range comparison (default = false)")); + static cl::opt UserSwitchToLookup( "switch-to-lookup", cl::Hidden, cl::init(false), cl::desc("Convert switches to lookup tables (default = false)")); static cl::opt UserForwardSwitchCond( "forward-switch-cond", cl::Hidden, cl::init(false), cl::desc("Forward switch condition to phi ops (default = false)")); static cl::opt UserHoistCommonInsts( "hoist-common-insts", cl::Hidden, cl::init(false), cl::desc("hoist common instructions (default = false)")); static cl::opt UserSinkCommonInsts( "sink-common-insts", cl::Hidden, cl::init(false), cl::desc("Sink common instructions (default = false)")); STATISTIC(NumSimpl, "Number of blocks simplified"); static bool performBlockTailMerging(Function &F, ArrayRef BBs, std::vector *Updates) { SmallVector NewOps; // We don't want to change IR just because we can. // Only do that if there are at least two blocks we'll tail-merge. if (BBs.size() < 2) return false; if (Updates) Updates->reserve(Updates->size() + BBs.size()); BasicBlock *CanonicalBB; Instruction *CanonicalTerm; { auto *Term = BBs[0]->getTerminator(); // Create a canonical block for this function terminator type now, // placing it *before* the first block that will branch to it. CanonicalBB = BasicBlock::Create( F.getContext(), Twine("common.") + Term->getOpcodeName(), &F, BBs[0]); // We'll also need a PHI node per each operand of the terminator. NewOps.resize(Term->getNumOperands()); for (auto I : zip(Term->operands(), NewOps)) { std::get<1>(I) = PHINode::Create(std::get<0>(I)->getType(), /*NumReservedValues=*/BBs.size(), CanonicalBB->getName() + ".op"); CanonicalBB->getInstList().push_back(std::get<1>(I)); } // Make it so that this canonical block actually has the right // terminator. CanonicalTerm = Term->clone(); CanonicalBB->getInstList().push_back(CanonicalTerm); // If the canonical terminator has operands, rewrite it to take PHI's. for (auto I : zip(NewOps, CanonicalTerm->operands())) std::get<1>(I) = std::get<0>(I); } // Now, go through each block (with the current terminator type) // we've recorded, and rewrite it to branch to the new common block. const DILocation *CommonDebugLoc = nullptr; for (BasicBlock *BB : BBs) { auto *Term = BB->getTerminator(); assert(Term->getOpcode() == CanonicalTerm->getOpcode() && "All blocks to be tail-merged must be the same " "(function-terminating) terminator type."); // Aha, found a new non-canonical function terminator. If it has operands, // forward them to the PHI nodes in the canonical block. for (auto I : zip(Term->operands(), NewOps)) std::get<1>(I)->addIncoming(std::get<0>(I), BB); // Compute the debug location common to all the original terminators. if (!CommonDebugLoc) CommonDebugLoc = Term->getDebugLoc(); else CommonDebugLoc = DILocation::getMergedLocation(CommonDebugLoc, Term->getDebugLoc()); // And turn BB into a block that just unconditionally branches // to the canonical block. Term->eraseFromParent(); BranchInst::Create(CanonicalBB, BB); if (Updates) Updates->push_back({DominatorTree::Insert, BB, CanonicalBB}); } CanonicalTerm->setDebugLoc(CommonDebugLoc); return true; } static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F, DomTreeUpdater *DTU) { SmallMapVector, 4> Structure; // Scan all the blocks in the function, record the interesting-ones. for (BasicBlock &BB : F) { if (DTU && DTU->isBBPendingDeletion(&BB)) continue; // We are only interested in function-terminating blocks. if (!succ_empty(&BB)) continue; auto *Term = BB.getTerminator(); // Fow now only support `ret`/`resume` function terminators. // FIXME: lift this restriction. switch (Term->getOpcode()) { case Instruction::Ret: case Instruction::Resume: break; default: continue; } // We can't tail-merge block that contains a musttail call. if (BB.getTerminatingMustTailCall()) continue; // Calls to experimental_deoptimize must be followed by a return // of the value computed by experimental_deoptimize. // I.e., we can not change `ret` to `br` for this block. if (auto *CI = dyn_cast_or_null(Term->getPrevNonDebugInstruction())) { if (Function *F = CI->getCalledFunction()) if (Intrinsic::ID ID = F->getIntrinsicID()) if (ID == Intrinsic::experimental_deoptimize) continue; } // PHI nodes cannot have token type, so if the terminator has an operand // with token type, we can not tail-merge this kind of function terminators. if (any_of(Term->operands(), [](Value *Op) { return Op->getType()->isTokenTy(); })) continue; // Canonical blocks are uniqued based on the terminator type (opcode). Structure[Term->getOpcode()].emplace_back(&BB); } bool Changed = false; std::vector Updates; for (ArrayRef BBs : make_second_range(Structure)) Changed |= performBlockTailMerging(F, BBs, DTU ? &Updates : nullptr); if (DTU) DTU->applyUpdates(Updates); return Changed; } /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const SimplifyCFGOptions &Options) { bool Changed = false; bool LocalChange = true; SmallVector, 32> Edges; FindFunctionBackedges(F, Edges); SmallPtrSet UniqueLoopHeaders; for (unsigned i = 0, e = Edges.size(); i != e; ++i) UniqueLoopHeaders.insert(const_cast(Edges[i].second)); SmallVector LoopHeaders(UniqueLoopHeaders.begin(), UniqueLoopHeaders.end()); unsigned IterCnt = 0; (void)IterCnt; while (LocalChange) { assert(IterCnt++ < 1000 && "Iterative simplification didn't converge!"); LocalChange = false; // Loop over all of the basic blocks and remove them if they are unneeded. for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { BasicBlock &BB = *BBIt++; if (DTU) { assert( !DTU->isBBPendingDeletion(&BB) && "Should not end up trying to simplify blocks marked for removal."); // Make sure that the advanced iterator does not point at the blocks // that are marked for removal, skip over all such blocks. while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt)) ++BBIt; } if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) { LocalChange = true; ++NumSimpl; } } Changed |= LocalChange; } return Changed; } static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT, const SimplifyCFGOptions &Options) { DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr); EverChanged |= tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr); EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); // If neither pass changed anything, we're done. if (!EverChanged) return false; // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, // removeUnreachableBlocks is needed to nuke them, which means we should // iterate between the two optimizations. We structure the code like this to // avoid rerunning iterativelySimplifyCFG if the second pass of // removeUnreachableBlocks doesn't do anything. if (!removeUnreachableBlocks(F, DT ? &DTU : nullptr)) return true; do { EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); } while (EverChanged); return true; } static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT, const SimplifyCFGOptions &Options) { assert((!RequireAndPreserveDomTree || (DT && DT->verify(DominatorTree::VerificationLevel::Full))) && "Original domtree is invalid?"); bool Changed = simplifyFunctionCFGImpl(F, TTI, DT, Options); assert((!RequireAndPreserveDomTree || (DT && DT->verify(DominatorTree::VerificationLevel::Full))) && "Failed to maintain validity of domtree!"); return Changed; } // Command-line settings override compile-time settings. static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) { if (UserBonusInstThreshold.getNumOccurrences()) Options.BonusInstThreshold = UserBonusInstThreshold; if (UserForwardSwitchCond.getNumOccurrences()) Options.ForwardSwitchCondToPhi = UserForwardSwitchCond; + if (UserSwitchRangeToICmp.getNumOccurrences()) + Options.ConvertSwitchRangeToICmp = UserSwitchRangeToICmp; if (UserSwitchToLookup.getNumOccurrences()) Options.ConvertSwitchToLookupTable = UserSwitchToLookup; if (UserKeepLoops.getNumOccurrences()) Options.NeedCanonicalLoop = UserKeepLoops; if (UserHoistCommonInsts.getNumOccurrences()) Options.HoistCommonInsts = UserHoistCommonInsts; if (UserSinkCommonInsts.getNumOccurrences()) Options.SinkCommonInsts = UserSinkCommonInsts; } SimplifyCFGPass::SimplifyCFGPass() { applyCommandLineOverridesToOptions(Options); } SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) : Options(Opts) { applyCommandLineOverridesToOptions(Options); } void SimplifyCFGPass::printPipeline( raw_ostream &OS, function_ref MapClassName2PassName) { static_cast *>(this)->printPipeline( OS, MapClassName2PassName); OS << "<"; OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";"; OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;"; + OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-") + << "switch-range-to-icmp;"; OS << (Options.ConvertSwitchToLookupTable ? "" : "no-") << "switch-to-lookup;"; OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;"; OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;"; OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts"; OS << ">"; } PreservedAnalyses SimplifyCFGPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TTI = AM.getResult(F); Options.AC = &AM.getResult(F); DominatorTree *DT = nullptr; if (RequireAndPreserveDomTree) DT = &AM.getResult(F); if (F.hasFnAttribute(Attribute::OptForFuzzing)) { Options.setSimplifyCondBranch(false).setFoldTwoEntryPHINode(false); } else { Options.setSimplifyCondBranch(true).setFoldTwoEntryPHINode(true); } if (!simplifyFunctionCFG(F, TTI, DT, Options)) return PreservedAnalyses::all(); PreservedAnalyses PA; if (RequireAndPreserveDomTree) PA.preserve(); return PA; } namespace { struct CFGSimplifyPass : public FunctionPass { static char ID; SimplifyCFGOptions Options; std::function PredicateFtor; CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(), std::function Ftor = nullptr) : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) { initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); // Check for command-line overrides of options for debug/customization. applyCommandLineOverridesToOptions(Options); } bool runOnFunction(Function &F) override { if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F))) return false; Options.AC = &getAnalysis().getAssumptionCache(F); DominatorTree *DT = nullptr; if (RequireAndPreserveDomTree) DT = &getAnalysis().getDomTree(); if (F.hasFnAttribute(Attribute::OptForFuzzing)) { Options.setSimplifyCondBranch(false) .setFoldTwoEntryPHINode(false); } else { Options.setSimplifyCondBranch(true) .setFoldTwoEntryPHINode(true); } auto &TTI = getAnalysis().getTTI(F); return simplifyFunctionCFG(F, TTI, DT, Options); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); if (RequireAndPreserveDomTree) AU.addRequired(); AU.addRequired(); if (RequireAndPreserveDomTree) AU.addPreserved(); AU.addPreserved(); } }; } char CFGSimplifyPass::ID = 0; INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, false) // Public interface to the CFGSimplification pass FunctionPass * llvm::createCFGSimplificationPass(SimplifyCFGOptions Options, std::function Ftor) { return new CFGSimplifyPass(Options, std::move(Ftor)); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 335ac03ccb52..8c4e1b381b4d 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1,6795 +1,6797 @@ //===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Peephole optimize the CFG. // //===----------------------------------------------------------------------===// #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Sequence.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; using namespace PatternMatch; #define DEBUG_TYPE "simplifycfg" cl::opt llvm::RequireAndPreserveDomTree( "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Temorary development switch used to gradually uplift SimplifyCFG " "into preserving DomTree,")); // Chosen as 2 so as to be cheap, but still to have enough power to fold // a select, so the "clamp" idiom (of a min followed by a max) will be caught. // To catch this, we need to fold a compare and a select, hence '2' being the // minimum reasonable default. static cl::opt PHINodeFoldingThreshold( "phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc( "Control the amount of phi node folding to perform (default = 2)")); static cl::opt TwoEntryPHINodeFoldingThreshold( "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)")); static cl::opt HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block")); static cl::opt SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block")); static cl::opt HoistCondStores( "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes")); static cl::opt MergeCondStores( "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store")); static cl::opt MergeCondStoresAggressively( "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result")); static cl::opt SpeculateOneExpensiveInst( "speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed")); static cl::opt MaxSpeculationDepth( "max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); static cl::opt MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through")); // Two is chosen to allow one negation and a logical combine. static cl::opt BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches")); static cl::opt BranchFoldToCommonDestVectorMultiplier( "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present")); STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables"); STATISTIC( NumLookupTablesHoles, "Number of switch instructions turned into lookup tables (holes checked)"); STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares"); STATISTIC(NumFoldValueComparisonIntoPredecessors, "Number of value comparisons folded into predecessor basic blocks"); STATISTIC(NumFoldBranchToCommonDest, "Number of branches folded into predecessor basic block"); STATISTIC( NumHoistCommonCode, "Number of common instruction 'blocks' hoisted up to the begin block"); STATISTIC(NumHoistCommonInstrs, "Number of common instructions hoisted up to the begin block"); STATISTIC(NumSinkCommonCode, "Number of common instruction 'blocks' sunk down to the end block"); STATISTIC(NumSinkCommonInstrs, "Number of common instructions sunk down to the end block"); STATISTIC(NumSpeculations, "Number of speculative executed instructions"); STATISTIC(NumInvokes, "Number of invokes with empty resume blocks simplified into calls"); namespace { // The first field contains the value that the switch produces when a certain // case group is selected, and the second field is a vector containing the // cases composing the case group. using SwitchCaseResultVectorTy = SmallVector>, 2>; // The first field contains the phi node that generates a result of the switch // and the second field contains the value generated for a certain case in the // switch for that PHI. using SwitchCaseResultsTy = SmallVector, 4>; /// ValueEqualityComparisonCase - Represents a case of a switch. struct ValueEqualityComparisonCase { ConstantInt *Value; BasicBlock *Dest; ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest) : Value(Value), Dest(Dest) {} bool operator<(ValueEqualityComparisonCase RHS) const { // Comparing pointers is ok as we only rely on the order for uniquing. return Value < RHS.Value; } bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; } }; class SimplifyCFGOpt { const TargetTransformInfo &TTI; DomTreeUpdater *DTU; const DataLayout &DL; ArrayRef LoopHeaders; const SimplifyCFGOptions &Options; bool Resimplify; Value *isValueEqualityComparison(Instruction *TI); BasicBlock *GetValueEqualityComparisonCases( Instruction *TI, std::vector &Cases); bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder); bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder); bool FoldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder); bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder); bool simplifySingleResume(ResumeInst *RI); bool simplifyCommonResume(ResumeInst *RI); bool simplifyCleanupReturn(CleanupReturnInst *RI); bool simplifyUnreachable(UnreachableInst *UI); bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder); bool simplifyIndirectBr(IndirectBrInst *IBI); bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI, bool EqTermsOnly); bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, const TargetTransformInfo &TTI); bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, BasicBlock *TrueBB, BasicBlock *FalseBB, uint32_t TrueWeight, uint32_t FalseWeight); bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, const DataLayout &DL); bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select); bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI); bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder); public: SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL, ArrayRef LoopHeaders, const SimplifyCFGOptions &Opts) : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) { assert((!DTU || !DTU->hasPostDomTree()) && "SimplifyCFG is not yet capable of maintaining validity of a " "PostDomTree, so don't ask for it."); } bool simplifyOnce(BasicBlock *BB); bool run(BasicBlock *BB); // Helper to set Resimplify and return change indication. bool requestResimplify() { Resimplify = true; return true; } }; } // end anonymous namespace /// Return true if it is safe to merge these two /// terminator instructions together. static bool SafeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector *FailBlocks = nullptr) { if (SI1 == SI2) return false; // Can't merge with self! // It is not safe to merge these two switch instructions if they have a common // successor, and if that successor has a PHI node, and if *that* PHI node has // conflicting incoming values from the two switch blocks. BasicBlock *SI1BB = SI1->getParent(); BasicBlock *SI2BB = SI2->getParent(); SmallPtrSet SI1Succs(succ_begin(SI1BB), succ_end(SI1BB)); bool Fail = false; for (BasicBlock *Succ : successors(SI2BB)) if (SI1Succs.count(Succ)) for (BasicBlock::iterator BBI = Succ->begin(); isa(BBI); ++BBI) { PHINode *PN = cast(BBI); if (PN->getIncomingValueForBlock(SI1BB) != PN->getIncomingValueForBlock(SI2BB)) { if (FailBlocks) FailBlocks->insert(Succ); Fail = true; } } return !Fail; } /// Update PHI nodes in Succ to indicate that there will now be entries in it /// from the 'NewPred' block. The values that will be flowing into the PHI nodes /// will be the same as those coming in from ExistPred, an existing predecessor /// of Succ. static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU = nullptr) { for (PHINode &PN : Succ->phis()) PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred); if (MSSAU) if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ)) MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred); } /// Compute an abstract "cost" of speculating the given instruction, /// which is assumed to be safe to speculate. TCC_Free means cheap, /// TCC_Basic means less cheap, and TCC_Expensive means prohibitively /// expensive. static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { assert(isSafeToSpeculativelyExecute(I) && "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); } /// If we have a merge point of an "if condition" as accepted above, /// return true if the specified value dominates the block. We /// don't handle the true generality of domination here, just a special case /// which works well enough for us. /// /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to /// see if V (which must be an instruction) and its recursive operands /// that do not dominate BB have a combined cost lower than Budget and /// are non-trapping. If both are true, the instruction is inserted into the /// set and true is returned. /// /// The cost for most non-trapping instructions is defined as 1 except for /// Select whose cost is 2. /// /// After this function returns, Cost is increased by the cost of /// V plus its non-dominating operands. If that cost is greater than /// Budget, false is returned and Cost is undefined. static bool dominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSetImpl &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, unsigned Depth = 0) { // It is possible to hit a zero-cost cycle (phi/gep instructions for example), // so limit the recursion depth. // TODO: While this recursion limit does prevent pathological behavior, it // would be better to track visited instructions to avoid cycles. if (Depth == MaxSpeculationDepth) return false; Instruction *I = dyn_cast(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs // can be executed unconditionally. if (ConstantExpr *C = dyn_cast(V)) if (C->canTrap()) return false; return true; } BasicBlock *PBB = I->getParent(); // We don't want to allow weird loops that might have the "if condition" in // the bottom of this block. if (PBB == BB) return false; // If this instruction is defined in a block that contains an unconditional // branch to BB, then it must be in the 'conditional' part of the "if // statement". If not, it definitely dominates the region. BranchInst *BI = dyn_cast(PBB->getTerminator()); if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB) return true; // If we have seen this instruction before, don't count it again. if (AggressiveInsts.count(I)) return true; // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. if (!isSafeToSpeculativelyExecute(I)) return false; Cost += computeSpeculationCost(I, TTI); // Allow exactly one instruction to be speculated regardless of its cost // (as long as it is safe to do so). // This is intended to flatten the CFG even if the instruction is a division // or other expensive operation. The speculation of an expensive instruction // is expected to be undone in CodeGenPrepare if the speculation has not // enabled further IR optimizations. if (Cost > Budget && (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 || !Cost.isValid())) return false; // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (Use &Op : I->operands()) if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI, Depth + 1)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts.insert(I); return true; } /// Extract ConstantInt from value, looking through IntToPtr /// and PointerNullValue. Return NULL if value is not a constant int. static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) { // Normal constant int. ConstantInt *CI = dyn_cast(V); if (CI || !isa(V) || !V->getType()->isPointerTy()) return CI; // This is some kind of pointer constant. Turn it into a pointer-sized // ConstantInt if possible. IntegerType *PtrTy = cast(DL.getIntPtrType(V->getType())); // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*). if (isa(V)) return ConstantInt::get(PtrTy, 0); // IntToPtr const int. if (ConstantExpr *CE = dyn_cast(V)) if (CE->getOpcode() == Instruction::IntToPtr) if (ConstantInt *CI = dyn_cast(CE->getOperand(0))) { // The constant is very likely to have the right type already. if (CI->getType() == PtrTy) return CI; else return cast( ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); } return nullptr; } namespace { /// Given a chain of or (||) or and (&&) comparison of a value against a /// constant, this will try to recover the information required for a switch /// structure. /// It will depth-first traverse the chain of comparison, seeking for patterns /// like %a == 12 or %a < 4 and combine them to produce a set of integer /// representing the different cases for the switch. /// Note that if the chain is composed of '||' it will build the set of elements /// that matches the comparisons (i.e. any of this value validate the chain) /// while for a chain of '&&' it will build the set elements that make the test /// fail. struct ConstantComparesGatherer { const DataLayout &DL; /// Value found for the switch comparison Value *CompValue = nullptr; /// Extra clause to be checked before the switch Value *Extra = nullptr; /// Set of integers to match in switch SmallVector Vals; /// Number of comparisons matched in the and/or chain unsigned UsedICmps = 0; /// Construct and compute the result for the comparison instruction Cond ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) { gather(Cond); } ConstantComparesGatherer(const ConstantComparesGatherer &) = delete; ConstantComparesGatherer & operator=(const ConstantComparesGatherer &) = delete; private: /// Try to set the current value used for the comparison, it succeeds only if /// it wasn't set before or if the new value is the same as the old one bool setValueOnce(Value *NewVal) { if (CompValue && CompValue != NewVal) return false; CompValue = NewVal; return (CompValue != nullptr); } /// Try to match Instruction "I" as a comparison against a constant and /// populates the array Vals with the set of values that match (or do not /// match depending on isEQ). /// Return false on failure. On success, the Value the comparison matched /// against is placed in CompValue. /// If CompValue is already set, the function is expected to fail if a match /// is found but the value compared to is different. bool matchInstruction(Instruction *I, bool isEQ) { // If this is an icmp against a constant, handle this as one of the cases. ICmpInst *ICI; ConstantInt *C; if (!((ICI = dyn_cast(I)) && (C = GetConstantInt(I->getOperand(1), DL)))) { return false; } Value *RHSVal; const APInt *RHSC; // Pattern match a special case // (x & ~2^z) == y --> x == y || x == y|2^z // This undoes a transformation done by instcombine to fuse 2 compares. if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) { // It's a little bit hard to see why the following transformations are // correct. Here is a CVC3 program to verify them for 64-bit values: /* ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63); x : BITVECTOR(64); y : BITVECTOR(64); z : BITVECTOR(64); mask : BITVECTOR(64) = BVSHL(ONE, z); QUERY( (y & ~mask = y) => ((x & ~mask = y) <=> (x = y OR x = (y | mask))) ); QUERY( (y | mask = y) => ((x | mask = y) <=> (x = y OR x = (y & ~mask))) ); */ // Please note that each pattern must be a dual implication (<--> or // iff). One directional implication can create spurious matches. If the // implication is only one-way, an unsatisfiable condition on the left // side can imply a satisfiable condition on the right side. Dual // implication ensures that satisfiable conditions are transformed to // other satisfiable conditions and unsatisfiable conditions are // transformed to other unsatisfiable conditions. // Here is a concrete example of a unsatisfiable condition on the left // implying a satisfiable condition on the right: // // mask = (1 << z) // (x & ~mask) == y --> (x == y || x == (y | mask)) // // Substituting y = 3, z = 0 yields: // (x & -2) == 3 --> (x == 3 || x == 2) // Pattern match a special case: /* QUERY( (y & ~mask = y) => ((x & ~mask = y) <=> (x = y OR x = (y | mask))) ); */ if (match(ICI->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC)))) { APInt Mask = ~*RHSC; if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) { // If we already have a value for the switch, it has to match! if (!setValueOnce(RHSVal)) return false; Vals.push_back(C); Vals.push_back( ConstantInt::get(C->getContext(), C->getValue() | Mask)); UsedICmps++; return true; } } // Pattern match a special case: /* QUERY( (y | mask = y) => ((x | mask = y) <=> (x = y OR x = (y & ~mask))) ); */ if (match(ICI->getOperand(0), m_Or(m_Value(RHSVal), m_APInt(RHSC)))) { APInt Mask = *RHSC; if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) { // If we already have a value for the switch, it has to match! if (!setValueOnce(RHSVal)) return false; Vals.push_back(C); Vals.push_back(ConstantInt::get(C->getContext(), C->getValue() & ~Mask)); UsedICmps++; return true; } } // If we already have a value for the switch, it has to match! if (!setValueOnce(ICI->getOperand(0))) return false; UsedICmps++; Vals.push_back(C); return ICI->getOperand(0); } // If we have "x ult 3", for example, then we can add 0,1,2 to the set. ConstantRange Span = ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue()); // Shift the range if the compare is fed by an add. This is the range // compare idiom as emitted by instcombine. Value *CandidateVal = I->getOperand(0); if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) { Span = Span.subtract(*RHSC); CandidateVal = RHSVal; } // If this is an and/!= check, then we are looking to build the set of // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into // x != 0 && x != 1. if (!isEQ) Span = Span.inverse(); // If there are a ton of values, we don't want to make a ginormous switch. if (Span.isSizeLargerThan(8) || Span.isEmptySet()) { return false; } // If we already have a value for the switch, it has to match! if (!setValueOnce(CandidateVal)) return false; // Add all values from the range to the set for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(I->getContext(), Tmp)); UsedICmps++; return true; } /// Given a potentially 'or'd or 'and'd together collection of icmp /// eq/ne/lt/gt instructions that compare a value against a constant, extract /// the value being compared, and stick the list constants into the Vals /// vector. /// One "Extra" case is allowed to differ from the other. void gather(Value *V) { bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value())); // Keep a stack (SmallVector for efficiency) for depth-first traversal SmallVector DFT; SmallPtrSet Visited; // Initialize Visited.insert(V); DFT.push_back(V); while (!DFT.empty()) { V = DFT.pop_back_val(); if (Instruction *I = dyn_cast(V)) { // If it is a || (or && depending on isEQ), process the operands. Value *Op0, *Op1; if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1))) : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) { if (Visited.insert(Op1).second) DFT.push_back(Op1); if (Visited.insert(Op0).second) DFT.push_back(Op0); continue; } // Try to match the current instruction if (matchInstruction(I, isEQ)) // Match succeed, continue the loop continue; } // One element of the sequence of || (or &&) could not be match as a // comparison against the same value as the others. // We allow only one "Extra" case to be checked before the switch if (!Extra) { Extra = V; continue; } // Failed to parse a proper sequence, abort now CompValue = nullptr; break; } } }; } // end anonymous namespace static void EraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU = nullptr) { Instruction *Cond = nullptr; if (SwitchInst *SI = dyn_cast(TI)) { Cond = dyn_cast(SI->getCondition()); } else if (BranchInst *BI = dyn_cast(TI)) { if (BI->isConditional()) Cond = dyn_cast(BI->getCondition()); } else if (IndirectBrInst *IBI = dyn_cast(TI)) { Cond = dyn_cast(IBI->getAddress()); } TI->eraseFromParent(); if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU); } /// Return true if the specified terminator checks /// to see if a value is equal to constant integer value. Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) { Value *CV = nullptr; if (SwitchInst *SI = dyn_cast(TI)) { // Do not permit merging of large switch instructions into their // predecessors unless there is only one predecessor. if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors())) CV = SI->getCondition(); } else if (BranchInst *BI = dyn_cast(TI)) if (BI->isConditional() && BI->getCondition()->hasOneUse()) if (ICmpInst *ICI = dyn_cast(BI->getCondition())) { if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL)) CV = ICI->getOperand(0); } // Unwrap any lossless ptrtoint cast. if (CV) { if (PtrToIntInst *PTII = dyn_cast(CV)) { Value *Ptr = PTII->getPointerOperand(); if (PTII->getType() == DL.getIntPtrType(Ptr->getType())) CV = Ptr; } } return CV; } /// Given a value comparison instruction, /// decode all of the 'cases' that it represents and return the 'default' block. BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases( Instruction *TI, std::vector &Cases) { if (SwitchInst *SI = dyn_cast(TI)) { Cases.reserve(SI->getNumCases()); for (auto Case : SI->cases()) Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(), Case.getCaseSuccessor())); return SI->getDefaultDest(); } BranchInst *BI = cast(TI); ICmpInst *ICI = cast(BI->getCondition()); BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE); Cases.push_back(ValueEqualityComparisonCase( GetConstantInt(ICI->getOperand(1), DL), Succ)); return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ); } /// Given a vector of bb/value pairs, remove any entries /// in the list that match the specified block. static void EliminateBlockCases(BasicBlock *BB, std::vector &Cases) { llvm::erase_value(Cases, BB); } /// Return true if there are any keys in C1 that exist in C2 as well. static bool ValuesOverlap(std::vector &C1, std::vector &C2) { std::vector *V1 = &C1, *V2 = &C2; // Make V1 be smaller than V2. if (V1->size() > V2->size()) std::swap(V1, V2); if (V1->empty()) return false; if (V1->size() == 1) { // Just scan V2. ConstantInt *TheVal = (*V1)[0].Value; for (unsigned i = 0, e = V2->size(); i != e; ++i) if (TheVal == (*V2)[i].Value) return true; } // Otherwise, just sort both lists and compare element by element. array_pod_sort(V1->begin(), V1->end()); array_pod_sort(V2->begin(), V2->end()); unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size(); while (i1 != e1 && i2 != e2) { if ((*V1)[i1].Value == (*V2)[i2].Value) return true; if ((*V1)[i1].Value < (*V2)[i2].Value) ++i1; else ++i2; } return false; } // Set branch weights on SwitchInst. This sets the metadata if there is at // least one non-zero weight. static void setBranchWeights(SwitchInst *SI, ArrayRef Weights) { // Check that there is at least one non-zero weight. Otherwise, pass // nullptr to setMetadata which will erase the existing metadata. MDNode *N = nullptr; if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; })) N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights); SI->setMetadata(LLVMContext::MD_prof, N); } // Similar to the above, but for branch and select instructions that take // exactly 2 weights. static void setBranchWeights(Instruction *I, uint32_t TrueWeight, uint32_t FalseWeight) { assert(isa(I) || isa(I)); // Check that there is at least one non-zero weight. Otherwise, pass // nullptr to setMetadata which will erase the existing metadata. MDNode *N = nullptr; if (TrueWeight || FalseWeight) N = MDBuilder(I->getParent()->getContext()) .createBranchWeights(TrueWeight, FalseWeight); I->setMetadata(LLVMContext::MD_prof, N); } /// If TI is known to be a terminator instruction and its block is known to /// only have a single predecessor block, check to see if that predecessor is /// also a value comparison with the same value, and if that comparison /// determines the outcome of this comparison. If so, simplify TI. This does a /// very limited form of jump threading. bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) { Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); if (!PredVal) return false; // Not a value comparison in predecessor. Value *ThisVal = isValueEqualityComparison(TI); assert(ThisVal && "This isn't a value comparison!!"); if (ThisVal != PredVal) return false; // Different predicates. // TODO: Preserve branch weight metadata, similarly to how // FoldValueComparisonIntoPredecessors preserves it. // Find out information about when control will move from Pred to TI's block. std::vector PredCases; BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases); EliminateBlockCases(PredDef, PredCases); // Remove default from cases. // Find information about how control leaves this block. std::vector ThisCases; BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases); EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases. // If TI's block is the default block from Pred's comparison, potentially // simplify TI based on this knowledge. if (PredDef == TI->getParent()) { // If we are here, we know that the value is none of those cases listed in // PredCases. If there are any cases in ThisCases that are in PredCases, we // can simplify TI. if (!ValuesOverlap(PredCases, ThisCases)) return false; if (isa(TI)) { // Okay, one of the successors of this condbr is dead. Convert it to a // uncond br. assert(ThisCases.size() == 1 && "Branch can only have one case!"); // Insert the new branch. Instruction *NI = Builder.CreateBr(ThisDef); (void)NI; // Remove PHI node entries for the dead edge. ThisCases[0].Dest->removePredecessor(PredDef); LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorAndDCECond(TI); if (DTU) DTU->applyUpdates( {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}}); return true; } SwitchInstProfUpdateWrapper SI = *cast(TI); // Okay, TI has cases that are statically dead, prune them away. SmallPtrSet DeadCases; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) DeadCases.insert(PredCases[i].Value); LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI); SmallDenseMap NumPerSuccessorCases; for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) { --i; auto *Successor = i->getCaseSuccessor(); if (DTU) ++NumPerSuccessorCases[Successor]; if (DeadCases.count(i->getCaseValue())) { Successor->removePredecessor(PredDef); SI.removeCase(i); if (DTU) --NumPerSuccessorCases[Successor]; } } if (DTU) { std::vector Updates; for (const std::pair &I : NumPerSuccessorCases) if (I.second == 0) Updates.push_back({DominatorTree::Delete, PredDef, I.first}); DTU->applyUpdates(Updates); } LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; } // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. ConstantInt *TIV = nullptr; BasicBlock *TIBB = TI->getParent(); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == TIBB) { if (TIV) return false; // Cannot handle multiple values coming to this block. TIV = PredCases[i].Value; } assert(TIV && "No edge from pred to succ?"); // Okay, we found the one constant that our value can be if we get into TI's // BB. Find out which successor will unconditionally be branched to. BasicBlock *TheRealDest = nullptr; for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) if (ThisCases[i].Value == TIV) { TheRealDest = ThisCases[i].Dest; break; } // If not handled by any explicit cases, it is handled by the default case. if (!TheRealDest) TheRealDest = ThisDef; SmallPtrSet RemovedSuccs; // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; for (BasicBlock *Succ : successors(TIBB)) if (Succ != CheckEdge) { if (Succ != TheRealDest) RemovedSuccs.insert(Succ); Succ->removePredecessor(TIBB); } else CheckEdge = nullptr; // Insert the new branch. Instruction *NI = Builder.CreateBr(TheRealDest); (void)NI; LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n"); EraseTerminatorAndDCECond(TI); if (DTU) { SmallVector Updates; Updates.reserve(RemovedSuccs.size()); for (auto *RemovedSucc : RemovedSuccs) Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc}); DTU->applyUpdates(Updates); } return true; } namespace { /// This class implements a stable ordering of constant /// integers that does not depend on their address. This is important for /// applications that sort ConstantInt's to ensure uniqueness. struct ConstantIntOrdering { bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const { return LHS->getValue().ult(RHS->getValue()); } }; } // end anonymous namespace static int ConstantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2) { const ConstantInt *LHS = *P1; const ConstantInt *RHS = *P2; if (LHS == RHS) return 0; return LHS->getValue().ult(RHS->getValue()) ? 1 : -1; } static inline bool HasBranchWeights(const Instruction *I) { MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof); if (ProfMD && ProfMD->getOperand(0)) if (MDString *MDS = dyn_cast(ProfMD->getOperand(0))) return MDS->getString().equals("branch_weights"); return false; } /// Get Weights of a given terminator, the default weight is at the front /// of the vector. If TI is a conditional eq, we need to swap the branch-weight /// metadata. static void GetBranchWeights(Instruction *TI, SmallVectorImpl &Weights) { MDNode *MD = TI->getMetadata(LLVMContext::MD_prof); assert(MD); for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) { ConstantInt *CI = mdconst::extract(MD->getOperand(i)); Weights.push_back(CI->getValue().getZExtValue()); } // If TI is a conditional eq, the default case is the false case, // and the corresponding branch-weight data is at index 2. We swap the // default weight to be the first entry. if (BranchInst *BI = dyn_cast(TI)) { assert(Weights.size() == 2); ICmpInst *ICI = cast(BI->getCondition()); if (ICI->getPredicate() == ICmpInst::ICMP_EQ) std::swap(Weights.front(), Weights.back()); } } /// Keep halving the weights until all can fit in uint32_t. static void FitWeights(MutableArrayRef Weights) { uint64_t Max = *std::max_element(Weights.begin(), Weights.end()); if (Max > UINT_MAX) { unsigned Offset = 32 - countLeadingZeros(Max); for (uint64_t &I : Weights) I >>= Offset; } } static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) { Instruction *PTI = PredBlock->getTerminator(); // If we have bonus instructions, clone them into the predecessor block. // Note that there may be multiple predecessor blocks, so we cannot move // bonus instructions to a predecessor block. for (Instruction &BonusInst : *BB) { if (isa(BonusInst) || BonusInst.isTerminator()) continue; Instruction *NewBonusInst = BonusInst.clone(); if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) { // Unless the instruction has the same !dbg location as the original // branch, drop it. When we fold the bonus instructions we want to make // sure we reset their debug locations in order to avoid stepping on // dead code caused by folding dead branches. NewBonusInst->setDebugLoc(DebugLoc()); } RemapInstruction(NewBonusInst, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); VMap[&BonusInst] = NewBonusInst; // If we moved a load, we cannot any longer claim any knowledge about // its potential value. The previous information might have been valid // only given the branch precondition. // For an analogous reason, we must also drop all the metadata whose // semantics we don't understand. We *can* preserve !annotation, because // it is tied to the instruction itself, not the value or position. // Similarly strip attributes on call parameters that may cause UB in // location the call is moved to. NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata( LLVMContext::MD_annotation); PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst); NewBonusInst->takeName(&BonusInst); BonusInst.setName(NewBonusInst->getName() + ".old"); // Update (liveout) uses of bonus instructions, // now that the bonus instruction has been cloned into predecessor. // Note that we expect to be in a block-closed SSA form for this to work! for (Use &U : make_early_inc_range(BonusInst.uses())) { auto *UI = cast(U.getUser()); auto *PN = dyn_cast(UI); if (!PN) { assert(UI->getParent() == BB && BonusInst.comesBefore(UI) && "If the user is not a PHI node, then it should be in the same " "block as, and come after, the original bonus instruction."); continue; // Keep using the original bonus instruction. } // Is this the block-closed SSA form PHI node? if (PN->getIncomingBlock(U) == BB) continue; // Great, keep using the original bonus instruction. // The only other alternative is an "use" when coming from // the predecessor block - here we should refer to the cloned bonus instr. assert(PN->getIncomingBlock(U) == PredBlock && "Not in block-closed SSA form?"); U.set(NewBonusInst); } } } bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding( Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) { BasicBlock *BB = TI->getParent(); BasicBlock *Pred = PTI->getParent(); SmallVector Updates; // Figure out which 'cases' to copy from SI to PSI. std::vector BBCases; BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases); std::vector PredCases; BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases); // Based on whether the default edge from PTI goes to BB or not, fill in // PredCases and PredDefault with the new switch cases we would like to // build. SmallMapVector NewSuccessors; // Update the branch weight metadata along the way SmallVector Weights; bool PredHasWeights = HasBranchWeights(PTI); bool SuccHasWeights = HasBranchWeights(TI); if (PredHasWeights) { GetBranchWeights(PTI, Weights); // branch-weight metadata is inconsistent here. if (Weights.size() != 1 + PredCases.size()) PredHasWeights = SuccHasWeights = false; } else if (SuccHasWeights) // If there are no predecessor weights but there are successor weights, // populate Weights with 1, which will later be scaled to the sum of // successor's weights Weights.assign(1 + PredCases.size(), 1); SmallVector SuccWeights; if (SuccHasWeights) { GetBranchWeights(TI, SuccWeights); // branch-weight metadata is inconsistent here. if (SuccWeights.size() != 1 + BBCases.size()) PredHasWeights = SuccHasWeights = false; } else if (PredHasWeights) SuccWeights.assign(1 + BBCases.size(), 1); if (PredDefault == BB) { // If this is the default destination from PTI, only the edges in TI // that don't occur in PTI, or that branch to BB will be activated. std::set PTIHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest != BB) PTIHandled.insert(PredCases[i].Value); else { // The default destination is BB, we don't need explicit targets. std::swap(PredCases[i], PredCases.back()); if (PredHasWeights || SuccHasWeights) { // Increase weight for the default case. Weights[0] += Weights[i + 1]; std::swap(Weights[i + 1], Weights.back()); Weights.pop_back(); } PredCases.pop_back(); --i; --e; } // Reconstruct the new switch statement we will be building. if (PredDefault != BBDefault) { PredDefault->removePredecessor(Pred); if (DTU && PredDefault != BB) Updates.push_back({DominatorTree::Delete, Pred, PredDefault}); PredDefault = BBDefault; ++NewSuccessors[BBDefault]; } unsigned CasesFromPred = Weights.size(); uint64_t ValidTotalSuccWeight = 0; for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) { PredCases.push_back(BBCases[i]); ++NewSuccessors[BBCases[i].Dest]; if (SuccHasWeights || PredHasWeights) { // The default weight is at index 0, so weight for the ith case // should be at index i+1. Scale the cases from successor by // PredDefaultWeight (Weights[0]). Weights.push_back(Weights[0] * SuccWeights[i + 1]); ValidTotalSuccWeight += SuccWeights[i + 1]; } } if (SuccHasWeights || PredHasWeights) { ValidTotalSuccWeight += SuccWeights[0]; // Scale the cases from predecessor by ValidTotalSuccWeight. for (unsigned i = 1; i < CasesFromPred; ++i) Weights[i] *= ValidTotalSuccWeight; // Scale the default weight by SuccDefaultWeight (SuccWeights[0]). Weights[0] *= SuccWeights[0]; } } else { // If this is not the default destination from PSI, only the edges // in SI that occur in PSI with a destination of BB will be // activated. std::set PTIHandled; std::map WeightsForHandled; for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == BB) { PTIHandled.insert(PredCases[i].Value); if (PredHasWeights || SuccHasWeights) { WeightsForHandled[PredCases[i].Value] = Weights[i + 1]; std::swap(Weights[i + 1], Weights.back()); Weights.pop_back(); } std::swap(PredCases[i], PredCases.back()); PredCases.pop_back(); --i; --e; } // Okay, now we know which constants were sent to BB from the // predecessor. Figure out where they will all go now. for (unsigned i = 0, e = BBCases.size(); i != e; ++i) if (PTIHandled.count(BBCases[i].Value)) { // If this is one we are capable of getting... if (PredHasWeights || SuccHasWeights) Weights.push_back(WeightsForHandled[BBCases[i].Value]); PredCases.push_back(BBCases[i]); ++NewSuccessors[BBCases[i].Dest]; PTIHandled.erase(BBCases[i].Value); // This constant is taken care of } // If there are any constants vectored to BB that TI doesn't handle, // they must go to the default destination of TI. for (ConstantInt *I : PTIHandled) { if (PredHasWeights || SuccHasWeights) Weights.push_back(WeightsForHandled[I]); PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault)); ++NewSuccessors[BBDefault]; } } // Okay, at this point, we know which new successor Pred will get. Make // sure we update the number of entries in the PHI nodes for these // successors. SmallPtrSet SuccsOfPred; if (DTU) { SuccsOfPred = {succ_begin(Pred), succ_end(Pred)}; Updates.reserve(Updates.size() + NewSuccessors.size()); } for (const std::pair &NewSuccessor : NewSuccessors) { for (auto I : seq(0, NewSuccessor.second)) { (void)I; AddPredecessorToBlock(NewSuccessor.first, Pred, BB); } if (DTU && !SuccsOfPred.contains(NewSuccessor.first)) Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first}); } Builder.SetInsertPoint(PTI); // Convert pointer to int before we switch. if (CV->getType()->isPointerTy()) { CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr"); } // Now that the successors are updated, create the new Switch instruction. SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size()); NewSI->setDebugLoc(PTI->getDebugLoc()); for (ValueEqualityComparisonCase &V : PredCases) NewSI->addCase(V.Value, V.Dest); if (PredHasWeights || SuccHasWeights) { // Halve the weights if any of them cannot fit in an uint32_t FitWeights(Weights); SmallVector MDWeights(Weights.begin(), Weights.end()); setBranchWeights(NewSI, MDWeights); } EraseTerminatorAndDCECond(PTI); // Okay, last check. If BB is still a successor of PSI, then we must // have an infinite loop case. If so, add an infinitely looping block // to handle the case to preserve the behavior of the code. BasicBlock *InfLoopBlock = nullptr; for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) if (NewSI->getSuccessor(i) == BB) { if (!InfLoopBlock) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); if (DTU) Updates.push_back( {DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); } NewSI->setSuccessor(i, InfLoopBlock); } if (DTU) { if (InfLoopBlock) Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock}); Updates.push_back({DominatorTree::Delete, Pred, BB}); DTU->applyUpdates(Updates); } ++NumFoldValueComparisonIntoPredecessors; return true; } /// The specified terminator is a value equality comparison instruction /// (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons /// on the same value. If so, and if safe to do so, fold them together. bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder) { BasicBlock *BB = TI->getParent(); Value *CV = isValueEqualityComparison(TI); // CondVal assert(CV && "Not a comparison?"); bool Changed = false; SmallSetVector Preds(pred_begin(BB), pred_end(BB)); while (!Preds.empty()) { BasicBlock *Pred = Preds.pop_back_val(); Instruction *PTI = Pred->getTerminator(); // Don't try to fold into itself. if (Pred == BB) continue; // See if the predecessor is a comparison with the same value. Value *PCV = isValueEqualityComparison(PTI); // PredCondVal if (PCV != CV) continue; SmallSetVector FailBlocks; if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) { for (auto *Succ : FailBlocks) { if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU)) return false; } } PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder); Changed = true; } return Changed; } // If we would need to insert a select that uses the value of this invoke // (comments in HoistThenElseCodeToIf explain why we would need to do this), we // can't hoist the invoke, as there is nowhere to put the select in this case. static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2) { for (BasicBlock *Succ : successors(BB1)) { for (const PHINode &PN : Succ->phis()) { Value *BB1V = PN.getIncomingValueForBlock(BB1); Value *BB2V = PN.getIncomingValueForBlock(BB2); if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) { return false; } } } return true; } static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false); /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function /// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, /// only perform hoisting in case both blocks only contain a terminator. In that /// case, only the original BI will be replaced and selects for PHIs are added. bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI, bool EqTermsOnly) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As // such, we currently just scan for obviously identical instructions in an // identical order. BasicBlock *BB1 = BI->getSuccessor(0); // The true destination. BasicBlock *BB2 = BI->getSuccessor(1); // The false destination // If either of the blocks has it's address taken, then we can't do this fold, // because the code we'd hoist would no longer run when we jump into the block // by it's address. if (BB1->hasAddressTaken() || BB2->hasAddressTaken()) return false; BasicBlock::iterator BB1_Itr = BB1->begin(); BasicBlock::iterator BB2_Itr = BB2->begin(); Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++; // Skip debug info if it is not identical. DbgInfoIntrinsic *DBI1 = dyn_cast(I1); DbgInfoIntrinsic *DBI2 = dyn_cast(I2); if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { while (isa(I1)) I1 = &*BB1_Itr++; while (isa(I2)) I2 = &*BB2_Itr++; } // FIXME: Can we define a safety predicate for CallBr? if (isa(I1) || !I1->isIdenticalToWhenDefined(I2) || (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) || isa(I1)) return false; BasicBlock *BIParent = BI->getParent(); bool Changed = false; auto _ = make_scope_exit([&]() { if (Changed) ++NumHoistCommonCode; }); // Check if only hoisting terminators is allowed. This does not add new // instructions to the hoist location. if (EqTermsOnly) { // Skip any debug intrinsics, as they are free to hoist. auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator()); auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator()); if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg)) return false; if (!I1NonDbg->isTerminator()) return false; // Now we know that we only need to hoist debug instrinsics and the // terminator. Let the loop below handle those 2 cases. } do { // If we are hoisting the terminator instruction, don't move one (making a // broken BB), instead clone it, and remove BI. if (I1->isTerminator()) goto HoistTerminator; // If we're going to hoist a call, make sure that the two instructions we're // commoning/hoisting are both marked with musttail, or neither of them is // marked as such. Otherwise, we might end up in a situation where we hoist // from a block where the terminator is a `ret` to a block where the terminator // is a `br`, and `musttail` calls expect to be followed by a return. auto *C1 = dyn_cast(I1); auto *C2 = dyn_cast(I2); if (C1 && C2) if (C1->isMustTailCall() != C2->isMustTailCall()) return Changed; if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return Changed; // If any of the two call sites has nomerge attribute, stop hoisting. if (const auto *CB1 = dyn_cast(I1)) if (CB1->cannotMerge()) return Changed; if (const auto *CB2 = dyn_cast(I2)) if (CB2->cannotMerge()) return Changed; if (isa(I1) || isa(I2)) { assert (isa(I1) && isa(I2)); // The debug location is an integral part of a debug info intrinsic // and can't be separated from it or replaced. Instead of attempting // to merge locations, simply hoist both copies of the intrinsic. BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1); BIParent->getInstList().splice(BI->getIterator(), BB2->getInstList(), I2); Changed = true; } else { // For a normal instruction, we just move one to right before the branch, // then replace all uses of the other with the first. Finally, we remove // the now redundant second instruction. BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1); if (!I2->use_empty()) I2->replaceAllUsesWith(I1); I1->andIRFlags(I2); unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_range, LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); // I1 and I2 are being combined into a single instruction. Its debug // location is the merged locations of the original instructions. I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); I2->eraseFromParent(); Changed = true; } ++NumHoistCommonInstrs; I1 = &*BB1_Itr++; I2 = &*BB2_Itr++; // Skip debug info if it is not identical. DbgInfoIntrinsic *DBI1 = dyn_cast(I1); DbgInfoIntrinsic *DBI2 = dyn_cast(I2); if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { while (isa(I1)) I1 = &*BB1_Itr++; while (isa(I2)) I2 = &*BB2_Itr++; } } while (I1->isIdenticalToWhenDefined(I2)); return true; HoistTerminator: // It may not be possible to hoist an invoke. // FIXME: Can we define a safety predicate for CallBr? if (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) return Changed; // TODO: callbr hoisting currently disabled pending further study. if (isa(I1)) return Changed; for (BasicBlock *Succ : successors(BB1)) { for (PHINode &PN : Succ->phis()) { Value *BB1V = PN.getIncomingValueForBlock(BB1); Value *BB2V = PN.getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; // Check for passingValueIsAlwaysUndefined here because we would rather // eliminate undefined control flow then converting it to a select. if (passingValueIsAlwaysUndefined(BB1V, &PN) || passingValueIsAlwaysUndefined(BB2V, &PN)) return Changed; if (isa(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) return Changed; if (isa(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) return Changed; } } // Okay, it is safe to hoist the terminator. Instruction *NT = I1->clone(); BIParent->getInstList().insert(BI->getIterator(), NT); if (!NT->getType()->isVoidTy()) { I1->replaceAllUsesWith(NT); I2->replaceAllUsesWith(NT); NT->takeName(I1); } Changed = true; ++NumHoistCommonInstrs; // Ensure terminator gets a debug location, even an unknown one, in case // it involves inlinable calls. NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); // PHIs created below will adopt NT's merged DebugLoc. IRBuilder Builder(NT); // Hoisting one of the terminators from our successor is a great thing. // Unfortunately, the successors of the if/else blocks may have PHI nodes in // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI // nodes, so we insert select instruction to compute the final result. std::map, SelectInst *> InsertedSelects; for (BasicBlock *Succ : successors(BB1)) { for (PHINode &PN : Succ->phis()) { Value *BB1V = PN.getIncomingValueForBlock(BB1); Value *BB2V = PN.getIncomingValueForBlock(BB2); if (BB1V == BB2V) continue; // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; if (!SI) { // Propagate fast-math-flags from phi node to its replacement select. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); if (isa(PN)) Builder.setFastMathFlags(PN.getFastMathFlags()); SI = cast( Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName() + "." + BB2V->getName(), BI)); } // Make the PHI node use the select for all incoming values for BB1/BB2 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) PN.setIncomingValue(i, SI); } } SmallVector Updates; // Update any PHI nodes in our new successors. for (BasicBlock *Succ : successors(BB1)) { AddPredecessorToBlock(Succ, BIParent, BB1); if (DTU) Updates.push_back({DominatorTree::Insert, BIParent, Succ}); } if (DTU) for (BasicBlock *Succ : successors(BI)) Updates.push_back({DominatorTree::Delete, BIParent, Succ}); EraseTerminatorAndDCECond(BI); if (DTU) DTU->applyUpdates(Updates); return Changed; } // Check lifetime markers. static bool isLifeTimeMarker(const Instruction *I) { if (auto II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return true; } } return false; } // TODO: Refine this. This should avoid cases like turning constant memcpy sizes // into variables. static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx) { return !isa(I); } // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common // instruction instead. For every value that would be required to be provided by // PHI node (because an operand varies in each input block), add to PHIOperands. static bool canSinkInstructions( ArrayRef Insts, DenseMap> &PHIOperands) { // Prune out obviously bad instructions to move. Each instruction must have // exactly zero or one use, and we check later that use is by a single, common // PHI instruction in the successor. bool HasUse = !Insts.front()->user_empty(); for (auto *I : Insts) { // These instructions may change or break semantics if moved. if (isa(I) || I->isEHPad() || isa(I) || I->getType()->isTokenTy()) return false; // Do not try to sink an instruction in an infinite loop - it can cause // this algorithm to infinite loop. if (I->getParent()->getSingleSuccessor() == I->getParent()) return false; // Conservatively return false if I is an inline-asm instruction. Sinking // and merging inline-asm instructions can potentially create arguments // that cannot satisfy the inline-asm constraints. // If the instruction has nomerge attribute, return false. if (const auto *C = dyn_cast(I)) if (C->isInlineAsm() || C->cannotMerge()) return false; // Each instruction must have zero or one use. if (HasUse && !I->hasOneUse()) return false; if (!HasUse && !I->user_empty()) return false; } const Instruction *I0 = Insts.front(); for (auto *I : Insts) if (!I->isSameOperationAs(I0)) return false; // All instructions in Insts are known to be the same opcode. If they have a // use, check that the only user is a PHI or in the same block as the // instruction, because if a user is in the same block as an instruction we're // contemplating sinking, it must already be determined to be sinkable. if (HasUse) { auto *PNUse = dyn_cast(*I0->user_begin()); auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0); if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool { auto *U = cast(*I->user_begin()); return (PNUse && PNUse->getParent() == Succ && PNUse->getIncomingValueForBlock(I->getParent()) == I) || U->getParent() == I->getParent(); })) return false; } // Because SROA can't handle speculating stores of selects, try not to sink // loads, stores or lifetime markers of allocas when we'd have to create a // PHI for the address operand. Also, because it is likely that loads or // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink // them. // This can cause code churn which can have unintended consequences down // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244. // FIXME: This is a workaround for a deficiency in SROA - see // https://llvm.org/bugs/show_bug.cgi?id=30188 if (isa(I0) && any_of(Insts, [](const Instruction *I) { return isa(I->getOperand(1)->stripPointerCasts()); })) return false; if (isa(I0) && any_of(Insts, [](const Instruction *I) { return isa(I->getOperand(0)->stripPointerCasts()); })) return false; if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) { return isa(I->getOperand(1)->stripPointerCasts()); })) return false; // For calls to be sinkable, they must all be indirect, or have same callee. // I.e. if we have two direct calls to different callees, we don't want to // turn that into an indirect call. Likewise, if we have an indirect call, // and a direct call, we don't actually want to have a single indirect call. if (isa(I0)) { auto IsIndirectCall = [](const Instruction *I) { return cast(I)->isIndirectCall(); }; bool HaveIndirectCalls = any_of(Insts, IsIndirectCall); bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall); if (HaveIndirectCalls) { if (!AllCallsAreIndirect) return false; } else { // All callees must be identical. Value *Callee = nullptr; for (const Instruction *I : Insts) { Value *CurrCallee = cast(I)->getCalledOperand(); if (!Callee) Callee = CurrCallee; else if (Callee != CurrCallee) return false; } } } for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) { Value *Op = I0->getOperand(OI); if (Op->getType()->isTokenTy()) // Don't touch any operand of token type. return false; auto SameAsI0 = [&I0, OI](const Instruction *I) { assert(I->getNumOperands() == I0->getNumOperands()); return I->getOperand(OI) == I0->getOperand(OI); }; if (!all_of(Insts, SameAsI0)) { if ((isa(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) || !canReplaceOperandWithVariable(I0, OI)) // We can't create a PHI from this GEP. return false; for (auto *I : Insts) PHIOperands[I].push_back(I->getOperand(OI)); } } return true; } // Assuming canSinkInstructions(Blocks) has returned true, sink the last // instruction of every block in Blocks to their common successor, commoning // into one instruction. static bool sinkLastInstruction(ArrayRef Blocks) { auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); // canSinkInstructions returning true guarantees that every block has at // least one non-terminator instruction. SmallVector Insts; for (auto *BB : Blocks) { Instruction *I = BB->getTerminator(); do { I = I->getPrevNode(); } while (isa(I) && I != &BB->front()); if (!isa(I)) Insts.push_back(I); } // The only checking we need to do now is that all users of all instructions // are the same PHI node. canSinkInstructions should have checked this but // it is slightly over-aggressive - it gets confused by commutative // instructions so double-check it here. Instruction *I0 = Insts.front(); if (!I0->user_empty()) { auto *PNUse = dyn_cast(*I0->user_begin()); if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool { auto *U = cast(*I->user_begin()); return U == PNUse; })) return false; } // We don't need to do any more checking here; canSinkInstructions should // have done it all for us. SmallVector NewOperands; for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { // This check is different to that in canSinkInstructions. There, we // cared about the global view once simplifycfg (and instcombine) have // completed - it takes into account PHIs that become trivially // simplifiable. However here we need a more local view; if an operand // differs we create a PHI and rely on instcombine to clean up the very // small mess we may make. bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { return I->getOperand(O) != I0->getOperand(O); }); if (!NeedPHI) { NewOperands.push_back(I0->getOperand(O)); continue; } // Create a new PHI in the successor block and populate it. auto *Op = I0->getOperand(O); assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); auto *PN = PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink", &BBEnd->front()); for (auto *I : Insts) PN->addIncoming(I->getOperand(O), I->getParent()); NewOperands.push_back(PN); } // Arbitrarily use I0 as the new "common" instruction; remap its operands // and move it to the start of the successor block. for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) I0->getOperandUse(O).set(NewOperands[O]); I0->moveBefore(&*BBEnd->getFirstInsertionPt()); // Update metadata and IR flags, and merge debug locations. for (auto *I : Insts) if (I != I0) { // The debug location for the "common" instruction is the merged locations // of all the commoned instructions. We start with the original location // of the "common" instruction and iteratively merge each location in the // loop below. // This is an N-way merge, which will be inefficient if I0 is a CallInst. // However, as N-way merge for CallInst is rare, so we use simplified API // instead of using complex API for N-way merge. I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc()); combineMetadataForCSE(I0, I, true); I0->andIRFlags(I); } if (!I0->user_empty()) { // canSinkLastInstruction checked that all instructions were used by // one and only one PHI node. Find that now, RAUW it to our common // instruction and nuke it. auto *PN = cast(*I0->user_begin()); PN->replaceAllUsesWith(I0); PN->eraseFromParent(); } // Finally nuke all instructions apart from the common instruction. for (auto *I : Insts) if (I != I0) I->eraseFromParent(); return true; } namespace { // LockstepReverseIterator - Iterates through instructions // in a set of blocks in reverse order from the first non-terminator. // For example (assume all blocks have size n): // LockstepReverseIterator I([B1, B2, B3]); // *I-- = [B1[n], B2[n], B3[n]]; // *I-- = [B1[n-1], B2[n-1], B3[n-1]]; // *I-- = [B1[n-2], B2[n-2], B3[n-2]]; // ... class LockstepReverseIterator { ArrayRef Blocks; SmallVector Insts; bool Fail; public: LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { reset(); } void reset() { Fail = false; Insts.clear(); for (auto *BB : Blocks) { Instruction *Inst = BB->getTerminator(); for (Inst = Inst->getPrevNode(); Inst && isa(Inst);) Inst = Inst->getPrevNode(); if (!Inst) { // Block wasn't big enough. Fail = true; return; } Insts.push_back(Inst); } } bool isValid() const { return !Fail; } void operator--() { if (Fail) return; for (auto *&Inst : Insts) { for (Inst = Inst->getPrevNode(); Inst && isa(Inst);) Inst = Inst->getPrevNode(); // Already at beginning of block. if (!Inst) { Fail = true; return; } } } void operator++() { if (Fail) return; for (auto *&Inst : Insts) { for (Inst = Inst->getNextNode(); Inst && isa(Inst);) Inst = Inst->getNextNode(); // Already at end of block. if (!Inst) { Fail = true; return; } } } ArrayRef operator * () const { return Insts; } }; } // end anonymous namespace /// Check whether BB's predecessors end with unconditional branches. If it is /// true, sink any common code from the predecessors to BB. static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU) { // We support two situations: // (1) all incoming arcs are unconditional // (2) there are non-unconditional incoming arcs // // (2) is very common in switch defaults and // else-if patterns; // // if (a) f(1); // else if (b) f(2); // // produces: // // [if] // / \ // [f(1)] [if] // | | \ // | | | // | [f(2)]| // \ | / // [ end ] // // [end] has two unconditional predecessor arcs and one conditional. The // conditional refers to the implicit empty 'else' arc. This conditional // arc can also be caused by an empty default block in a switch. // // In this case, we attempt to sink code from all *unconditional* arcs. // If we can sink instructions from these arcs (determined during the scan // phase below) we insert a common successor for all unconditional arcs and // connect that to [end], to enable sinking: // // [if] // / \ // [x(1)] [if] // | | \ // | | \ // | [x(2)] | // \ / | // [sink.split] | // \ / // [ end ] // SmallVector UnconditionalPreds; bool HaveNonUnconditionalPredecessors = false; for (auto *PredBB : predecessors(BB)) { auto *PredBr = dyn_cast(PredBB->getTerminator()); if (PredBr && PredBr->isUnconditional()) UnconditionalPreds.push_back(PredBB); else HaveNonUnconditionalPredecessors = true; } if (UnconditionalPreds.size() < 2) return false; // We take a two-step approach to tail sinking. First we scan from the end of // each block upwards in lockstep. If the n'th instruction from the end of each // block can be sunk, those instructions are added to ValuesToSink and we // carry on. If we can sink an instruction but need to PHI-merge some operands // (because they're not identical in each instruction) we add these to // PHIOperands. int ScanIdx = 0; SmallPtrSet InstructionsToSink; DenseMap> PHIOperands; LockstepReverseIterator LRI(UnconditionalPreds); while (LRI.isValid() && canSinkInstructions(*LRI, PHIOperands)) { LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n"); InstructionsToSink.insert((*LRI).begin(), (*LRI).end()); ++ScanIdx; --LRI; } // If no instructions can be sunk, early-return. if (ScanIdx == 0) return false; bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB); if (!followedByDeoptOrUnreachable) { // Okay, we *could* sink last ScanIdx instructions. But how many can we // actually sink before encountering instruction that is unprofitable to // sink? auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { unsigned NumPHIdValues = 0; for (auto *I : *LRI) for (auto *V : PHIOperands[I]) { if (!InstructionsToSink.contains(V)) ++NumPHIdValues; // FIXME: this check is overly optimistic. We may end up not sinking // said instruction, due to the very same profitability check. // See @creating_too_many_phis in sink-common-code.ll. } LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); if ((NumPHIdValues % UnconditionalPreds.size()) != 0) NumPHIInsts++; return NumPHIInsts <= 1; }; // We've determined that we are going to sink last ScanIdx instructions, // and recorded them in InstructionsToSink. Now, some instructions may be // unprofitable to sink. But that determination depends on the instructions // that we are going to sink. // First, forward scan: find the first instruction unprofitable to sink, // recording all the ones that are profitable to sink. // FIXME: would it be better, after we detect that not all are profitable. // to either record the profitable ones, or erase the unprofitable ones? // Maybe we need to choose (at runtime) the one that will touch least // instrs? LRI.reset(); int Idx = 0; SmallPtrSet InstructionsProfitableToSink; while (Idx < ScanIdx) { if (!ProfitableToSinkInstruction(LRI)) { // Too many PHIs would be created. LLVM_DEBUG( dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); break; } InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end()); --LRI; ++Idx; } // If no instructions can be sunk, early-return. if (Idx == 0) return false; // Did we determine that (only) some instructions are unprofitable to sink? if (Idx < ScanIdx) { // Okay, some instructions are unprofitable. ScanIdx = Idx; InstructionsToSink = InstructionsProfitableToSink; // But, that may make other instructions unprofitable, too. // So, do a backward scan, do any earlier instructions become // unprofitable? assert( !ProfitableToSinkInstruction(LRI) && "We already know that the last instruction is unprofitable to sink"); ++LRI; --Idx; while (Idx >= 0) { // If we detect that an instruction becomes unprofitable to sink, // all earlier instructions won't be sunk either, // so preemptively keep InstructionsProfitableToSink in sync. // FIXME: is this the most performant approach? for (auto *I : *LRI) InstructionsProfitableToSink.erase(I); if (!ProfitableToSinkInstruction(LRI)) { // Everything starting with this instruction won't be sunk. ScanIdx = Idx; InstructionsToSink = InstructionsProfitableToSink; } ++LRI; --Idx; } } // If no instructions can be sunk, early-return. if (ScanIdx == 0) return false; } bool Changed = false; if (HaveNonUnconditionalPredecessors) { if (!followedByDeoptOrUnreachable) { // It is always legal to sink common instructions from unconditional // predecessors. However, if not all predecessors are unconditional, // this transformation might be pessimizing. So as a rule of thumb, // don't do it unless we'd sink at least one non-speculatable instruction. // See https://bugs.llvm.org/show_bug.cgi?id=30244 LRI.reset(); int Idx = 0; bool Profitable = false; while (Idx < ScanIdx) { if (!isSafeToSpeculativelyExecute((*LRI)[0])) { Profitable = true; break; } --LRI; ++Idx; } if (!Profitable) return false; } LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n"); // We have a conditional edge and we're going to sink some instructions. // Insert a new block postdominating all blocks we're going to sink from. if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU)) // Edges couldn't be split. return false; Changed = true; } // Now that we've analyzed all potential sinking candidates, perform the // actual sink. We iteratively sink the last non-terminator of the source // blocks into their common successor unless doing so would require too // many PHI instructions to be generated (currently only one PHI is allowed // per sunk instruction). // // We can use InstructionsToSink to discount values needing PHI-merging that will // actually be sunk in a later iteration. This allows us to be more // aggressive in what we sink. This does allow a false positive where we // sink presuming a later value will also be sunk, but stop half way through // and never actually sink it which means we produce more PHIs than intended. // This is unlikely in practice though. int SinkIdx = 0; for (; SinkIdx != ScanIdx; ++SinkIdx) { LLVM_DEBUG(dbgs() << "SINK: Sink: " << *UnconditionalPreds[0]->getTerminator()->getPrevNode() << "\n"); // Because we've sunk every instruction in turn, the current instruction to // sink is always at index 0. LRI.reset(); if (!sinkLastInstruction(UnconditionalPreds)) { LLVM_DEBUG( dbgs() << "SINK: stopping here, failed to actually sink instruction!\n"); break; } NumSinkCommonInstrs++; Changed = true; } if (SinkIdx != 0) ++NumSinkCommonCode; return Changed; } /// Determine if we can hoist sink a sole store instruction out of a /// conditional block. /// /// We are looking for code like the following: /// BrBB: /// store i32 %add, i32* %arrayidx2 /// ... // No other stores or function calls (we could be calling a memory /// ... // function). /// %cmp = icmp ult %x, %y /// br i1 %cmp, label %EndBB, label %ThenBB /// ThenBB: /// store i32 %add5, i32* %arrayidx2 /// br label EndBB /// EndBB: /// ... /// We are going to transform this into: /// BrBB: /// store i32 %add, i32* %arrayidx2 /// ... // /// %cmp = icmp ult %x, %y /// %add.add5 = select i1 %cmp, i32 %add, %add5 /// store i32 %add.add5, i32* %arrayidx2 /// ... /// /// \return The pointer to the value of the previous store if the store can be /// hoisted into the predecessor block. 0 otherwise. static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB) { StoreInst *StoreToHoist = dyn_cast(I); if (!StoreToHoist) return nullptr; // Volatile or atomic. if (!StoreToHoist->isSimple()) return nullptr; Value *StorePtr = StoreToHoist->getPointerOperand(); Type *StoreTy = StoreToHoist->getValueOperand()->getType(); // Look for a store to the same pointer in BrBB. unsigned MaxNumInstToLookAt = 9; // Skip pseudo probe intrinsic calls which are not really killing any memory // accesses. for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) { if (!MaxNumInstToLookAt) break; --MaxNumInstToLookAt; // Could be calling an instruction that affects memory like free(). if (CurI.mayWriteToMemory() && !isa(CurI)) return nullptr; if (auto *SI = dyn_cast(&CurI)) { // Found the previous store to same location and type. Make sure it is // simple, to avoid introducing a spurious non-atomic write after an // atomic write. if (SI->getPointerOperand() == StorePtr && SI->getValueOperand()->getType() == StoreTy && SI->isSimple()) // Found the previous store, return its value operand. return SI->getValueOperand(); return nullptr; // Unknown store. } if (auto *LI = dyn_cast(&CurI)) { if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy && LI->isSimple()) { // Local objects (created by an `alloca` instruction) are always // writable, so once we are past a read from a location it is valid to // also write to that same location. // If the address of the local object never escapes the function, that // means it's never concurrently read or written, hence moving the store // from under the condition will not introduce a data race. auto *AI = dyn_cast(getUnderlyingObject(StorePtr)); if (AI && !PointerMayBeCaptured(AI, false, true)) // Found a previous load, return it. return LI; } // The load didn't work out, but we may still find a store. } } return nullptr; } /// Estimate the cost of the insertion(s) and check that the PHI nodes can be /// converted to selects. static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI) { TargetTransformInfo::TargetCostKind CostKind = BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; bool HaveRewritablePHIs = false; for (PHINode &PN : EndBB->phis()) { Value *OrigV = PN.getIncomingValueForBlock(BB); Value *ThenV = PN.getIncomingValueForBlock(ThenBB); // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf. // Skip PHIs which are trivial. if (ThenV == OrigV) continue; Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr, CmpInst::BAD_ICMP_PREDICATE, CostKind); // Don't convert to selects if we could remove undefined behavior instead. if (passingValueIsAlwaysUndefined(OrigV, &PN) || passingValueIsAlwaysUndefined(ThenV, &PN)) return false; HaveRewritablePHIs = true; ConstantExpr *OrigCE = dyn_cast(OrigV); ConstantExpr *ThenCE = dyn_cast(ThenV); if (!OrigCE && !ThenCE) continue; // Known safe and cheap. if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) return false; InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0; InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0; InstructionCost MaxCost = 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; if (OrigCost + ThenCost > MaxCost) return false; // Account for the cost of an unfolded ConstantExpr which could end up // getting expanded into Instructions. // FIXME: This doesn't account for how many operations are combined in the // constant expression. ++SpeculatedInstructions; if (SpeculatedInstructions > 1) return false; } return HaveRewritablePHIs; } /// Speculate a conditional basic block flattening the CFG. /// /// Note that this is a very risky transform currently. Speculating /// instructions like this is most often not desirable. Instead, there is an MI /// pass which can do it with full awareness of the resource constraints. /// However, some cases are "obvious" and we should do directly. An example of /// this is speculating a single, reasonably cheap instruction. /// /// There is only one distinct advantage to flattening the CFG at the IR level: /// it makes very common but simplistic optimizations such as are common in /// instcombine and the DAG combiner more powerful by removing CFG edges and /// modeling their effects with easier to reason about SSA value graphs. /// /// /// An illustration of this transform is turning this IR: /// \code /// BB: /// %cmp = icmp ult %x, %y /// br i1 %cmp, label %EndBB, label %ThenBB /// ThenBB: /// %sub = sub %x, %y /// br label BB2 /// EndBB: /// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ] /// ... /// \endcode /// /// Into this IR: /// \code /// BB: /// %cmp = icmp ult %x, %y /// %sub = sub %x, %y /// %cond = select i1 %cmp, 0, %sub /// ... /// \endcode /// /// \returns true if the conditional block is removed. bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, const TargetTransformInfo &TTI) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa(BrCond)) return false; BasicBlock *BB = BI->getParent(); BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0); InstructionCost Budget = PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; // If ThenBB is actually on the false edge of the conditional branch, remember // to swap the select operands later. bool Invert = false; if (ThenBB != BI->getSuccessor(0)) { assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?"); Invert = true; } assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block"); // If the branch is non-unpredictable, and is predicted to *not* branch to // the `then` block, then avoid speculating it. if (!BI->getMetadata(LLVMContext::MD_unpredictable)) { uint64_t TWeight, FWeight; if (BI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) { uint64_t EndWeight = Invert ? TWeight : FWeight; BranchProbability BIEndProb = BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight); BranchProbability Likely = TTI.getPredictableBranchThreshold(); if (BIEndProb >= Likely) return false; } } // Keep a count of how many times instructions are used within ThenBB when // they are candidates for sinking into ThenBB. Specifically: // - They are defined in BB, and // - They have no side effects, and // - All of their uses are in ThenBB. SmallDenseMap SinkCandidateUseCounts; SmallVector SpeculatedDbgIntrinsics; unsigned SpeculatedInstructions = 0; Value *SpeculatedStoreValue = nullptr; StoreInst *SpeculatedStore = nullptr; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = std::prev(ThenBB->end()); BBI != BBE; ++BBI) { Instruction *I = &*BBI; // Skip debug info. if (isa(I)) { SpeculatedDbgIntrinsics.push_back(I); continue; } // Skip pseudo probes. The consequence is we lose track of the branch // probability for ThenBB, which is fine since the optimization here takes // place regardless of the branch probability. if (isa(I)) { // The probe should be deleted so that it will not be over-counted when // the samples collected on the non-conditional path are counted towards // the conditional path. We leave it for the counts inference algorithm to // figure out a proper count for an unknown probe. SpeculatedDbgIntrinsics.push_back(I); continue; } // Only speculatively execute a single instruction (not counting the // terminator) for now. ++SpeculatedInstructions; if (SpeculatedInstructions > 1) return false; // Don't hoist the instruction if it's unsafe or expensive. if (!isSafeToSpeculativelyExecute(I) && !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore( I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && computeSpeculationCost(I, TTI) > PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic) return false; // Store the store speculation candidate. if (SpeculatedStoreValue) SpeculatedStore = cast(I); // Do not hoist the instruction if any of its operands are defined but not // used in BB. The transformation will prevent the operand from // being sunk into the use block. for (Use &Op : I->operands()) { Instruction *OpI = dyn_cast(Op); if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects()) continue; // Not a candidate for sinking. ++SinkCandidateUseCounts[OpI]; } } // Consider any sink candidates which are only used in ThenBB as costs for // speculation. Note, while we iterate over a DenseMap here, we are summing // and so iteration order isn't significant. for (SmallDenseMap::iterator I = SinkCandidateUseCounts.begin(), E = SinkCandidateUseCounts.end(); I != E; ++I) if (I->first->hasNUses(I->second)) { ++SpeculatedInstructions; if (SpeculatedInstructions > 1) return false; } // Check that we can insert the selects and that it's not too expensive to do // so. bool Convert = SpeculatedStore != nullptr; InstructionCost Cost = 0; Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB, SpeculatedInstructions, Cost, TTI); if (!Convert || Cost > Budget) return false; // If we get here, we can hoist the instruction and if-convert. LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); // Insert a select of the value of the speculated store. if (SpeculatedStoreValue) { IRBuilder Builder(BI); Value *TrueV = SpeculatedStore->getValueOperand(); Value *FalseV = SpeculatedStoreValue; if (Invert) std::swap(TrueV, FalseV); Value *S = Builder.CreateSelect( BrCond, TrueV, FalseV, "spec.store.select", BI); SpeculatedStore->setOperand(0, S); SpeculatedStore->applyMergedLocation(BI->getDebugLoc(), SpeculatedStore->getDebugLoc()); } // Metadata can be dependent on the condition we are hoisting above. // Conservatively strip all metadata on the instruction. Drop the debug loc // to avoid making it appear as if the condition is a constant, which would // be misleading while debugging. // Similarly strip attributes that maybe dependent on condition we are // hoisting above. for (auto &I : *ThenBB) { if (!SpeculatedStoreValue || &I != SpeculatedStore) I.setDebugLoc(DebugLoc()); I.dropUndefImplyingAttrsAndUnknownMetadata(); } // Hoist the instructions. BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(), ThenBB->begin(), std::prev(ThenBB->end())); // Insert selects and rewrite the PHI operands. IRBuilder Builder(BI); for (PHINode &PN : EndBB->phis()) { unsigned OrigI = PN.getBasicBlockIndex(BB); unsigned ThenI = PN.getBasicBlockIndex(ThenBB); Value *OrigV = PN.getIncomingValue(OrigI); Value *ThenV = PN.getIncomingValue(ThenI); // Skip PHIs which are trivial. if (OrigV == ThenV) continue; // Create a select whose true value is the speculatively executed value and // false value is the pre-existing value. Swap them if the branch // destinations were inverted. Value *TrueV = ThenV, *FalseV = OrigV; if (Invert) std::swap(TrueV, FalseV); Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI); PN.setIncomingValue(OrigI, V); PN.setIncomingValue(ThenI, V); } // Remove speculated dbg intrinsics. // FIXME: Is it possible to do this in a more elegant way? Moving/merging the // dbg value for the different flows and inserting it after the select. for (Instruction *I : SpeculatedDbgIntrinsics) I->eraseFromParent(); ++NumSpeculations; return true; } /// Return true if we can thread a branch across this block. static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { int Size = 0; SmallPtrSet EphValues; auto IsEphemeral = [&](const Instruction *I) { if (isa(I)) return true; return !I->mayHaveSideEffects() && !I->isTerminator() && all_of(I->users(), [&](const User *U) { return EphValues.count(U); }); }; // Walk the loop in reverse so that we can identify ephemeral values properly // (values only feeding assumes). for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) { // Can't fold blocks that contain noduplicate or convergent calls. if (CallInst *CI = dyn_cast(&I)) if (CI->cannotDuplicate() || CI->isConvergent()) return false; // Ignore ephemeral values which are deleted during codegen. if (IsEphemeral(&I)) EphValues.insert(&I); // We will delete Phis while threading, so Phis should not be accounted in // block's size. else if (!isa(I)) { if (Size++ > MaxSmallBlockSize) return false; // Don't clone large BB's. } // We can only support instructions that do not define values that are // live outside of the current basic block. for (User *U : I.users()) { Instruction *UI = cast(U); if (UI->getParent() != BB || isa(UI)) return false; } // Looks ok, continue checking. } return true; } /// If we have a conditional branch on a PHI node value that is defined in the /// same block as the branch and if any PHI entries are constants, thread edges /// corresponding to that entry to be branches to their ultimate destination. static Optional FoldCondBranchOnPHIImpl(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC) { BasicBlock *BB = BI->getParent(); PHINode *PN = dyn_cast(BI->getCondition()); // NOTE: we currently cannot transform this case if the PHI node is used // outside of the block. if (!PN || PN->getParent() != BB || !PN->hasOneUse()) return false; // Degenerate case of a single entry PHI. if (PN->getNumIncomingValues() == 1) { FoldSingleEntryPHINodes(PN->getParent()); return true; } // Now we know that this block has multiple preds and two succs. if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false; // Okay, this is a simple enough basic block. See if any phi values are // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast(PN->getIncomingValue(i)); if (!CB || !CB->getType()->isIntegerTy(1)) continue; // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue()); if (RealDest == BB) continue; // Skip self loops. // Skip if the predecessor's terminator is an indirect branch. if (isa(PredBB->getTerminator())) continue; SmallVector Updates; // The dest block might have PHI nodes, other predecessors and other // difficult cases. Instead of being smart about this, just insert a new // block that jumps to the destination block, effectively splitting // the edge we are about to create. BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge", RealDest->getParent(), RealDest); BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB); if (DTU) Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest}); CritEdgeBranch->setDebugLoc(BI->getDebugLoc()); // Update PHI nodes. AddPredecessorToBlock(RealDest, EdgeBB, BB); // BB may have instructions that are being threaded over. Clone these // instructions into EdgeBB. We know that there will be no uses of the // cloned instructions outside of EdgeBB. BasicBlock::iterator InsertPt = EdgeBB->begin(); DenseMap TranslateMap; // Track translated values. for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { if (PHINode *PN = dyn_cast(BBI)) { TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB); continue; } // Clone the instruction. Instruction *N = BBI->clone(); if (BBI->hasName()) N->setName(BBI->getName() + ".c"); // Update operands due to translation. for (Use &Op : N->operands()) { DenseMap::iterator PI = TranslateMap.find(Op); if (PI != TranslateMap.end()) Op = PI->second; } // Check for trivial simplification. if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) { if (!BBI->use_empty()) TranslateMap[&*BBI] = V; if (!N->mayHaveSideEffects()) { N->deleteValue(); // Instruction folded away, don't need actual inst N = nullptr; } } else { if (!BBI->use_empty()) TranslateMap[&*BBI] = N; } if (N) { // Insert the new instruction into its new home. EdgeBB->getInstList().insert(InsertPt, N); // Register the new instruction with the assumption cache if necessary. if (auto *Assume = dyn_cast(N)) if (AC) AC->registerAssumption(Assume); } } // Loop over all of the edges from PredBB to BB, changing them to branch // to EdgeBB instead. Instruction *PredBBTI = PredBB->getTerminator(); for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i) if (PredBBTI->getSuccessor(i) == BB) { BB->removePredecessor(PredBB); PredBBTI->setSuccessor(i, EdgeBB); } if (DTU) { Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); DTU->applyUpdates(Updates); } // Signal repeat, simplifying any other constants. return None; } return false; } static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC) { Optional Result; bool EverChanged = false; do { // Note that None means "we changed things, but recurse further." Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC); EverChanged |= Result == None || *Result; } while (Result == None); return EverChanged; } /// Given a BB that starts with the specified two-entry PHI node, /// see if we can eliminate it. static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const DataLayout &DL) { // Ok, this is a two entry PHI node. Check to see if this is a simple "if // statement", which has a very simple dominance structure. Basically, we // are trying to find the condition that is being branched on, which // subsequently causes this merge to happen. We really want control // dependence information for this check, but simplifycfg can't keep it up // to date, and this catches most of the cases we care about anyway. BasicBlock *BB = PN->getParent(); BasicBlock *IfTrue, *IfFalse; BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse); if (!DomBI) return false; Value *IfCond = DomBI->getCondition(); // Don't bother if the branch will be constant folded trivially. if (isa(IfCond)) return false; BasicBlock *DomBlock = DomBI->getParent(); SmallVector IfBlocks; llvm::copy_if( PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) { return cast(IfBlock->getTerminator())->isUnconditional(); }); assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) && "Will have either one or two blocks to speculate."); // If the branch is non-unpredictable, see if we either predictably jump to // the merge bb (if we have only a single 'then' block), or if we predictably // jump to one specific 'then' block (if we have two of them). // It isn't beneficial to speculatively execute the code // from the block that we know is predictably not entered. if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) { uint64_t TWeight, FWeight; if (DomBI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) { BranchProbability BITrueProb = BranchProbability::getBranchProbability(TWeight, TWeight + FWeight); BranchProbability Likely = TTI.getPredictableBranchThreshold(); BranchProbability BIFalseProb = BITrueProb.getCompl(); if (IfBlocks.size() == 1) { BranchProbability BIBBProb = DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb; if (BIBBProb >= Likely) return false; } else { if (BITrueProb >= Likely || BIFalseProb >= Likely) return false; } } } // Don't try to fold an unreachable block. For example, the phi node itself // can't be the candidate if-condition for a select that we want to form. if (auto *IfCondPhiInst = dyn_cast(IfCond)) if (IfCondPhiInst->getParent() == BB) return false; // Okay, we found that we can merge this two-entry phi node into a select. // Doing so would require us to fold *all* two entry phi nodes in this block. // At some point this becomes non-profitable (particularly if the target // doesn't support cmov's). Only do this transformation if there are two or // fewer PHI nodes in this block. unsigned NumPhis = 0; for (BasicBlock::iterator I = BB->begin(); isa(I); ++NumPhis, ++I) if (NumPhis > 2) return false; // Loop over the PHI's seeing if we can promote them all to select // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; InstructionCost Cost = 0; InstructionCost Budget = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; bool Changed = false; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); if (Value *V = SimplifyInstruction(PN, {DL, PN})) { PN->replaceAllUsesWith(V); PN->eraseFromParent(); Changed = true; continue; } if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts, Cost, Budget, TTI) || !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts, Cost, Budget, TTI)) return Changed; } // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast(BB->begin()); if (!PN) return true; // Return true if at least one of these is a 'not', and another is either // a 'not' too, or a constant. auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) { if (!match(V0, m_Not(m_Value()))) std::swap(V0, V1); auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant()); return match(V0, m_Not(m_Value())) && match(V1, Invertible); }; // Don't fold i1 branches on PHIs which contain binary operators or // (possibly inverted) select form of or/ands, unless one of // the incoming values is an 'not' and another one is freely invertible. // These can often be turned into switches and other things. auto IsBinOpOrAnd = [](Value *V) { return match( V, m_CombineOr( m_BinOp(), m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()), m_Select(m_Value(), m_Value(), m_ImmConstant())))); }; if (PN->getType()->isIntegerTy(1) && (IsBinOpOrAnd(PN->getIncomingValue(0)) || IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) && !CanHoistNotFromBothValues(PN->getIncomingValue(0), PN->getIncomingValue(1))) return Changed; // If all PHI nodes are promotable, check to make sure that all instructions // in the predecessor blocks can be promoted as well. If not, we won't be able // to get rid of the control flow, so it's not worth promoting to select // instructions. for (BasicBlock *IfBlock : IfBlocks) for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I) if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) { // This is not an aggressive instruction that we can promote. // Because of this, we won't be able to get rid of the control flow, so // the xform is not worth it. return Changed; } // If either of the blocks has it's address taken, we can't do this fold. if (any_of(IfBlocks, [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); })) return Changed; LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond << " T: " << IfTrue->getName() << " F: " << IfFalse->getName() << "\n"); // If we can still promote the PHI nodes after this gauntlet of tests, // do all of the PHI's now. // Move all 'aggressive' instructions, which are defined in the // conditional parts of the if's up to the dominating block. for (BasicBlock *IfBlock : IfBlocks) hoistAllInstructionsInto(DomBlock, DomBI, IfBlock); IRBuilder Builder(DomBI); // Propagate fast-math-flags from phi nodes to replacement selects. IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); while (PHINode *PN = dyn_cast(BB->begin())) { if (isa(PN)) Builder.setFastMathFlags(PN->getFastMathFlags()); // Change the PHI node into a select instruction. Value *TrueVal = PN->getIncomingValueForBlock(IfTrue); Value *FalseVal = PN->getIncomingValueForBlock(IfFalse); Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI); PN->replaceAllUsesWith(Sel); Sel->takeName(PN); PN->eraseFromParent(); } // At this point, all IfBlocks are empty, so our if statement // has been flattened. Change DomBlock to jump directly to our new block to // avoid other simplifycfg's kicking in on the diamond. Builder.CreateBr(BB); SmallVector Updates; if (DTU) { Updates.push_back({DominatorTree::Insert, DomBlock, BB}); for (auto *Successor : successors(DomBlock)) Updates.push_back({DominatorTree::Delete, DomBlock, Successor}); } DomBI->eraseFromParent(); if (DTU) DTU->applyUpdates(Updates); return true; } static Value *createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name = "") { // Try to relax logical op to binary op. if (impliesPoison(RHS, LHS)) return Builder.CreateBinOp(Opc, LHS, RHS, Name); if (Opc == Instruction::And) return Builder.CreateLogicalAnd(LHS, RHS, Name); if (Opc == Instruction::Or) return Builder.CreateLogicalOr(LHS, RHS, Name); llvm_unreachable("Invalid logical opcode"); } /// Return true if either PBI or BI has branch weight available, and store /// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does /// not have branch weight, use 1:1 as its weight. static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight) { bool PredHasWeights = PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight); bool SuccHasWeights = BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight); if (PredHasWeights || SuccHasWeights) { if (!PredHasWeights) PredTrueWeight = PredFalseWeight = 1; if (!SuccHasWeights) SuccTrueWeight = SuccFalseWeight = 1; return true; } else { return false; } } /// Determine if the two branches share a common destination and deduce a glue /// that joins the branches' conditions to arrive at the common destination if /// that would be profitable. static Optional> shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI, const TargetTransformInfo *TTI) { assert(BI && PBI && BI->isConditional() && PBI->isConditional() && "Both blocks must end with a conditional branches."); assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) && "PredBB must be a predecessor of BB."); // We have the potential to fold the conditions together, but if the // predecessor branch is predictable, we may not want to merge them. uint64_t PTWeight, PFWeight; BranchProbability PBITrueProb, Likely; if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) && PBI->extractProfMetadata(PTWeight, PFWeight) && (PTWeight + PFWeight) != 0) { PBITrueProb = BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight); Likely = TTI->getPredictableBranchThreshold(); } if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { // Speculate the 2nd condition unless the 1st is probably true. if (PBITrueProb.isUnknown() || PBITrueProb < Likely) return {{Instruction::Or, false}}; } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { // Speculate the 2nd condition unless the 1st is probably false. if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) return {{Instruction::And, false}}; } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { // Speculate the 2nd condition unless the 1st is probably true. if (PBITrueProb.isUnknown() || PBITrueProb < Likely) return {{Instruction::And, true}}; } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { // Speculate the 2nd condition unless the 1st is probably false. if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely) return {{Instruction::Or, true}}; } return None; } static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI) { BasicBlock *BB = BI->getParent(); BasicBlock *PredBlock = PBI->getParent(); // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond; std::tie(Opc, InvertPredCond) = *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI); LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); IRBuilder<> Builder(PBI); // The builder is used to create instructions to eliminate the branch in BB. // If BB's terminator has !annotation metadata, add it to the new // instructions. Builder.CollectMetadataToCopy(BB->getTerminator(), {LLVMContext::MD_annotation}); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { Value *NewCond = PBI->getCondition(); if (NewCond->hasOneUse() && isa(NewCond)) { CmpInst *CI = cast(NewCond); CI->setPredicate(CI->getInversePredicate()); } else { NewCond = Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not"); } PBI->setCondition(NewCond); PBI->swapSuccessors(); } BasicBlock *UniqueSucc = PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1); // Before cloning instructions, notify the successor basic block that it // is about to have a new predecessor. This will update PHI nodes, // which will allow us to update live-out uses of bonus instructions. AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU); // Try to update branch weights. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight)) { SmallVector NewWeights; if (PBI->getSuccessor(0) == BB) { // PBI: br i1 %x, BB, FalseDest // BI: br i1 %y, UniqueSucc, FalseDest // TrueWeight is TrueWeight for PBI * TrueWeight for BI. NewWeights.push_back(PredTrueWeight * SuccTrueWeight); // FalseWeight is FalseWeight for PBI * TotalWeight for BI + // TrueWeight for PBI * FalseWeight for BI. // We assume that total weights of a BranchInst can fit into 32 bits. // Therefore, we will not have overflow using 64-bit arithmetic. NewWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) + PredTrueWeight * SuccFalseWeight); } else { // PBI: br i1 %x, TrueDest, BB // BI: br i1 %y, TrueDest, UniqueSucc // TrueWeight is TrueWeight for PBI * TotalWeight for BI + // FalseWeight for PBI * TrueWeight for BI. NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) + PredFalseWeight * SuccTrueWeight); // FalseWeight is FalseWeight for PBI * FalseWeight for BI. NewWeights.push_back(PredFalseWeight * SuccFalseWeight); } // Halve the weights if any of them cannot fit in an uint32_t FitWeights(NewWeights); SmallVector MDWeights(NewWeights.begin(), NewWeights.end()); setBranchWeights(PBI, MDWeights[0], MDWeights[1]); // TODO: If BB is reachable from all paths through PredBlock, then we // could replace PBI's branch probabilities with BI's. } else PBI->setMetadata(LLVMContext::MD_prof, nullptr); // Now, update the CFG. PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc); if (DTU) DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc}, {DominatorTree::Delete, PredBlock, BB}}); // If BI was a loop latch, it may have had associated loop metadata. // We need to copy it to the new latch, that is, PBI. if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop)) PBI->setMetadata(LLVMContext::MD_loop, LoopMD); ValueToValueMapTy VMap; // maps original values to cloned values CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap); // Now that the Cond was cloned into the predecessor basic block, // or/and the two conditions together. Value *BICond = VMap[BI->getCondition()]; PBI->setCondition( createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond")); // Copy any debug value intrinsics into the end of PredBlock. for (Instruction &I : *BB) { if (isa(I)) { Instruction *NewI = I.clone(); RemapInstruction(NewI, VMap, RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); NewI->insertBefore(PBI); } } ++NumFoldBranchToCommonDest; return true; } /// Return if an instruction's type or any of its operands' types are a vector /// type. static bool isVectorOp(Instruction &I) { return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) { return U->getType()->isVectorTy(); }); } /// If this basic block is simple enough, and if a predecessor branches to us /// and one of our successors, fold the block into the predecessor and use /// logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI, unsigned BonusInstThreshold) { // If this block ends with an unconditional branch, // let SpeculativelyExecuteBB() deal with it. if (!BI->isConditional()) return false; BasicBlock *BB = BI->getParent(); TargetTransformInfo::TargetCostKind CostKind = BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize : TargetTransformInfo::TCK_SizeAndLatency; Instruction *Cond = dyn_cast(BI->getCondition()); if (!Cond || (!isa(Cond) && !isa(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. if (ConstantExpr *CE = dyn_cast(Cond->getOperand(0))) if (CE->canTrap()) return false; if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) if (CE->canTrap()) return false; // Finally, don't infinitely unroll conditional loops. if (is_contained(successors(BB), BB)) return false; // With which predecessors will we want to deal with? SmallVector Preds; for (BasicBlock *PredBlock : predecessors(BB)) { BranchInst *PBI = dyn_cast(PredBlock->getTerminator()); // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; // Determine if the two branches share a common destination. Instruction::BinaryOps Opc; bool InvertPredCond; if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI)) std::tie(Opc, InvertPredCond) = *Recipe; else continue; // Check the cost of inserting the necessary logic before performing the // transformation. if (TTI) { Type *Ty = BI->getCondition()->getType(); InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind); if (InvertPredCond && (!PBI->getCondition()->hasOneUse() || !isa(PBI->getCondition()))) Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind); if (Cost > BranchFoldThreshold) continue; } // Ok, we do want to deal with this predecessor. Record it. Preds.emplace_back(PredBlock); } // If there aren't any predecessors into which we can fold, // don't bother checking the cost. if (Preds.empty()) return false; // Only allow this transformation if computing the condition doesn't involve // too many instructions and these involved instructions can be executed // unconditionally. We denote all involved instructions except the condition // as "bonus instructions", and only allow this transformation when the // number of the bonus instructions we'll need to create when cloning into // each predecessor does not exceed a certain threshold. unsigned NumBonusInsts = 0; bool SawVectorOp = false; const unsigned PredCount = Preds.size(); for (Instruction &I : *BB) { // Don't check the branch condition comparison itself. if (&I == Cond) continue; // Ignore dbg intrinsics, and the terminator. if (isa(I) || isa(I)) continue; // I must be safe to execute unconditionally. if (!isSafeToSpeculativelyExecute(&I)) return false; SawVectorOp |= isVectorOp(I); // Account for the cost of duplicating this instruction into each // predecessor. Ignore free instructions. if (!TTI || TTI->getUserCost(&I, CostKind) != TargetTransformInfo::TCC_Free) { NumBonusInsts += PredCount; // Early exits once we reach the limit. if (NumBonusInsts > BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier) return false; } auto IsBCSSAUse = [BB, &I](Use &U) { auto *UI = cast(U.getUser()); if (auto *PN = dyn_cast(UI)) return PN->getIncomingBlock(U) == BB; return UI->getParent() == BB && I.comesBefore(UI); }; // Does this instruction require rewriting of uses? if (!all_of(I.uses(), IsBCSSAUse)) return false; } if (NumBonusInsts > BonusInstThreshold * (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1)) return false; // Ok, we have the budget. Perform the transformation. for (BasicBlock *PredBlock : Preds) { auto *PBI = cast(PredBlock->getTerminator()); return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI); } return false; } // If there is only one store in BB1 and BB2, return it, otherwise return // nullptr. static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) { StoreInst *S = nullptr; for (auto *BB : {BB1, BB2}) { if (!BB) continue; for (auto &I : *BB) if (auto *SI = dyn_cast(&I)) { if (S) // Multiple stores seen. return nullptr; else S = SI; } } return S; } static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV = nullptr) { // PHI is going to be a PHI node that allows the value V that is defined in // BB to be referenced in BB's only successor. // // If AlternativeV is nullptr, the only value we care about in PHI is V. It // doesn't matter to us what the other operand is (it'll never get used). We // could just create a new PHI with an undef incoming value, but that could // increase register pressure if EarlyCSE/InstCombine can't fold it with some // other PHI. So here we directly look for some PHI in BB's successor with V // as an incoming operand. If we find one, we use it, else we create a new // one. // // If AlternativeV is not nullptr, we care about both incoming values in PHI. // PHI must be exactly: phi [ %BB, %V ], [ %OtherBB, %AlternativeV] // where OtherBB is the single other predecessor of BB's only successor. PHINode *PHI = nullptr; BasicBlock *Succ = BB->getSingleSuccessor(); for (auto I = Succ->begin(); isa(I); ++I) if (cast(I)->getIncomingValueForBlock(BB) == V) { PHI = cast(I); if (!AlternativeV) break; assert(Succ->hasNPredecessors(2)); auto PredI = pred_begin(Succ); BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI; if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) break; PHI = nullptr; } if (PHI) return PHI; // If V is not an instruction defined in BB, just return it. if (!AlternativeV && (!isa(V) || cast(V)->getParent() != BB)) return V; PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front()); PHI->addIncoming(V, BB); for (BasicBlock *PredBB : predecessors(Succ)) if (PredBB != BB) PHI->addIncoming( AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB); return PHI; } static bool mergeConditionalStoreToAddress( BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. // FIXME: We could relax this restriction with a bit more work and performance // testing. StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB); StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB); if (!PStore || !QStore) return false; // Now check the stores are compatible. if (!QStore->isUnordered() || !PStore->isUnordered()) return false; // Check that sinking the store won't cause program behavior changes. Sinking // the store out of the Q blocks won't change any behavior as we're sinking // from a block to its unconditional successor. But we're moving a store from // the P blocks down through the middle block (QBI) and past both QFB and QTB. // So we need to check that there are no aliasing loads or stores in // QBI, QTB and QFB. We also need to check there are no conflicting memory // operations between PStore and the end of its parent block. // // The ideal way to do this is to query AliasAnalysis, but we don't // preserve AA currently so that is dangerous. Be super safe and just // check there are no other memory operations at all. for (auto &I : *QFB->getSinglePredecessor()) if (I.mayReadOrWriteMemory()) return false; for (auto &I : *QFB) if (&I != QStore && I.mayReadOrWriteMemory()) return false; if (QTB) for (auto &I : *QTB) if (&I != QStore && I.mayReadOrWriteMemory()) return false; for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end(); I != E; ++I) if (&*I != PStore && I->mayReadOrWriteMemory()) return false; // If we're not in aggressive mode, we only optimize if we have some // confidence that by optimizing we'll allow P and/or Q to be if-converted. auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef FreeStores) { if (!BB) return true; // Heuristic: if the block can be if-converted/phi-folded and the // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to // thread this store. InstructionCost Cost = 0; InstructionCost Budget = PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; for (auto &I : BB->instructionsWithoutDebug(false)) { // Consider terminator instruction to be free. if (I.isTerminator()) continue; // If this is one the stores that we want to speculate out of this BB, // then don't count it's cost, consider it to be free. if (auto *S = dyn_cast(&I)) if (llvm::find(FreeStores, S)) continue; // Else, we have a white-list of instructions that we are ak speculating. if (!isa(I) && !isa(I)) return false; // Not in white-list - not worthwhile folding. // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency); if (Cost > Budget) return false; // Eagerly refuse to fold as soon as we're out of budget. } assert(Cost <= Budget && "When we run out of budget we will eagerly return from within the " "per-instruction loop."); return true; }; const std::array FreeStores = {PStore, QStore}; if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) return false; // If PostBB has more than two predecessors, we need to split it so we can // sink the store. if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { // We know that QFB's only successor is PostBB. And QFB has a single // predecessor. If QTB exists, then its only successor is also PostBB. // If QTB does not exist, then QFB's only predecessor has a conditional // branch to QFB and PostBB. BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); BasicBlock *NewBB = SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU); if (!NewBB) return false; PostBB = NewBB; } // OK, we're going to sink the stores to PostBB. The store has to be // conditional though, so first create the predicate. Value *PCond = cast(PFB->getSinglePredecessor()->getTerminator()) ->getCondition(); Value *QCond = cast(QFB->getSinglePredecessor()->getTerminator()) ->getCondition(); Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(), PStore->getParent()); Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(), QStore->getParent(), PPHI); IRBuilder<> QB(&*PostBB->getFirstInsertionPt()); Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond); Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond); if (InvertPCond) PPred = QB.CreateNot(PPred); if (InvertQCond) QPred = QB.CreateNot(QPred); Value *CombinedPred = QB.CreateOr(PPred, QPred); auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), /*Unreachable=*/false, /*BranchWeights=*/nullptr, DTU); QB.SetInsertPoint(T); StoreInst *SI = cast(QB.CreateStore(QPHI, Address)); SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata())); // Choose the minimum alignment. If we could prove both stores execute, we // could use biggest one. In this case, though, we only know that one of the // stores executes. And we don't know it's safe to take the alignment from a // store that doesn't execute. SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign())); QStore->eraseFromParent(); PStore->eraseFromParent(); return true; } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these // stores are conditional, so they can't be unconditionally sunk. But it may // be profitable to speculatively sink the stores into one merged store at the // end, and predicate the merged store on the union of the two conditions of // PBI and QBI. // // This can reduce the number of stores executed if both of the conditions are // true, and can allow the blocks to become small enough to be if-converted. // This optimization will also chain, so that ladders of test-and-set // sequences can be if-converted away. // // We only deal with simple diamonds or triangles: // // PBI or PBI or a combination of the two // / \ | \ // PTB PFB | PFB // \ / | / // QBI QBI // / \ | \ // QTB QFB | QFB // \ / | / // PostBB PostBB // // We model triangles as a type of diamond with a nullptr "true" block. // Triangles are canonicalized so that the fallthrough edge is represented by // a true condition, as in the diagram above. BasicBlock *PTB = PBI->getSuccessor(0); BasicBlock *PFB = PBI->getSuccessor(1); BasicBlock *QTB = QBI->getSuccessor(0); BasicBlock *QFB = QBI->getSuccessor(1); BasicBlock *PostBB = QFB->getSingleSuccessor(); // Make sure we have a good guess for PostBB. If QTB's only successor is // QFB, then QFB is a better PostBB. if (QTB->getSingleSuccessor() == QFB) PostBB = QFB; // If we couldn't find a good PostBB, stop. if (!PostBB) return false; bool InvertPCond = false, InvertQCond = false; // Canonicalize fallthroughs to the true branches. if (PFB == QBI->getParent()) { std::swap(PFB, PTB); InvertPCond = true; } if (QFB == PostBB) { std::swap(QFB, QTB); InvertQCond = true; } // From this point on we can assume PTB or QTB may be fallthroughs but PFB // and QFB may not. Model fallthroughs as a nullptr block. if (PTB == QBI->getParent()) PTB = nullptr; if (QTB == PostBB) QTB = nullptr; // Legality bailouts. We must have at least the non-fallthrough blocks and // the post-dominating block, and the non-fallthroughs must only have one // predecessor. auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) return false; if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB))) return false; if (!QBI->getParent()->hasNUses(2)) return false; // OK, this is a sequence of two diamonds or triangles. // Check if there are stores in PTB or PFB that are repeated in QTB or QFB. SmallPtrSet PStoreAddresses, QStoreAddresses; for (auto *BB : {PTB, PFB}) { if (!BB) continue; for (auto &I : *BB) if (StoreInst *SI = dyn_cast(&I)) PStoreAddresses.insert(SI->getPointerOperand()); } for (auto *BB : {QTB, QFB}) { if (!BB) continue; for (auto &I : *BB) if (StoreInst *SI = dyn_cast(&I)) QStoreAddresses.insert(SI->getPointerOperand()); } set_intersect(PStoreAddresses, QStoreAddresses); // set_intersect mutates PStoreAddresses in place. Rename it here to make it // clear what it contains. auto &CommonAddresses = PStoreAddresses; bool Changed = false; for (auto *Address : CommonAddresses) Changed |= mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DTU, DL, TTI); return Changed; } /// If the previous block ended with a widenable branch, determine if reusing /// the target block is profitable and legal. This will have the effect of /// "widening" PBI, but doesn't require us to reason about hosting safety. static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU) { // TODO: This can be generalized in two important ways: // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input // values from the PBI edge. // 2) We can sink side effecting instructions into BI's fallthrough // successor provided they doesn't contribute to computation of // BI's condition. Value *CondWB, *WC; BasicBlock *IfTrueBB, *IfFalseBB; if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) || IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor()) return false; if (!IfFalseBB->phis().empty()) return false; // TODO // Use lambda to lazily compute expensive condition after cheap ones. auto NoSideEffects = [](BasicBlock &BB) { return llvm::none_of(BB, [](const Instruction &I) { return I.mayWriteToMemory() || I.mayHaveSideEffects(); }); }; if (BI->getSuccessor(1) != IfFalseBB && // no inf looping BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { auto *OldSuccessor = BI->getSuccessor(1); OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(1, IfFalseBB); if (DTU) DTU->applyUpdates( {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } if (BI->getSuccessor(0) != IfFalseBB && // no inf looping BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability NoSideEffects(*BI->getParent())) { auto *OldSuccessor = BI->getSuccessor(0); OldSuccessor->removePredecessor(BI->getParent()); BI->setSuccessor(0, IfFalseBB); if (DTU) DTU->applyUpdates( {{DominatorTree::Insert, BI->getParent(), IfFalseBB}, {DominatorTree::Delete, BI->getParent(), OldSuccessor}}); return true; } return false; } /// If we have a conditional branch as a predecessor of another block, /// this function tries to simplify it. We know /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); // If this block ends with a branch instruction, and if there is a // predecessor that ends on a branch of the same condition, make // this conditional branch redundant. if (PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { // Okay, the outcome of this conditional branch is statically // knowable. If this block had a single pred, handle specially. if (BB->getSinglePredecessor()) { // Turn this into a branch on constant. bool CondIsTrue = PBI->getSuccessor(0) == BB; BI->setCondition( ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue)); return true; // Nuke the branch on constant. } // Otherwise, if there are multiple predecessors, insert a PHI that merges // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { pred_iterator PB = pred_begin(BB), PE = pred_end(BB); PHINode *NewPN = PHINode::Create( Type::getInt1Ty(BB->getContext()), std::distance(PB, PE), BI->getCondition()->getName() + ".pr", &BB->front()); // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if ((PBI = dyn_cast(P->getTerminator())) && PBI != BI && PBI->isConditional() && PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; NewPN->addIncoming( ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue), P); } else { NewPN->addIncoming(BI->getCondition(), P); } } BI->setCondition(NewPN); return true; } } // If the previous block ended with a widenable branch, determine if reusing // the target block is profitable and legal. This will have the effect of // "widening" PBI, but doesn't require us to reason about hosting safety. if (tryWidenCondBranchToCondBranch(PBI, BI, DTU)) return true; if (auto *CE = dyn_cast(BI->getCondition())) if (CE->canTrap()) return false; // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. // Ignore dbg intrinsics. if (&*BB->instructionsWithoutDebug(false).begin() != BI) return false; int PBIOp, BIOp; if (PBI->getSuccessor(0) == BI->getSuccessor(0)) { PBIOp = 0; BIOp = 0; } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) { PBIOp = 0; BIOp = 1; } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) { PBIOp = 1; BIOp = 0; } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) { PBIOp = 1; BIOp = 1; } else { return false; } // Check to make sure that the other destination of this branch // isn't BB itself. If so, this is an infinite loop that will // keep getting unwound. if (PBI->getSuccessor(PBIOp) == BB) return false; // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. // Also do not perform this transformation if any phi node in the common // destination block can trap when reached by BB or PBB (PR17073). In that // case, it would be unsafe to hoist the operation into a select instruction. BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1); unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa(II); ++II, ++NumPhis) { if (NumPhis > 2) // Disable this xform. return false; PHINode *PN = cast(II); Value *BIV = PN->getIncomingValueForBlock(BB); if (ConstantExpr *CE = dyn_cast(BIV)) if (CE->canTrap()) return false; unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); Value *PBIV = PN->getIncomingValue(PBBIdx); if (ConstantExpr *CE = dyn_cast(PBIV)) if (CE->canTrap()) return false; } // Finally, if everything is ok, fold the branches to logical ops. BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); SmallVector Updates; // If OtherDest *is* BB, then BB is a basic block with a single conditional // branch in it, where one edge (OtherDest) goes back to itself but the other // exits. We don't *know* that the program avoids the infinite loop // (even though that seems likely). If we do this xform naively, we'll end up // recursively unpeeling the loop. Since we know that (after the xform is // done) that the block *is* infinite if reached, we just make it an obviously // infinite loop with no cond branch. if (OtherDest == BB) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop", BB->getParent()); BranchInst::Create(InfLoopBlock, InfLoopBlock); if (DTU) Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock}); OtherDest = InfLoopBlock; } LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); // BI may have other predecessors. Because of this, we leave // it alone, but modify PBI. // Make sure we get to CommonDest on True&True directions. Value *PBICond = PBI->getCondition(); IRBuilder Builder(PBI); if (PBIOp) PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not"); Value *BICond = BI->getCondition(); if (BIOp) BICond = Builder.CreateNot(BICond, BICond->getName() + ".not"); // Merge the conditions. Value *Cond = createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge"); // Modify PBI to branch on the new condition to the new dests. PBI->setCondition(Cond); PBI->setSuccessor(0, CommonDest); PBI->setSuccessor(1, OtherDest); if (DTU) { Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest}); Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest}); DTU->applyUpdates(Updates); } // Update branch weight for PBI. uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight; uint64_t PredCommon, PredOther, SuccCommon, SuccOther; bool HasWeights = extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight); if (HasWeights) { PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; // The weight to CommonDest should be PredCommon * SuccTotal + // PredOther * SuccCommon. // The weight to OtherDest should be PredOther * SuccOther. uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) + PredOther * SuccCommon, PredOther * SuccOther}; // Halve the weights if any of them cannot fit in an uint32_t FitWeights(NewWeights); setBranchWeights(PBI, NewWeights[0], NewWeights[1]); } // OtherDest may have phi nodes. If so, add an entry from PBI's // block that are identical to the entries for BI's block. AddPredecessorToBlock(OtherDest, PBI->getParent(), BB); // We know that the CommonDest already had an edge from PBI to // it. If it has PHIs though, the PHIs may have different // entries for BB and PBI's BB. If so, insert a select to make // them agree. for (PHINode &PN : CommonDest->phis()) { Value *BIV = PN.getIncomingValueForBlock(BB); unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent()); Value *PBIV = PN.getIncomingValue(PBBIdx); if (BIV != PBIV) { // Insert a select in PBI to pick the right value. SelectInst *NV = cast( Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); PN.setIncomingValue(PBBIdx, NV); // Although the select has the same condition as PBI, the original branch // weights for PBI do not apply to the new select because the select's // 'logical' edges are incoming edges of the phi that is eliminated, not // the outgoing edges of PBI. if (HasWeights) { uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight; uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight; uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight; uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight; // The weight to PredCommonDest should be PredCommon * SuccTotal. // The weight to PredOtherDest should be PredOther * SuccCommon. uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther), PredOther * SuccCommon}; FitWeights(NewWeights); setBranchWeights(NV, NewWeights[0], NewWeights[1]); } } } LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent()); LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent()); // This basic block is probably dead. We know it has at least // one fewer predecessor. return true; } // Simplifies a terminator by replacing it with a branch to TrueBB if Cond is // true or to FalseBB if Cond is false. // Takes care of updating the successors and removing the old terminator. // Also makes sure not to introduce new successors by assuming that edges to // non-successor TrueBBs and FalseBBs aren't reachable. bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond, BasicBlock *TrueBB, BasicBlock *FalseBB, uint32_t TrueWeight, uint32_t FalseWeight) { auto *BB = OldTerm->getParent(); // Remove any superfluous successor edges from the CFG. // First, figure out which successors to preserve. // If TrueBB and FalseBB are equal, only try to preserve one copy of that // successor. BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; SmallSetVector RemovedSuccessors; // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { // Make sure only to keep exactly one copy of each edge. if (Succ == KeepEdge1) KeepEdge1 = nullptr; else if (Succ == KeepEdge2) KeepEdge2 = nullptr; else { Succ->removePredecessor(BB, /*KeepOneInputPHIs=*/true); if (Succ != TrueBB && Succ != FalseBB) RemovedSuccessors.insert(Succ); } } IRBuilder<> Builder(OldTerm); Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); // Insert an appropriate new terminator. if (!KeepEdge1 && !KeepEdge2) { if (TrueBB == FalseBB) { // We were only looking for one successor, and it was present. // Create an unconditional branch to it. Builder.CreateBr(TrueBB); } else { // We found both of the successors we were looking for. // Create a conditional branch sharing the condition of the select. BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB); if (TrueWeight != FalseWeight) setBranchWeights(NewBI, TrueWeight, FalseWeight); } } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) { // Neither of the selected blocks were successors, so this // terminator must be unreachable. new UnreachableInst(OldTerm->getContext(), OldTerm); } else { // One of the selected values was a successor, but the other wasn't. // Insert an unconditional branch to the one that was found; // the edge to the one that wasn't must be unreachable. if (!KeepEdge1) { // Only TrueBB was found. Builder.CreateBr(TrueBB); } else { // Only FalseBB was found. Builder.CreateBr(FalseBB); } } EraseTerminatorAndDCECond(OldTerm); if (DTU) { SmallVector Updates; Updates.reserve(RemovedSuccessors.size()); for (auto *RemovedSuccessor : RemovedSuccessors) Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); DTU->applyUpdates(Updates); } return true; } // Replaces // (switch (select cond, X, Y)) on constant X, Y // with a branch - conditional if X and Y lead to distinct BBs, // unconditional otherwise. bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { // Check for constant integer values in the select. ConstantInt *TrueVal = dyn_cast(Select->getTrueValue()); ConstantInt *FalseVal = dyn_cast(Select->getFalseValue()); if (!TrueVal || !FalseVal) return false; // Find the relevant condition and destinations. Value *Condition = Select->getCondition(); BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor(); BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor(); // Get weight for TrueBB and FalseBB. uint32_t TrueWeight = 0, FalseWeight = 0; SmallVector Weights; bool HasWeights = HasBranchWeights(SI); if (HasWeights) { GetBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { TrueWeight = (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()]; FalseWeight = (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()]; } } // Perform the actual simplification. return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight, FalseWeight); } // Replaces // (indirectbr (select cond, blockaddress(@fn, BlockA), // blockaddress(@fn, BlockB))) // with // (br cond, BlockA, BlockB). bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) { // Check that both operands of the select are block addresses. BlockAddress *TBA = dyn_cast(SI->getTrueValue()); BlockAddress *FBA = dyn_cast(SI->getFalseValue()); if (!TBA || !FBA) return false; // Extract the actual blocks. BasicBlock *TrueBB = TBA->getBasicBlock(); BasicBlock *FalseBB = FBA->getBasicBlock(); // Perform the actual simplification. return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0, 0); } /// This is called when we find an icmp instruction /// (a seteq/setne with a constant) as the only instruction in a /// block that ends with an uncond branch. We are looking for a very specific /// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In /// this case, we merge the first two "or's of icmp" into a switch, but then the /// default value goes to an uncond block with a seteq in it, we get something /// like: /// /// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ] /// DEFAULT: /// %tmp = icmp eq i8 %A, 92 /// br label %end /// end: /// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ] /// /// We prefer to split the edge to 'end' so that there is a true/false entry to /// the PHI, merging the third icmp into the switch. bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt( ICmpInst *ICI, IRBuilder<> &Builder) { BasicBlock *BB = ICI->getParent(); // If the block has any PHIs in it or the icmp has multiple uses, it is too // complex. if (isa(BB->begin()) || !ICI->hasOneUse()) return false; Value *V = ICI->getOperand(0); ConstantInt *Cst = cast(ICI->getOperand(1)); // The pattern we're looking for is where our only predecessor is a switch on // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); if (!Pred || !isa(Pred->getTerminator())) return false; SwitchInst *SI = cast(Pred->getTerminator()); if (SI->getCondition() != V) return false; // If BB is reachable on a non-default case, then we simply know the value of // V in this block. Substitute it and constant fold the icmp instruction // away. if (SI->getDefaultDest() != BB) { ConstantInt *VVal = SI->findCaseDest(BB); assert(VVal && "Should have a unique destination value"); ICI->setOperand(0, VVal); if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) { ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); } // BB is now empty, so it is likely to simplify away. return requestResimplify(); } // Ok, the block is reachable from the default dest. If the constant we're // comparing exists in one of the other edges, then we can constant fold ICI // and zap it. if (SI->findCaseValue(Cst) != SI->case_default()) { Value *V; if (ICI->getPredicate() == ICmpInst::ICMP_EQ) V = ConstantInt::getFalse(BB->getContext()); else V = ConstantInt::getTrue(BB->getContext()); ICI->replaceAllUsesWith(V); ICI->eraseFromParent(); // BB is now empty, so it is likely to simplify away. return requestResimplify(); } // The use of the icmp has to be in the 'end' block, by the only PHI node in // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); PHINode *PHIUse = dyn_cast(ICI->user_back()); if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || isa(++BasicBlock::iterator(PHIUse))) return false; // If the icmp is a SETEQ, then the default dest gets false, the new edge gets // true in the PHI. Constant *DefaultCst = ConstantInt::getTrue(BB->getContext()); Constant *NewCst = ConstantInt::getFalse(BB->getContext()); if (ICI->getPredicate() == ICmpInst::ICMP_EQ) std::swap(DefaultCst, NewCst); // Replace ICI (which is used by the PHI for the default value) with true or // false depending on if it is EQ or NE. ICI->replaceAllUsesWith(DefaultCst); ICI->eraseFromParent(); SmallVector Updates; // Okay, the switch goes to this block on a default value. Add an edge from // the switch to the merge point on the compared value. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB); { SwitchInstProfUpdateWrapper SIW(*SI); auto W0 = SIW.getSuccessorWeight(0); SwitchInstProfUpdateWrapper::CaseWeightOpt NewW; if (W0) { NewW = ((uint64_t(*W0) + 1) >> 1); SIW.setSuccessorWeight(0, *NewW); } SIW.addCase(Cst, NewBB, NewW); if (DTU) Updates.push_back({DominatorTree::Insert, Pred, NewBB}); } // NewBB branches to the phi block, add the uncond branch and the phi entry. Builder.SetInsertPoint(NewBB); Builder.SetCurrentDebugLocation(SI->getDebugLoc()); Builder.CreateBr(SuccBlock); PHIUse->addIncoming(NewCst, NewBB); if (DTU) { Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock}); DTU->applyUpdates(Updates); } return true; } /// The specified branch is a conditional branch. /// Check to see if it is branching on an or/and chain of icmp instructions, and /// fold it into a switch instruction if so. bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder, const DataLayout &DL) { Instruction *Cond = dyn_cast(BI->getCondition()); if (!Cond) return false; // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. // Try to gather values from a chain of and/or to be turned into a switch ConstantComparesGatherer ConstantCompare(Cond, DL); // Unpack the result SmallVectorImpl &Values = ConstantCompare.Vals; Value *CompVal = ConstantCompare.CompValue; unsigned UsedICmps = ConstantCompare.UsedICmps; Value *ExtraCase = ConstantCompare.Extra; // If we didn't have a multiply compared value, fail. if (!CompVal) return false; // Avoid turning single icmps into a switch. if (UsedICmps <= 1) return false; bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value())); // There might be duplicate constants in the list, which the switch // instruction can't handle, remove them now. array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate); Values.erase(std::unique(Values.begin(), Values.end()), Values.end()); // If Extra was used, we require at least two switch values to do the // transformation. A switch with one value is just a conditional branch. if (ExtraCase && Values.size() < 2) return false; // TODO: Preserve branch weight metadata, similarly to how // FoldValueComparisonIntoPredecessors preserves it. // Figure out which block is which destination. BasicBlock *DefaultBB = BI->getSuccessor(1); BasicBlock *EdgeBB = BI->getSuccessor(0); if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB); BasicBlock *BB = BI->getParent(); LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() << " cases into SWITCH. BB is:\n" << *BB); SmallVector Updates; // If there are any extra values that couldn't be folded into the switch // then we evaluate them with an explicit branch first. Split the block // right before the condbr to handle it. if (ExtraCase) { BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr, /*MSSAU=*/nullptr, "switch.early.test"); // Remove the uncond branch added to the old block. Instruction *OldTI = BB->getTerminator(); Builder.SetInsertPoint(OldTI); // There can be an unintended UB if extra values are Poison. Before the // transformation, extra values may not be evaluated according to the // condition, and it will not raise UB. But after transformation, we are // evaluating extra values before checking the condition, and it will raise // UB. It can be solved by adding freeze instruction to extra values. AssumptionCache *AC = Options.AC; if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr)) ExtraCase = Builder.CreateFreeze(ExtraCase); if (TrueWhenEqual) Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB); else Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB); OldTI->eraseFromParent(); if (DTU) Updates.push_back({DominatorTree::Insert, BB, EdgeBB}); // If there are PHI nodes in EdgeBB, then we need to add a new entry to them // for the edge we just added. AddPredecessorToBlock(EdgeBB, BB, NewBB); LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase << "\nEXTRABB = " << *BB); BB = NewBB; } Builder.SetInsertPoint(BI); // Convert pointer to int before we switch. if (CompVal->getType()->isPointerTy()) { CompVal = Builder.CreatePtrToInt( CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr"); } // Create the new switch instruction now. SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size()); // Add all of the 'cases' to the switch instruction. for (unsigned i = 0, e = Values.size(); i != e; ++i) New->addCase(Values[i], EdgeBB); // We added edges from PI to the EdgeBB. As such, if there were any // PHI nodes in EdgeBB, they need entries to be added corresponding to // the number of edges added. for (BasicBlock::iterator BBI = EdgeBB->begin(); isa(BBI); ++BBI) { PHINode *PN = cast(BBI); Value *InVal = PN->getIncomingValueForBlock(BB); for (unsigned i = 0, e = Values.size() - 1; i != e; ++i) PN->addIncoming(InVal, BB); } // Erase the old branch instruction. EraseTerminatorAndDCECond(BI); if (DTU) DTU->applyUpdates(Updates); LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n'); return true; } bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) { if (isa(RI->getValue())) return simplifyCommonResume(RI); else if (isa(RI->getParent()->getFirstNonPHI()) && RI->getValue() == RI->getParent()->getFirstNonPHI()) // The resume must unwind the exception that caused control to branch here. return simplifySingleResume(RI); return false; } // Check if cleanup block is empty static bool isCleanupBlockEmpty(iterator_range R) { for (Instruction &I : R) { auto *II = dyn_cast(&I); if (!II) return false; Intrinsic::ID IntrinsicID = II->getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::dbg_declare: case Intrinsic::dbg_value: case Intrinsic::dbg_label: case Intrinsic::lifetime_end: break; default: return false; } } return true; } // Simplify resume that is shared by several landing pads (phi of landing pad). bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); // Check that there are no other instructions except for debug and lifetime // intrinsics between the phi's and resume instruction. if (!isCleanupBlockEmpty( make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator()))) return false; SmallSetVector TrivialUnwindBlocks; auto *PhiLPInst = cast(RI->getValue()); // Check incoming blocks to see if any of them are trivial. for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End; Idx++) { auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx); auto *IncomingValue = PhiLPInst->getIncomingValue(Idx); // If the block has other successors, we can not delete it because // it has other dependents. if (IncomingBB->getUniqueSuccessor() != BB) continue; auto *LandingPad = dyn_cast(IncomingBB->getFirstNonPHI()); // Not the landing pad that caused the control to branch here. if (IncomingValue != LandingPad) continue; if (isCleanupBlockEmpty( make_range(LandingPad->getNextNode(), IncomingBB->getTerminator()))) TrivialUnwindBlocks.insert(IncomingBB); } // If no trivial unwind blocks, don't do any simplifications. if (TrivialUnwindBlocks.empty()) return false; // Turn all invokes that unwind here into calls. for (auto *TrivialBB : TrivialUnwindBlocks) { // Blocks that will be simplified should be removed from the phi node. // Note there could be multiple edges to the resume block, and we need // to remove them all. while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1) BB->removePredecessor(TrivialBB, true); for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(TrivialBB))) { removeUnwindEdge(Pred, DTU); ++NumInvokes; } // In each SimplifyCFG run, only the current processed block can be erased. // Otherwise, it will break the iteration of SimplifyCFG pass. So instead // of erasing TrivialBB, we only remove the branch to the common resume // block so that we can later erase the resume block since it has no // predecessors. TrivialBB->getTerminator()->eraseFromParent(); new UnreachableInst(RI->getContext(), TrivialBB); if (DTU) DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}}); } // Delete the resume block if all its predecessors have been removed. if (pred_empty(BB)) DeleteDeadBlock(BB, DTU); return !TrivialUnwindBlocks.empty(); } // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); auto *LPInst = cast(BB->getFirstNonPHI()); assert(RI->getValue() == LPInst && "Resume must unwind the exception that caused control to here"); // Check that there are no other instructions except for debug intrinsics. if (!isCleanupBlockEmpty( make_range(LPInst->getNextNode(), RI))) return false; // Turn all invokes that unwind here into calls and delete the basic block. for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) { removeUnwindEdge(Pred, DTU); ++NumInvokes; } // The landingpad is now unreachable. Zap it. DeleteDeadBlock(BB, DTU); return true; } static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) { // If this is a trivial cleanup pad that executes no instructions, it can be // eliminated. If the cleanup pad continues to the caller, any predecessor // that is an EH pad will be updated to continue to the caller and any // predecessor that terminates with an invoke instruction will have its invoke // instruction converted to a call instruction. If the cleanup pad being // simplified does not continue to the caller, each predecessor will be // updated to continue to the unwind destination of the cleanup pad being // simplified. BasicBlock *BB = RI->getParent(); CleanupPadInst *CPInst = RI->getCleanupPad(); if (CPInst->getParent() != BB) // This isn't an empty cleanup. return false; // We cannot kill the pad if it has multiple uses. This typically arises // from unreachable basic blocks. if (!CPInst->hasOneUse()) return false; // Check that there are no other instructions except for benign intrinsics. if (!isCleanupBlockEmpty( make_range(CPInst->getNextNode(), RI))) return false; // If the cleanup return we are simplifying unwinds to the caller, this will // set UnwindDest to nullptr. BasicBlock *UnwindDest = RI->getUnwindDest(); Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr; // We're about to remove BB from the control flow. Before we do, sink any // PHINodes into the unwind destination. Doing this before changing the // control flow avoids some potentially slow checks, since we can currently // be certain that UnwindDest and BB have no common predecessors (since they // are both EH pads). if (UnwindDest) { // First, go through the PHI nodes in UnwindDest and update any nodes that // reference the block we are removing for (PHINode &DestPN : UnwindDest->phis()) { int Idx = DestPN.getBasicBlockIndex(BB); // Since BB unwinds to UnwindDest, it has to be in the PHI node. assert(Idx != -1); // This PHI node has an incoming value that corresponds to a control // path through the cleanup pad we are removing. If the incoming // value is in the cleanup pad, it must be a PHINode (because we // verified above that the block is otherwise empty). Otherwise, the // value is either a constant or a value that dominates the cleanup // pad being removed. // // Because BB and UnwindDest are both EH pads, all of their // predecessors must unwind to these blocks, and since no instruction // can have multiple unwind destinations, there will be no overlap in // incoming blocks between SrcPN and DestPN. Value *SrcVal = DestPN.getIncomingValue(Idx); PHINode *SrcPN = dyn_cast(SrcVal); bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB; for (auto *Pred : predecessors(BB)) { Value *Incoming = NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal; DestPN.addIncoming(Incoming, Pred); } } // Sink any remaining PHI nodes directly into UnwindDest. Instruction *InsertPt = DestEHPad; for (PHINode &PN : make_early_inc_range(BB->phis())) { if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB)) // If the PHI node has no uses or all of its uses are in this basic // block (meaning they are debug or lifetime intrinsics), just leave // it. It will be erased when we erase BB below. continue; // Otherwise, sink this PHI node into UnwindDest. // Any predecessors to UnwindDest which are not already represented // must be back edges which inherit the value from the path through // BB. In this case, the PHI value must reference itself. for (auto *pred : predecessors(UnwindDest)) if (pred != BB) PN.addIncoming(&PN, pred); PN.moveBefore(InsertPt); // Also, add a dummy incoming value for the original BB itself, // so that the PHI is well-formed until we drop said predecessor. PN.addIncoming(UndefValue::get(PN.getType()), BB); } } std::vector Updates; // We use make_early_inc_range here because we will remove all predecessors. for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) { if (UnwindDest == nullptr) { if (DTU) { DTU->applyUpdates(Updates); Updates.clear(); } removeUnwindEdge(PredBB, DTU); ++NumInvokes; } else { BB->removePredecessor(PredBB); Instruction *TI = PredBB->getTerminator(); TI->replaceUsesOfWith(BB, UnwindDest); if (DTU) { Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); } } } if (DTU) DTU->applyUpdates(Updates); DeleteDeadBlock(BB, DTU); return true; } // Try to merge two cleanuppads together. static bool mergeCleanupPad(CleanupReturnInst *RI) { // Skip any cleanuprets which unwind to caller, there is nothing to merge // with. BasicBlock *UnwindDest = RI->getUnwindDest(); if (!UnwindDest) return false; // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't // be safe to merge without code duplication. if (UnwindDest->getSinglePredecessor() != RI->getParent()) return false; // Verify that our cleanuppad's unwind destination is another cleanuppad. auto *SuccessorCleanupPad = dyn_cast(&UnwindDest->front()); if (!SuccessorCleanupPad) return false; CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad(); // Replace any uses of the successor cleanupad with the predecessor pad // The only cleanuppad uses should be this cleanupret, it's cleanupret and // funclet bundle operands. SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad); // Remove the old cleanuppad. SuccessorCleanupPad->eraseFromParent(); // Now, we simply replace the cleanupret with a branch to the unwind // destination. BranchInst::Create(UnwindDest, RI->getParent()); RI->eraseFromParent(); return true; } bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) { // It is possible to transiantly have an undef cleanuppad operand because we // have deleted some, but not all, dead blocks. // Eventually, this block will be deleted. if (isa(RI->getOperand(0))) return false; if (mergeCleanupPad(RI)) return true; if (removeEmptyCleanup(RI, DTU)) return true; return false; } // WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()! bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { BasicBlock *BB = UI->getParent(); bool Changed = false; // If there are any instructions immediately before the unreachable that can // be removed, do so. while (UI->getIterator() != BB->begin()) { BasicBlock::iterator BBI = UI->getIterator(); --BBI; if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI)) break; // Can not drop any more instructions. We're done here. // Otherwise, this instruction can be freely erased, // even if it is not side-effect free. // Note that deleting EH's here is in fact okay, although it involves a bit // of subtle reasoning. If this inst is an EH, all the predecessors of this // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn, // and we can therefore guarantee this block will be erased. // Delete this instruction (any uses are guaranteed to be dead) BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType())); BBI->eraseFromParent(); Changed = true; } // If the unreachable instruction is the first in the block, take a gander // at all of the predecessors of this instruction, and simplify them. if (&BB->front() != UI) return Changed; std::vector Updates; SmallSetVector Preds(pred_begin(BB), pred_end(BB)); for (unsigned i = 0, e = Preds.size(); i != e; ++i) { auto *Predecessor = Preds[i]; Instruction *TI = Predecessor->getTerminator(); IRBuilder<> Builder(TI); if (auto *BI = dyn_cast(TI)) { // We could either have a proper unconditional branch, // or a degenerate conditional branch with matching destinations. if (all_of(BI->successors(), [BB](auto *Successor) { return Successor == BB; })) { new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } else { assert(BI->isConditional() && "Can't get here with an uncond branch."); Value* Cond = BI->getCondition(); assert(BI->getSuccessor(0) != BI->getSuccessor(1) && "The destinations are guaranteed to be different here."); if (BI->getSuccessor(0) == BB) { Builder.CreateAssumption(Builder.CreateNot(Cond)); Builder.CreateBr(BI->getSuccessor(1)); } else { assert(BI->getSuccessor(1) == BB && "Incorrect CFG"); Builder.CreateAssumption(Cond); Builder.CreateBr(BI->getSuccessor(0)); } EraseTerminatorAndDCECond(BI); Changed = true; } if (DTU) Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *SI = dyn_cast(TI)) { SwitchInstProfUpdateWrapper SU(*SI); for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) { if (i->getCaseSuccessor() != BB) { ++i; continue; } BB->removePredecessor(SU->getParent()); i = SU.removeCase(i); e = SU->case_end(); Changed = true; } // Note that the default destination can't be removed! if (DTU && SI->getDefaultDest() != BB) Updates.push_back({DominatorTree::Delete, Predecessor, BB}); } else if (auto *II = dyn_cast(TI)) { if (II->getUnwindDest() == BB) { if (DTU) { DTU->applyUpdates(Updates); Updates.clear(); } removeUnwindEdge(TI->getParent(), DTU); Changed = true; } } else if (auto *CSI = dyn_cast(TI)) { if (CSI->getUnwindDest() == BB) { if (DTU) { DTU->applyUpdates(Updates); Updates.clear(); } removeUnwindEdge(TI->getParent(), DTU); Changed = true; continue; } for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), E = CSI->handler_end(); I != E; ++I) { if (*I == BB) { CSI->removeHandler(I); --I; --E; Changed = true; } } if (DTU) Updates.push_back({DominatorTree::Delete, Predecessor, BB}); if (CSI->getNumHandlers() == 0) { if (CSI->hasUnwindDest()) { // Redirect all predecessors of the block containing CatchSwitchInst // to instead branch to the CatchSwitchInst's unwind destination. if (DTU) { for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) { Updates.push_back({DominatorTree::Insert, PredecessorOfPredecessor, CSI->getUnwindDest()}); Updates.push_back({DominatorTree::Delete, PredecessorOfPredecessor, Predecessor}); } } Predecessor->replaceAllUsesWith(CSI->getUnwindDest()); } else { // Rewrite all preds to unwind to caller (or from invoke to call). if (DTU) { DTU->applyUpdates(Updates); Updates.clear(); } SmallVector EHPreds(predecessors(Predecessor)); for (BasicBlock *EHPred : EHPreds) removeUnwindEdge(EHPred, DTU); } // The catchswitch is no longer reachable. new UnreachableInst(CSI->getContext(), CSI); CSI->eraseFromParent(); Changed = true; } } else if (auto *CRI = dyn_cast(TI)) { (void)CRI; assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB && "Expected to always have an unwind to BB."); if (DTU) Updates.push_back({DominatorTree::Delete, Predecessor, BB}); new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; } } if (DTU) DTU->applyUpdates(Updates); // If this block is now dead, remove it. if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) { DeleteDeadBlock(BB, DTU); return true; } return Changed; } static bool CasesAreContiguous(SmallVectorImpl &Cases) { assert(Cases.size() >= 1); array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate); for (size_t I = 1, E = Cases.size(); I != E; ++I) { if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1) return false; } return true; } static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU) { LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); auto *BB = Switch->getParent(); auto *OrigDefaultBlock = Switch->getDefaultDest(); OrigDefaultBlock->removePredecessor(BB); BasicBlock *NewDefaultBlock = BasicBlock::Create( BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(), OrigDefaultBlock); new UnreachableInst(Switch->getContext(), NewDefaultBlock); Switch->setDefaultDest(&*NewDefaultBlock); if (DTU) { SmallVector Updates; Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock}); if (!is_contained(successors(BB), OrigDefaultBlock)) Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock}); DTU->applyUpdates(Updates); } } /// Turn a switch with two reachable destinations into an integer range /// comparison and branch. bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); bool HasDefault = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); auto *BB = SI->getParent(); // Partition the cases into two sets with different destinations. BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr; BasicBlock *DestB = nullptr; SmallVector CasesA; SmallVector CasesB; for (auto Case : SI->cases()) { BasicBlock *Dest = Case.getCaseSuccessor(); if (!DestA) DestA = Dest; if (Dest == DestA) { CasesA.push_back(Case.getCaseValue()); continue; } if (!DestB) DestB = Dest; if (Dest == DestB) { CasesB.push_back(Case.getCaseValue()); continue; } return false; // More than two destinations. } assert(DestA && DestB && "Single-destination switch should have been folded."); assert(DestA != DestB); assert(DestB != SI->getDefaultDest()); assert(!CasesB.empty() && "There must be non-default cases."); assert(!CasesA.empty() || HasDefault); // Figure out if one of the sets of cases form a contiguous range. SmallVectorImpl *ContiguousCases = nullptr; BasicBlock *ContiguousDest = nullptr; BasicBlock *OtherDest = nullptr; if (!CasesA.empty() && CasesAreContiguous(CasesA)) { ContiguousCases = &CasesA; ContiguousDest = DestA; OtherDest = DestB; } else if (CasesAreContiguous(CasesB)) { ContiguousCases = &CasesB; ContiguousDest = DestB; OtherDest = DestA; } else return false; // Start building the compare and branch. Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back()); Constant *NumCases = ConstantInt::get(Offset->getType(), ContiguousCases->size()); Value *Sub = SI->getCondition(); if (!Offset->isNullValue()) Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off"); Value *Cmp; // If NumCases overflowed, then all possible values jump to the successor. if (NumCases->isNullValue() && !ContiguousCases->empty()) Cmp = ConstantInt::getTrue(SI->getContext()); else Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch"); BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest); // Update weight for the newly-created conditional branch. if (HasBranchWeights(SI)) { SmallVector Weights; GetBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { uint64_t TrueWeight = 0; uint64_t FalseWeight = 0; for (size_t I = 0, E = Weights.size(); I != E; ++I) { if (SI->getSuccessor(I) == ContiguousDest) TrueWeight += Weights[I]; else FalseWeight += Weights[I]; } while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) { TrueWeight /= 2; FalseWeight /= 2; } setBranchWeights(NewBI, TrueWeight, FalseWeight); } } // Prune obsolete incoming values off the successors' PHI nodes. for (auto BBI = ContiguousDest->begin(); isa(BBI); ++BBI) { unsigned PreviousEdges = ContiguousCases->size(); if (ContiguousDest == SI->getDefaultDest()) ++PreviousEdges; for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) cast(BBI)->removeIncomingValue(SI->getParent()); } for (auto BBI = OtherDest->begin(); isa(BBI); ++BBI) { unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size(); if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) cast(BBI)->removeIncomingValue(SI->getParent()); } // Clean up the default block - it may have phis or other instructions before // the unreachable terminator. if (!HasDefault) createUnreachableSwitchDefault(SI, DTU); auto *UnreachableDefault = SI->getDefaultDest(); // Drop the switch. SI->eraseFromParent(); if (!HasDefault && DTU) DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}}); return true; } /// Compute masked bits for the condition of a switch /// and use it to remove dead cases. static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) // bits are in the condition value. unsigned MaxSignificantBitsInCond = ComputeMaxSignificantBits(Cond, DL, 0, AC, SI); // Gather dead cases. SmallVector DeadCases; SmallDenseMap NumPerSuccessorCases; SmallVector UniqueSuccessors; for (auto &Case : SI->cases()) { auto *Successor = Case.getCaseSuccessor(); if (DTU) { if (!NumPerSuccessorCases.count(Successor)) UniqueSuccessors.push_back(Successor); ++NumPerSuccessorCases[Successor]; } const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { DeadCases.push_back(Case.getCaseValue()); if (DTU) --NumPerSuccessorCases[Successor]; LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); } } // If we can prove that the cases must cover all possible values, the // default destination becomes dead and we can remove it. If we know some // of the bits in the value, we can use that to more precisely compute the // number of possible unique case values. bool HasDefault = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const unsigned NumUnknownBits = Known.getBitWidth() - (Known.Zero | Known.One).countPopulation(); assert(NumUnknownBits <= Known.getBitWidth()); if (HasDefault && DeadCases.empty() && NumUnknownBits < 64 /* avoid overflow */ && SI->getNumCases() == (1ULL << NumUnknownBits)) { createUnreachableSwitchDefault(SI, DTU); return true; } if (DeadCases.empty()) return false; SwitchInstProfUpdateWrapper SIW(*SI); for (ConstantInt *DeadCase : DeadCases) { SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase); assert(CaseI != SI->case_default() && "Case was not found. Probably mistake in DeadCases forming."); // Prune unused values from PHI nodes. CaseI->getCaseSuccessor()->removePredecessor(SI->getParent()); SIW.removeCase(CaseI); } if (DTU) { std::vector Updates; for (auto *Successor : UniqueSuccessors) if (NumPerSuccessorCases[Successor] == 0) Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor}); DTU->applyUpdates(Updates); } return true; } /// If BB would be eligible for simplification by /// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated /// by an unconditional branch), look at the phi node for BB in the successor /// block and see if the incoming value is equal to CaseValue. If so, return /// the phi node, and set PhiIndex to BB's index in the phi node. static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex) { if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) return nullptr; // BB must be empty to be a candidate for simplification. if (!BB->getSinglePredecessor()) return nullptr; // BB must be dominated by the switch. BranchInst *Branch = dyn_cast(BB->getTerminator()); if (!Branch || !Branch->isUnconditional()) return nullptr; // Terminator must be unconditional branch. BasicBlock *Succ = Branch->getSuccessor(0); for (PHINode &PHI : Succ->phis()) { int Idx = PHI.getBasicBlockIndex(BB); assert(Idx >= 0 && "PHI has no entry for predecessor?"); Value *InValue = PHI.getIncomingValue(Idx); if (InValue != CaseValue) continue; *PhiIndex = Idx; return &PHI; } return nullptr; } /// Try to forward the condition of a switch instruction to a phi node /// dominated by the switch, if that would mean that some of the destination /// blocks of the switch can be folded away. Return true if a change is made. static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { using ForwardingNodesMap = DenseMap>; ForwardingNodesMap ForwardingNodes; BasicBlock *SwitchBlock = SI->getParent(); bool Changed = false; for (auto &Case : SI->cases()) { ConstantInt *CaseValue = Case.getCaseValue(); BasicBlock *CaseDest = Case.getCaseSuccessor(); // Replace phi operands in successor blocks that are using the constant case // value rather than the switch condition variable: // switchbb: // switch i32 %x, label %default [ // i32 17, label %succ // ... // succ: // %r = phi i32 ... [ 17, %switchbb ] ... // --> // %r = phi i32 ... [ %x, %switchbb ] ... for (PHINode &Phi : CaseDest->phis()) { // This only works if there is exactly 1 incoming edge from the switch to // a phi. If there is >1, that means multiple cases of the switch map to 1 // value in the phi, and that phi value is not the switch condition. Thus, // this transform would not make sense (the phi would be invalid because // a phi can't have different incoming values from the same block). int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock); if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue && count(Phi.blocks(), SwitchBlock) == 1) { Phi.setIncomingValue(SwitchBBIdx, SI->getCondition()); Changed = true; } } // Collect phi nodes that are indirectly using this switch's case constants. int PhiIdx; if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx)) ForwardingNodes[Phi].push_back(PhiIdx); } for (auto &ForwardingNode : ForwardingNodes) { PHINode *Phi = ForwardingNode.first; SmallVectorImpl &Indexes = ForwardingNode.second; if (Indexes.size() < 2) continue; for (int Index : Indexes) Phi->setIncomingValue(Index, SI->getCondition()); Changed = true; } return Changed; } /// Return true if the backend will be able to handle /// initializing an array of constants like C. static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) { if (C->isThreadDependent()) return false; if (C->isDLLImportDependent()) return false; if (!isa(C) && !isa(C) && !isa(C) && !isa(C) && !isa(C) && !isa(C)) return false; if (ConstantExpr *CE = dyn_cast(C)) { // Pointer casts and in-bounds GEPs will not prohibit the backend from // materializing the array of constants. Constant *StrippedC = cast(CE->stripInBoundsConstantOffsets()); if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI)) return false; } if (!TTI.shouldBuildLookupTablesForConstant(C)) return false; return true; } /// If V is a Constant, return it. Otherwise, try to look up /// its constant value in ConstantPool, returning 0 if it's not there. static Constant * LookupConstant(Value *V, const SmallDenseMap &ConstantPool) { if (Constant *C = dyn_cast(V)) return C; return ConstantPool.lookup(V); } /// Try to fold instruction I into a constant. This works for /// simple instructions such as binary operations where both operands are /// constant or can be replaced by constants from the ConstantPool. Returns the /// resulting constant on success, 0 otherwise. static Constant * ConstantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap &ConstantPool) { if (SelectInst *Select = dyn_cast(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) return nullptr; if (A->isAllOnesValue()) return LookupConstant(Select->getTrueValue(), ConstantPool); if (A->isNullValue()) return LookupConstant(Select->getFalseValue(), ConstantPool); return nullptr; } SmallVector COps; for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) { if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) COps.push_back(A); else return nullptr; } if (CmpInst *Cmp = dyn_cast(I)) { return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0], COps[1], DL); } return ConstantFoldInstOperands(I, COps, DL); } /// Try to determine the resulting constant values in phi nodes /// at the common destination basic block, *CommonDest, for one of the case /// destionations CaseDest corresponding to value CaseVal (0 for the default /// case), of a switch instruction SI. static bool GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl> &Res, const DataLayout &DL, const TargetTransformInfo &TTI) { // The block from which we enter the common destination. BasicBlock *Pred = SI->getParent(); // If CaseDest is empty except for some side-effect free instructions through // which we can constant-propagate the CaseVal, continue to its successor. SmallDenseMap ConstantPool; ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) { if (I.isTerminator()) { // If the terminator is a simple branch, continue to the next block. if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator()) return false; Pred = CaseDest; CaseDest = I.getSuccessor(0); } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) { // Instruction is side-effect free and constant. // If the instruction has uses outside this block or a phi node slot for // the block, it is not safe to bypass the instruction since it would then // no longer dominate all its uses. for (auto &Use : I.uses()) { User *User = Use.getUser(); if (Instruction *I = dyn_cast(User)) if (I->getParent() == CaseDest) continue; if (PHINode *Phi = dyn_cast(User)) if (Phi->getIncomingBlock(Use) == CaseDest) continue; return false; } ConstantPool.insert(std::make_pair(&I, C)); } else { break; } } // If we did not have a CommonDest before, use the current one. if (!*CommonDest) *CommonDest = CaseDest; // If the destination isn't the common one, abort. if (CaseDest != *CommonDest) return false; // Get the values for this case from phi nodes in the destination block. for (PHINode &PHI : (*CommonDest)->phis()) { int Idx = PHI.getBasicBlockIndex(Pred); if (Idx == -1) continue; Constant *ConstVal = LookupConstant(PHI.getIncomingValue(Idx), ConstantPool); if (!ConstVal) return false; // Be conservative about which kinds of constants we support. if (!ValidLookupTableConstant(ConstVal, TTI)) return false; Res.push_back(std::make_pair(&PHI, ConstVal)); } return Res.size() > 0; } // Helper function used to add CaseVal to the list of cases that generate // Result. Returns the updated number of cases that generate this result. static uintptr_t MapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result) { for (auto &I : UniqueResults) { if (I.first == Result) { I.second.push_back(CaseVal); return I.second.size(); } } UniqueResults.push_back( std::make_pair(Result, SmallVector(1, CaseVal))); return 1; } // Helper function that initializes a map containing // results for the PHI node of the common destination block for a switch // instruction. Returns false if multiple PHI nodes have been found or if // there is not a common destination block for the switch. static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) { for (auto &I : SI->cases()) { ConstantInt *CaseVal = I.getCaseValue(); // Resulting value at phi nodes for this case value. SwitchCaseResultsTy Results; if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results, DL, TTI)) return false; // Only one value per case is permitted. if (Results.size() > 1) return false; // Add the case->result mapping to UniqueResults. const uintptr_t NumCasesForResult = MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); // Early out if there are too many cases for this result. if (NumCasesForResult > MaxCasesPerResult) return false; // Early out if there are too many unique results. if (UniqueResults.size() > MaxUniqueResults) return false; // Check the PHI consistency. if (!PHI) PHI = Results[0].first; else if (PHI != Results[0].first) return false; } // Find the default result value. SmallVector, 1> DefaultResults; BasicBlock *DefaultDest = SI->getDefaultDest(); GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults, DL, TTI); // If the default value is not found abort unless the default destination // is unreachable. DefaultResult = DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr; if ((!DefaultResult && !isa(DefaultDest->getFirstNonPHIOrDbg()))) return false; return true; } // Helper function that checks if it is possible to transform a switch with only // two cases (or two cases + default) that produces a result into a select. // Example: // switch (a) { // case 10: %0 = icmp eq i32 %a, 10 // return 10; %1 = select i1 %0, i32 10, i32 4 // case 20: ----> %2 = icmp eq i32 %a, 20 // return 2; %3 = select i1 %2, i32 2, i32 %1 // default: // return 4; // } static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder) { // If we are selecting between only two cases transform into a simple // select or a two-way select if default is possible. if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 && ResultVector[1].second.size() == 1) { ConstantInt *const FirstCase = ResultVector[0].second[0]; ConstantInt *const SecondCase = ResultVector[1].second[0]; bool DefaultCanTrigger = DefaultResult; Value *SelectValue = ResultVector[1].first; if (DefaultCanTrigger) { Value *const ValueCompare = Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp"); SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first, DefaultResult, "switch.select"); } Value *const ValueCompare = Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp"); return Builder.CreateSelect(ValueCompare, ResultVector[0].first, SelectValue, "switch.select"); } // Handle the degenerate case where two cases have the same value. if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 && DefaultResult) { Value *Cmp1 = Builder.CreateICmpEQ( Condition, ResultVector[0].second[0], "switch.selectcmp.case1"); Value *Cmp2 = Builder.CreateICmpEQ( Condition, ResultVector[0].second[1], "switch.selectcmp.case2"); Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp"); return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult); } return nullptr; } // Helper function to cleanup a switch instruction that has been converted into // a select, fixing up PHI nodes and basic blocks. static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU) { std::vector Updates; BasicBlock *SelectBB = SI->getParent(); BasicBlock *DestBB = PHI->getParent(); if (DTU && !is_contained(predecessors(DestBB), SelectBB)) Updates.push_back({DominatorTree::Insert, SelectBB, DestBB}); Builder.CreateBr(DestBB); // Remove the switch. while (PHI->getBasicBlockIndex(SelectBB) >= 0) PHI->removeIncomingValue(SelectBB); PHI->addIncoming(SelectValue, SelectBB); SmallPtrSet RemovedSuccessors; for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); if (Succ == DestBB) continue; Succ->removePredecessor(SelectBB); if (DTU && RemovedSuccessors.insert(Succ).second) Updates.push_back({DominatorTree::Delete, SelectBB, Succ}); } SI->eraseFromParent(); if (DTU) DTU->applyUpdates(Updates); } /// If the switch is only used to initialize one or more /// phi nodes in a common successor block with only two different /// constant values, replace the switch with select. static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { Value *const Cond = SI->getCondition(); PHINode *PHI = nullptr; BasicBlock *CommonDest = nullptr; Constant *DefaultResult; SwitchCaseResultVectorTy UniqueResults; // Collect all the cases that will deliver the same value from the switch. if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, DL, TTI, /*MaxUniqueResults*/2, /*MaxCasesPerResult*/2)) return false; assert(PHI != nullptr && "PHI for value select not found"); Builder.SetInsertPoint(SI); Value *SelectValue = ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder); if (SelectValue) { RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU); return true; } // The switch couldn't be converted into a select. return false; } namespace { /// This class represents a lookup table that can be used to replace a switch. class SwitchLookupTable { public: /// Create a lookup table to use as a switch replacement with the contents /// of Values, using DefaultValue to fill any holes in the table. SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl> &Values, Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName); /// Build instructions with Builder to retrieve the value at /// the position given by Index in the lookup table. Value *BuildLookup(Value *Index, IRBuilder<> &Builder); /// Return true if a table with TableSize elements of /// type ElementType would fit in a target-legal register. static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, Type *ElementType); private: // Depending on the contents of the table, it can be represented in // different ways. enum { // For tables where each element contains the same value, we just have to // store that single value and return it for each lookup. SingleValueKind, // For tables where there is a linear relationship between table index // and values. We calculate the result with a simple multiplication // and addition instead of a table lookup. LinearMapKind, // For small tables with integer elements, we can pack them into a bitmap // that fits into a target-legal register. Values are retrieved by // shift and mask operations. BitMapKind, // The table is stored as an array of values. Values are retrieved by load // instructions from the table. ArrayKind } Kind; // For SingleValueKind, this is the single value. Constant *SingleValue = nullptr; // For BitMapKind, this is the bitmap. ConstantInt *BitMap = nullptr; IntegerType *BitMapElementTy = nullptr; // For LinearMapKind, these are the constants used to derive the value. ConstantInt *LinearOffset = nullptr; ConstantInt *LinearMultiplier = nullptr; // For ArrayKind, this is the array. GlobalVariable *Array = nullptr; }; } // end anonymous namespace SwitchLookupTable::SwitchLookupTable( Module &M, uint64_t TableSize, ConstantInt *Offset, const SmallVectorImpl> &Values, Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); // If all values in the table are equal, this is that value. SingleValue = Values.begin()->second; Type *ValueType = Values.begin()->second->getType(); // Build up the table contents. SmallVector TableContents(TableSize); for (size_t I = 0, E = Values.size(); I != E; ++I) { ConstantInt *CaseVal = Values[I].first; Constant *CaseRes = Values[I].second; assert(CaseRes->getType() == ValueType); uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue(); TableContents[Idx] = CaseRes; if (CaseRes != SingleValue) SingleValue = nullptr; } // Fill in any holes in the table with the default result. if (Values.size() < TableSize) { assert(DefaultValue && "Need a default value to fill the lookup table holes."); assert(DefaultValue->getType() == ValueType); for (uint64_t I = 0; I < TableSize; ++I) { if (!TableContents[I]) TableContents[I] = DefaultValue; } if (DefaultValue != SingleValue) SingleValue = nullptr; } // If each element in the table contains the same value, we only need to store // that single value. if (SingleValue) { Kind = SingleValueKind; return; } // Check if we can derive the value with a linear transformation from the // table index. if (isa(ValueType)) { bool LinearMappingPossible = true; APInt PrevVal; APInt DistToPrev; assert(TableSize >= 2 && "Should be a SingleValue table."); // Check if there is the same distance between two consecutive values. for (uint64_t I = 0; I < TableSize; ++I) { ConstantInt *ConstVal = dyn_cast(TableContents[I]); if (!ConstVal) { // This is an undef. We could deal with it, but undefs in lookup tables // are very seldom. It's probably not worth the additional complexity. LinearMappingPossible = false; break; } const APInt &Val = ConstVal->getValue(); if (I != 0) { APInt Dist = Val - PrevVal; if (I == 1) { DistToPrev = Dist; } else if (Dist != DistToPrev) { LinearMappingPossible = false; break; } } PrevVal = Val; } if (LinearMappingPossible) { LinearOffset = cast(TableContents[0]); LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev); Kind = LinearMapKind; ++NumLinearMaps; return; } } // If the type is integer and the table fits in a register, build a bitmap. if (WouldFitInRegister(DL, TableSize, ValueType)) { IntegerType *IT = cast(ValueType); APInt TableInt(TableSize * IT->getBitWidth(), 0); for (uint64_t I = TableSize; I > 0; --I) { TableInt <<= IT->getBitWidth(); // Insert values into the bitmap. Undef values are set to zero. if (!isa(TableContents[I - 1])) { ConstantInt *Val = cast(TableContents[I - 1]); TableInt |= Val->getValue().zext(TableInt.getBitWidth()); } } BitMap = ConstantInt::get(M.getContext(), TableInt); BitMapElementTy = IT; Kind = BitMapKind; ++NumBitMaps; return; } // Store the table in an array. ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize); Constant *Initializer = ConstantArray::get(ArrayTy, TableContents); Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true, GlobalVariable::PrivateLinkage, Initializer, "switch.table." + FuncName); Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Set the alignment to that of an array items. We will be only loading one // value out of it. Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType))); Kind = ArrayKind; } Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) { switch (Kind) { case SingleValueKind: return SingleValue; case LinearMapKind: { // Derive the result value from the input value. Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(), false, "switch.idx.cast"); if (!LinearMultiplier->isOne()) Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult"); if (!LinearOffset->isZero()) Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset"); return Result; } case BitMapKind: { // Type of the bitmap (e.g. i59). IntegerType *MapTy = BitMap->getType(); // Cast Index to the same type as the bitmap. // Note: The Index is <= the number of elements in the table, so // truncating it to the width of the bitmask is safe. Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast"); // Multiply the shift amount by the element width. ShiftAmt = Builder.CreateMul( ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()), "switch.shiftamt"); // Shift down. Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift"); // Mask off. return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked"); } case ArrayKind: { // Make sure the table index will not overflow when treated as signed. IntegerType *IT = cast(Index->getType()); uint64_t TableSize = Array->getInitializer()->getType()->getArrayNumElements(); if (TableSize > (1ULL << (IT->getBitWidth() - 1))) Index = Builder.CreateZExt( Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1), "switch.tableidx.zext"); Value *GEPIndices[] = {Builder.getInt32(0), Index}; Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array, GEPIndices, "switch.gep"); return Builder.CreateLoad( cast(Array->getValueType())->getElementType(), GEP, "switch.load"); } } llvm_unreachable("Unknown lookup table kind!"); } bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL, uint64_t TableSize, Type *ElementType) { auto *IT = dyn_cast(ElementType); if (!IT) return false; // FIXME: If the type is wider than it needs to be, e.g. i8 but all values // are <= 15, we could try to narrow the type. // Avoid overflow, fitsInLegalInteger uses unsigned int for the width. if (TableSize >= UINT_MAX / IT->getBitWidth()) return false; return DL.fitsInLegalInteger(TableSize * IT->getBitWidth()); } static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL) { // Allow any legal type. if (TTI.isTypeLegal(Ty)) return true; auto *IT = dyn_cast(Ty); if (!IT) return false; // Also allow power of 2 integer types that have at least 8 bits and fit in // a register. These types are common in frontend languages and targets // usually support loads of these types. // TODO: We could relax this to any integer that fits in a register and rely // on ABI alignment and padding in the table to allow the load to be widened. // Or we could widen the constants and truncate the load. unsigned BitWidth = IT->getBitWidth(); return BitWidth >= 8 && isPowerOf2_32(BitWidth) && DL.fitsInLegalInteger(IT->getBitWidth()); } /// Determine whether a lookup table should be built for this switch, based on /// the number of cases, size of the table, and the types of the results. // TODO: We could support larger than legal types by limiting based on the // number of loads required and/or table size. If the constants are small we // could use smaller table entries and extend after the load. static bool ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallDenseMap &ResultTypes) { if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10) return false; // TableSize overflowed, or mul below might overflow. bool AllTablesFitInRegister = true; bool HasIllegalType = false; for (const auto &I : ResultTypes) { Type *Ty = I.second; // Saturate this flag to true. HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL); // Saturate this flag to false. AllTablesFitInRegister = AllTablesFitInRegister && SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty); // If both flags saturate, we're done. NOTE: This *only* works with // saturating flags, and all flags have to saturate first due to the // non-deterministic behavior of iterating over a dense map. if (HasIllegalType && !AllTablesFitInRegister) break; } // If each table would fit in a register, we should build it anyway. if (AllTablesFitInRegister) return true; // Don't build a table that doesn't fit in-register if it has illegal types. if (HasIllegalType) return false; // The table density should be at least 40%. This is the same criterion as for // jump tables, see SelectionDAGBuilder::handleJTSwitchCase. // FIXME: Find the best cut-off. return SI->getNumCases() * 10 >= TableSize * 4; } /// Try to reuse the switch table index compare. Following pattern: /// \code /// if (idx < tablesize) /// r = table[idx]; // table does not contain default_value /// else /// r = default_value; /// if (r != default_value) /// ... /// \endcode /// Is optimized to: /// \code /// cond = idx < tablesize; /// if (cond) /// r = table[idx]; /// else /// r = default_value; /// if (cond) /// ... /// \endcode /// Jump threading will then eliminate the second if(cond). static void reuseTableCompare( User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl> &Values) { ICmpInst *CmpInst = dyn_cast(PhiUser); if (!CmpInst) return; // We require that the compare is in the same block as the phi so that jump // threading can do its work afterwards. if (CmpInst->getParent() != PhiBlock) return; Constant *CmpOp1 = dyn_cast(CmpInst->getOperand(1)); if (!CmpOp1) return; Value *RangeCmp = RangeCheckBranch->getCondition(); Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType()); Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType()); // Check if the compare with the default value is constant true or false. Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(), DefaultValue, CmpOp1, true); if (DefaultConst != TrueConst && DefaultConst != FalseConst) return; // Check if the compare with the case values is distinct from the default // compare result. for (auto ValuePair : Values) { Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(), ValuePair.second, CmpOp1, true); if (!CaseConst || CaseConst == DefaultConst || (CaseConst != TrueConst && CaseConst != FalseConst)) return; } // Check if the branch instruction dominates the phi node. It's a simple // dominance check, but sufficient for our needs. // Although this check is invariant in the calling loops, it's better to do it // at this late stage. Practically we do it at most once for a switch. BasicBlock *BranchBlock = RangeCheckBranch->getParent(); for (BasicBlock *Pred : predecessors(PhiBlock)) { if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock) return; } if (DefaultConst == FalseConst) { // The compare yields the same result. We can replace it. CmpInst->replaceAllUsesWith(RangeCmp); ++NumTableCmpReuses; } else { // The compare yields the same result, just inverted. We can replace it. Value *InvertedTableCmp = BinaryOperator::CreateXor( RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp", RangeCheckBranch); CmpInst->replaceAllUsesWith(InvertedTableCmp); ++NumTableCmpReuses; } } /// If the switch is only used to initialize one or more phi nodes in a common /// successor block with different constant values, replace the switch with /// lookup tables. static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); BasicBlock *BB = SI->getParent(); Function *Fn = BB->getParent(); // Only build lookup table when we have a target that supports it or the // attribute is not set. if (!TTI.shouldBuildLookupTables() || (Fn->getFnAttribute("no-jump-tables").getValueAsBool())) return false; // FIXME: If the switch is too sparse for a lookup table, perhaps we could // split off a dense part and build a lookup table for that. // FIXME: This creates arrays of GEPs to constant strings, which means each // GEP needs a runtime relocation in PIC code. We should just build one big // string and lookup indices into that. // Ignore switches with less than three cases. Lookup tables will not make // them faster, so we don't analyze them. if (SI->getNumCases() < 3) return false; // Figure out the corresponding result for each case value and phi node in the // common destination, as well as the min and max case values. assert(!SI->cases().empty()); SwitchInst::CaseIt CI = SI->case_begin(); ConstantInt *MinCaseVal = CI->getCaseValue(); ConstantInt *MaxCaseVal = CI->getCaseValue(); BasicBlock *CommonDest = nullptr; using ResultListTy = SmallVector, 4>; SmallDenseMap ResultLists; SmallDenseMap DefaultResults; SmallDenseMap ResultTypes; SmallVector PHIs; for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) { ConstantInt *CaseVal = CI->getCaseValue(); if (CaseVal->getValue().slt(MinCaseVal->getValue())) MinCaseVal = CaseVal; if (CaseVal->getValue().sgt(MaxCaseVal->getValue())) MaxCaseVal = CaseVal; // Resulting value at phi nodes for this case value. using ResultsTy = SmallVector, 4>; ResultsTy Results; if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest, Results, DL, TTI)) return false; // Append the result from this case to the list for each phi. for (const auto &I : Results) { PHINode *PHI = I.first; Constant *Value = I.second; if (!ResultLists.count(PHI)) PHIs.push_back(PHI); ResultLists[PHI].push_back(std::make_pair(CaseVal, Value)); } } // Keep track of the result types. for (PHINode *PHI : PHIs) { ResultTypes[PHI] = ResultLists[PHI][0].second->getType(); } uint64_t NumResults = ResultLists[PHIs[0]].size(); APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue(); uint64_t TableSize = RangeSpread.getLimitedValue() + 1; bool TableHasHoles = (NumResults < TableSize); // If the table has holes, we need a constant result for the default case // or a bitmask that fits in a register. SmallVector, 4> DefaultResultsList; bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResultsList, DL, TTI); bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { // As an extra penalty for the validity test we require more cases. if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark). return false; if (!DL.fitsInLegalInteger(TableSize)) return false; } for (const auto &I : DefaultResultsList) { PHINode *PHI = I.first; Constant *Result = I.second; DefaultResults[PHI] = Result; } if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes)) return false; std::vector Updates; // Create the BB that does the lookups. Module &Mod = *CommonDest->getParent()->getParent(); BasicBlock *LookupBB = BasicBlock::Create( Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); // Compute the table index value. Builder.SetInsertPoint(SI); Value *TableIndex; if (MinCaseVal->isNullValue()) TableIndex = SI->getCondition(); else TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); // Compute the maximum table size representable by the integer type we are // switching upon. unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits(); uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize; assert(MaxTableSize >= TableSize && "It is impossible for a switch to have more entries than the max " "representable value of its input integer type's size."); // If the default destination is unreachable, or if the lookup table covers // all values of the conditional variable, branch directly to the lookup table // BB. Otherwise, check that the condition is within the case range. const bool DefaultIsReachable = !isa(SI->getDefaultDest()->getFirstNonPHIOrDbg()); const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); BranchInst *RangeCheckBranch = nullptr; if (!DefaultIsReachable || GeneratingCoveredLookupTable) { Builder.CreateBr(LookupBB); if (DTU) Updates.push_back({DominatorTree::Insert, BB, LookupBB}); // Note: We call removeProdecessor later since we need to be able to get the // PHI value for the default case in case we're using a bit mask. } else { Value *Cmp = Builder.CreateICmpULT( TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); if (DTU) Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } // Populate the BB that does the lookups. Builder.SetInsertPoint(LookupBB); if (NeedMask) { // Before doing the lookup, we do the hole check. The LookupBB is therefore // re-purposed to do the hole check, and we create a new LookupBB. BasicBlock *MaskBB = LookupBB; MaskBB->setName("switch.hole_check"); LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid // unnecessary illegal types. uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL)); APInt MaskInt(TableSizePowOf2, 0); APInt One(TableSizePowOf2, 1); // Build bitmask; fill in a 1 bit for every case. const ResultListTy &ResultList = ResultLists[PHIs[0]]; for (size_t I = 0, E = ResultList.size(); I != E; ++I) { uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue()) .getLimitedValue(); MaskInt |= One << Idx; } ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt); // Get the TableIndex'th bit of the bitmask. // If this bit is 0 (meaning hole) jump to the default destination, // else continue with table lookup. IntegerType *MapTy = TableMask->getType(); Value *MaskIndex = Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex"); Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); Value *LoBit = Builder.CreateTrunc( Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); if (DTU) { Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); } Builder.SetInsertPoint(LookupBB); AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB); } if (!DefaultIsReachable || GeneratingCoveredLookupTable) { // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later, // do not delete PHINodes here. SI->getDefaultDest()->removePredecessor(BB, /*KeepOneInputPHIs=*/true); if (DTU) Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()}); } for (PHINode *PHI : PHIs) { const ResultListTy &ResultList = ResultLists[PHI]; // If using a bitmask, use any value to fill the lookup table holes. Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; StringRef FuncName = Fn->getName(); SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL, FuncName); Value *Result = Table.BuildLookup(TableIndex, Builder); // Do a small peephole optimization: re-use the switch table compare if // possible. if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) { BasicBlock *PhiBlock = PHI->getParent(); // Search for compare instructions which use the phi. for (auto *User : PHI->users()) { reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList); } } PHI->addIncoming(Result, LookupBB); } Builder.CreateBr(CommonDest); if (DTU) Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); // Remove the switch. SmallPtrSet RemovedSuccessors; for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { BasicBlock *Succ = SI->getSuccessor(i); if (Succ == SI->getDefaultDest()) continue; Succ->removePredecessor(BB); if (DTU && RemovedSuccessors.insert(Succ).second) Updates.push_back({DominatorTree::Delete, BB, Succ}); } SI->eraseFromParent(); if (DTU) DTU->applyUpdates(Updates); ++NumLookupTables; if (NeedMask) ++NumLookupTablesHoles; return true; } static bool isSwitchDense(ArrayRef Values) { // See also SelectionDAGBuilder::isDense(), which this function was based on. uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); uint64_t Range = Diff + 1; uint64_t NumCases = Values.size(); // 40% is the default density for building a jump table in optsize/minsize mode. uint64_t MinDensity = 40; return NumCases * 100 >= Range * MinDensity; } /// Try to transform a switch that has "holes" in it to a contiguous sequence /// of cases. /// /// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be /// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. /// /// This converts a sparse switch into a dense switch which allows better /// lowering and could also allow transforming into a lookup table. static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI) { auto *CondTy = cast(SI->getCondition()->getType()); if (CondTy->getIntegerBitWidth() > 64 || !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) return false; // Only bother with this optimization if there are more than 3 switch cases; // SDAG will only bother creating jump tables for 4 or more cases. if (SI->getNumCases() < 4) return false; // This transform is agnostic to the signedness of the input or case values. We // can treat the case values as signed or unsigned. We can optimize more common // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values // as signed. SmallVector Values; for (auto &C : SI->cases()) Values.push_back(C.getCaseValue()->getValue().getSExtValue()); llvm::sort(Values); // If the switch is already dense, there's nothing useful to do here. if (isSwitchDense(Values)) return false; // First, transform the values such that they start at zero and ascend. int64_t Base = Values[0]; for (auto &V : Values) V -= (uint64_t)(Base); // Now we have signed numbers that have been shifted so that, given enough // precision, there are no negative values. Since the rest of the transform // is bitwise only, we switch now to an unsigned representation. // This transform can be done speculatively because it is so cheap - it // results in a single rotate operation being inserted. // FIXME: It's possible that optimizing a switch on powers of two might also // be beneficial - flag values are often powers of two and we could use a CLZ // as the key function. // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than // one element and LLVM disallows duplicate cases, Shift is guaranteed to be // less than 64. unsigned Shift = 64; for (auto &V : Values) Shift = std::min(Shift, countTrailingZeros((uint64_t)V)); assert(Shift < 64); if (Shift > 0) for (auto &V : Values) V = (int64_t)((uint64_t)V >> Shift); if (!isSwitchDense(Values)) // Transform didn't create a dense switch. return false; // The obvious transform is to shift the switch condition right and emit a // check that the condition actually cleanly divided by GCD, i.e. // C & (1 << Shift - 1) == 0 // inserting a new CFG edge to handle the case where it didn't divide cleanly. // // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the // shift and puts the shifted-off bits in the uppermost bits. If any of these // are nonzero then the switch condition will be very large and will hit the // default case. auto *Ty = cast(SI->getCondition()->getType()); Builder.SetInsertPoint(SI); auto *ShiftC = ConstantInt::get(Ty, Shift); auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); auto *LShr = Builder.CreateLShr(Sub, ShiftC); auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift); auto *Rot = Builder.CreateOr(LShr, Shl); SI->replaceUsesOfWith(SI->getCondition(), Rot); for (auto Case : SI->cases()) { auto *Orig = Case.getCaseValue(); auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); Case.setValue( cast(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue())))); } return true; } bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); if (isValueEqualityComparison(SI)) { // If we only have one predecessor, and if it is a branch on this value, // see if that predecessor totally determines the outcome of this switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder)) return requestResimplify(); Value *Cond = SI->getCondition(); if (SelectInst *Select = dyn_cast(Cond)) if (SimplifySwitchOnSelect(SI, Select)) return requestResimplify(); // If the block only contains the switch, see if we can fold the block // away into any preds. if (SI == &*BB->instructionsWithoutDebug(false).begin()) if (FoldValueComparisonIntoPredecessors(SI, Builder)) return requestResimplify(); } // Try to transform the switch into an icmp and a branch. - if (TurnSwitchRangeIntoICmp(SI, Builder)) + // The conversion from switch to comparison may lose information on + // impossible switch values, so disable it early in the pipeline. + if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder)) return requestResimplify(); // Remove unreachable cases. if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL)) return requestResimplify(); if (switchToSelect(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI)) return requestResimplify(); // The conversion from switch to lookup tables results in difficult-to-analyze // code and makes pruning branches much harder. This is a problem if the // switch expression itself can still be restricted as a result of inlining or // CVP. Therefore, only apply this transformation during late stages of the // optimisation pipeline. if (Options.ConvertSwitchToLookupTable && SwitchToLookupTable(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (ReduceSwitchRange(SI, Builder, DL, TTI)) return requestResimplify(); return false; } bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; // Eliminate redundant destinations. SmallPtrSet Succs; SmallSetVector RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { if (!Dest->hasAddressTaken()) RemovedSuccs.insert(Dest); Dest->removePredecessor(BB); IBI->removeDestination(i); --i; --e; Changed = true; } } if (DTU) { std::vector Updates; Updates.reserve(RemovedSuccs.size()); for (auto *RemovedSucc : RemovedSuccs) Updates.push_back({DominatorTree::Delete, BB, RemovedSucc}); DTU->applyUpdates(Updates); } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. new UnreachableInst(IBI->getContext(), IBI); EraseTerminatorAndDCECond(IBI); return true; } if (IBI->getNumDestinations() == 1) { // If the indirectbr has one successor, change it to a direct branch. BranchInst::Create(IBI->getDestination(0), IBI); EraseTerminatorAndDCECond(IBI); return true; } if (SelectInst *SI = dyn_cast(IBI->getAddress())) { if (SimplifyIndirectBrOnSelect(IBI, SI)) return requestResimplify(); } return Changed; } /// Given an block with only a single landing pad and a unconditional branch /// try to find another basic block which this one can be merged with. This /// handles cases where we have multiple invokes with unique landing pads, but /// a shared handler. /// /// We specifically choose to not worry about merging non-empty blocks /// here. That is a PRE/scheduling problem and is best solved elsewhere. In /// practice, the optimizer produces empty landing pad blocks quite frequently /// when dealing with exception dense code. (see: instcombine, gvn, if-else /// sinking in this file) /// /// This is primarily a code size optimization. We need to avoid performing /// any transform which might inhibit optimization (such as our ability to /// specialize a particular handler via tail commoning). We do this by not /// merging any blocks which require us to introduce a phi. Since the same /// values are flowing through both blocks, we don't lose any ability to /// specialize. If anything, we make such specialization more likely. /// /// TODO - This transformation could remove entries from a phi in the target /// block when the inputs in the phi are the same for the two blocks being /// merged. In some cases, this could result in removal of the PHI entirely. static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, BasicBlock *BB, DomTreeUpdater *DTU) { auto Succ = BB->getUniqueSuccessor(); assert(Succ); // If there's a phi in the successor block, we'd likely have to introduce // a phi into the merged landing pad block. if (isa(*Succ->begin())) return false; for (BasicBlock *OtherPred : predecessors(Succ)) { if (BB == OtherPred) continue; BasicBlock::iterator I = OtherPred->begin(); LandingPadInst *LPad2 = dyn_cast(I); if (!LPad2 || !LPad2->isIdenticalTo(LPad)) continue; for (++I; isa(I); ++I) ; BranchInst *BI2 = dyn_cast(I); if (!BI2 || !BI2->isIdenticalTo(BI)) continue; std::vector Updates; // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. SmallSetVector UniquePreds(pred_begin(BB), pred_end(BB)); for (BasicBlock *Pred : UniquePreds) { InvokeInst *II = cast(Pred->getTerminator()); assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); II->setUnwindDest(OtherPred); if (DTU) { Updates.push_back({DominatorTree::Insert, Pred, OtherPred}); Updates.push_back({DominatorTree::Delete, Pred, BB}); } } // The debug info in OtherPred doesn't cover the merged control flow that // used to go through BB. We need to delete it or update it. for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred)) if (isa(Inst)) Inst.eraseFromParent(); SmallSetVector UniqueSuccs(succ_begin(BB), succ_end(BB)); for (BasicBlock *Succ : UniqueSuccs) { Succ->removePredecessor(BB); if (DTU) Updates.push_back({DominatorTree::Delete, BB, Succ}); } IRBuilder<> Builder(BI); Builder.CreateUnreachable(); BI->eraseFromParent(); if (DTU) DTU->applyUpdates(Updates); return true; } return false; } bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) { return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder) : simplifyCondBranch(Branch, Builder); } bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder) { BasicBlock *BB = BI->getParent(); BasicBlock *Succ = BI->getSuccessor(0); // If the Terminator is the only non-phi instruction, simplify the block. // If LoopHeader is provided, check if the block or its successor is a loop // header. (This is for early invocations before loop simplify and // vectorization to keep canonical loop forms for nested loops. These blocks // can be eliminated when the pass is invoked later in the back-end.) // Note that if BB has only one predecessor then we do not introduce new // backedge, so we can eliminate BB. bool NeedCanonicalLoop = Options.NeedCanonicalLoop && (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) && (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ))); BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator(); if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() && !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) return true; // If the only instruction in the block is a seteq/setne comparison against a // constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast(I)) if (ICI->isEquality() && isa(ICI->getOperand(1))) { for (++I; isa(I); ++I) ; if (I->isTerminator() && tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder)) return true; } // See if we can merge an empty landing pad block with another which is // equivalent. if (LandingPadInst *LPad = dyn_cast(I)) { for (++I; isa(I); ++I) ; if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU)) return true; } // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); return false; } static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) { BasicBlock *PredPred = nullptr; for (auto *P : predecessors(BB)) { BasicBlock *PPred = P->getSinglePredecessor(); if (!PPred || (PredPred && PredPred != PPred)) return nullptr; PredPred = PPred; } return PredPred; } bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { assert( !isa(BI->getCondition()) && BI->getSuccessor(0) != BI->getSuccessor(1) && "Tautological conditional branch should have been eliminated already."); BasicBlock *BB = BI->getParent(); if (!Options.SimplifyCondBranch) return false; // Conditional branch if (isValueEqualityComparison(BI)) { // If we only have one predecessor, and if it is a branch on this value, // see if that predecessor totally determines the outcome of this // switch. if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder)) return requestResimplify(); // This block must be empty, except for the setcond inst, if it exists. // Ignore dbg and pseudo intrinsics. auto I = BB->instructionsWithoutDebug(true).begin(); if (&*I == BI) { if (FoldValueComparisonIntoPredecessors(BI, Builder)) return requestResimplify(); } else if (&*I == cast(BI->getCondition())) { ++I; if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder)) return requestResimplify(); } } // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction. if (SimplifyBranchOnICmpChain(BI, Builder, DL)) return true; // If this basic block has dominating predecessor blocks and the dominating // blocks' conditions imply BI's condition, we know the direction of BI. Optional Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL); if (Imp) { // Turn this into a branch on constant. auto *OldCond = BI->getCondition(); ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext()) : ConstantInt::getFalse(BB->getContext()); BI->setCondition(TorF); RecursivelyDeleteTriviallyDeadInstructions(OldCond); return requestResimplify(); } // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. if (FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.BonusInstThreshold)) return requestResimplify(); // We have a conditional branch to two blocks that are only reachable // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { if (HoistCommon && HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts)) return requestResimplify(); } else { // If Successor #1 has multiple preds, we may be able to conditionally // execute Successor #0 if it branches to Successor #1. Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI)) return requestResimplify(); } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally // execute Successor #1 if it branches to Successor #0. Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI)) return requestResimplify(); } // If this is a branch on a phi node in the current block, thread control // through this block if any PHI node entries are constants. if (PHINode *PN = dyn_cast(BI->getCondition())) if (PN->getParent() == BI->getParent()) if (FoldCondBranchOnPHI(BI, DTU, DL, Options.AC)) return requestResimplify(); // Scan predecessor blocks for conditional branches. for (BasicBlock *Pred : predecessors(BB)) if (BranchInst *PBI = dyn_cast(Pred->getTerminator())) if (PBI != BI && PBI->isConditional()) if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI)) return requestResimplify(); // Look for diamond patterns. if (MergeCondStores) if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) if (mergeConditionalStores(PBI, BI, DTU, DL, TTI)) return requestResimplify(); return false; } /// Check if passing a value to an instruction will cause undefined behavior. static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { Constant *C = dyn_cast(V); if (!C) return false; if (I->use_empty()) return false; if (C->isNullValue() || isa(C)) { // Only look at the first use, avoid hurting compile time with long uselists auto *Use = cast(*I->user_begin()); // Bail out if Use is not in the same BB as I or Use == I or Use comes // before I in the block. The latter two can be the case if Use is a PHI // node. if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I)) return false; // Now make sure that there are no instructions in between that can alter // control flow (eg. calls) auto InstrRange = make_range(std::next(I->getIterator()), Use->getIterator()); if (any_of(InstrRange, [](Instruction &I) { return !isGuaranteedToTransferExecutionToSuccessor(&I); })) return false; // Look through GEPs. A load from a GEP derived from NULL is still undefined if (GetElementPtrInst *GEP = dyn_cast(Use)) if (GEP->getPointerOperand() == I) { if (!GEP->isInBounds() || !GEP->hasAllZeroIndices()) PtrValueMayBeModified = true; return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified); } // Look through bitcasts. if (BitCastInst *BC = dyn_cast(Use)) return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified); // Load from null is undefined. if (LoadInst *LI = dyn_cast(Use)) if (!LI->isVolatile()) return !NullPointerIsDefined(LI->getFunction(), LI->getPointerAddressSpace()); // Store to null is undefined. if (StoreInst *SI = dyn_cast(Use)) if (!SI->isVolatile()) return (!NullPointerIsDefined(SI->getFunction(), SI->getPointerAddressSpace())) && SI->getPointerOperand() == I; if (auto *CB = dyn_cast(Use)) { if (C->isNullValue() && NullPointerIsDefined(CB->getFunction())) return false; // A call to null is undefined. if (CB->getCalledOperand() == I) return true; if (C->isNullValue()) { for (const llvm::Use &Arg : CB->args()) if (Arg == I) { unsigned ArgIdx = CB->getArgOperandNo(&Arg); if (CB->isPassingUndefUB(ArgIdx) && CB->paramHasAttr(ArgIdx, Attribute::NonNull)) { // Passing null to a nonnnull+noundef argument is undefined. return !PtrValueMayBeModified; } } } else if (isa(C)) { // Passing undef to a noundef argument is undefined. for (const llvm::Use &Arg : CB->args()) if (Arg == I) { unsigned ArgIdx = CB->getArgOperandNo(&Arg); if (CB->isPassingUndefUB(ArgIdx)) { // Passing undef to a noundef argument is undefined. return true; } } } } } return false; } /// If BB has an incoming value that will always trigger undefined behavior /// (eg. null pointer dereference), remove the branch leading here. static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU) { for (PHINode &PHI : BB->phis()) for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { BasicBlock *Predecessor = PHI.getIncomingBlock(i); Instruction *T = Predecessor->getTerminator(); IRBuilder<> Builder(T); if (BranchInst *BI = dyn_cast(T)) { BB->removePredecessor(Predecessor); // Turn uncoditional branches into unreachables and remove the dead // destination from conditional branches. if (BI->isUnconditional()) Builder.CreateUnreachable(); else { // Preserve guarding condition in assume, because it might not be // inferrable from any dominating condition. Value *Cond = BI->getCondition(); if (BI->getSuccessor(0) == BB) Builder.CreateAssumption(Builder.CreateNot(Cond)); else Builder.CreateAssumption(Cond); Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1) : BI->getSuccessor(0)); } BI->eraseFromParent(); if (DTU) DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}}); return true; } else if (SwitchInst *SI = dyn_cast(T)) { // Redirect all branches leading to UB into // a newly created unreachable block. BasicBlock *Unreachable = BasicBlock::Create( Predecessor->getContext(), "unreachable", BB->getParent(), BB); Builder.SetInsertPoint(Unreachable); // The new block contains only one instruction: Unreachable Builder.CreateUnreachable(); for (auto &Case : SI->cases()) if (Case.getCaseSuccessor() == BB) { BB->removePredecessor(Predecessor); Case.setSuccessor(Unreachable); } if (SI->getDefaultDest() == BB) { BB->removePredecessor(Predecessor); SI->setDefaultDest(Unreachable); } if (DTU) DTU->applyUpdates( { { DominatorTree::Insert, Predecessor, Unreachable }, { DominatorTree::Delete, Predecessor, BB } }); return true; } } return false; } bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { bool Changed = false; assert(BB && BB->getParent() && "Block not embedded in function!"); assert(BB->getTerminator() && "Degenerate basic block encountered!"); // Remove basic blocks that have no predecessors (except the entry block)... // or that just have themself as a predecessor. These are unreachable. if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) || BB->getSinglePredecessor() == BB) { LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB); DeleteDeadBlock(BB, DTU); return true; } // Check to see if we can constant propagate this terminator instruction // away... Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true, /*TLI=*/nullptr, DTU); // Check for and eliminate duplicate PHI nodes in this block. Changed |= EliminateDuplicatePHINodes(BB); // Check for and remove branches that will always cause undefined behavior. if (removeUndefIntroducingPredecessor(BB, DTU)) return requestResimplify(); // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. if (MergeBlockIntoPredecessor(BB, DTU)) return true; if (SinkCommon && Options.SinkCommonInsts) if (SinkCommonCodeFromPredecessors(BB, DTU)) { // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's, // so we may now how duplicate PHI's. // Let's rerun EliminateDuplicatePHINodes() first, // before FoldTwoEntryPHINode() potentially converts them into select's, // after which we'd need a whole EarlyCSE pass run to cleanup them. return true; } IRBuilder<> Builder(BB); if (Options.FoldTwoEntryPHINode) { // If there is a trivial two-entry PHI node in this basic block, and we can // eliminate it, do so now. if (auto *PN = dyn_cast(BB->begin())) if (PN->getNumIncomingValues() == 2) if (FoldTwoEntryPHINode(PN, TTI, DTU, DL)) return true; } Instruction *Terminator = BB->getTerminator(); Builder.SetInsertPoint(Terminator); switch (Terminator->getOpcode()) { case Instruction::Br: Changed |= simplifyBranch(cast(Terminator), Builder); break; case Instruction::Resume: Changed |= simplifyResume(cast(Terminator), Builder); break; case Instruction::CleanupRet: Changed |= simplifyCleanupReturn(cast(Terminator)); break; case Instruction::Switch: Changed |= simplifySwitch(cast(Terminator), Builder); break; case Instruction::Unreachable: Changed |= simplifyUnreachable(cast(Terminator)); break; case Instruction::IndirectBr: Changed |= simplifyIndirectBr(cast(Terminator)); break; } return Changed; } bool SimplifyCFGOpt::run(BasicBlock *BB) { bool Changed = false; // Repeated simplify BB as long as resimplification is requested. do { Resimplify = false; // Perform one round of simplifcation. Resimplify flag will be set if // another iteration is requested. Changed |= simplifyOnce(BB); } while (Resimplify); return Changed; } bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, const SimplifyCFGOptions &Options, ArrayRef LoopHeaders) { return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders, Options) .run(BB); } diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc index 073becaaedab..f5ca305f872e 100644 --- a/lib/clang/include/VCSVersion.inc +++ b/lib/clang/include/VCSVersion.inc @@ -1,10 +1,10 @@ // $FreeBSD$ -#define LLVM_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103" +#define LLVM_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a" #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git" -#define CLANG_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103" +#define CLANG_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a" #define CLANG_REPOSITORY "https://github.com/llvm/llvm-project.git" -#define LLDB_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103" +#define LLDB_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a" #define LLDB_REPOSITORY "https://github.com/llvm/llvm-project.git" diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc index 10526b338fbc..29771a0900ff 100644 --- a/lib/clang/include/lld/Common/Version.inc +++ b/lib/clang/include/lld/Common/Version.inc @@ -1,4 +1,4 @@ // Local identifier in __FreeBSD_version style #define LLD_FREEBSD_VERSION 1400003 -#define LLD_VERSION_STRING "14.0.0 (FreeBSD llvmorg-14.0.0-rc2-12-g09546e1b5103-" __XSTRING(LLD_FREEBSD_VERSION) ")" +#define LLD_VERSION_STRING "14.0.0 (FreeBSD llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a-" __XSTRING(LLD_FREEBSD_VERSION) ")" diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index 7444d5650fab..fd42c33018a7 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,3 +1,3 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "llvmorg-14.0.0-rc2-12-g09546e1b5103" +#define LLVM_REVISION "llvmorg-14.0.0-rc4-2-gadd3ab7f4c8a" #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git"